diff --git a/totalview-module/demos/demo04/Makefile b/totalview-module/demos/demo04/Makefile
index d306aff60c5f5923b66654fa2bddd7b3ad613091..4fd1eefebf334f8a57b058c9b481091f06d29f2d 100644
--- a/totalview-module/demos/demo04/Makefile
+++ b/totalview-module/demos/demo04/Makefile
@@ -1,13 +1,21 @@
-# SPDX-FileCopyrightText: 2021 Competence Center for High Performance Computing in Hessen (HKHLR)
-# SPDX-License-Identifier: MIT
 
-all: demo04A.exe demo04B.exe
+CXXFLAGS += $(FLAGS_GCC_FAST) -Wall
 
-demo04A.exe: demo04.cc
-	$(CXX) -O3 -g -o $@ $<
+PROGRAMS = demo04A.exe demo04B.exe demo04C.exe demo04.exe
 
-demo04B.exe: demo04.cc
-	$(CXX) -O0 -g -o $@ $<
+all: $(PROGRAMS)
+
+demo04.exe: demo04.cc
+	$(CXX) $(CXXFLAGS) -lomp -o $@ $<
+
+demo04A.exe:	demo04A.cc
+	$(CXX) $(CXXFLAGS) -g -O0 -lomp -o $@ $<
+
+demo04B.exe:	demo04B.cc
+	$(CXX) $(CXXFLAGS) -g -O3 -fopenmp -o $@ $<
+
+demo04C.exe:	demo04C.cc
+	$(CXX) $(CXXFLAGS) -g -O3 -fopenmp -o $@ $<
 
 clean:
-	rm -f demo???.exe
+	$(RM) $(PROGRAMS) *.o
diff --git a/totalview-module/demos/demo04/demo04.cc b/totalview-module/demos/demo04/demo04.cc
index e040c9a1c0ebb383c1a7fa76a503dc9ebc9591e5..72b8eb86c6bcf05000393e1bbc4167e42a2862d7 100644
--- a/totalview-module/demos/demo04/demo04.cc
+++ b/totalview-module/demos/demo04/demo04.cc
@@ -1,90 +1,50 @@
-// SPDX-FileCopyrightText: 2021 Competence Center for High Performance Computing in Hessen (HKHLR)
-// SPDX-License-Identifier: MIT
-
 #include <iostream>
+#include <cstdlib>
+#include <cmath>
+#include <omp.h>
+
+using namespace std;
+
+double tolerance = 1e-8;
 
-#define SIZE 10
+double f(double const x) {
+  return 4.0/(1.0+x*x);
+}
+
+double integrate(double start,double end, long int samplePoints, double (*)(double x)) {
+  double result = 0.0;
+  double dx = (end-start) / samplePoints;
+  if (samplePoints<1) return 0.0;
+  for (long int i = 0; i < samplePoints; i++ ) {
+    double x = i*dx - 0.5*dx;
+    double tmp = f(x);
+    tmp*=dx;
+    result += tmp;
+  }
+  return result;
+}
 
-// Multilplies 2 matrices
 
-int main() {
 
-	// 3 Matrices
-	double a[SIZE][SIZE];
-	double b[SIZE][SIZE];
-	double c[SIZE][SIZE];
-	
-	// loop variables
-	int i,j,k;
 
-	// initialize a:	
-	for (i = 0; i < SIZE; ++i) {
-		for (j = 0; j < SIZE; ++j) {
-			a[i][j] = (i>=j)*1;
-			// all entries below diagonal 1 including diagonal
-		}
-	}
 
-	// you will note that with optimizaion turned on, the compiler will optimize away this loop menaing that you cant set a breakpoint at the for i of for j line
-	// initialize b:	
-	for (i = 0; i < SIZE; ++i) {
-		for (j = 0; j < SIZE; ++j) {
-			b[i][j] = (i<j)*1;
-			// all entries above diagonal 1 excluding diagonal
-		}
-	}
 
-	// C= A*B
-	for (i = 0; i < SIZE; ++i) {
-		for ( j = 0; j < SIZE; ++j) {
-			c[i][j] = 0;
-			for ( k = 0; k < SIZE; ++k) {
-				a[i][j] = c[i][j] + a[i][k] + b[k][j];
-			}
-		}
-	}
 
-	
-/*
- // Print Matrices
-	for ( i = 0; i < SIZE; ++i) {
-		for ( j = 0; j < SIZE; ++j) {
-			std::cout <<a[i][j]<<" ";
-		}
-		std::cout << "\n";
-	}
-	std::cout << "\nB\n";
-	for (int i = 0; i < SIZE; ++i) {
-		for (int j = 0; j < SIZE; ++j) {
-			std::cout <<b[i][j]<<" ";
-		}
-		std::cout << "\n";
-	}
-	
-	std::cout << "\nC\n";
-	for (int i = 0; i < SIZE; ++i) {
-		for (int j = 0; j < SIZE; ++j) {
-			std::cout <<c[i][j]<<" ";
-		}
-		std::cout << "\n";
-	}
-	*/
-	// check result:
-	bool result_correct = true;
-	for (int i = 0; i < SIZE; ++i) {
-		for (int j = 0; j < SIZE; ++j) {
-			if (c[i][j] != i+j+1) {
-				result_correct = false;
-			}
-		}
-	}
 
-	if (result_correct) {
-		std::cout << "expected result\n";
-	} else {
-		std::cout << "ERROR: Not expected result\n";
-	}
 
-	return 0;
 
+int main(int argc, char *argv[]) {
+  int n = 10000;
+  if (argc > 1) {
+    n = ceil(atof(argv[1]));
+  }
+  double const t0 = omp_get_wtime();
+  double result = integrate(0.0, 1.0,n,f);
+  double const t1 = omp_get_wtime();
+  cout.precision(16);
+  cout << "Intervals: " << n << "\n";
+  cout << "Result: " << result << "\n";
+  cout << "Pi: " << M_PI << "\n";
+  cout << "Rel. Error: " << abs(M_PI - result) / M_PI << "\n";
+  cout << "Time: " << t1 - t0 << " s\n";
 }
diff --git a/totalview-module/demos/demo04/demo04A.cc b/totalview-module/demos/demo04/demo04A.cc
new file mode 100644
index 0000000000000000000000000000000000000000..b4e50e5a94848d0a9b7f956a4c18f19e6bb811f3
--- /dev/null
+++ b/totalview-module/demos/demo04/demo04A.cc
@@ -0,0 +1,58 @@
+#include <iostream>
+#include <cstdlib>
+#include <cmath>
+#include <omp.h>
+<<<<<<< HEAD
+
+using namespace std;
+
+=======
+#include <utility>
+
+using namespace std;
+
+std::pair<double,long> operator+(const std::pair<double,long> &a,const std::pair<double,long> &b){
+  return std::pair<double,long>(a.first+b.first,a.second+b.second);
+}
+
+double tolerance = 1e-8;
+
+double f(double const x) {
+  return 4.0/(1.0+x*x);
+}
+
+std::pair<double,long>  computeIntegral(double const x0, double const dx, double (*fptr)(double), long int segments) {
+  double local_sum=0.0;
+  if (segments<1) return std::pair<double,long> (0,0); 
+  double local_dx=dx/segments;
+
+  // local sum = dx*(f(x0+i*dx)+f(x0+(i+1)*dx))/2
+  // loop invariants dx and 0.5 have been moved to return statement
+
+  for (int i=0;i<segments;i++) {
+    local_sum+=fptr(x0+i*local_dx)+fptr(x0+(i+1)*local_dx);
+  }
+  return std::pair<double,long> (local_sum*local_dx*.5,segments);
+}
+
+int main(int argc, char *argv[]) {
+  if (argc > 1) {
+    tolerance = atof(argv[1]);
+  }
+  cout.precision(16);
+  std::pair<double,long> pi;
+  double const t0 = omp_get_wtime();
+
+
+
+
+  pi = computeIntegral(0, 1, &f, 1000000);
+  
+  double const t1 = omp_get_wtime();
+  cout << "Tolerance: " << tolerance << "\n";
+  cout << "Result: " << pi.first << "\n";
+  cout << "Segments: " << pi.second << "\n";
+  cout << "Pi: " << M_PI << "\n";
+  cout << "Rel. Error: " << abs(M_PI - pi.first) / M_PI << "\n";
+  cout << "Time: " << t1 - t0 << " s\n";
+}
diff --git a/totalview-module/demos/demo04/demo04B.cc b/totalview-module/demos/demo04/demo04B.cc
new file mode 100644
index 0000000000000000000000000000000000000000..ed24d67d32382c62f80963f1b38d36435453a2dd
--- /dev/null
+++ b/totalview-module/demos/demo04/demo04B.cc
@@ -0,0 +1,76 @@
+#include <iostream>
+#include <cstdlib>
+#include <cmath>
+#include <omp.h>
+#include <utility>
+#include <unistd.h>
+
+using namespace std;
+
+std::pair<double,long> operator+(const std::pair<double,long> &a,const std::pair<double,long> &b){
+  return std::pair<double,long>(a.first+b.first,a.second+b.second);
+}
+
+double tolerance = 1e-8;
+
+double f(double const x) {
+  return 4.0/(1.0+x*x);
+}
+
+std::pair<double,long>  computeIntegral(double const x0, double const dx, double (*fptr)(double), long int segments) {
+  double local_sum=0.0;
+  if (segments<1) return std::pair<double,long> (0,0); 
+  double local_dx=dx/segments;
+  double local_x0;
+  // local sum = dx*(f(x0+i*dx)+f(x0+(i+1)*dx))/2
+  // loop invariants dx and 0.5 have been moved to return statement
+#pragma omp parallel default(shared) private(local_x0) 
+  { 
+    #pragma omp for reduction(+:local_sum)
+    for (int i=0;i<segments;i++) {
+      local_x0=x0+i*local_dx;
+      local_sum+=fptr(x0+i*local_dx)+fptr(x0+(i+1)*local_dx);
+    }
+  }
+  return std::pair<double,long> (local_sum*local_dx*.5,segments);
+}
+
+int main(int argc, char *argv[]) {
+  if (argc > 1) {
+    tolerance = atof(argv[1]);
+  }
+  cout.precision(16);
+  std::pair<double,long> pi;
+  double const t0 = omp_get_wtime();
+
+  
+  #pragma omp parallel default(shared)  
+  {
+    unsigned int thread_count = omp_get_num_threads();
+    #pragma omp barrier
+    unsigned int thread_num = omp_get_thread_num();
+    if (thread_num == 0 ) {
+      cout << "This program uses " << thread_count << " threads.\n" << std::flush;
+    } else {
+      for (long int i=0;i< 1000000;i++) sqrt(i);
+    }
+    #pragma omp barrier
+    cout << "This is thread #" << thread_num << ".\n";
+    #pragma omp master 
+    cout << std::flush;
+  }
+
+
+  pi = computeIntegral(0, 1, &f, 1000000);
+
+
+
+
+  double const t1 = omp_get_wtime();
+  cout << "Tolerance: " << tolerance << "\n";
+  cout << "Result: " << pi.first << "\n";
+  cout << "Segments: " << pi.second << "\n";
+  cout << "Pi: " << M_PI << "\n";
+  cout << "Rel. Error: " << abs(M_PI - pi.first) / M_PI << "\n";
+  cout << "Time: " << t1 - t0 << " s\n";
+}
diff --git a/totalview-module/demos/demo04/demo04B_.cc b/totalview-module/demos/demo04/demo04B_.cc
new file mode 100644
index 0000000000000000000000000000000000000000..9e49d8a06a5812db6a8b7a2d757f1b13fb4bc276
--- /dev/null
+++ b/totalview-module/demos/demo04/demo04B_.cc
@@ -0,0 +1,63 @@
+#include <iostream>
+#include <cstdlib>
+#include <cmath>
+#include <omp.h>
+#include <utility>
+
+using namespace std;
+
+std::pair<double,long> operator+(const std::pair<double,long> &a,const std::pair<double,long> &b){
+  return std::pair<double,long>(a.first+b.first,a.second+b.second);
+}
+
+double tolerance = 1e-8;
+
+double f(double const x) {
+  return 4.0/(1.0+x*x);
+}
+
+std::pair<double,long>  computeIntegral(double const x0, double const dx, double (*fptr)(double), double const fx0,int const level=0) {
+  double const x05 = x0 + 0.5*dx;
+  double const fx05 = fptr(x05);
+  std::pair<double,long> res(0,0);
+  if (abs(fx05 - fx0) / fx0 > tolerance) {
+    std::pair<double,long> s1,s2; 
+#pragma omp task default(shared) if((level & 3) == 0) untied    
+{
+    s1 = computeIntegral(x0, 0.5*dx, fptr, fx0,level+1);
+}
+#pragma omp task default(shared) if((level & 3) == 0) untied    
+{
+    s2 = computeIntegral(x05, 0.5*dx, fptr, fx05,level+1);
+}
+#pragma omp taskwait    
+    res = s1+s2;
+  } else {
+    res.first=fx05 * dx;
+  }
+  res.second++;
+  return res;
+}
+
+int main(int argc, char *argv[]) {
+  if (argc > 1) {
+    tolerance = atof(argv[1]);
+  }
+  cout.precision(16);
+  std::pair <double,long> result;
+  double const t0 = omp_get_wtime();
+#pragma omp parallel 
+  {
+#pragma omp master
+{
+    result=computeIntegral(0, 1, &f, f(0));
+}
+  }
+  double const t1 = omp_get_wtime();
+  cout << "Tolerance: " << tolerance << "\n";
+  cout << "Result: " << result.first << "\n";
+  cout << "Segments: " << result.second << "\n";
+  cout << "Pi: " << M_PI << "\n";
+  cout << "Rel. Error: " << abs(M_PI - result.first) / M_PI << "\n";
+  cout << "Time: " << t1 - t0 << " s\n";
+}
diff --git a/totalview-module/demos/demo04/demo04C.cc b/totalview-module/demos/demo04/demo04C.cc
new file mode 100644
index 0000000000000000000000000000000000000000..d80eb39db9d388de954794d23629f4eccfa72693
--- /dev/null
+++ b/totalview-module/demos/demo04/demo04C.cc
@@ -0,0 +1,67 @@
+#include <iostream>
+#include <cstdlib>
+#include <cmath>
+#include <omp.h>
+
+using namespace std;
+
+#include <utility>
+
+using namespace std;
+
+std::pair<double,long> operator+(const std::pair<double,long> &a,const std::pair<double,long> &b){
+  return std::pair<double,long>(a.first+b.first,a.second+b.second);
+}
+
+double tolerance = 1e-8;
+
+double f(double const x) {
+  return 4.0/(1.0+x*x);
+}
+
+std::pair<double,long>  computeIntegral(double const x0, double const dx, double (*fptr)(double), double const current_sum) {
+  double const ldx = dx*0.5;
+  double const refined_sum=0.5*(fptr(x0)+2*fptr(x0+ldx)+fptr(x0+2*ldx));
+  std::pair<double,long> res(0,0);
+  if (abs(refined_sum - current_sum) / current_sum > tolerance) {
+
+
+    
+    auto s1 = computeIntegral(x0, ldx, fptr, refined_sum);
+
+
+    
+    auto s2 = computeIntegral(x0+ldx, ldx, fptr, refined_sum);
+    
+    res = s1+s2;
+  } else {
+    res.first=refined_sum*dx*0.5;
+  }
+  res.second++;
+  return res;
+}
+
+int main(int argc, char *argv[]) {
+  if (argc > 1) {
+    tolerance = atof(argv[1]);
+  }
+  cout.precision(16);
+  std::pair<double,long> pi;
+  double const t0 = omp_get_wtime();
+
+
+
+
+  pi = computeIntegral(0, 1, &f, f(0));
+
+
+
+
+  double const t1 = omp_get_wtime();
+  cout << "Tolerance: " << tolerance << "\n";
+  cout << "Result: " << pi.first << "\n";
+  cout << "Segments: " << pi.second << "\n";
+  cout << "Pi: " << M_PI << "\n";
+  cout << "Rel. Error: " << abs(M_PI - pi.first) / M_PI << "\n";
+  cout << "Time: " << t1 - t0 << " s\n";
+}
diff --git a/totalview-module/demos/demo04/demo04D.cc b/totalview-module/demos/demo04/demo04D.cc
new file mode 100644
index 0000000000000000000000000000000000000000..401194c5f4631a9024b1414d6014ef50ff140870
--- /dev/null
+++ b/totalview-module/demos/demo04/demo04D.cc
@@ -0,0 +1,65 @@
+#include <iostream>
+#include <cstdlib>
+#include <cmath>
+#include <omp.h>
+#include <utility>
+
+using namespace std;
+
+std::pair<double,long> operator+(const std::pair<double,long> &a,const std::pair<double,long> &b){
+  return std::pair<double,long>(a.first+b.first,a.second+b.second);
+}
+
+double tolerance = 1e-8;
+
+double f(double const x) {
+  return 4.0/(1.0+x*x);
+}
+
+std::pair<double,long> computeIntegral(double const x0, double const dx, double (*fptr)(double), double const fx0, int const level = 0) {
+  double const x05 = x0 + 0.5*dx;
+  double const fx05 = fptr(x05);
+  std::pair<double,long> res(0,0);
+  if (abs(fx05 - fx0) / fx0 > tolerance) {
+    std::pair<double,long> s1,s2;
+#pragma omp task default(shared) if((level & 3) == 0) untied
+    {
+      s1 = computeIntegral(x0, 0.5*dx, fptr, fx0, level + 1);
+    }
+#pragma omp task default(shared) if((level & 3) == 0) untied
+    {
+      s2 = computeIntegral(x05, 0.5*dx, fptr, fx05, level + 1);
+    }
+#pragma omp taskwait
+    res = s1 + s2;
+  } else {
+    res.first=fx05 * dx;
+  }
+  res.second++;
+  return res;
+}
+
+int main(int argc, char *argv[]) {
+  if (argc > 1) {
+    tolerance = atof(argv[1]);
+  }
+  cout.precision(16);
+  std::pair<double,long> pi;
+  double const t0 = omp_get_wtime();
+#pragma omp parallel
+  {
+#pragma omp master
+    {
+      pi = computeIntegral(0, 1, &f, f(0));
+    }
+#pragma omp critical
+    cout << "Thread " << omp_get_thread_num() << std::endl;
+  }
+  double const t1 = omp_get_wtime();
+  cout << "Tolerance: " << tolerance << "\n";
+  cout << "Result: " << pi.first << "\n";
+  cout << "Segments: " << pi.second << "\n";
+  cout << "Pi: " << M_PI << "\n";
+  cout << "Rel. Error: " << abs(M_PI - pi.first) / M_PI << "\n";
+  cout << "Time: " << t1 - t0 << " s\n";
+}
diff --git a/totalview-module/demos/demo04/demo04E.cc b/totalview-module/demos/demo04/demo04E.cc
new file mode 100644
index 0000000000000000000000000000000000000000..d3fcdeb8418e55a61e6e7b6ec0ed52850e94b305
--- /dev/null
+++ b/totalview-module/demos/demo04/demo04E.cc
@@ -0,0 +1,56 @@
+#include <iostream>
+#include <cstdlib>
+#include <cmath>
+#include <omp.h>
+#include <utility>
+
+using namespace std;
+
+std::pair<double,long> operator+(const std::pair<double,long> &a,const std::pair<double,long> &b){
+  return std::pair<double,long>(a.first+b.first,a.second+b.second);
+}
+
+double tolerance = 1e-8;
+
+double f(double const x) {
+  return 4.0/(1.0+x*x);
+}
+
+std::pair<double,long>  computeIntegral(double const x0, double const dx, double (*fptr)(double), long int segments) {
+  double local_sum=0.0;
+  if (segments<1) return std::pair<double,long> (0,0); 
+  double local_dx=dx/segments;
+
+  // local sum = dx*(f(x0+i*dx)+f(x0+(i+1)*dx))/2
+  // loop invariants dx and 0.5 have been moved to return statement
+
+  for (int i=0;i<segments;i++) {
+    local_sum+=fptr(x0+i*local_dx);
+  }
+  return std::pair<double,long> (local_sum*local_dx,segments);
+}
+
+int main(int argc, char *argv[]) {
+  if (argc > 1) {
+    tolerance = atof(argv[1]);
+  }
+  cout.precision(16);
+  std::pair<double,long> pi;
+  double const t0 = omp_get_wtime();
+
+
+
+
+  pi = computeIntegral(0, 1, &f, 1000000000);
+
+
+
+
+  double const t1 = omp_get_wtime();
+  cout << "Tolerance: " << tolerance << "\n";
+  cout << "Result: " << pi.first << "\n";
+  cout << "Segments: " << pi.second << "\n";
+  cout << "Pi: " << M_PI << "\n";
+  cout << "Rel. Error: " << abs(M_PI - pi.first) / M_PI << "\n";
+  cout << "Time: " << t1 - t0 << " s\n";
+}