From 06bdb378c93e620c018ff99f817051e0792ed621 Mon Sep 17 00:00:00 2001
From: Niklas Eiling <niklas.eiling@eonerc.rwth-aachen.de>
Date: Tue, 1 Aug 2023 15:57:04 +0200
Subject: [PATCH] add warumup runs to overhead

Signed-off-by: Niklas Eiling <niklas.eiling@eonerc.rwth-aachen.de>
---
 tests/test_apps/overhead.cu | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/tests/test_apps/overhead.cu b/tests/test_apps/overhead.cu
index ac5a290..6928422 100644
--- a/tests/test_apps/overhead.cu
+++ b/tests/test_apps/overhead.cu
@@ -6,7 +6,8 @@
 
 #include <cuda_runtime.h>
 
-#define ITERATIONS 10
+#define ITERATIONS 100000
+#define WARUMUPS 10
 #define MEMSIZE 1024*1024
 const int blocksize = 32;
 
@@ -31,7 +32,10 @@ int main()
 
     printf("init CUDA\n");
     cudaGetDeviceCount(&cnt);
-    printf("cudaGetDeviceCount (%d iterations)\n", iterations);
+    printf("1. cudaGetDeviceCount (%d iterations)\n", iterations);
+    for (int i=0; i != WARUMUPS; i++) {
+        cudaGetDeviceCount(&cnt);
+    }
     gettimeofday(&begin, NULL);
     for (int i=0; i != iterations; i++) {
         cudaGetDeviceCount(&cnt);
@@ -41,7 +45,11 @@ int main()
 
     uint16_t *dev_A;
     size_t A_size = MEMSIZE;
-    printf("cudaMalloc/cudaFree (%d iterations)\n", iterations);
+    printf("2. cudaMalloc/cudaFree (%d iterations)\n", iterations);
+    for (int i=0; i != WARMUPS; i++) {
+        cudaMalloc( (void**)&dev_A, A_size );
+        cudaFree( dev_A );
+    }
     gettimeofday(&begin, NULL);
     for (int i=0; i != iterations; i++) {
         cudaMalloc( (void**)&dev_A, A_size );
@@ -53,7 +61,10 @@ int main()
 
     dim3 dimBlock( blocksize, 1 );
     dim3 dimGrid( 1, 1);
-    printf("kernel launch w/o parameteter (%d iterations)\n", iterations);
+    printf("3. kernel launch w/o parameteter (%d iterations)\n", iterations);
+    for (int i=0; i != WARMUPS; i++) {
+        kernel_no_param<<<dimGrid, dimBlock>>>();
+    }
     gettimeofday(&begin, NULL);
     for (int i=0; i != iterations; i++) {
         kernel_no_param<<<dimGrid, dimBlock>>>();
@@ -71,7 +82,10 @@ int main()
     cudaMalloc( (void**)&dev_A, MEMSIZE );
     cudaMemset( dev_A, 1, MEMSIZE);
     cudaMemset( dev_x, 2, MEMSIZE);
-    printf("kernel launch w/ parameteters (%d iterations)\n", iterations);
+    printf("4. kernel launch w/ parameteters (%d iterations)\n", iterations);
+    for (int i=0; i != WARMUPS; i++) {
+        kernel<<<dimGrid, dimBlock>>>(dev_A, dev_x, dev_res, 0, 0, 0, 0);
+    }
     gettimeofday(&begin, NULL);
     for (int i=0; i != iterations; i++) {
         kernel<<<dimGrid, dimBlock>>>(dev_A, dev_x, dev_res, 0, 0, 0, 0);
-- 
GitLab