From f3f4be2d90acb6e690b4b75b5de478d58b99330a Mon Sep 17 00:00:00 2001 From: Fritz Stracke <fritz.stracke@rwth-aachen.de> Date: Thu, 25 May 2023 11:53:59 +0200 Subject: [PATCH] Add debugging comments helpful for refractoring current errors. Signed-off-by: Fritz Stracke <fritz.stracke@rwth-aachen.de> --- gpu/src/cricket-cr.c | 3 +++ gpu/src/main.c | 11 +++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/gpu/src/cricket-cr.c b/gpu/src/cricket-cr.c index fcbfd0d..9069bee 100644 --- a/gpu/src/cricket-cr.c +++ b/gpu/src/cricket-cr.c @@ -1170,6 +1170,8 @@ bool cricket_cr_ckp_params(CUDBGAPI cudbgAPI, const char *ckp_dir, /* Parameters are the same for all warps so just use warp 0 * TODO: use first valid warp, because warp 0 may not be in use (is that * possible?) + * + * This seems to cause issues right now. Needs a solution. */ if ((param_mem = (uint8_t*)malloc(elf_info->param_size)) == NULL) return false; @@ -1482,6 +1484,7 @@ bool cricket_cr_ckp_globals(CUDBGAPI cudbgAPI, const char *ckp_dir) if (res != CUDBG_SUCCESS) { LOGE(LOG_ERROR, "cuda error: %s", cudbgGetErrorString(res)); + LOGE(LOG_DEBUG, "encountered in iteration %d of %d\n", i, globals_num); goto cleanup; } offset += globals[i].size; diff --git a/gpu/src/main.c b/gpu/src/main.c index b3dee57..98b6b4a 100644 --- a/gpu/src/main.c +++ b/gpu/src/main.c @@ -836,10 +836,11 @@ int cricket_checkpoint(int argc, char *argv[]) return -1; } + printf("Initializing GDB!\n\n"); gdb_init(argc, argv, NULL, argv[2]); /* attach to process (both CPU and GPU) */ - // printf("attaching...\n"); + printf("attaching...\n"); // attach_command(argv[2], !batch_flag); if (cuda_api_get_state() != CUDA_API_STATE_INITIALIZED) { @@ -854,6 +855,9 @@ int cricket_checkpoint(int argc, char *argv[]) #ifdef CRICKET_PROFILE gettimeofday(&b, NULL); #endif + + printf("attached!\n\n"); + printf("trying to get CUDA debugger API\n"); /* get CUDA debugger API */ res = cudbgGetAPI(CUDBG_API_VERSION_MAJOR, CUDBG_API_VERSION_MINOR, @@ -863,7 +867,7 @@ int cricket_checkpoint(int argc, char *argv[]) goto cuda_error; } printf("got API\n"); - + printf("enumerating devices...\n"); if (!cricket_device_get_num(cudbgAPI, &numDev)) { printf("error getting device num\n"); @@ -1046,6 +1050,9 @@ int cricket_checkpoint(int argc, char *argv[]) //cricket_focus_kernel(!batch_flag); + /// TODO: Logic Error: first_warp might always be invalid! In line 889 first_warp is looped over + /// the number of available warps. As far as I can tell it is never reset afterwards. + /// That would explain why this line throws an invalid warp exception. Needs discussion. if (!cricket_cr_ckp_params(cudbgAPI, ckp_dir, &elf_info, 0, 0, first_warp)) { printf("cricket_cr_ckp_params unsuccessful\n"); -- GitLab