From cdd27689fde2eae0cb6e022d18400f7a696b2f58 Mon Sep 17 00:00:00 2001 From: Sebastian Nickels Date: Fri, 20 Dec 2019 09:20:52 +0100 Subject: [PATCH] Implemented parameters checkpoint_period, log_period and eval_train --- .../templates/gluon/CNNDataLoader.ftl | 12 +-- .../templates/gluon/CNNSupervisedTrainer.ftl | 48 +++++----- .../resources/templates/gluon/CNNTrainer.ftl | 9 ++ .../templates/gluon/pythonExecuteTest.ftl | 8 +- .../templates/gluon/pythonExecuteTrain.ftl | 8 +- .../gluon/saveAttentionImageTest.ftl | 19 +++- .../gluon/saveAttentionImageTrain.ftl | 14 +-- .../target_code/CNNDataLoader_Alexnet.py | 12 +-- .../CNNDataLoader_CifarClassifierNetwork.py | 12 +-- .../target_code/CNNDataLoader_VGG16.py | 12 +-- .../CNNSupervisedTrainer_Alexnet.py | 91 ++++++++++++------- ...upervisedTrainer_CifarClassifierNetwork.py | 91 ++++++++++++------- .../target_code/CNNSupervisedTrainer_VGG16.py | 91 ++++++++++++------- 13 files changed, 251 insertions(+), 176 deletions(-) diff --git a/src/main/resources/templates/gluon/CNNDataLoader.ftl b/src/main/resources/templates/gluon/CNNDataLoader.ftl index 9223e70..3e7b1cf 100644 --- a/src/main/resources/templates/gluon/CNNDataLoader.ftl +++ b/src/main/resources/templates/gluon/CNNDataLoader.ftl @@ -15,7 +15,7 @@ class ${tc.fileNameWithoutEnding}: def __init__(self): self._data_dir = "${tc.dataPath}/" - def load_data(self, train_batch_size, test_batch_size): + def load_data(self, batch_size): train_h5, test_h5 = self.load_h5_files() train_data = {} @@ -39,11 +39,7 @@ class ${tc.fileNameWithoutEnding}: train_iter = mx.io.NDArrayIter(data=train_data, label=train_label, - batch_size=train_batch_size) - - train_test_iter = mx.io.NDArrayIter(data=train_data, - label=train_label, - batch_size=test_batch_size) + batch_size=batch_size) test_iter = None @@ -64,9 +60,9 @@ class ${tc.fileNameWithoutEnding}: test_iter = mx.io.NDArrayIter(data=test_data, label=test_label, - batch_size=test_batch_size) + batch_size=batch_size) - return train_iter, train_test_iter, test_iter, data_mean, data_std, train_images, test_images + return train_iter, test_iter, data_mean, data_std, train_images, test_images def load_data_img(self, batch_size, img_size): train_h5, test_h5 = self.load_h5_files() diff --git a/src/main/resources/templates/gluon/CNNSupervisedTrainer.ftl b/src/main/resources/templates/gluon/CNNSupervisedTrainer.ftl index 4833f2f..ab8d634 100644 --- a/src/main/resources/templates/gluon/CNNSupervisedTrainer.ftl +++ b/src/main/resources/templates/gluon/CNNSupervisedTrainer.ftl @@ -181,13 +181,15 @@ class ${tc.fileNameWithoutEnding}: num_epoch=10, eval_metric='acc', eval_metric_params={}, + eval_train=False, loss ='softmax_cross_entropy', loss_params={}, optimizer='adam', optimizer_params=(('learning_rate', 0.001),), load_checkpoint=True, - context='gpu', checkpoint_period=5, + log_period=50, + context='gpu', save_attention_image=False, use_teacher_forcing=False, normalize=True): @@ -213,10 +215,7 @@ class ${tc.fileNameWithoutEnding}: del optimizer_params['step_size'] del optimizer_params['learning_rate_decay'] - train_batch_size = batch_size - test_batch_size = batch_size - - train_iter, train_test_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(train_batch_size, test_batch_size) + train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size) if normalize: self._net_creator.construct(context=mx_context, data_mean=data_mean, data_std=data_std) @@ -275,11 +274,11 @@ class ${tc.fileNameWithoutEnding}: else: logging.error("Invalid loss parameter.") - speed_period = 50 tic = None for epoch in range(begin_epoch, begin_epoch + num_epoch): + loss_total = 0 train_iter.reset() for batch_i, batch in enumerate(train_iter): with autograd.record(): @@ -291,44 +290,51 @@ class ${tc.fileNameWithoutEnding}: loss.backward() + loss_total += loss.sum().asscalar() + for trainer in trainers: trainer.step(batch_size) if tic is None: tic = time.time() else: - if batch_i % speed_period == 0: + if batch_i % log_period == 0: try: - speed = speed_period * batch_size / (time.time() - tic) + speed = log_period * batch_size / (time.time() - tic) except ZeroDivisionError: speed = float("inf") - logging.info("Epoch[%d] Batch[%d] Speed: %.2f samples/sec" % (epoch, batch_i, speed)) + loss_avg = loss_total / (batch_size * log_period) + loss_total = 0 + + logging.info("Epoch[%d] Batch[%d] Speed: %.2f samples/sec Loss: %.5f" % (epoch, batch_i, speed, loss_avg)) tic = time.time() tic = None - train_test_iter.reset() - metric = mx.metric.create(eval_metric, **eval_metric_params) - for batch_i, batch in enumerate(train_test_iter): - if True: <#-- Fix indentation --> + if eval_train: + train_iter.reset() + metric = mx.metric.create(eval_metric, **eval_metric_params) + for batch_i, batch in enumerate(train_iter): <#include "pythonExecuteTest.ftl"> <#include "saveAttentionImageTrain.ftl"> - predictions = [] - for output_name in outputs: - if mx.nd.shape_array(mx.nd.squeeze(output_name)).size > 1: - predictions.append(mx.nd.argmax(output_name, axis=1)) - else: - predictions.append(output_name) + predictions = [] + for output_name in outputs: + if mx.nd.shape_array(mx.nd.squeeze(output_name)).size > 1: + predictions.append(mx.nd.argmax(output_name, axis=1)) + else: + predictions.append(output_name) - metric.update(preds=predictions, labels=labels) - train_metric_score = metric.get()[1] + metric.update(preds=predictions, labels=labels) + train_metric_score = metric.get()[1] + else: + train_metric_score = 0 test_iter.reset() metric = mx.metric.create(eval_metric, **eval_metric_params) diff --git a/src/main/resources/templates/gluon/CNNTrainer.ftl b/src/main/resources/templates/gluon/CNNTrainer.ftl index 9bbdb58..b0f4210 100644 --- a/src/main/resources/templates/gluon/CNNTrainer.ftl +++ b/src/main/resources/templates/gluon/CNNTrainer.ftl @@ -31,6 +31,12 @@ if __name__ == "__main__": <#if (config.loadCheckpoint)??> load_checkpoint=${config.loadCheckpoint?string("True","False")}, +<#if (config.checkpointPeriod)??> + checkpoint_period=${config.checkpointPeriod}, + +<#if (config.logPeriod)??> + log_period=${config.logPeriod}, + <#if (config.context)??> context='${config.context}', @@ -51,6 +57,9 @@ if __name__ == "__main__": }, +<#if (config.evalTrain)??> + eval_train=${config.evalTrain?string("True","False")}, + <#if (config.configuration.loss)??> loss='${config.lossName}', <#if (config.lossParams)??> diff --git a/src/main/resources/templates/gluon/pythonExecuteTest.ftl b/src/main/resources/templates/gluon/pythonExecuteTest.ftl index 7bd43b3..a60efd4 100644 --- a/src/main/resources/templates/gluon/pythonExecuteTest.ftl +++ b/src/main/resources/templates/gluon/pythonExecuteTest.ftl @@ -6,19 +6,19 @@ <#if tc.architectureOutputSymbols?size gt 1> <#assign outputName = tc.getNameWithoutIndex(tc.getName(tc.architectureOutputSymbols[0]))> - ${outputName} = [mx.nd.zeros((test_batch_size, ${tc.join(tc.architectureOutputSymbols[0].ioDeclaration.type.dimensions, ", ")},), ctx=mx_context) for i in range(${tc.architectureOutputs?size?c})] + ${outputName} = [mx.nd.zeros((batch_size, ${tc.join(tc.architectureOutputSymbols[0].ioDeclaration.type.dimensions, ", ")},), ctx=mx_context) for i in range(${tc.architectureOutputs?size?c})] <#else> <#list tc.architectureOutputSymbols as output> - ${tc.getName(output)} = mx.nd.zeros((test_batch_size, ${tc.join(output.ioDeclaration.type.dimensions, ", ")},), ctx=mx_context)<#sep>, + ${tc.getName(output)} = mx.nd.zeros((batch_size, ${tc.join(output.ioDeclaration.type.dimensions, ", ")},), ctx=mx_context)<#sep>, <#list tc.getLayerVariableMembers()?keys as member> - ${member} = mx.nd.zeros((test_batch_size, ${tc.join(tc.cutDimensions(tc.getLayerVariableMembers()[member]), ", ")},), ctx=mx_context) + ${member} = mx.nd.zeros((batch_size, ${tc.join(tc.cutDimensions(tc.getLayerVariableMembers()[member]), ", ")},), ctx=mx_context) <#list tc.architecture.constants as constant> - ${tc.getName(constant)} = mx.nd.full((test_batch_size, 1,), ${constant.intValue?c}, ctx=mx_context) + ${tc.getName(constant)} = mx.nd.full((batch_size, 1,), ${constant.intValue?c}, ctx=mx_context) nd.waitall() diff --git a/src/main/resources/templates/gluon/pythonExecuteTrain.ftl b/src/main/resources/templates/gluon/pythonExecuteTrain.ftl index d1cfe61..fcc6a71 100644 --- a/src/main/resources/templates/gluon/pythonExecuteTrain.ftl +++ b/src/main/resources/templates/gluon/pythonExecuteTrain.ftl @@ -6,19 +6,19 @@ <#if tc.architectureOutputSymbols?size gt 1> <#assign outputName = tc.getNameWithoutIndex(tc.getName(tc.architectureOutputSymbols[0]))> - ${outputName} = [mx.nd.zeros((train_batch_size, ${tc.join(tc.architectureOutputSymbols[0].ioDeclaration.type.dimensions, ", ")},), ctx=mx_context) for i in range(${tc.architectureOutputs?size?c})] + ${outputName} = [mx.nd.zeros((batch_size, ${tc.join(tc.architectureOutputSymbols[0].ioDeclaration.type.dimensions, ", ")},), ctx=mx_context) for i in range(${tc.architectureOutputs?size?c})] <#else> <#list tc.architectureOutputSymbols as output> - ${tc.getName(output)} = mx.nd.zeros((train_batch_size, ${tc.join(output.ioDeclaration.type.dimensions, ", ")},), ctx=mx_context)<#sep>, + ${tc.getName(output)} = mx.nd.zeros((batch_size, ${tc.join(output.ioDeclaration.type.dimensions, ", ")},), ctx=mx_context)<#sep>, <#list tc.getLayerVariableMembers()?keys as member> - ${member} = mx.nd.zeros((train_batch_size, ${tc.join(tc.cutDimensions(tc.getLayerVariableMembers()[member]), ", ")},), ctx=mx_context) + ${member} = mx.nd.zeros((batch_size, ${tc.join(tc.cutDimensions(tc.getLayerVariableMembers()[member]), ", ")},), ctx=mx_context) <#list tc.architecture.constants as constant> - ${tc.getName(constant)} = mx.nd.full((train_batch_size, 1,), ${constant.intValue?c}, ctx=mx_context) + ${tc.getName(constant)} = mx.nd.full((batch_size, 1,), ${constant.intValue?c}, ctx=mx_context) nd.waitall() diff --git a/src/main/resources/templates/gluon/saveAttentionImageTest.ftl b/src/main/resources/templates/gluon/saveAttentionImageTest.ftl index 9905d9a..4936b1e 100644 --- a/src/main/resources/templates/gluon/saveAttentionImageTest.ftl +++ b/src/main/resources/templates/gluon/saveAttentionImageTest.ftl @@ -1,10 +1,20 @@ if save_attention_image == "True": + if not eval_train: + import matplotlib + matplotlib.use('Agg') + import matplotlib.pyplot as plt + logging.getLogger('matplotlib').setLevel(logging.ERROR) + + if(os.path.isfile('src/test/resources/training_data/Show_attend_tell/dict.pkl')): + with open('src/test/resources/training_data/Show_attend_tell/dict.pkl', 'rb') as f: + dict = pickle.load(f) + plt.clf() fig = plt.figure(figsize=(15,15)) max_length = len(labels)-1 ax = fig.add_subplot(max_length//3, max_length//4, 1) - ax.imshow(test_images[0+test_batch_size*(batch_i)].transpose(1,2,0)) + ax.imshow(test_images[0+batch_size*(batch_i)].transpose(1,2,0)) for l in range(max_length): attention = attentionList[l] @@ -15,14 +25,17 @@ ax.set_title("") elif dict[int(mx.nd.slice_axis(outputs[l+1], axis=0, begin=0, end=1).squeeze().asscalar())] == "": ax.set_title(".") - img = ax.imshow(test_images[0+test_batch_size*(batch_i)].transpose(1,2,0)) + img = ax.imshow(test_images[0+batch_size*(batch_i)].transpose(1,2,0)) ax.imshow(attention_resized, cmap='gray', alpha=0.6, extent=img.get_extent()) break else: ax.set_title(dict[int(mx.nd.slice_axis(outputs[l+1], axis=0, begin=0, end=1).squeeze().asscalar())]) - img = ax.imshow(test_images[0+test_batch_size*(batch_i)].transpose(1,2,0)) + img = ax.imshow(test_images[0+batch_size*(batch_i)].transpose(1,2,0)) ax.imshow(attention_resized, cmap='gray', alpha=0.6, extent=img.get_extent()) plt.tight_layout() + target_dir = 'target/attention_images' + if not os.path.exists(target_dir): + os.makedirs(target_dir) plt.savefig(target_dir + '/attention_test.png') plt.close() \ No newline at end of file diff --git a/src/main/resources/templates/gluon/saveAttentionImageTrain.ftl b/src/main/resources/templates/gluon/saveAttentionImageTrain.ftl index 03a6b06..4cb2666 100644 --- a/src/main/resources/templates/gluon/saveAttentionImageTrain.ftl +++ b/src/main/resources/templates/gluon/saveAttentionImageTrain.ftl @@ -4,16 +4,16 @@ import matplotlib.pyplot as plt logging.getLogger('matplotlib').setLevel(logging.ERROR) - plt.clf() - fig = plt.figure(figsize=(15,15)) - max_length = len(labels)-1 - if(os.path.isfile('src/test/resources/training_data/Show_attend_tell/dict.pkl')): with open('src/test/resources/training_data/Show_attend_tell/dict.pkl', 'rb') as f: dict = pickle.load(f) + plt.clf() + fig = plt.figure(figsize=(15,15)) + max_length = len(labels)-1 + ax = fig.add_subplot(max_length//3, max_length//4, 1) - ax.imshow(train_images[0+test_batch_size*(batch_i)].transpose(1,2,0)) + ax.imshow(train_images[0+batch_size*(batch_i)].transpose(1,2,0)) for l in range(max_length): attention = attentionList[l] @@ -24,12 +24,12 @@ ax.set_title("") elif dict[int(labels[l+1][0].asscalar())] == "": ax.set_title(".") - img = ax.imshow(train_images[0+test_batch_size*(batch_i)].transpose(1,2,0)) + img = ax.imshow(train_images[0+batch_size*(batch_i)].transpose(1,2,0)) ax.imshow(attention_resized, cmap='gray', alpha=0.6, extent=img.get_extent()) break else: ax.set_title(dict[int(labels[l+1][0].asscalar())]) - img = ax.imshow(train_images[0+test_batch_size*(batch_i)].transpose(1,2,0)) + img = ax.imshow(train_images[0+batch_size*(batch_i)].transpose(1,2,0)) ax.imshow(attention_resized, cmap='gray', alpha=0.6, extent=img.get_extent()) plt.tight_layout() diff --git a/src/test/resources/target_code/CNNDataLoader_Alexnet.py b/src/test/resources/target_code/CNNDataLoader_Alexnet.py index 3d54479..e764abe 100644 --- a/src/test/resources/target_code/CNNDataLoader_Alexnet.py +++ b/src/test/resources/target_code/CNNDataLoader_Alexnet.py @@ -14,7 +14,7 @@ class CNNDataLoader_Alexnet: def __init__(self): self._data_dir = "data/Alexnet/" - def load_data(self, train_batch_size, test_batch_size): + def load_data(self, batch_size): train_h5, test_h5 = self.load_h5_files() train_data = {} @@ -38,11 +38,7 @@ class CNNDataLoader_Alexnet: train_iter = mx.io.NDArrayIter(data=train_data, label=train_label, - batch_size=train_batch_size) - - train_test_iter = mx.io.NDArrayIter(data=train_data, - label=train_label, - batch_size=test_batch_size) + batch_size=batch_size) test_iter = None @@ -63,9 +59,9 @@ class CNNDataLoader_Alexnet: test_iter = mx.io.NDArrayIter(data=test_data, label=test_label, - batch_size=test_batch_size) + batch_size=batch_size) - return train_iter, train_test_iter, test_iter, data_mean, data_std, train_images, test_images + return train_iter, test_iter, data_mean, data_std, train_images, test_images def load_data_img(self, batch_size, img_size): train_h5, test_h5 = self.load_h5_files() diff --git a/src/test/resources/target_code/CNNDataLoader_CifarClassifierNetwork.py b/src/test/resources/target_code/CNNDataLoader_CifarClassifierNetwork.py index 934184b..c474907 100644 --- a/src/test/resources/target_code/CNNDataLoader_CifarClassifierNetwork.py +++ b/src/test/resources/target_code/CNNDataLoader_CifarClassifierNetwork.py @@ -14,7 +14,7 @@ class CNNDataLoader_CifarClassifierNetwork: def __init__(self): self._data_dir = "data/CifarClassifierNetwork/" - def load_data(self, train_batch_size, test_batch_size): + def load_data(self, batch_size): train_h5, test_h5 = self.load_h5_files() train_data = {} @@ -38,11 +38,7 @@ class CNNDataLoader_CifarClassifierNetwork: train_iter = mx.io.NDArrayIter(data=train_data, label=train_label, - batch_size=train_batch_size) - - train_test_iter = mx.io.NDArrayIter(data=train_data, - label=train_label, - batch_size=test_batch_size) + batch_size=batch_size) test_iter = None @@ -63,9 +59,9 @@ class CNNDataLoader_CifarClassifierNetwork: test_iter = mx.io.NDArrayIter(data=test_data, label=test_label, - batch_size=test_batch_size) + batch_size=batch_size) - return train_iter, train_test_iter, test_iter, data_mean, data_std, train_images, test_images + return train_iter, test_iter, data_mean, data_std, train_images, test_images def load_data_img(self, batch_size, img_size): train_h5, test_h5 = self.load_h5_files() diff --git a/src/test/resources/target_code/CNNDataLoader_VGG16.py b/src/test/resources/target_code/CNNDataLoader_VGG16.py index 7f434af..1e10265 100644 --- a/src/test/resources/target_code/CNNDataLoader_VGG16.py +++ b/src/test/resources/target_code/CNNDataLoader_VGG16.py @@ -14,7 +14,7 @@ class CNNDataLoader_VGG16: def __init__(self): self._data_dir = "data/VGG16/" - def load_data(self, train_batch_size, test_batch_size): + def load_data(self, batch_size): train_h5, test_h5 = self.load_h5_files() train_data = {} @@ -38,11 +38,7 @@ class CNNDataLoader_VGG16: train_iter = mx.io.NDArrayIter(data=train_data, label=train_label, - batch_size=train_batch_size) - - train_test_iter = mx.io.NDArrayIter(data=train_data, - label=train_label, - batch_size=test_batch_size) + batch_size=batch_size) test_iter = None @@ -63,9 +59,9 @@ class CNNDataLoader_VGG16: test_iter = mx.io.NDArrayIter(data=test_data, label=test_label, - batch_size=test_batch_size) + batch_size=batch_size) - return train_iter, train_test_iter, test_iter, data_mean, data_std, train_images, test_images + return train_iter, test_iter, data_mean, data_std, train_images, test_images def load_data_img(self, batch_size, img_size): train_h5, test_h5 = self.load_h5_files() diff --git a/src/test/resources/target_code/CNNSupervisedTrainer_Alexnet.py b/src/test/resources/target_code/CNNSupervisedTrainer_Alexnet.py index 24973d5..76eebb5 100644 --- a/src/test/resources/target_code/CNNSupervisedTrainer_Alexnet.py +++ b/src/test/resources/target_code/CNNSupervisedTrainer_Alexnet.py @@ -180,13 +180,15 @@ class CNNSupervisedTrainer_Alexnet: num_epoch=10, eval_metric='acc', eval_metric_params={}, + eval_train=False, loss ='softmax_cross_entropy', loss_params={}, optimizer='adam', optimizer_params=(('learning_rate', 0.001),), load_checkpoint=True, - context='gpu', checkpoint_period=5, + log_period=50, + context='gpu', save_attention_image=False, use_teacher_forcing=False, normalize=True): @@ -212,10 +214,7 @@ class CNNSupervisedTrainer_Alexnet: del optimizer_params['step_size'] del optimizer_params['learning_rate_decay'] - train_batch_size = batch_size - test_batch_size = batch_size - - train_iter, train_test_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(train_batch_size, test_batch_size) + train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size) if normalize: self._net_creator.construct(context=mx_context, data_mean=data_mean, data_std=data_std) @@ -274,10 +273,11 @@ class CNNSupervisedTrainer_Alexnet: else: logging.error("Invalid loss parameter.") - speed_period = 50 tic = None for epoch in range(begin_epoch, begin_epoch + num_epoch): + + loss_total = 0 train_iter.reset() for batch_i, batch in enumerate(train_iter): with autograd.record(): @@ -285,7 +285,7 @@ class CNNSupervisedTrainer_Alexnet: data_ = batch.data[0].as_in_context(mx_context) - predictions_ = mx.nd.zeros((train_batch_size, 10,), ctx=mx_context) + predictions_ = mx.nd.zeros((batch_size, 10,), ctx=mx_context) nd.waitall() @@ -302,33 +302,39 @@ class CNNSupervisedTrainer_Alexnet: loss.backward() + loss_total += loss.sum().asscalar() + for trainer in trainers: trainer.step(batch_size) if tic is None: tic = time.time() else: - if batch_i % speed_period == 0: + if batch_i % log_period == 0: try: - speed = speed_period * batch_size / (time.time() - tic) + speed = log_period * batch_size / (time.time() - tic) except ZeroDivisionError: speed = float("inf") - logging.info("Epoch[%d] Batch[%d] Speed: %.2f samples/sec" % (epoch, batch_i, speed)) + loss_avg = loss_total / (batch_size * log_period) + loss_total = 0 + + logging.info("Epoch[%d] Batch[%d] Speed: %.2f samples/sec Loss: %.5f" % (epoch, batch_i, speed, loss_avg)) tic = time.time() tic = None - train_test_iter.reset() - metric = mx.metric.create(eval_metric, **eval_metric_params) - for batch_i, batch in enumerate(train_test_iter): - if True: + + if eval_train: + train_iter.reset() + metric = mx.metric.create(eval_metric, **eval_metric_params) + for batch_i, batch in enumerate(train_iter): labels = [batch.label[i].as_in_context(mx_context) for i in range(1)] data_ = batch.data[0].as_in_context(mx_context) - predictions_ = mx.nd.zeros((test_batch_size, 10,), ctx=mx_context) + predictions_ = mx.nd.zeros((batch_size, 10,), ctx=mx_context) nd.waitall() @@ -346,16 +352,16 @@ class CNNSupervisedTrainer_Alexnet: import matplotlib.pyplot as plt logging.getLogger('matplotlib').setLevel(logging.ERROR) - plt.clf() - fig = plt.figure(figsize=(15,15)) - max_length = len(labels)-1 - if(os.path.isfile('src/test/resources/training_data/Show_attend_tell/dict.pkl')): with open('src/test/resources/training_data/Show_attend_tell/dict.pkl', 'rb') as f: dict = pickle.load(f) + plt.clf() + fig = plt.figure(figsize=(15,15)) + max_length = len(labels)-1 + ax = fig.add_subplot(max_length//3, max_length//4, 1) - ax.imshow(train_images[0+test_batch_size*(batch_i)].transpose(1,2,0)) + ax.imshow(train_images[0+batch_size*(batch_i)].transpose(1,2,0)) for l in range(max_length): attention = attentionList[l] @@ -366,12 +372,12 @@ class CNNSupervisedTrainer_Alexnet: ax.set_title("") elif dict[int(labels[l+1][0].asscalar())] == "": ax.set_title(".") - img = ax.imshow(train_images[0+test_batch_size*(batch_i)].transpose(1,2,0)) + img = ax.imshow(train_images[0+batch_size*(batch_i)].transpose(1,2,0)) ax.imshow(attention_resized, cmap='gray', alpha=0.6, extent=img.get_extent()) break else: ax.set_title(dict[int(labels[l+1][0].asscalar())]) - img = ax.imshow(train_images[0+test_batch_size*(batch_i)].transpose(1,2,0)) + img = ax.imshow(train_images[0+batch_size*(batch_i)].transpose(1,2,0)) ax.imshow(attention_resized, cmap='gray', alpha=0.6, extent=img.get_extent()) plt.tight_layout() @@ -381,25 +387,27 @@ class CNNSupervisedTrainer_Alexnet: plt.savefig(target_dir + '/attention_train.png') plt.close() - predictions = [] - for output_name in outputs: - if mx.nd.shape_array(mx.nd.squeeze(output_name)).size > 1: - predictions.append(mx.nd.argmax(output_name, axis=1)) - else: - predictions.append(output_name) + predictions = [] + for output_name in outputs: + if mx.nd.shape_array(mx.nd.squeeze(output_name)).size > 1: + predictions.append(mx.nd.argmax(output_name, axis=1)) + else: + predictions.append(output_name) - metric.update(preds=predictions, labels=labels) - train_metric_score = metric.get()[1] + metric.update(preds=predictions, labels=labels) + train_metric_score = metric.get()[1] + else: + train_metric_score = 0 test_iter.reset() metric = mx.metric.create(eval_metric, **eval_metric_params) for batch_i, batch in enumerate(test_iter): - if True: + if True: labels = [batch.label[i].as_in_context(mx_context) for i in range(1)] data_ = batch.data[0].as_in_context(mx_context) - predictions_ = mx.nd.zeros((test_batch_size, 10,), ctx=mx_context) + predictions_ = mx.nd.zeros((batch_size, 10,), ctx=mx_context) nd.waitall() @@ -412,12 +420,22 @@ class CNNSupervisedTrainer_Alexnet: if save_attention_image == "True": + if not eval_train: + import matplotlib + matplotlib.use('Agg') + import matplotlib.pyplot as plt + logging.getLogger('matplotlib').setLevel(logging.ERROR) + + if(os.path.isfile('src/test/resources/training_data/Show_attend_tell/dict.pkl')): + with open('src/test/resources/training_data/Show_attend_tell/dict.pkl', 'rb') as f: + dict = pickle.load(f) + plt.clf() fig = plt.figure(figsize=(15,15)) max_length = len(labels)-1 ax = fig.add_subplot(max_length//3, max_length//4, 1) - ax.imshow(test_images[0+test_batch_size*(batch_i)].transpose(1,2,0)) + ax.imshow(test_images[0+batch_size*(batch_i)].transpose(1,2,0)) for l in range(max_length): attention = attentionList[l] @@ -428,15 +446,18 @@ class CNNSupervisedTrainer_Alexnet: ax.set_title("") elif dict[int(mx.nd.slice_axis(outputs[l+1], axis=0, begin=0, end=1).squeeze().asscalar())] == "": ax.set_title(".") - img = ax.imshow(test_images[0+test_batch_size*(batch_i)].transpose(1,2,0)) + img = ax.imshow(test_images[0+batch_size*(batch_i)].transpose(1,2,0)) ax.imshow(attention_resized, cmap='gray', alpha=0.6, extent=img.get_extent()) break else: ax.set_title(dict[int(mx.nd.slice_axis(outputs[l+1], axis=0, begin=0, end=1).squeeze().asscalar())]) - img = ax.imshow(test_images[0+test_batch_size*(batch_i)].transpose(1,2,0)) + img = ax.imshow(test_images[0+batch_size*(batch_i)].transpose(1,2,0)) ax.imshow(attention_resized, cmap='gray', alpha=0.6, extent=img.get_extent()) plt.tight_layout() + target_dir = 'target/attention_images' + if not os.path.exists(target_dir): + os.makedirs(target_dir) plt.savefig(target_dir + '/attention_test.png') plt.close() diff --git a/src/test/resources/target_code/CNNSupervisedTrainer_CifarClassifierNetwork.py b/src/test/resources/target_code/CNNSupervisedTrainer_CifarClassifierNetwork.py index f0400ab..3bd65bd 100644 --- a/src/test/resources/target_code/CNNSupervisedTrainer_CifarClassifierNetwork.py +++ b/src/test/resources/target_code/CNNSupervisedTrainer_CifarClassifierNetwork.py @@ -180,13 +180,15 @@ class CNNSupervisedTrainer_CifarClassifierNetwork: num_epoch=10, eval_metric='acc', eval_metric_params={}, + eval_train=False, loss ='softmax_cross_entropy', loss_params={}, optimizer='adam', optimizer_params=(('learning_rate', 0.001),), load_checkpoint=True, - context='gpu', checkpoint_period=5, + log_period=50, + context='gpu', save_attention_image=False, use_teacher_forcing=False, normalize=True): @@ -212,10 +214,7 @@ class CNNSupervisedTrainer_CifarClassifierNetwork: del optimizer_params['step_size'] del optimizer_params['learning_rate_decay'] - train_batch_size = batch_size - test_batch_size = batch_size - - train_iter, train_test_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(train_batch_size, test_batch_size) + train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size) if normalize: self._net_creator.construct(context=mx_context, data_mean=data_mean, data_std=data_std) @@ -274,10 +273,11 @@ class CNNSupervisedTrainer_CifarClassifierNetwork: else: logging.error("Invalid loss parameter.") - speed_period = 50 tic = None for epoch in range(begin_epoch, begin_epoch + num_epoch): + + loss_total = 0 train_iter.reset() for batch_i, batch in enumerate(train_iter): with autograd.record(): @@ -285,7 +285,7 @@ class CNNSupervisedTrainer_CifarClassifierNetwork: data_ = batch.data[0].as_in_context(mx_context) - softmax_ = mx.nd.zeros((train_batch_size, 10,), ctx=mx_context) + softmax_ = mx.nd.zeros((batch_size, 10,), ctx=mx_context) nd.waitall() @@ -302,33 +302,39 @@ class CNNSupervisedTrainer_CifarClassifierNetwork: loss.backward() + loss_total += loss.sum().asscalar() + for trainer in trainers: trainer.step(batch_size) if tic is None: tic = time.time() else: - if batch_i % speed_period == 0: + if batch_i % log_period == 0: try: - speed = speed_period * batch_size / (time.time() - tic) + speed = log_period * batch_size / (time.time() - tic) except ZeroDivisionError: speed = float("inf") - logging.info("Epoch[%d] Batch[%d] Speed: %.2f samples/sec" % (epoch, batch_i, speed)) + loss_avg = loss_total / (batch_size * log_period) + loss_total = 0 + + logging.info("Epoch[%d] Batch[%d] Speed: %.2f samples/sec Loss: %.5f" % (epoch, batch_i, speed, loss_avg)) tic = time.time() tic = None - train_test_iter.reset() - metric = mx.metric.create(eval_metric, **eval_metric_params) - for batch_i, batch in enumerate(train_test_iter): - if True: + + if eval_train: + train_iter.reset() + metric = mx.metric.create(eval_metric, **eval_metric_params) + for batch_i, batch in enumerate(train_iter): labels = [batch.label[i].as_in_context(mx_context) for i in range(1)] data_ = batch.data[0].as_in_context(mx_context) - softmax_ = mx.nd.zeros((test_batch_size, 10,), ctx=mx_context) + softmax_ = mx.nd.zeros((batch_size, 10,), ctx=mx_context) nd.waitall() @@ -346,16 +352,16 @@ class CNNSupervisedTrainer_CifarClassifierNetwork: import matplotlib.pyplot as plt logging.getLogger('matplotlib').setLevel(logging.ERROR) - plt.clf() - fig = plt.figure(figsize=(15,15)) - max_length = len(labels)-1 - if(os.path.isfile('src/test/resources/training_data/Show_attend_tell/dict.pkl')): with open('src/test/resources/training_data/Show_attend_tell/dict.pkl', 'rb') as f: dict = pickle.load(f) + plt.clf() + fig = plt.figure(figsize=(15,15)) + max_length = len(labels)-1 + ax = fig.add_subplot(max_length//3, max_length//4, 1) - ax.imshow(train_images[0+test_batch_size*(batch_i)].transpose(1,2,0)) + ax.imshow(train_images[0+batch_size*(batch_i)].transpose(1,2,0)) for l in range(max_length): attention = attentionList[l] @@ -366,12 +372,12 @@ class CNNSupervisedTrainer_CifarClassifierNetwork: ax.set_title("") elif dict[int(labels[l+1][0].asscalar())] == "": ax.set_title(".") - img = ax.imshow(train_images[0+test_batch_size*(batch_i)].transpose(1,2,0)) + img = ax.imshow(train_images[0+batch_size*(batch_i)].transpose(1,2,0)) ax.imshow(attention_resized, cmap='gray', alpha=0.6, extent=img.get_extent()) break else: ax.set_title(dict[int(labels[l+1][0].asscalar())]) - img = ax.imshow(train_images[0+test_batch_size*(batch_i)].transpose(1,2,0)) + img = ax.imshow(train_images[0+batch_size*(batch_i)].transpose(1,2,0)) ax.imshow(attention_resized, cmap='gray', alpha=0.6, extent=img.get_extent()) plt.tight_layout() @@ -381,25 +387,27 @@ class CNNSupervisedTrainer_CifarClassifierNetwork: plt.savefig(target_dir + '/attention_train.png') plt.close() - predictions = [] - for output_name in outputs: - if mx.nd.shape_array(mx.nd.squeeze(output_name)).size > 1: - predictions.append(mx.nd.argmax(output_name, axis=1)) - else: - predictions.append(output_name) + predictions = [] + for output_name in outputs: + if mx.nd.shape_array(mx.nd.squeeze(output_name)).size > 1: + predictions.append(mx.nd.argmax(output_name, axis=1)) + else: + predictions.append(output_name) - metric.update(preds=predictions, labels=labels) - train_metric_score = metric.get()[1] + metric.update(preds=predictions, labels=labels) + train_metric_score = metric.get()[1] + else: + train_metric_score = 0 test_iter.reset() metric = mx.metric.create(eval_metric, **eval_metric_params) for batch_i, batch in enumerate(test_iter): - if True: + if True: labels = [batch.label[i].as_in_context(mx_context) for i in range(1)] data_ = batch.data[0].as_in_context(mx_context) - softmax_ = mx.nd.zeros((test_batch_size, 10,), ctx=mx_context) + softmax_ = mx.nd.zeros((batch_size, 10,), ctx=mx_context) nd.waitall() @@ -412,12 +420,22 @@ class CNNSupervisedTrainer_CifarClassifierNetwork: if save_attention_image == "True": + if not eval_train: + import matplotlib + matplotlib.use('Agg') + import matplotlib.pyplot as plt + logging.getLogger('matplotlib').setLevel(logging.ERROR) + + if(os.path.isfile('src/test/resources/training_data/Show_attend_tell/dict.pkl')): + with open('src/test/resources/training_data/Show_attend_tell/dict.pkl', 'rb') as f: + dict = pickle.load(f) + plt.clf() fig = plt.figure(figsize=(15,15)) max_length = len(labels)-1 ax = fig.add_subplot(max_length//3, max_length//4, 1) - ax.imshow(test_images[0+test_batch_size*(batch_i)].transpose(1,2,0)) + ax.imshow(test_images[0+batch_size*(batch_i)].transpose(1,2,0)) for l in range(max_length): attention = attentionList[l] @@ -428,15 +446,18 @@ class CNNSupervisedTrainer_CifarClassifierNetwork: ax.set_title("") elif dict[int(mx.nd.slice_axis(outputs[l+1], axis=0, begin=0, end=1).squeeze().asscalar())] == "": ax.set_title(".") - img = ax.imshow(test_images[0+test_batch_size*(batch_i)].transpose(1,2,0)) + img = ax.imshow(test_images[0+batch_size*(batch_i)].transpose(1,2,0)) ax.imshow(attention_resized, cmap='gray', alpha=0.6, extent=img.get_extent()) break else: ax.set_title(dict[int(mx.nd.slice_axis(outputs[l+1], axis=0, begin=0, end=1).squeeze().asscalar())]) - img = ax.imshow(test_images[0+test_batch_size*(batch_i)].transpose(1,2,0)) + img = ax.imshow(test_images[0+batch_size*(batch_i)].transpose(1,2,0)) ax.imshow(attention_resized, cmap='gray', alpha=0.6, extent=img.get_extent()) plt.tight_layout() + target_dir = 'target/attention_images' + if not os.path.exists(target_dir): + os.makedirs(target_dir) plt.savefig(target_dir + '/attention_test.png') plt.close() diff --git a/src/test/resources/target_code/CNNSupervisedTrainer_VGG16.py b/src/test/resources/target_code/CNNSupervisedTrainer_VGG16.py index 9c27d6c..67f6f85 100644 --- a/src/test/resources/target_code/CNNSupervisedTrainer_VGG16.py +++ b/src/test/resources/target_code/CNNSupervisedTrainer_VGG16.py @@ -180,13 +180,15 @@ class CNNSupervisedTrainer_VGG16: num_epoch=10, eval_metric='acc', eval_metric_params={}, + eval_train=False, loss ='softmax_cross_entropy', loss_params={}, optimizer='adam', optimizer_params=(('learning_rate', 0.001),), load_checkpoint=True, - context='gpu', checkpoint_period=5, + log_period=50, + context='gpu', save_attention_image=False, use_teacher_forcing=False, normalize=True): @@ -212,10 +214,7 @@ class CNNSupervisedTrainer_VGG16: del optimizer_params['step_size'] del optimizer_params['learning_rate_decay'] - train_batch_size = batch_size - test_batch_size = batch_size - - train_iter, train_test_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(train_batch_size, test_batch_size) + train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size) if normalize: self._net_creator.construct(context=mx_context, data_mean=data_mean, data_std=data_std) @@ -274,10 +273,11 @@ class CNNSupervisedTrainer_VGG16: else: logging.error("Invalid loss parameter.") - speed_period = 50 tic = None for epoch in range(begin_epoch, begin_epoch + num_epoch): + + loss_total = 0 train_iter.reset() for batch_i, batch in enumerate(train_iter): with autograd.record(): @@ -285,7 +285,7 @@ class CNNSupervisedTrainer_VGG16: data_ = batch.data[0].as_in_context(mx_context) - predictions_ = mx.nd.zeros((train_batch_size, 1000,), ctx=mx_context) + predictions_ = mx.nd.zeros((batch_size, 1000,), ctx=mx_context) nd.waitall() @@ -302,33 +302,39 @@ class CNNSupervisedTrainer_VGG16: loss.backward() + loss_total += loss.sum().asscalar() + for trainer in trainers: trainer.step(batch_size) if tic is None: tic = time.time() else: - if batch_i % speed_period == 0: + if batch_i % log_period == 0: try: - speed = speed_period * batch_size / (time.time() - tic) + speed = log_period * batch_size / (time.time() - tic) except ZeroDivisionError: speed = float("inf") - logging.info("Epoch[%d] Batch[%d] Speed: %.2f samples/sec" % (epoch, batch_i, speed)) + loss_avg = loss_total / (batch_size * log_period) + loss_total = 0 + + logging.info("Epoch[%d] Batch[%d] Speed: %.2f samples/sec Loss: %.5f" % (epoch, batch_i, speed, loss_avg)) tic = time.time() tic = None - train_test_iter.reset() - metric = mx.metric.create(eval_metric, **eval_metric_params) - for batch_i, batch in enumerate(train_test_iter): - if True: + + if eval_train: + train_iter.reset() + metric = mx.metric.create(eval_metric, **eval_metric_params) + for batch_i, batch in enumerate(train_iter): labels = [batch.label[i].as_in_context(mx_context) for i in range(1)] data_ = batch.data[0].as_in_context(mx_context) - predictions_ = mx.nd.zeros((test_batch_size, 1000,), ctx=mx_context) + predictions_ = mx.nd.zeros((batch_size, 1000,), ctx=mx_context) nd.waitall() @@ -346,16 +352,16 @@ class CNNSupervisedTrainer_VGG16: import matplotlib.pyplot as plt logging.getLogger('matplotlib').setLevel(logging.ERROR) - plt.clf() - fig = plt.figure(figsize=(15,15)) - max_length = len(labels)-1 - if(os.path.isfile('src/test/resources/training_data/Show_attend_tell/dict.pkl')): with open('src/test/resources/training_data/Show_attend_tell/dict.pkl', 'rb') as f: dict = pickle.load(f) + plt.clf() + fig = plt.figure(figsize=(15,15)) + max_length = len(labels)-1 + ax = fig.add_subplot(max_length//3, max_length//4, 1) - ax.imshow(train_images[0+test_batch_size*(batch_i)].transpose(1,2,0)) + ax.imshow(train_images[0+batch_size*(batch_i)].transpose(1,2,0)) for l in range(max_length): attention = attentionList[l] @@ -366,12 +372,12 @@ class CNNSupervisedTrainer_VGG16: ax.set_title("") elif dict[int(labels[l+1][0].asscalar())] == "": ax.set_title(".") - img = ax.imshow(train_images[0+test_batch_size*(batch_i)].transpose(1,2,0)) + img = ax.imshow(train_images[0+batch_size*(batch_i)].transpose(1,2,0)) ax.imshow(attention_resized, cmap='gray', alpha=0.6, extent=img.get_extent()) break else: ax.set_title(dict[int(labels[l+1][0].asscalar())]) - img = ax.imshow(train_images[0+test_batch_size*(batch_i)].transpose(1,2,0)) + img = ax.imshow(train_images[0+batch_size*(batch_i)].transpose(1,2,0)) ax.imshow(attention_resized, cmap='gray', alpha=0.6, extent=img.get_extent()) plt.tight_layout() @@ -381,25 +387,27 @@ class CNNSupervisedTrainer_VGG16: plt.savefig(target_dir + '/attention_train.png') plt.close() - predictions = [] - for output_name in outputs: - if mx.nd.shape_array(mx.nd.squeeze(output_name)).size > 1: - predictions.append(mx.nd.argmax(output_name, axis=1)) - else: - predictions.append(output_name) + predictions = [] + for output_name in outputs: + if mx.nd.shape_array(mx.nd.squeeze(output_name)).size > 1: + predictions.append(mx.nd.argmax(output_name, axis=1)) + else: + predictions.append(output_name) - metric.update(preds=predictions, labels=labels) - train_metric_score = metric.get()[1] + metric.update(preds=predictions, labels=labels) + train_metric_score = metric.get()[1] + else: + train_metric_score = 0 test_iter.reset() metric = mx.metric.create(eval_metric, **eval_metric_params) for batch_i, batch in enumerate(test_iter): - if True: + if True: labels = [batch.label[i].as_in_context(mx_context) for i in range(1)] data_ = batch.data[0].as_in_context(mx_context) - predictions_ = mx.nd.zeros((test_batch_size, 1000,), ctx=mx_context) + predictions_ = mx.nd.zeros((batch_size, 1000,), ctx=mx_context) nd.waitall() @@ -412,12 +420,22 @@ class CNNSupervisedTrainer_VGG16: if save_attention_image == "True": + if not eval_train: + import matplotlib + matplotlib.use('Agg') + import matplotlib.pyplot as plt + logging.getLogger('matplotlib').setLevel(logging.ERROR) + + if(os.path.isfile('src/test/resources/training_data/Show_attend_tell/dict.pkl')): + with open('src/test/resources/training_data/Show_attend_tell/dict.pkl', 'rb') as f: + dict = pickle.load(f) + plt.clf() fig = plt.figure(figsize=(15,15)) max_length = len(labels)-1 ax = fig.add_subplot(max_length//3, max_length//4, 1) - ax.imshow(test_images[0+test_batch_size*(batch_i)].transpose(1,2,0)) + ax.imshow(test_images[0+batch_size*(batch_i)].transpose(1,2,0)) for l in range(max_length): attention = attentionList[l] @@ -428,15 +446,18 @@ class CNNSupervisedTrainer_VGG16: ax.set_title("") elif dict[int(mx.nd.slice_axis(outputs[l+1], axis=0, begin=0, end=1).squeeze().asscalar())] == "": ax.set_title(".") - img = ax.imshow(test_images[0+test_batch_size*(batch_i)].transpose(1,2,0)) + img = ax.imshow(test_images[0+batch_size*(batch_i)].transpose(1,2,0)) ax.imshow(attention_resized, cmap='gray', alpha=0.6, extent=img.get_extent()) break else: ax.set_title(dict[int(mx.nd.slice_axis(outputs[l+1], axis=0, begin=0, end=1).squeeze().asscalar())]) - img = ax.imshow(test_images[0+test_batch_size*(batch_i)].transpose(1,2,0)) + img = ax.imshow(test_images[0+batch_size*(batch_i)].transpose(1,2,0)) ax.imshow(attention_resized, cmap='gray', alpha=0.6, extent=img.get_extent()) plt.tight_layout() + target_dir = 'target/attention_images' + if not os.path.exists(target_dir): + os.makedirs(target_dir) plt.savefig(target_dir + '/attention_test.png') plt.close() -- GitLab