Commit 58436d17 authored by Sebastian N.'s avatar Sebastian N.

Added output of train and test/validation loss at the end of each epoch

parent bf56b53c
Pipeline #226535 failed with stages
in 33 seconds
...@@ -291,6 +291,9 @@ class ${tc.fileNameWithoutEnding}: ...@@ -291,6 +291,9 @@ class ${tc.fileNameWithoutEnding}:
else: else:
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size, shuffle_data) train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size, shuffle_data)
global_loss_train = 0.0
train_batches = 0
loss_total = 0 loss_total = 0
train_iter.reset() train_iter.reset()
for batch_i, batch in enumerate(train_iter): for batch_i, batch in enumerate(train_iter):
...@@ -305,6 +308,9 @@ class ${tc.fileNameWithoutEnding}: ...@@ -305,6 +308,9 @@ class ${tc.fileNameWithoutEnding}:
loss_total += loss.sum().asscalar() loss_total += loss.sum().asscalar()
global_loss_train += float(loss.mean().asscalar())
train_batches += 1
if clip_global_grad_norm: if clip_global_grad_norm:
grads = [] grads = []
...@@ -332,6 +338,9 @@ class ${tc.fileNameWithoutEnding}: ...@@ -332,6 +338,9 @@ class ${tc.fileNameWithoutEnding}:
tic = time.time() tic = time.time()
if train_batches > 0:
global_loss_train /= train_batches
tic = None tic = None
...@@ -357,6 +366,9 @@ class ${tc.fileNameWithoutEnding}: ...@@ -357,6 +366,9 @@ class ${tc.fileNameWithoutEnding}:
else: else:
train_metric_score = 0 train_metric_score = 0
global_loss_test = 0.0
test_batches = 0
test_iter.reset() test_iter.reset()
metric = mx.metric.create(eval_metric, **eval_metric_params) metric = mx.metric.create(eval_metric, **eval_metric_params)
for batch_i, batch in enumerate(test_iter): for batch_i, batch in enumerate(test_iter):
...@@ -366,6 +378,12 @@ class ${tc.fileNameWithoutEnding}: ...@@ -366,6 +378,12 @@ class ${tc.fileNameWithoutEnding}:
<#include "saveAttentionImageTest.ftl"> <#include "saveAttentionImageTest.ftl">
loss = 0
for element in lossList:
loss = loss + element
global_loss_test += float(loss.mean().asscalar())
test_batches += 1
predictions = [] predictions = []
for output_name in outputs: for output_name in outputs:
...@@ -378,8 +396,10 @@ class ${tc.fileNameWithoutEnding}: ...@@ -378,8 +396,10 @@ class ${tc.fileNameWithoutEnding}:
metric.update(preds=predictions, labels=labels) metric.update(preds=predictions, labels=labels)
test_metric_score = metric.get()[1] test_metric_score = metric.get()[1]
logging.info("Epoch[%d] Train: %f, Test: %f" % (epoch, train_metric_score, test_metric_score)) if test_batches > 0:
global_loss_test /= test_batches
logging.info("Epoch[%d] Train: %f, Test: %f, Train Loss: %f, Test Loss: %f" % (epoch, train_metric_score, test_metric_score, global_loss_train, global_loss_test))
if (epoch - begin_epoch) % checkpoint_period == 0: if (epoch - begin_epoch) % checkpoint_period == 0:
for i, network in self._networks.items(): for i, network in self._networks.items():
......
...@@ -24,7 +24,8 @@ ...@@ -24,7 +24,8 @@
nd.waitall() nd.waitall()
outputs = [] outputs = []
attentionList=[] lossList = []
attentionList = []
<#list tc.architecture.networkInstructions as networkInstruction> <#list tc.architecture.networkInstructions as networkInstruction>
<#if networkInstruction.isUnroll()> <#if networkInstruction.isUnroll()>
k = ${tc.getBeamSearchWidth(networkInstruction)} k = ${tc.getBeamSearchWidth(networkInstruction)}
...@@ -92,6 +93,7 @@ ...@@ -92,6 +93,7 @@
<#if tc.getNameWithoutIndex(outputName) == tc.outputName> <#if tc.getNameWithoutIndex(outputName) == tc.outputName>
${outputName} = sequences[0][0][i] ${outputName} = sequences[0][0][i]
outputs.append(${outputName}) outputs.append(${outputName})
lossList.append(loss_function(${outputName}, labels[${tc.getIndex(outputName, true)}]))
<#if tc.isAttentionNetwork()> <#if tc.isAttentionNetwork()>
attentionList.append(sequences[0][2][i]) attentionList.append(sequences[0][2][i])
</#if> </#if>
...@@ -103,6 +105,7 @@ ...@@ -103,6 +105,7 @@
<#list tc.getStreamOutputNames(networkInstruction.body, true) as outputName> <#list tc.getStreamOutputNames(networkInstruction.body, true) as outputName>
<#if tc.getNameWithoutIndex(outputName) == tc.outputName> <#if tc.getNameWithoutIndex(outputName) == tc.outputName>
outputs.append(${outputName}) outputs.append(${outputName})
lossList.append(loss_function(${outputName}, labels[${tc.getIndex(outputName, true)}]))
<#if tc.endsWithArgmax(networkInstruction.body)> <#if tc.endsWithArgmax(networkInstruction.body)>
${outputName} = mx.nd.argmax(${outputName}, axis=1).expand_dims(1) ${outputName} = mx.nd.argmax(${outputName}, axis=1).expand_dims(1)
</#if> </#if>
......
...@@ -283,6 +283,15 @@ class CNNSupervisedTrainer_Alexnet: ...@@ -283,6 +283,15 @@ class CNNSupervisedTrainer_Alexnet:
tic = None tic = None
for epoch in range(begin_epoch, begin_epoch + num_epoch): for epoch in range(begin_epoch, begin_epoch + num_epoch):
if shuffle_data:
if preprocessing:
preproc_lib = "CNNPreprocessor_Alexnet_executor"
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_preprocessed_data(batch_size, preproc_lib, shuffle_data)
else:
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size, shuffle_data)
global_loss_train = 0.0
train_batches = 0
loss_total = 0 loss_total = 0
train_iter.reset() train_iter.reset()
...@@ -311,6 +320,9 @@ class CNNSupervisedTrainer_Alexnet: ...@@ -311,6 +320,9 @@ class CNNSupervisedTrainer_Alexnet:
loss_total += loss.sum().asscalar() loss_total += loss.sum().asscalar()
global_loss_train += float(loss.mean().asscalar())
train_batches += 1
if clip_global_grad_norm: if clip_global_grad_norm:
grads = [] grads = []
...@@ -338,6 +350,9 @@ class CNNSupervisedTrainer_Alexnet: ...@@ -338,6 +350,9 @@ class CNNSupervisedTrainer_Alexnet:
tic = time.time() tic = time.time()
if train_batches > 0:
global_loss_train /= train_batches
tic = None tic = None
...@@ -355,10 +370,12 @@ class CNNSupervisedTrainer_Alexnet: ...@@ -355,10 +370,12 @@ class CNNSupervisedTrainer_Alexnet:
nd.waitall() nd.waitall()
outputs = [] outputs = []
attentionList=[] lossList = []
attentionList = []
predictions_ = self._networks[0](data_) predictions_ = self._networks[0](data_)
outputs.append(predictions_) outputs.append(predictions_)
lossList.append(loss_function(predictions_, labels[0]))
if save_attention_image == "True": if save_attention_image == "True":
...@@ -414,10 +431,13 @@ class CNNSupervisedTrainer_Alexnet: ...@@ -414,10 +431,13 @@ class CNNSupervisedTrainer_Alexnet:
else: else:
train_metric_score = 0 train_metric_score = 0
global_loss_test = 0.0
test_batches = 0
test_iter.reset() test_iter.reset()
metric = mx.metric.create(eval_metric, **eval_metric_params) metric = mx.metric.create(eval_metric, **eval_metric_params)
for batch_i, batch in enumerate(test_iter): for batch_i, batch in enumerate(test_iter):
if True: if True:
labels = [batch.label[i].as_in_context(mx_context) for i in range(1)] labels = [batch.label[i].as_in_context(mx_context) for i in range(1)]
data_ = batch.data[0].as_in_context(mx_context) data_ = batch.data[0].as_in_context(mx_context)
...@@ -428,10 +448,12 @@ class CNNSupervisedTrainer_Alexnet: ...@@ -428,10 +448,12 @@ class CNNSupervisedTrainer_Alexnet:
nd.waitall() nd.waitall()
outputs = [] outputs = []
attentionList=[] lossList = []
attentionList = []
predictions_ = self._networks[0](data_) predictions_ = self._networks[0](data_)
outputs.append(predictions_) outputs.append(predictions_)
lossList.append(loss_function(predictions_, labels[0]))
if save_attention_image == "True": if save_attention_image == "True":
...@@ -475,6 +497,12 @@ class CNNSupervisedTrainer_Alexnet: ...@@ -475,6 +497,12 @@ class CNNSupervisedTrainer_Alexnet:
os.makedirs(target_dir) os.makedirs(target_dir)
plt.savefig(target_dir + '/attention_test.png') plt.savefig(target_dir + '/attention_test.png')
plt.close() plt.close()
loss = 0
for element in lossList:
loss = loss + element
global_loss_test += float(loss.mean().asscalar())
test_batches += 1
predictions = [] predictions = []
for output_name in outputs: for output_name in outputs:
...@@ -487,8 +515,10 @@ class CNNSupervisedTrainer_Alexnet: ...@@ -487,8 +515,10 @@ class CNNSupervisedTrainer_Alexnet:
metric.update(preds=predictions, labels=labels) metric.update(preds=predictions, labels=labels)
test_metric_score = metric.get()[1] test_metric_score = metric.get()[1]
logging.info("Epoch[%d] Train: %f, Test: %f" % (epoch, train_metric_score, test_metric_score)) if test_batches > 0:
global_loss_test /= test_batches
logging.info("Epoch[%d] Train: %f, Test: %f, Train Loss: %f, Test Loss: %f" % (epoch, train_metric_score, test_metric_score, global_loss_train, global_loss_test))
if (epoch - begin_epoch) % checkpoint_period == 0: if (epoch - begin_epoch) % checkpoint_period == 0:
for i, network in self._networks.items(): for i, network in self._networks.items():
......
...@@ -283,6 +283,15 @@ class CNNSupervisedTrainer_CifarClassifierNetwork: ...@@ -283,6 +283,15 @@ class CNNSupervisedTrainer_CifarClassifierNetwork:
tic = None tic = None
for epoch in range(begin_epoch, begin_epoch + num_epoch): for epoch in range(begin_epoch, begin_epoch + num_epoch):
if shuffle_data:
if preprocessing:
preproc_lib = "CNNPreprocessor_CifarClassifierNetwork_executor"
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_preprocessed_data(batch_size, preproc_lib, shuffle_data)
else:
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size, shuffle_data)
global_loss_train = 0.0
train_batches = 0
loss_total = 0 loss_total = 0
train_iter.reset() train_iter.reset()
...@@ -311,6 +320,9 @@ class CNNSupervisedTrainer_CifarClassifierNetwork: ...@@ -311,6 +320,9 @@ class CNNSupervisedTrainer_CifarClassifierNetwork:
loss_total += loss.sum().asscalar() loss_total += loss.sum().asscalar()
global_loss_train += float(loss.mean().asscalar())
train_batches += 1
if clip_global_grad_norm: if clip_global_grad_norm:
grads = [] grads = []
...@@ -338,6 +350,9 @@ class CNNSupervisedTrainer_CifarClassifierNetwork: ...@@ -338,6 +350,9 @@ class CNNSupervisedTrainer_CifarClassifierNetwork:
tic = time.time() tic = time.time()
if train_batches > 0:
global_loss_train /= train_batches
tic = None tic = None
...@@ -355,10 +370,12 @@ class CNNSupervisedTrainer_CifarClassifierNetwork: ...@@ -355,10 +370,12 @@ class CNNSupervisedTrainer_CifarClassifierNetwork:
nd.waitall() nd.waitall()
outputs = [] outputs = []
attentionList=[] lossList = []
attentionList = []
softmax_ = self._networks[0](data_) softmax_ = self._networks[0](data_)
outputs.append(softmax_) outputs.append(softmax_)
lossList.append(loss_function(softmax_, labels[0]))
if save_attention_image == "True": if save_attention_image == "True":
...@@ -414,10 +431,13 @@ class CNNSupervisedTrainer_CifarClassifierNetwork: ...@@ -414,10 +431,13 @@ class CNNSupervisedTrainer_CifarClassifierNetwork:
else: else:
train_metric_score = 0 train_metric_score = 0
global_loss_test = 0.0
test_batches = 0
test_iter.reset() test_iter.reset()
metric = mx.metric.create(eval_metric, **eval_metric_params) metric = mx.metric.create(eval_metric, **eval_metric_params)
for batch_i, batch in enumerate(test_iter): for batch_i, batch in enumerate(test_iter):
if True: if True:
labels = [batch.label[i].as_in_context(mx_context) for i in range(1)] labels = [batch.label[i].as_in_context(mx_context) for i in range(1)]
data_ = batch.data[0].as_in_context(mx_context) data_ = batch.data[0].as_in_context(mx_context)
...@@ -428,10 +448,12 @@ class CNNSupervisedTrainer_CifarClassifierNetwork: ...@@ -428,10 +448,12 @@ class CNNSupervisedTrainer_CifarClassifierNetwork:
nd.waitall() nd.waitall()
outputs = [] outputs = []
attentionList=[] lossList = []
attentionList = []
softmax_ = self._networks[0](data_) softmax_ = self._networks[0](data_)
outputs.append(softmax_) outputs.append(softmax_)
lossList.append(loss_function(softmax_, labels[0]))
if save_attention_image == "True": if save_attention_image == "True":
...@@ -475,6 +497,12 @@ class CNNSupervisedTrainer_CifarClassifierNetwork: ...@@ -475,6 +497,12 @@ class CNNSupervisedTrainer_CifarClassifierNetwork:
os.makedirs(target_dir) os.makedirs(target_dir)
plt.savefig(target_dir + '/attention_test.png') plt.savefig(target_dir + '/attention_test.png')
plt.close() plt.close()
loss = 0
for element in lossList:
loss = loss + element
global_loss_test += float(loss.mean().asscalar())
test_batches += 1
predictions = [] predictions = []
for output_name in outputs: for output_name in outputs:
...@@ -487,8 +515,10 @@ class CNNSupervisedTrainer_CifarClassifierNetwork: ...@@ -487,8 +515,10 @@ class CNNSupervisedTrainer_CifarClassifierNetwork:
metric.update(preds=predictions, labels=labels) metric.update(preds=predictions, labels=labels)
test_metric_score = metric.get()[1] test_metric_score = metric.get()[1]
logging.info("Epoch[%d] Train: %f, Test: %f" % (epoch, train_metric_score, test_metric_score)) if test_batches > 0:
global_loss_test /= test_batches
logging.info("Epoch[%d] Train: %f, Test: %f, Train Loss: %f, Test Loss: %f" % (epoch, train_metric_score, test_metric_score, global_loss_train, global_loss_test))
if (epoch - begin_epoch) % checkpoint_period == 0: if (epoch - begin_epoch) % checkpoint_period == 0:
for i, network in self._networks.items(): for i, network in self._networks.items():
......
...@@ -283,6 +283,15 @@ class CNNSupervisedTrainer_VGG16: ...@@ -283,6 +283,15 @@ class CNNSupervisedTrainer_VGG16:
tic = None tic = None
for epoch in range(begin_epoch, begin_epoch + num_epoch): for epoch in range(begin_epoch, begin_epoch + num_epoch):
if shuffle_data:
if preprocessing:
preproc_lib = "CNNPreprocessor_VGG16_executor"
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_preprocessed_data(batch_size, preproc_lib, shuffle_data)
else:
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size, shuffle_data)
global_loss_train = 0.0
train_batches = 0
loss_total = 0 loss_total = 0
train_iter.reset() train_iter.reset()
...@@ -311,6 +320,9 @@ class CNNSupervisedTrainer_VGG16: ...@@ -311,6 +320,9 @@ class CNNSupervisedTrainer_VGG16:
loss_total += loss.sum().asscalar() loss_total += loss.sum().asscalar()
global_loss_train += float(loss.mean().asscalar())
train_batches += 1
if clip_global_grad_norm: if clip_global_grad_norm:
grads = [] grads = []
...@@ -338,6 +350,9 @@ class CNNSupervisedTrainer_VGG16: ...@@ -338,6 +350,9 @@ class CNNSupervisedTrainer_VGG16:
tic = time.time() tic = time.time()
if train_batches > 0:
global_loss_train /= train_batches
tic = None tic = None
...@@ -355,10 +370,12 @@ class CNNSupervisedTrainer_VGG16: ...@@ -355,10 +370,12 @@ class CNNSupervisedTrainer_VGG16:
nd.waitall() nd.waitall()
outputs = [] outputs = []
attentionList=[] lossList = []
attentionList = []
predictions_ = self._networks[0](data_) predictions_ = self._networks[0](data_)
outputs.append(predictions_) outputs.append(predictions_)
lossList.append(loss_function(predictions_, labels[0]))
if save_attention_image == "True": if save_attention_image == "True":
...@@ -414,10 +431,13 @@ class CNNSupervisedTrainer_VGG16: ...@@ -414,10 +431,13 @@ class CNNSupervisedTrainer_VGG16:
else: else:
train_metric_score = 0 train_metric_score = 0
global_loss_test = 0.0
test_batches = 0
test_iter.reset() test_iter.reset()
metric = mx.metric.create(eval_metric, **eval_metric_params) metric = mx.metric.create(eval_metric, **eval_metric_params)
for batch_i, batch in enumerate(test_iter): for batch_i, batch in enumerate(test_iter):
if True: if True:
labels = [batch.label[i].as_in_context(mx_context) for i in range(1)] labels = [batch.label[i].as_in_context(mx_context) for i in range(1)]
data_ = batch.data[0].as_in_context(mx_context) data_ = batch.data[0].as_in_context(mx_context)
...@@ -428,10 +448,12 @@ class CNNSupervisedTrainer_VGG16: ...@@ -428,10 +448,12 @@ class CNNSupervisedTrainer_VGG16:
nd.waitall() nd.waitall()
outputs = [] outputs = []
attentionList=[] lossList = []
attentionList = []
predictions_ = self._networks[0](data_) predictions_ = self._networks[0](data_)
outputs.append(predictions_) outputs.append(predictions_)
lossList.append(loss_function(predictions_, labels[0]))
if save_attention_image == "True": if save_attention_image == "True":
...@@ -475,6 +497,12 @@ class CNNSupervisedTrainer_VGG16: ...@@ -475,6 +497,12 @@ class CNNSupervisedTrainer_VGG16:
os.makedirs(target_dir) os.makedirs(target_dir)
plt.savefig(target_dir + '/attention_test.png') plt.savefig(target_dir + '/attention_test.png')
plt.close() plt.close()
loss = 0
for element in lossList:
loss = loss + element
global_loss_test += float(loss.mean().asscalar())
test_batches += 1
predictions = [] predictions = []
for output_name in outputs: for output_name in outputs:
...@@ -487,8 +515,10 @@ class CNNSupervisedTrainer_VGG16: ...@@ -487,8 +515,10 @@ class CNNSupervisedTrainer_VGG16:
metric.update(preds=predictions, labels=labels) metric.update(preds=predictions, labels=labels)
test_metric_score = metric.get()[1] test_metric_score = metric.get()[1]
logging.info("Epoch[%d] Train: %f, Test: %f" % (epoch, train_metric_score, test_metric_score)) if test_batches > 0:
global_loss_test /= test_batches
logging.info("Epoch[%d] Train: %f, Test: %f, Train Loss: %f, Test Loss: %f" % (epoch, train_metric_score, test_metric_score, global_loss_train, global_loss_test))
if (epoch - begin_epoch) % checkpoint_period == 0: if (epoch - begin_epoch) % checkpoint_period == 0:
for i, network in self._networks.items(): for i, network in self._networks.items():
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment