Commit bf56b53c authored by Sebastian Nickels's avatar Sebastian Nickels

Added shuffle_data and clip_global_grad_norm params

parent fb939c7a
......@@ -16,7 +16,7 @@ class ${tc.fileNameWithoutEnding}:
def __init__(self):
self._data_dir = "${tc.dataPath}/"
def load_data(self, batch_size):
def load_data(self, batch_size, shuffle=False):
train_h5, test_h5 = self.load_h5_files()
train_data = {}
......@@ -40,7 +40,8 @@ class ${tc.fileNameWithoutEnding}:
train_iter = mx.io.NDArrayIter(data=train_data,
label=train_label,
batch_size=batch_size)
batch_size=batch_size,
shuffle=shuffle)
test_iter = None
......@@ -65,7 +66,7 @@ class ${tc.fileNameWithoutEnding}:
return train_iter, test_iter, data_mean, data_std, train_images, test_images
def load_preprocessed_data(self, batch_size, preproc_lib):
def load_preprocessed_data(self, batch_size, preproc_lib, shuffle=False):
train_h5, test_h5 = self.load_h5_files()
wrapper = importlib.import_module(preproc_lib)
......@@ -111,7 +112,8 @@ class ${tc.fileNameWithoutEnding}:
train_iter = mx.io.NDArrayIter(data=train_data,
label=train_label,
batch_size=batch_size)
batch_size=batch_size,
shuffle=shuffle)
test_data = {}
test_label = {}
......
......@@ -193,6 +193,8 @@ class ${tc.fileNameWithoutEnding}:
save_attention_image=False,
use_teacher_forcing=False,
normalize=True,
shuffle_data=False,
clip_global_grad_norm=None,
preprocessing = False):
if context == 'gpu':
mx_context = mx.gpu()
......@@ -203,9 +205,9 @@ class ${tc.fileNameWithoutEnding}:
if preprocessing:
preproc_lib = "CNNPreprocessor_${tc.fileNameWithoutEnding?keep_after("CNNSupervisedTrainer_")}_executor"
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_preprocessed_data(batch_size, preproc_lib)
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_preprocessed_data(batch_size, preproc_lib, shuffle_data)
else:
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size)
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size, shuffle_data)
if 'weight_decay' in optimizer_params:
optimizer_params['wd'] = optimizer_params['weight_decay']
......@@ -282,6 +284,12 @@ class ${tc.fileNameWithoutEnding}:
tic = None
for epoch in range(begin_epoch, begin_epoch + num_epoch):
if shuffle_data:
if preprocessing:
preproc_lib = "CNNPreprocessor_${tc.fileNameWithoutEnding?keep_after("CNNSupervisedTrainer_")}_executor"
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_preprocessed_data(batch_size, preproc_lib, shuffle_data)
else:
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size, shuffle_data)
loss_total = 0
train_iter.reset()
......@@ -297,6 +305,14 @@ class ${tc.fileNameWithoutEnding}:
loss_total += loss.sum().asscalar()
if clip_global_grad_norm:
grads = []
for network in self._networks.values():
grads.extend([param.grad(mx_context) for param in network.collect_params().values()])
gluon.utils.clip_global_norm(grads, clip_global_grad_norm)
for trainer in trainers:
trainer.step(batch_size)
......
......@@ -43,6 +43,12 @@ if __name__ == "__main__":
<#if (config.normalize)??>
normalize=${config.normalize?string("True","False")},
</#if>
<#if (config.shuffleData)??>
shuffle_data=${config.shuffleData?string("True","False")},
</#if>
<#if (config.clipGlobalGradNorm)??>
clip_global_grad_norm=${config.clipGlobalGradNorm},
</#if>
<#if (config.preprocessingName)??>
preprocessing=${config.preprocessingName???string("True","False")},
</#if>
......
......@@ -15,7 +15,7 @@ class CNNDataLoader_Alexnet:
def __init__(self):
self._data_dir = "data/Alexnet/"
def load_data(self, batch_size):
def load_data(self, batch_size, shuffle=False):
train_h5, test_h5 = self.load_h5_files()
train_data = {}
......@@ -39,7 +39,8 @@ class CNNDataLoader_Alexnet:
train_iter = mx.io.NDArrayIter(data=train_data,
label=train_label,
batch_size=batch_size)
batch_size=batch_size,
shuffle=shuffle)
test_iter = None
......@@ -64,7 +65,7 @@ class CNNDataLoader_Alexnet:
return train_iter, test_iter, data_mean, data_std, train_images, test_images
def load_preprocessed_data(self, batch_size, preproc_lib):
def load_preprocessed_data(self, batch_size, preproc_lib, shuffle=False):
train_h5, test_h5 = self.load_h5_files()
wrapper = importlib.import_module(preproc_lib)
......@@ -110,7 +111,8 @@ class CNNDataLoader_Alexnet:
train_iter = mx.io.NDArrayIter(data=train_data,
label=train_label,
batch_size=batch_size)
batch_size=batch_size,
shuffle=shuffle)
test_data = {}
test_label = {}
......
......@@ -15,7 +15,7 @@ class CNNDataLoader_CifarClassifierNetwork:
def __init__(self):
self._data_dir = "data/CifarClassifierNetwork/"
def load_data(self, batch_size):
def load_data(self, batch_size, shuffle=False):
train_h5, test_h5 = self.load_h5_files()
train_data = {}
......@@ -39,7 +39,8 @@ class CNNDataLoader_CifarClassifierNetwork:
train_iter = mx.io.NDArrayIter(data=train_data,
label=train_label,
batch_size=batch_size)
batch_size=batch_size,
shuffle=shuffle)
test_iter = None
......@@ -64,7 +65,7 @@ class CNNDataLoader_CifarClassifierNetwork:
return train_iter, test_iter, data_mean, data_std, train_images, test_images
def load_preprocessed_data(self, batch_size, preproc_lib):
def load_preprocessed_data(self, batch_size, preproc_lib, shuffle=False):
train_h5, test_h5 = self.load_h5_files()
wrapper = importlib.import_module(preproc_lib)
......@@ -110,7 +111,8 @@ class CNNDataLoader_CifarClassifierNetwork:
train_iter = mx.io.NDArrayIter(data=train_data,
label=train_label,
batch_size=batch_size)
batch_size=batch_size,
shuffle=shuffle)
test_data = {}
test_label = {}
......
......@@ -15,7 +15,7 @@ class CNNDataLoader_VGG16:
def __init__(self):
self._data_dir = "data/VGG16/"
def load_data(self, batch_size):
def load_data(self, batch_size, shuffle=False):
train_h5, test_h5 = self.load_h5_files()
train_data = {}
......@@ -39,7 +39,8 @@ class CNNDataLoader_VGG16:
train_iter = mx.io.NDArrayIter(data=train_data,
label=train_label,
batch_size=batch_size)
batch_size=batch_size,
shuffle=shuffle)
test_iter = None
......@@ -64,7 +65,7 @@ class CNNDataLoader_VGG16:
return train_iter, test_iter, data_mean, data_std, train_images, test_images
def load_preprocessed_data(self, batch_size, preproc_lib):
def load_preprocessed_data(self, batch_size, preproc_lib, shuffle=False):
train_h5, test_h5 = self.load_h5_files()
wrapper = importlib.import_module(preproc_lib)
......@@ -110,7 +111,8 @@ class CNNDataLoader_VGG16:
train_iter = mx.io.NDArrayIter(data=train_data,
label=train_label,
batch_size=batch_size)
batch_size=batch_size,
shuffle=shuffle)
test_data = {}
test_label = {}
......
......@@ -192,6 +192,8 @@ class CNNSupervisedTrainer_Alexnet:
save_attention_image=False,
use_teacher_forcing=False,
normalize=True,
shuffle_data=False,
clip_global_grad_norm=None,
preprocessing = False):
if context == 'gpu':
mx_context = mx.gpu()
......@@ -202,9 +204,9 @@ class CNNSupervisedTrainer_Alexnet:
if preprocessing:
preproc_lib = "CNNPreprocessor_Alexnet_executor"
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_preprocessed_data(batch_size, preproc_lib)
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_preprocessed_data(batch_size, preproc_lib, shuffle_data)
else:
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size)
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size, shuffle_data)
if 'weight_decay' in optimizer_params:
optimizer_params['wd'] = optimizer_params['weight_decay']
......@@ -309,6 +311,14 @@ class CNNSupervisedTrainer_Alexnet:
loss_total += loss.sum().asscalar()
if clip_global_grad_norm:
grads = []
for network in self._networks.values():
grads.extend([param.grad(mx_context) for param in network.collect_params().values()])
gluon.utils.clip_global_norm(grads, clip_global_grad_norm)
for trainer in trainers:
trainer.step(batch_size)
......
......@@ -192,6 +192,8 @@ class CNNSupervisedTrainer_CifarClassifierNetwork:
save_attention_image=False,
use_teacher_forcing=False,
normalize=True,
shuffle_data=False,
clip_global_grad_norm=None,
preprocessing = False):
if context == 'gpu':
mx_context = mx.gpu()
......@@ -202,9 +204,9 @@ class CNNSupervisedTrainer_CifarClassifierNetwork:
if preprocessing:
preproc_lib = "CNNPreprocessor_CifarClassifierNetwork_executor"
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_preprocessed_data(batch_size, preproc_lib)
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_preprocessed_data(batch_size, preproc_lib, shuffle_data)
else:
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size)
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size, shuffle_data)
if 'weight_decay' in optimizer_params:
optimizer_params['wd'] = optimizer_params['weight_decay']
......@@ -309,6 +311,14 @@ class CNNSupervisedTrainer_CifarClassifierNetwork:
loss_total += loss.sum().asscalar()
if clip_global_grad_norm:
grads = []
for network in self._networks.values():
grads.extend([param.grad(mx_context) for param in network.collect_params().values()])
gluon.utils.clip_global_norm(grads, clip_global_grad_norm)
for trainer in trainers:
trainer.step(batch_size)
......
......@@ -192,6 +192,8 @@ class CNNSupervisedTrainer_VGG16:
save_attention_image=False,
use_teacher_forcing=False,
normalize=True,
shuffle_data=False,
clip_global_grad_norm=None,
preprocessing = False):
if context == 'gpu':
mx_context = mx.gpu()
......@@ -202,9 +204,9 @@ class CNNSupervisedTrainer_VGG16:
if preprocessing:
preproc_lib = "CNNPreprocessor_VGG16_executor"
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_preprocessed_data(batch_size, preproc_lib)
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_preprocessed_data(batch_size, preproc_lib, shuffle_data)
else:
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size)
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size, shuffle_data)
if 'weight_decay' in optimizer_params:
optimizer_params['wd'] = optimizer_params['weight_decay']
......@@ -309,6 +311,14 @@ class CNNSupervisedTrainer_VGG16:
loss_total += loss.sum().asscalar()
if clip_global_grad_norm:
grads = []
for network in self._networks.values():
grads.extend([param.grad(mx_context) for param in network.collect_params().values()])
gluon.utils.clip_global_norm(grads, clip_global_grad_norm)
for trainer in trainers:
trainer.step(batch_size)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment