Aufgrund eines Security Updates wird GitLab heute zwischen 14:30 und 15:00 Uhr kurzzeitig nicht zur Verfügung stehen. / Due to a security update GitLab will be temporarily unavailable between 2:30 and 3 today.

Commit bf56b53c authored by Sebastian Nickels's avatar Sebastian Nickels

Added shuffle_data and clip_global_grad_norm params

parent fb939c7a
...@@ -16,7 +16,7 @@ class ${tc.fileNameWithoutEnding}: ...@@ -16,7 +16,7 @@ class ${tc.fileNameWithoutEnding}:
def __init__(self): def __init__(self):
self._data_dir = "${tc.dataPath}/" self._data_dir = "${tc.dataPath}/"
def load_data(self, batch_size): def load_data(self, batch_size, shuffle=False):
train_h5, test_h5 = self.load_h5_files() train_h5, test_h5 = self.load_h5_files()
train_data = {} train_data = {}
...@@ -40,7 +40,8 @@ class ${tc.fileNameWithoutEnding}: ...@@ -40,7 +40,8 @@ class ${tc.fileNameWithoutEnding}:
train_iter = mx.io.NDArrayIter(data=train_data, train_iter = mx.io.NDArrayIter(data=train_data,
label=train_label, label=train_label,
batch_size=batch_size) batch_size=batch_size,
shuffle=shuffle)
test_iter = None test_iter = None
...@@ -65,7 +66,7 @@ class ${tc.fileNameWithoutEnding}: ...@@ -65,7 +66,7 @@ class ${tc.fileNameWithoutEnding}:
return train_iter, test_iter, data_mean, data_std, train_images, test_images return train_iter, test_iter, data_mean, data_std, train_images, test_images
def load_preprocessed_data(self, batch_size, preproc_lib): def load_preprocessed_data(self, batch_size, preproc_lib, shuffle=False):
train_h5, test_h5 = self.load_h5_files() train_h5, test_h5 = self.load_h5_files()
wrapper = importlib.import_module(preproc_lib) wrapper = importlib.import_module(preproc_lib)
...@@ -111,7 +112,8 @@ class ${tc.fileNameWithoutEnding}: ...@@ -111,7 +112,8 @@ class ${tc.fileNameWithoutEnding}:
train_iter = mx.io.NDArrayIter(data=train_data, train_iter = mx.io.NDArrayIter(data=train_data,
label=train_label, label=train_label,
batch_size=batch_size) batch_size=batch_size,
shuffle=shuffle)
test_data = {} test_data = {}
test_label = {} test_label = {}
......
...@@ -193,6 +193,8 @@ class ${tc.fileNameWithoutEnding}: ...@@ -193,6 +193,8 @@ class ${tc.fileNameWithoutEnding}:
save_attention_image=False, save_attention_image=False,
use_teacher_forcing=False, use_teacher_forcing=False,
normalize=True, normalize=True,
shuffle_data=False,
clip_global_grad_norm=None,
preprocessing = False): preprocessing = False):
if context == 'gpu': if context == 'gpu':
mx_context = mx.gpu() mx_context = mx.gpu()
...@@ -203,9 +205,9 @@ class ${tc.fileNameWithoutEnding}: ...@@ -203,9 +205,9 @@ class ${tc.fileNameWithoutEnding}:
if preprocessing: if preprocessing:
preproc_lib = "CNNPreprocessor_${tc.fileNameWithoutEnding?keep_after("CNNSupervisedTrainer_")}_executor" preproc_lib = "CNNPreprocessor_${tc.fileNameWithoutEnding?keep_after("CNNSupervisedTrainer_")}_executor"
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_preprocessed_data(batch_size, preproc_lib) train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_preprocessed_data(batch_size, preproc_lib, shuffle_data)
else: else:
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size) train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size, shuffle_data)
if 'weight_decay' in optimizer_params: if 'weight_decay' in optimizer_params:
optimizer_params['wd'] = optimizer_params['weight_decay'] optimizer_params['wd'] = optimizer_params['weight_decay']
...@@ -282,6 +284,12 @@ class ${tc.fileNameWithoutEnding}: ...@@ -282,6 +284,12 @@ class ${tc.fileNameWithoutEnding}:
tic = None tic = None
for epoch in range(begin_epoch, begin_epoch + num_epoch): for epoch in range(begin_epoch, begin_epoch + num_epoch):
if shuffle_data:
if preprocessing:
preproc_lib = "CNNPreprocessor_${tc.fileNameWithoutEnding?keep_after("CNNSupervisedTrainer_")}_executor"
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_preprocessed_data(batch_size, preproc_lib, shuffle_data)
else:
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size, shuffle_data)
loss_total = 0 loss_total = 0
train_iter.reset() train_iter.reset()
...@@ -297,6 +305,14 @@ class ${tc.fileNameWithoutEnding}: ...@@ -297,6 +305,14 @@ class ${tc.fileNameWithoutEnding}:
loss_total += loss.sum().asscalar() loss_total += loss.sum().asscalar()
if clip_global_grad_norm:
grads = []
for network in self._networks.values():
grads.extend([param.grad(mx_context) for param in network.collect_params().values()])
gluon.utils.clip_global_norm(grads, clip_global_grad_norm)
for trainer in trainers: for trainer in trainers:
trainer.step(batch_size) trainer.step(batch_size)
......
...@@ -43,6 +43,12 @@ if __name__ == "__main__": ...@@ -43,6 +43,12 @@ if __name__ == "__main__":
<#if (config.normalize)??> <#if (config.normalize)??>
normalize=${config.normalize?string("True","False")}, normalize=${config.normalize?string("True","False")},
</#if> </#if>
<#if (config.shuffleData)??>
shuffle_data=${config.shuffleData?string("True","False")},
</#if>
<#if (config.clipGlobalGradNorm)??>
clip_global_grad_norm=${config.clipGlobalGradNorm},
</#if>
<#if (config.preprocessingName)??> <#if (config.preprocessingName)??>
preprocessing=${config.preprocessingName???string("True","False")}, preprocessing=${config.preprocessingName???string("True","False")},
</#if> </#if>
......
...@@ -15,7 +15,7 @@ class CNNDataLoader_Alexnet: ...@@ -15,7 +15,7 @@ class CNNDataLoader_Alexnet:
def __init__(self): def __init__(self):
self._data_dir = "data/Alexnet/" self._data_dir = "data/Alexnet/"
def load_data(self, batch_size): def load_data(self, batch_size, shuffle=False):
train_h5, test_h5 = self.load_h5_files() train_h5, test_h5 = self.load_h5_files()
train_data = {} train_data = {}
...@@ -39,7 +39,8 @@ class CNNDataLoader_Alexnet: ...@@ -39,7 +39,8 @@ class CNNDataLoader_Alexnet:
train_iter = mx.io.NDArrayIter(data=train_data, train_iter = mx.io.NDArrayIter(data=train_data,
label=train_label, label=train_label,
batch_size=batch_size) batch_size=batch_size,
shuffle=shuffle)
test_iter = None test_iter = None
...@@ -64,7 +65,7 @@ class CNNDataLoader_Alexnet: ...@@ -64,7 +65,7 @@ class CNNDataLoader_Alexnet:
return train_iter, test_iter, data_mean, data_std, train_images, test_images return train_iter, test_iter, data_mean, data_std, train_images, test_images
def load_preprocessed_data(self, batch_size, preproc_lib): def load_preprocessed_data(self, batch_size, preproc_lib, shuffle=False):
train_h5, test_h5 = self.load_h5_files() train_h5, test_h5 = self.load_h5_files()
wrapper = importlib.import_module(preproc_lib) wrapper = importlib.import_module(preproc_lib)
...@@ -110,7 +111,8 @@ class CNNDataLoader_Alexnet: ...@@ -110,7 +111,8 @@ class CNNDataLoader_Alexnet:
train_iter = mx.io.NDArrayIter(data=train_data, train_iter = mx.io.NDArrayIter(data=train_data,
label=train_label, label=train_label,
batch_size=batch_size) batch_size=batch_size,
shuffle=shuffle)
test_data = {} test_data = {}
test_label = {} test_label = {}
......
...@@ -15,7 +15,7 @@ class CNNDataLoader_CifarClassifierNetwork: ...@@ -15,7 +15,7 @@ class CNNDataLoader_CifarClassifierNetwork:
def __init__(self): def __init__(self):
self._data_dir = "data/CifarClassifierNetwork/" self._data_dir = "data/CifarClassifierNetwork/"
def load_data(self, batch_size): def load_data(self, batch_size, shuffle=False):
train_h5, test_h5 = self.load_h5_files() train_h5, test_h5 = self.load_h5_files()
train_data = {} train_data = {}
...@@ -39,7 +39,8 @@ class CNNDataLoader_CifarClassifierNetwork: ...@@ -39,7 +39,8 @@ class CNNDataLoader_CifarClassifierNetwork:
train_iter = mx.io.NDArrayIter(data=train_data, train_iter = mx.io.NDArrayIter(data=train_data,
label=train_label, label=train_label,
batch_size=batch_size) batch_size=batch_size,
shuffle=shuffle)
test_iter = None test_iter = None
...@@ -64,7 +65,7 @@ class CNNDataLoader_CifarClassifierNetwork: ...@@ -64,7 +65,7 @@ class CNNDataLoader_CifarClassifierNetwork:
return train_iter, test_iter, data_mean, data_std, train_images, test_images return train_iter, test_iter, data_mean, data_std, train_images, test_images
def load_preprocessed_data(self, batch_size, preproc_lib): def load_preprocessed_data(self, batch_size, preproc_lib, shuffle=False):
train_h5, test_h5 = self.load_h5_files() train_h5, test_h5 = self.load_h5_files()
wrapper = importlib.import_module(preproc_lib) wrapper = importlib.import_module(preproc_lib)
...@@ -110,7 +111,8 @@ class CNNDataLoader_CifarClassifierNetwork: ...@@ -110,7 +111,8 @@ class CNNDataLoader_CifarClassifierNetwork:
train_iter = mx.io.NDArrayIter(data=train_data, train_iter = mx.io.NDArrayIter(data=train_data,
label=train_label, label=train_label,
batch_size=batch_size) batch_size=batch_size,
shuffle=shuffle)
test_data = {} test_data = {}
test_label = {} test_label = {}
......
...@@ -15,7 +15,7 @@ class CNNDataLoader_VGG16: ...@@ -15,7 +15,7 @@ class CNNDataLoader_VGG16:
def __init__(self): def __init__(self):
self._data_dir = "data/VGG16/" self._data_dir = "data/VGG16/"
def load_data(self, batch_size): def load_data(self, batch_size, shuffle=False):
train_h5, test_h5 = self.load_h5_files() train_h5, test_h5 = self.load_h5_files()
train_data = {} train_data = {}
...@@ -39,7 +39,8 @@ class CNNDataLoader_VGG16: ...@@ -39,7 +39,8 @@ class CNNDataLoader_VGG16:
train_iter = mx.io.NDArrayIter(data=train_data, train_iter = mx.io.NDArrayIter(data=train_data,
label=train_label, label=train_label,
batch_size=batch_size) batch_size=batch_size,
shuffle=shuffle)
test_iter = None test_iter = None
...@@ -64,7 +65,7 @@ class CNNDataLoader_VGG16: ...@@ -64,7 +65,7 @@ class CNNDataLoader_VGG16:
return train_iter, test_iter, data_mean, data_std, train_images, test_images return train_iter, test_iter, data_mean, data_std, train_images, test_images
def load_preprocessed_data(self, batch_size, preproc_lib): def load_preprocessed_data(self, batch_size, preproc_lib, shuffle=False):
train_h5, test_h5 = self.load_h5_files() train_h5, test_h5 = self.load_h5_files()
wrapper = importlib.import_module(preproc_lib) wrapper = importlib.import_module(preproc_lib)
...@@ -110,7 +111,8 @@ class CNNDataLoader_VGG16: ...@@ -110,7 +111,8 @@ class CNNDataLoader_VGG16:
train_iter = mx.io.NDArrayIter(data=train_data, train_iter = mx.io.NDArrayIter(data=train_data,
label=train_label, label=train_label,
batch_size=batch_size) batch_size=batch_size,
shuffle=shuffle)
test_data = {} test_data = {}
test_label = {} test_label = {}
......
...@@ -192,6 +192,8 @@ class CNNSupervisedTrainer_Alexnet: ...@@ -192,6 +192,8 @@ class CNNSupervisedTrainer_Alexnet:
save_attention_image=False, save_attention_image=False,
use_teacher_forcing=False, use_teacher_forcing=False,
normalize=True, normalize=True,
shuffle_data=False,
clip_global_grad_norm=None,
preprocessing = False): preprocessing = False):
if context == 'gpu': if context == 'gpu':
mx_context = mx.gpu() mx_context = mx.gpu()
...@@ -202,9 +204,9 @@ class CNNSupervisedTrainer_Alexnet: ...@@ -202,9 +204,9 @@ class CNNSupervisedTrainer_Alexnet:
if preprocessing: if preprocessing:
preproc_lib = "CNNPreprocessor_Alexnet_executor" preproc_lib = "CNNPreprocessor_Alexnet_executor"
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_preprocessed_data(batch_size, preproc_lib) train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_preprocessed_data(batch_size, preproc_lib, shuffle_data)
else: else:
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size) train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size, shuffle_data)
if 'weight_decay' in optimizer_params: if 'weight_decay' in optimizer_params:
optimizer_params['wd'] = optimizer_params['weight_decay'] optimizer_params['wd'] = optimizer_params['weight_decay']
...@@ -309,6 +311,14 @@ class CNNSupervisedTrainer_Alexnet: ...@@ -309,6 +311,14 @@ class CNNSupervisedTrainer_Alexnet:
loss_total += loss.sum().asscalar() loss_total += loss.sum().asscalar()
if clip_global_grad_norm:
grads = []
for network in self._networks.values():
grads.extend([param.grad(mx_context) for param in network.collect_params().values()])
gluon.utils.clip_global_norm(grads, clip_global_grad_norm)
for trainer in trainers: for trainer in trainers:
trainer.step(batch_size) trainer.step(batch_size)
......
...@@ -192,6 +192,8 @@ class CNNSupervisedTrainer_CifarClassifierNetwork: ...@@ -192,6 +192,8 @@ class CNNSupervisedTrainer_CifarClassifierNetwork:
save_attention_image=False, save_attention_image=False,
use_teacher_forcing=False, use_teacher_forcing=False,
normalize=True, normalize=True,
shuffle_data=False,
clip_global_grad_norm=None,
preprocessing = False): preprocessing = False):
if context == 'gpu': if context == 'gpu':
mx_context = mx.gpu() mx_context = mx.gpu()
...@@ -202,9 +204,9 @@ class CNNSupervisedTrainer_CifarClassifierNetwork: ...@@ -202,9 +204,9 @@ class CNNSupervisedTrainer_CifarClassifierNetwork:
if preprocessing: if preprocessing:
preproc_lib = "CNNPreprocessor_CifarClassifierNetwork_executor" preproc_lib = "CNNPreprocessor_CifarClassifierNetwork_executor"
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_preprocessed_data(batch_size, preproc_lib) train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_preprocessed_data(batch_size, preproc_lib, shuffle_data)
else: else:
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size) train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size, shuffle_data)
if 'weight_decay' in optimizer_params: if 'weight_decay' in optimizer_params:
optimizer_params['wd'] = optimizer_params['weight_decay'] optimizer_params['wd'] = optimizer_params['weight_decay']
...@@ -309,6 +311,14 @@ class CNNSupervisedTrainer_CifarClassifierNetwork: ...@@ -309,6 +311,14 @@ class CNNSupervisedTrainer_CifarClassifierNetwork:
loss_total += loss.sum().asscalar() loss_total += loss.sum().asscalar()
if clip_global_grad_norm:
grads = []
for network in self._networks.values():
grads.extend([param.grad(mx_context) for param in network.collect_params().values()])
gluon.utils.clip_global_norm(grads, clip_global_grad_norm)
for trainer in trainers: for trainer in trainers:
trainer.step(batch_size) trainer.step(batch_size)
......
...@@ -192,6 +192,8 @@ class CNNSupervisedTrainer_VGG16: ...@@ -192,6 +192,8 @@ class CNNSupervisedTrainer_VGG16:
save_attention_image=False, save_attention_image=False,
use_teacher_forcing=False, use_teacher_forcing=False,
normalize=True, normalize=True,
shuffle_data=False,
clip_global_grad_norm=None,
preprocessing = False): preprocessing = False):
if context == 'gpu': if context == 'gpu':
mx_context = mx.gpu() mx_context = mx.gpu()
...@@ -202,9 +204,9 @@ class CNNSupervisedTrainer_VGG16: ...@@ -202,9 +204,9 @@ class CNNSupervisedTrainer_VGG16:
if preprocessing: if preprocessing:
preproc_lib = "CNNPreprocessor_VGG16_executor" preproc_lib = "CNNPreprocessor_VGG16_executor"
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_preprocessed_data(batch_size, preproc_lib) train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_preprocessed_data(batch_size, preproc_lib, shuffle_data)
else: else:
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size) train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size, shuffle_data)
if 'weight_decay' in optimizer_params: if 'weight_decay' in optimizer_params:
optimizer_params['wd'] = optimizer_params['weight_decay'] optimizer_params['wd'] = optimizer_params['weight_decay']
...@@ -309,6 +311,14 @@ class CNNSupervisedTrainer_VGG16: ...@@ -309,6 +311,14 @@ class CNNSupervisedTrainer_VGG16:
loss_total += loss.sum().asscalar() loss_total += loss.sum().asscalar()
if clip_global_grad_norm:
grads = []
for network in self._networks.values():
grads.extend([param.grad(mx_context) for param in network.collect_params().values()])
gluon.utils.clip_global_norm(grads, clip_global_grad_norm)
for trainer in trainers: for trainer in trainers:
trainer.step(batch_size) trainer.step(batch_size)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment