Commit 0cd223e5 authored by Evgeny Kusmenko's avatar Evgeny Kusmenko

Merge branch 'process_loss_parameter' into 'master'

Processed loss parameter and set default value based on the last layer of the network architecture

See merge request !22
parents f0801126 c4efe64f
Pipeline #105766 passed with stages
in 7 minutes and 57 seconds
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
<!-- .. SE-Libraries .................................................. --> <!-- .. SE-Libraries .................................................. -->
<CNNArch.version>0.2.7</CNNArch.version> <CNNArch.version>0.2.7</CNNArch.version>
<CNNTrain.version>0.2.5</CNNTrain.version> <CNNTrain.version>0.2.6-SNAPSHOT</CNNTrain.version>
<embedded-montiarc-math-generator>0.1.2-SNAPSHOT</embedded-montiarc-math-generator> <embedded-montiarc-math-generator>0.1.2-SNAPSHOT</embedded-montiarc-math-generator>
<!-- .. Libraries .................................................. --> <!-- .. Libraries .................................................. -->
......
...@@ -45,6 +45,10 @@ public class CNNArchTemplateController { ...@@ -45,6 +45,10 @@ public class CNNArchTemplateController {
private ArchitectureElementData dataElement; private ArchitectureElementData dataElement;
public String loss;
public static final String CROSS_ENTROPY = "cross_entropy";
public static final String EUCLIDEAN = "euclidean";
public CNNArchTemplateController(ArchitectureSymbol architecture) { public CNNArchTemplateController(ArchitectureSymbol architecture) {
setArchitecture(architecture); setArchitecture(architecture);
} }
...@@ -123,6 +127,10 @@ public class CNNArchTemplateController { ...@@ -123,6 +127,10 @@ public class CNNArchTemplateController {
return list; return list;
} }
public String getArchitectureLoss(){
return this.loss;
}
public void include(String relativePath, String templateWithoutFileEnding, Writer writer){ public void include(String relativePath, String templateWithoutFileEnding, Writer writer){
String templatePath = relativePath + templateWithoutFileEnding + FTL_FILE_ENDING; String templatePath = relativePath + templateWithoutFileEnding + FTL_FILE_ENDING;
Map<String, Object> ftlContext = new HashMap<>(); Map<String, Object> ftlContext = new HashMap<>();
...@@ -236,18 +244,30 @@ public class CNNArchTemplateController { ...@@ -236,18 +244,30 @@ public class CNNArchTemplateController {
public boolean isLogisticRegressionOutput(ArchitectureElementSymbol architectureElement){ public boolean isLogisticRegressionOutput(ArchitectureElementSymbol architectureElement){
return isTOutput(Sigmoid.class, architectureElement); if (isTOutput(Sigmoid.class, architectureElement)){
this.loss = CROSS_ENTROPY;
return true;
}
return false;
} }
public boolean isLinearRegressionOutput(ArchitectureElementSymbol architectureElement){ public boolean isLinearRegressionOutput(ArchitectureElementSymbol architectureElement){
return architectureElement.isOutput() if (architectureElement.isOutput()
&& !isLogisticRegressionOutput(architectureElement) && !isLogisticRegressionOutput(architectureElement)
&& !isSoftmaxOutput(architectureElement); && !isSoftmaxOutput(architectureElement)){
this.loss = EUCLIDEAN;
return true;
}
return false;
} }
public boolean isSoftmaxOutput(ArchitectureElementSymbol architectureElement){ public boolean isSoftmaxOutput(ArchitectureElementSymbol architectureElement){
return isTOutput(Softmax.class, architectureElement); if (isTOutput(Softmax.class, architectureElement)){
this.loss = CROSS_ENTROPY;
return true;
}
return false;
} }
private boolean isTOutput(Class inputPredefinedLayerClass, ArchitectureElementSymbol architectureElement){ private boolean isTOutput(Class inputPredefinedLayerClass, ArchitectureElementSymbol architectureElement){
......
...@@ -67,6 +67,13 @@ public class ConfigurationData { ...@@ -67,6 +67,13 @@ public class ConfigurationData {
return getConfiguration().getEntry("eval_metric").getValue().toString(); return getConfiguration().getEntry("eval_metric").getValue().toString();
} }
public String getLoss() {
if (!getConfiguration().getEntryMap().containsKey("loss")) {
return null;
}
return getConfiguration().getEntry("loss").getValue().toString();
}
public String getOptimizerName() { public String getOptimizerName() {
if (getConfiguration().getOptimizer() == null) { if (getConfiguration().getOptimizer() == null) {
return null; return null;
......
...@@ -65,10 +65,14 @@ class ${tc.fileNameWithoutEnding}: ...@@ -65,10 +65,14 @@ class ${tc.fileNameWithoutEnding}:
${tc.include(tc.architecture.body)} ${tc.include(tc.architecture.body)}
# this adds the loss and optimizer # this adds the loss and optimizer
def add_training_operators(self, model, output, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) : def add_training_operators(self, model, output, label, device_opts, loss, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) :
with core.DeviceScope(device_opts): with core.DeviceScope(device_opts):
xent = model.LabelCrossEntropy([output, label], 'xent') if loss == 'cross_entropy':
loss = model.AveragedLoss(xent, "loss") xent = model.LabelCrossEntropy([output, label], 'xent')
loss = model.AveragedLoss(xent, "loss")
elif loss == 'euclidean':
dist = model.net.SquaredL2Distance([label, output], 'dist')
loss = dist.AveragedLoss([], ['loss'])
model.AddGradientOperators([loss]) model.AddGradientOperators([loss])
...@@ -105,7 +109,7 @@ ${tc.include(tc.architecture.body)} ...@@ -105,7 +109,7 @@ ${tc.include(tc.architecture.body)}
accuracy = brew.accuracy(model, [output, label], "accuracy", top_k=3) accuracy = brew.accuracy(model, [output, label], "accuracy", top_k=3)
return accuracy return accuracy
def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) : def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', loss='${tc.architectureLoss}', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) :
if context == 'cpu': if context == 'cpu':
device_opts = core.DeviceOption(caffe2_pb2.CPU, 0) device_opts = core.DeviceOption(caffe2_pb2.CPU, 0)
print("CPU mode selected") print("CPU mode selected")
...@@ -120,7 +124,7 @@ ${tc.include(tc.architecture.body)} ...@@ -120,7 +124,7 @@ ${tc.include(tc.architecture.body)}
train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope) train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts) data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts)
${tc.join(tc.architectureOutputs, ",", "","")} = self.create_model(train_model, data, device_opts=device_opts, is_test=False) ${tc.join(tc.architectureOutputs, ",", "","")} = self.create_model(train_model, data, device_opts=device_opts, is_test=False)
self.add_training_operators(train_model, ${tc.join(tc.architectureOutputs, ",", "","")}, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) self.add_training_operators(train_model, ${tc.join(tc.architectureOutputs, ",", "","")}, label, device_opts, loss, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
self.add_accuracy(train_model, ${tc.join(tc.architectureOutputs, ",", "","")}, label, device_opts, eval_metric) self.add_accuracy(train_model, ${tc.join(tc.architectureOutputs, ",", "","")}, label, device_opts, eval_metric)
with core.DeviceScope(device_opts): with core.DeviceScope(device_opts):
brew.add_weight_decay(train_model, weight_decay) brew.add_weight_decay(train_model, weight_decay)
......
...@@ -29,6 +29,9 @@ if __name__ == "__main__": ...@@ -29,6 +29,9 @@ if __name__ == "__main__":
<#if (config.evalMetric)??> <#if (config.evalMetric)??>
eval_metric='${config.evalMetric}', eval_metric='${config.evalMetric}',
</#if> </#if>
<#if (config.loss)??>
loss='${config.loss}',
</#if>
<#if (config.configuration.optimizer)??> <#if (config.configuration.optimizer)??>
opt_type='${config.optimizerName}', opt_type='${config.optimizerName}',
<#list config.optimizerParams?keys as param> <#list config.optimizerParams?keys as param>
......
...@@ -82,10 +82,14 @@ class CNNCreator_LeNet: ...@@ -82,10 +82,14 @@ class CNNCreator_LeNet:
return predictions return predictions
# this adds the loss and optimizer # this adds the loss and optimizer
def add_training_operators(self, model, output, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) : def add_training_operators(self, model, output, label, device_opts, loss, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) :
with core.DeviceScope(device_opts): with core.DeviceScope(device_opts):
xent = model.LabelCrossEntropy([output, label], 'xent') if loss == 'cross_entropy':
loss = model.AveragedLoss(xent, "loss") xent = model.LabelCrossEntropy([output, label], 'xent')
loss = model.AveragedLoss(xent, "loss")
elif loss == 'euclidean':
dist = model.net.SquaredL2Distance([label, output], 'dist')
loss = dist.AveragedLoss([], ['loss'])
model.AddGradientOperators([loss]) model.AddGradientOperators([loss])
...@@ -122,7 +126,7 @@ class CNNCreator_LeNet: ...@@ -122,7 +126,7 @@ class CNNCreator_LeNet:
accuracy = brew.accuracy(model, [output, label], "accuracy", top_k=3) accuracy = brew.accuracy(model, [output, label], "accuracy", top_k=3)
return accuracy return accuracy
def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) : def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', loss='cross_entropy', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) :
if context == 'cpu': if context == 'cpu':
device_opts = core.DeviceOption(caffe2_pb2.CPU, 0) device_opts = core.DeviceOption(caffe2_pb2.CPU, 0)
print("CPU mode selected") print("CPU mode selected")
...@@ -137,7 +141,7 @@ class CNNCreator_LeNet: ...@@ -137,7 +141,7 @@ class CNNCreator_LeNet:
train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope) train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts) data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts)
predictions = self.create_model(train_model, data, device_opts=device_opts, is_test=False) predictions = self.create_model(train_model, data, device_opts=device_opts, is_test=False)
self.add_training_operators(train_model, predictions, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) self.add_training_operators(train_model, predictions, label, device_opts, loss, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
self.add_accuracy(train_model, predictions, label, device_opts, eval_metric) self.add_accuracy(train_model, predictions, label, device_opts, eval_metric)
with core.DeviceScope(device_opts): with core.DeviceScope(device_opts):
brew.add_weight_decay(train_model, weight_decay) brew.add_weight_decay(train_model, weight_decay)
......
...@@ -128,10 +128,14 @@ class CNNCreator_VGG16: ...@@ -128,10 +128,14 @@ class CNNCreator_VGG16:
return predictions return predictions
# this adds the loss and optimizer # this adds the loss and optimizer
def add_training_operators(self, model, output, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) : def add_training_operators(self, model, output, label, device_opts, loss, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) :
with core.DeviceScope(device_opts): with core.DeviceScope(device_opts):
xent = model.LabelCrossEntropy([output, label], 'xent') if loss == 'cross_entropy':
loss = model.AveragedLoss(xent, "loss") xent = model.LabelCrossEntropy([output, label], 'xent')
loss = model.AveragedLoss(xent, "loss")
elif loss == 'euclidean':
dist = model.net.SquaredL2Distance([label, output], 'dist')
loss = dist.AveragedLoss([], ['loss'])
model.AddGradientOperators([loss]) model.AddGradientOperators([loss])
...@@ -168,7 +172,7 @@ class CNNCreator_VGG16: ...@@ -168,7 +172,7 @@ class CNNCreator_VGG16:
accuracy = brew.accuracy(model, [output, label], "accuracy", top_k=3) accuracy = brew.accuracy(model, [output, label], "accuracy", top_k=3)
return accuracy return accuracy
def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) : def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', loss='cross_entropy', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) :
if context == 'cpu': if context == 'cpu':
device_opts = core.DeviceOption(caffe2_pb2.CPU, 0) device_opts = core.DeviceOption(caffe2_pb2.CPU, 0)
print("CPU mode selected") print("CPU mode selected")
...@@ -183,7 +187,7 @@ class CNNCreator_VGG16: ...@@ -183,7 +187,7 @@ class CNNCreator_VGG16:
train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope) train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts) data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts)
predictions = self.create_model(train_model, data, device_opts=device_opts, is_test=False) predictions = self.create_model(train_model, data, device_opts=device_opts, is_test=False)
self.add_training_operators(train_model, predictions, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) self.add_training_operators(train_model, predictions, label, device_opts, loss, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
self.add_accuracy(train_model, predictions, label, device_opts, eval_metric) self.add_accuracy(train_model, predictions, label, device_opts, eval_metric)
with core.DeviceScope(device_opts): with core.DeviceScope(device_opts):
brew.add_weight_decay(train_model, weight_decay) brew.add_weight_decay(train_model, weight_decay)
......
...@@ -18,6 +18,7 @@ if __name__ == "__main__": ...@@ -18,6 +18,7 @@ if __name__ == "__main__":
batch_size=100, batch_size=100,
context='gpu', context='gpu',
eval_metric='mse', eval_metric='mse',
loss='cross_entropy',
opt_type='rmsprop', opt_type='rmsprop',
epsilon=1.0E-6, epsilon=1.0E-6,
weight_decay=0.01, weight_decay=0.01,
......
...@@ -3,6 +3,7 @@ configuration FullConfig{ ...@@ -3,6 +3,7 @@ configuration FullConfig{
batch_size : 100 batch_size : 100
load_checkpoint : true load_checkpoint : true
eval_metric : mse eval_metric : mse
loss : cross_entropy
context : gpu context : gpu
normalize : true normalize : true
optimizer : rmsprop{ optimizer : rmsprop{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment