diff --git a/pom.xml b/pom.xml
index a761f6ed2f9532cc112673b368d8244d930173df..da5d33471e33a9801395858bc3af6934902027ef 100644
--- a/pom.xml
+++ b/pom.xml
@@ -16,7 +16,7 @@
0.2.7
- 0.2.5
+ 0.2.6-SNAPSHOT
0.1.2-SNAPSHOT
diff --git a/src/main/java/de/monticore/lang/monticar/cnnarch/caffe2generator/CNNArchTemplateController.java b/src/main/java/de/monticore/lang/monticar/cnnarch/caffe2generator/CNNArchTemplateController.java
index 0ea41ea21587bc908026353f34386e92dfe5552a..aafc19efef40eddba2ec25828ba67ac59cf518b0 100644
--- a/src/main/java/de/monticore/lang/monticar/cnnarch/caffe2generator/CNNArchTemplateController.java
+++ b/src/main/java/de/monticore/lang/monticar/cnnarch/caffe2generator/CNNArchTemplateController.java
@@ -45,6 +45,10 @@ public class CNNArchTemplateController {
private ArchitectureElementData dataElement;
+ public String loss;
+ public static final String CROSS_ENTROPY = "cross_entropy";
+ public static final String EUCLIDEAN = "euclidean";
+
public CNNArchTemplateController(ArchitectureSymbol architecture) {
setArchitecture(architecture);
}
@@ -123,6 +127,10 @@ public class CNNArchTemplateController {
return list;
}
+ public String getArchitectureLoss(){
+ return this.loss;
+ }
+
public void include(String relativePath, String templateWithoutFileEnding, Writer writer){
String templatePath = relativePath + templateWithoutFileEnding + FTL_FILE_ENDING;
Map ftlContext = new HashMap<>();
@@ -236,18 +244,30 @@ public class CNNArchTemplateController {
public boolean isLogisticRegressionOutput(ArchitectureElementSymbol architectureElement){
- return isTOutput(Sigmoid.class, architectureElement);
+ if (isTOutput(Sigmoid.class, architectureElement)){
+ this.loss = CROSS_ENTROPY;
+ return true;
+ }
+ return false;
}
public boolean isLinearRegressionOutput(ArchitectureElementSymbol architectureElement){
- return architectureElement.isOutput()
+ if (architectureElement.isOutput()
&& !isLogisticRegressionOutput(architectureElement)
- && !isSoftmaxOutput(architectureElement);
+ && !isSoftmaxOutput(architectureElement)){
+ this.loss = EUCLIDEAN;
+ return true;
+ }
+ return false;
}
public boolean isSoftmaxOutput(ArchitectureElementSymbol architectureElement){
- return isTOutput(Softmax.class, architectureElement);
+ if (isTOutput(Softmax.class, architectureElement)){
+ this.loss = CROSS_ENTROPY;
+ return true;
+ }
+ return false;
}
private boolean isTOutput(Class inputPredefinedLayerClass, ArchitectureElementSymbol architectureElement){
diff --git a/src/main/java/de/monticore/lang/monticar/cnnarch/caffe2generator/ConfigurationData.java b/src/main/java/de/monticore/lang/monticar/cnnarch/caffe2generator/ConfigurationData.java
index 40a48404a0e1b3ca0bcc333597f73edd5678e67b..a9106645da57a82f0605b677e8eedc9fea6e355f 100644
--- a/src/main/java/de/monticore/lang/monticar/cnnarch/caffe2generator/ConfigurationData.java
+++ b/src/main/java/de/monticore/lang/monticar/cnnarch/caffe2generator/ConfigurationData.java
@@ -67,6 +67,13 @@ public class ConfigurationData {
return getConfiguration().getEntry("eval_metric").getValue().toString();
}
+ public String getLoss() {
+ if (!getConfiguration().getEntryMap().containsKey("loss")) {
+ return null;
+ }
+ return getConfiguration().getEntry("loss").getValue().toString();
+ }
+
public String getOptimizerName() {
if (getConfiguration().getOptimizer() == null) {
return null;
diff --git a/src/main/resources/templates/caffe2/CNNCreator.ftl b/src/main/resources/templates/caffe2/CNNCreator.ftl
index f90e4234590c59fd5bffbe3602926697c8d86562..5d76ac4bcbc6975397a39dc9ec8c9e5d99c19de5 100644
--- a/src/main/resources/templates/caffe2/CNNCreator.ftl
+++ b/src/main/resources/templates/caffe2/CNNCreator.ftl
@@ -65,10 +65,14 @@ class ${tc.fileNameWithoutEnding}:
${tc.include(tc.architecture.body)}
# this adds the loss and optimizer
- def add_training_operators(self, model, output, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) :
+ def add_training_operators(self, model, output, label, device_opts, loss, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) :
with core.DeviceScope(device_opts):
- xent = model.LabelCrossEntropy([output, label], 'xent')
- loss = model.AveragedLoss(xent, "loss")
+ if loss == 'cross_entropy':
+ xent = model.LabelCrossEntropy([output, label], 'xent')
+ loss = model.AveragedLoss(xent, "loss")
+ elif loss == 'euclidean':
+ dist = model.net.SquaredL2Distance([label, output], 'dist')
+ loss = dist.AveragedLoss([], ['loss'])
model.AddGradientOperators([loss])
@@ -105,7 +109,7 @@ ${tc.include(tc.architecture.body)}
accuracy = brew.accuracy(model, [output, label], "accuracy", top_k=3)
return accuracy
- def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) :
+ def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', loss='${tc.architectureLoss}', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) :
if context == 'cpu':
device_opts = core.DeviceOption(caffe2_pb2.CPU, 0)
print("CPU mode selected")
@@ -120,7 +124,7 @@ ${tc.include(tc.architecture.body)}
train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts)
${tc.join(tc.architectureOutputs, ",", "","")} = self.create_model(train_model, data, device_opts=device_opts, is_test=False)
- self.add_training_operators(train_model, ${tc.join(tc.architectureOutputs, ",", "","")}, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
+ self.add_training_operators(train_model, ${tc.join(tc.architectureOutputs, ",", "","")}, label, device_opts, loss, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
self.add_accuracy(train_model, ${tc.join(tc.architectureOutputs, ",", "","")}, label, device_opts, eval_metric)
with core.DeviceScope(device_opts):
brew.add_weight_decay(train_model, weight_decay)
diff --git a/src/main/resources/templates/caffe2/CNNTrainer.ftl b/src/main/resources/templates/caffe2/CNNTrainer.ftl
index 8260a743461f160311681f04f2dd148f1d21dfb3..fbe73791f63bedf1d3a793f3a0a2fe2770988ee0 100644
--- a/src/main/resources/templates/caffe2/CNNTrainer.ftl
+++ b/src/main/resources/templates/caffe2/CNNTrainer.ftl
@@ -29,6 +29,9 @@ if __name__ == "__main__":
<#if (config.evalMetric)??>
eval_metric='${config.evalMetric}',
#if>
+<#if (config.loss)??>
+ loss='${config.loss}',
+#if>
<#if (config.configuration.optimizer)??>
opt_type='${config.optimizerName}',
<#list config.optimizerParams?keys as param>
diff --git a/src/test/resources/target_code/CNNCreator_LeNet.py b/src/test/resources/target_code/CNNCreator_LeNet.py
index cc749653eaa6ca532027854cd2c850a42e5c18e1..453ed8c8dd622fe36898b87c7b4f85ae05cbe315 100644
--- a/src/test/resources/target_code/CNNCreator_LeNet.py
+++ b/src/test/resources/target_code/CNNCreator_LeNet.py
@@ -82,10 +82,14 @@ class CNNCreator_LeNet:
return predictions
# this adds the loss and optimizer
- def add_training_operators(self, model, output, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) :
+ def add_training_operators(self, model, output, label, device_opts, loss, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) :
with core.DeviceScope(device_opts):
- xent = model.LabelCrossEntropy([output, label], 'xent')
- loss = model.AveragedLoss(xent, "loss")
+ if loss == 'cross_entropy':
+ xent = model.LabelCrossEntropy([output, label], 'xent')
+ loss = model.AveragedLoss(xent, "loss")
+ elif loss == 'euclidean':
+ dist = model.net.SquaredL2Distance([label, output], 'dist')
+ loss = dist.AveragedLoss([], ['loss'])
model.AddGradientOperators([loss])
@@ -122,7 +126,7 @@ class CNNCreator_LeNet:
accuracy = brew.accuracy(model, [output, label], "accuracy", top_k=3)
return accuracy
- def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) :
+ def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', loss='cross_entropy', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) :
if context == 'cpu':
device_opts = core.DeviceOption(caffe2_pb2.CPU, 0)
print("CPU mode selected")
@@ -137,7 +141,7 @@ class CNNCreator_LeNet:
train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts)
predictions = self.create_model(train_model, data, device_opts=device_opts, is_test=False)
- self.add_training_operators(train_model, predictions, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
+ self.add_training_operators(train_model, predictions, label, device_opts, loss, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
self.add_accuracy(train_model, predictions, label, device_opts, eval_metric)
with core.DeviceScope(device_opts):
brew.add_weight_decay(train_model, weight_decay)
diff --git a/src/test/resources/target_code/CNNCreator_VGG16.py b/src/test/resources/target_code/CNNCreator_VGG16.py
index 58ca35f0fad018119eec6394f6eed1a4a472e31b..f7a72517a2aece0625d100aeefa2dd7dada9969c 100644
--- a/src/test/resources/target_code/CNNCreator_VGG16.py
+++ b/src/test/resources/target_code/CNNCreator_VGG16.py
@@ -128,10 +128,14 @@ class CNNCreator_VGG16:
return predictions
# this adds the loss and optimizer
- def add_training_operators(self, model, output, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) :
+ def add_training_operators(self, model, output, label, device_opts, loss, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) :
with core.DeviceScope(device_opts):
- xent = model.LabelCrossEntropy([output, label], 'xent')
- loss = model.AveragedLoss(xent, "loss")
+ if loss == 'cross_entropy':
+ xent = model.LabelCrossEntropy([output, label], 'xent')
+ loss = model.AveragedLoss(xent, "loss")
+ elif loss == 'euclidean':
+ dist = model.net.SquaredL2Distance([label, output], 'dist')
+ loss = dist.AveragedLoss([], ['loss'])
model.AddGradientOperators([loss])
@@ -168,7 +172,7 @@ class CNNCreator_VGG16:
accuracy = brew.accuracy(model, [output, label], "accuracy", top_k=3)
return accuracy
- def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) :
+ def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', loss='cross_entropy', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) :
if context == 'cpu':
device_opts = core.DeviceOption(caffe2_pb2.CPU, 0)
print("CPU mode selected")
@@ -183,7 +187,7 @@ class CNNCreator_VGG16:
train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts)
predictions = self.create_model(train_model, data, device_opts=device_opts, is_test=False)
- self.add_training_operators(train_model, predictions, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
+ self.add_training_operators(train_model, predictions, label, device_opts, loss, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
self.add_accuracy(train_model, predictions, label, device_opts, eval_metric)
with core.DeviceScope(device_opts):
brew.add_weight_decay(train_model, weight_decay)
diff --git a/src/test/resources/target_code/CNNTrainer_fullConfig.py b/src/test/resources/target_code/CNNTrainer_fullConfig.py
index c914012e9693c77df984fb3d590658641d1ce79e..76ced4e81e2c84f8dcf02b0ca979c51e9ad6e507 100644
--- a/src/test/resources/target_code/CNNTrainer_fullConfig.py
+++ b/src/test/resources/target_code/CNNTrainer_fullConfig.py
@@ -18,6 +18,7 @@ if __name__ == "__main__":
batch_size=100,
context='gpu',
eval_metric='mse',
+ loss='cross_entropy',
opt_type='rmsprop',
epsilon=1.0E-6,
weight_decay=0.01,
diff --git a/src/test/resources/valid_tests/FullConfig.cnnt b/src/test/resources/valid_tests/FullConfig.cnnt
index df3313b7263ab850c4398c74d2e9e114f184f629..da584682a9743da4f389343f6fb11948b7cd6ba0 100644
--- a/src/test/resources/valid_tests/FullConfig.cnnt
+++ b/src/test/resources/valid_tests/FullConfig.cnnt
@@ -3,6 +3,7 @@ configuration FullConfig{
batch_size : 100
load_checkpoint : true
eval_metric : mse
+ loss : cross_entropy
context : gpu
normalize : true
optimizer : rmsprop{