diff --git a/pom.xml b/pom.xml index a761f6ed2f9532cc112673b368d8244d930173df..da5d33471e33a9801395858bc3af6934902027ef 100644 --- a/pom.xml +++ b/pom.xml @@ -16,7 +16,7 @@ 0.2.7 - 0.2.5 + 0.2.6-SNAPSHOT 0.1.2-SNAPSHOT diff --git a/src/main/java/de/monticore/lang/monticar/cnnarch/caffe2generator/CNNArchTemplateController.java b/src/main/java/de/monticore/lang/monticar/cnnarch/caffe2generator/CNNArchTemplateController.java index 0ea41ea21587bc908026353f34386e92dfe5552a..aafc19efef40eddba2ec25828ba67ac59cf518b0 100644 --- a/src/main/java/de/monticore/lang/monticar/cnnarch/caffe2generator/CNNArchTemplateController.java +++ b/src/main/java/de/monticore/lang/monticar/cnnarch/caffe2generator/CNNArchTemplateController.java @@ -45,6 +45,10 @@ public class CNNArchTemplateController { private ArchitectureElementData dataElement; + public String loss; + public static final String CROSS_ENTROPY = "cross_entropy"; + public static final String EUCLIDEAN = "euclidean"; + public CNNArchTemplateController(ArchitectureSymbol architecture) { setArchitecture(architecture); } @@ -123,6 +127,10 @@ public class CNNArchTemplateController { return list; } + public String getArchitectureLoss(){ + return this.loss; + } + public void include(String relativePath, String templateWithoutFileEnding, Writer writer){ String templatePath = relativePath + templateWithoutFileEnding + FTL_FILE_ENDING; Map ftlContext = new HashMap<>(); @@ -236,18 +244,30 @@ public class CNNArchTemplateController { public boolean isLogisticRegressionOutput(ArchitectureElementSymbol architectureElement){ - return isTOutput(Sigmoid.class, architectureElement); + if (isTOutput(Sigmoid.class, architectureElement)){ + this.loss = CROSS_ENTROPY; + return true; + } + return false; } public boolean isLinearRegressionOutput(ArchitectureElementSymbol architectureElement){ - return architectureElement.isOutput() + if (architectureElement.isOutput() && !isLogisticRegressionOutput(architectureElement) - && !isSoftmaxOutput(architectureElement); + && !isSoftmaxOutput(architectureElement)){ + this.loss = EUCLIDEAN; + return true; + } + return false; } public boolean isSoftmaxOutput(ArchitectureElementSymbol architectureElement){ - return isTOutput(Softmax.class, architectureElement); + if (isTOutput(Softmax.class, architectureElement)){ + this.loss = CROSS_ENTROPY; + return true; + } + return false; } private boolean isTOutput(Class inputPredefinedLayerClass, ArchitectureElementSymbol architectureElement){ diff --git a/src/main/java/de/monticore/lang/monticar/cnnarch/caffe2generator/ConfigurationData.java b/src/main/java/de/monticore/lang/monticar/cnnarch/caffe2generator/ConfigurationData.java index 40a48404a0e1b3ca0bcc333597f73edd5678e67b..a9106645da57a82f0605b677e8eedc9fea6e355f 100644 --- a/src/main/java/de/monticore/lang/monticar/cnnarch/caffe2generator/ConfigurationData.java +++ b/src/main/java/de/monticore/lang/monticar/cnnarch/caffe2generator/ConfigurationData.java @@ -67,6 +67,13 @@ public class ConfigurationData { return getConfiguration().getEntry("eval_metric").getValue().toString(); } + public String getLoss() { + if (!getConfiguration().getEntryMap().containsKey("loss")) { + return null; + } + return getConfiguration().getEntry("loss").getValue().toString(); + } + public String getOptimizerName() { if (getConfiguration().getOptimizer() == null) { return null; diff --git a/src/main/resources/templates/caffe2/CNNCreator.ftl b/src/main/resources/templates/caffe2/CNNCreator.ftl index f90e4234590c59fd5bffbe3602926697c8d86562..5d76ac4bcbc6975397a39dc9ec8c9e5d99c19de5 100644 --- a/src/main/resources/templates/caffe2/CNNCreator.ftl +++ b/src/main/resources/templates/caffe2/CNNCreator.ftl @@ -65,10 +65,14 @@ class ${tc.fileNameWithoutEnding}: ${tc.include(tc.architecture.body)} # this adds the loss and optimizer - def add_training_operators(self, model, output, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) : + def add_training_operators(self, model, output, label, device_opts, loss, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) : with core.DeviceScope(device_opts): - xent = model.LabelCrossEntropy([output, label], 'xent') - loss = model.AveragedLoss(xent, "loss") + if loss == 'cross_entropy': + xent = model.LabelCrossEntropy([output, label], 'xent') + loss = model.AveragedLoss(xent, "loss") + elif loss == 'euclidean': + dist = model.net.SquaredL2Distance([label, output], 'dist') + loss = dist.AveragedLoss([], ['loss']) model.AddGradientOperators([loss]) @@ -105,7 +109,7 @@ ${tc.include(tc.architecture.body)} accuracy = brew.accuracy(model, [output, label], "accuracy", top_k=3) return accuracy - def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) : + def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', loss='${tc.architectureLoss}', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) : if context == 'cpu': device_opts = core.DeviceOption(caffe2_pb2.CPU, 0) print("CPU mode selected") @@ -120,7 +124,7 @@ ${tc.include(tc.architecture.body)} train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope) data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts) ${tc.join(tc.architectureOutputs, ",", "","")} = self.create_model(train_model, data, device_opts=device_opts, is_test=False) - self.add_training_operators(train_model, ${tc.join(tc.architectureOutputs, ",", "","")}, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) + self.add_training_operators(train_model, ${tc.join(tc.architectureOutputs, ",", "","")}, label, device_opts, loss, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) self.add_accuracy(train_model, ${tc.join(tc.architectureOutputs, ",", "","")}, label, device_opts, eval_metric) with core.DeviceScope(device_opts): brew.add_weight_decay(train_model, weight_decay) diff --git a/src/main/resources/templates/caffe2/CNNTrainer.ftl b/src/main/resources/templates/caffe2/CNNTrainer.ftl index 8260a743461f160311681f04f2dd148f1d21dfb3..fbe73791f63bedf1d3a793f3a0a2fe2770988ee0 100644 --- a/src/main/resources/templates/caffe2/CNNTrainer.ftl +++ b/src/main/resources/templates/caffe2/CNNTrainer.ftl @@ -29,6 +29,9 @@ if __name__ == "__main__": <#if (config.evalMetric)??> eval_metric='${config.evalMetric}', +<#if (config.loss)??> + loss='${config.loss}', + <#if (config.configuration.optimizer)??> opt_type='${config.optimizerName}', <#list config.optimizerParams?keys as param> diff --git a/src/test/resources/target_code/CNNCreator_LeNet.py b/src/test/resources/target_code/CNNCreator_LeNet.py index cc749653eaa6ca532027854cd2c850a42e5c18e1..453ed8c8dd622fe36898b87c7b4f85ae05cbe315 100644 --- a/src/test/resources/target_code/CNNCreator_LeNet.py +++ b/src/test/resources/target_code/CNNCreator_LeNet.py @@ -82,10 +82,14 @@ class CNNCreator_LeNet: return predictions # this adds the loss and optimizer - def add_training_operators(self, model, output, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) : + def add_training_operators(self, model, output, label, device_opts, loss, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) : with core.DeviceScope(device_opts): - xent = model.LabelCrossEntropy([output, label], 'xent') - loss = model.AveragedLoss(xent, "loss") + if loss == 'cross_entropy': + xent = model.LabelCrossEntropy([output, label], 'xent') + loss = model.AveragedLoss(xent, "loss") + elif loss == 'euclidean': + dist = model.net.SquaredL2Distance([label, output], 'dist') + loss = dist.AveragedLoss([], ['loss']) model.AddGradientOperators([loss]) @@ -122,7 +126,7 @@ class CNNCreator_LeNet: accuracy = brew.accuracy(model, [output, label], "accuracy", top_k=3) return accuracy - def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) : + def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', loss='cross_entropy', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) : if context == 'cpu': device_opts = core.DeviceOption(caffe2_pb2.CPU, 0) print("CPU mode selected") @@ -137,7 +141,7 @@ class CNNCreator_LeNet: train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope) data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts) predictions = self.create_model(train_model, data, device_opts=device_opts, is_test=False) - self.add_training_operators(train_model, predictions, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) + self.add_training_operators(train_model, predictions, label, device_opts, loss, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) self.add_accuracy(train_model, predictions, label, device_opts, eval_metric) with core.DeviceScope(device_opts): brew.add_weight_decay(train_model, weight_decay) diff --git a/src/test/resources/target_code/CNNCreator_VGG16.py b/src/test/resources/target_code/CNNCreator_VGG16.py index 58ca35f0fad018119eec6394f6eed1a4a472e31b..f7a72517a2aece0625d100aeefa2dd7dada9969c 100644 --- a/src/test/resources/target_code/CNNCreator_VGG16.py +++ b/src/test/resources/target_code/CNNCreator_VGG16.py @@ -128,10 +128,14 @@ class CNNCreator_VGG16: return predictions # this adds the loss and optimizer - def add_training_operators(self, model, output, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) : + def add_training_operators(self, model, output, label, device_opts, loss, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) : with core.DeviceScope(device_opts): - xent = model.LabelCrossEntropy([output, label], 'xent') - loss = model.AveragedLoss(xent, "loss") + if loss == 'cross_entropy': + xent = model.LabelCrossEntropy([output, label], 'xent') + loss = model.AveragedLoss(xent, "loss") + elif loss == 'euclidean': + dist = model.net.SquaredL2Distance([label, output], 'dist') + loss = dist.AveragedLoss([], ['loss']) model.AddGradientOperators([loss]) @@ -168,7 +172,7 @@ class CNNCreator_VGG16: accuracy = brew.accuracy(model, [output, label], "accuracy", top_k=3) return accuracy - def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) : + def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', loss='cross_entropy', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) : if context == 'cpu': device_opts = core.DeviceOption(caffe2_pb2.CPU, 0) print("CPU mode selected") @@ -183,7 +187,7 @@ class CNNCreator_VGG16: train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope) data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts) predictions = self.create_model(train_model, data, device_opts=device_opts, is_test=False) - self.add_training_operators(train_model, predictions, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) + self.add_training_operators(train_model, predictions, label, device_opts, loss, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) self.add_accuracy(train_model, predictions, label, device_opts, eval_metric) with core.DeviceScope(device_opts): brew.add_weight_decay(train_model, weight_decay) diff --git a/src/test/resources/target_code/CNNTrainer_fullConfig.py b/src/test/resources/target_code/CNNTrainer_fullConfig.py index c914012e9693c77df984fb3d590658641d1ce79e..76ced4e81e2c84f8dcf02b0ca979c51e9ad6e507 100644 --- a/src/test/resources/target_code/CNNTrainer_fullConfig.py +++ b/src/test/resources/target_code/CNNTrainer_fullConfig.py @@ -18,6 +18,7 @@ if __name__ == "__main__": batch_size=100, context='gpu', eval_metric='mse', + loss='cross_entropy', opt_type='rmsprop', epsilon=1.0E-6, weight_decay=0.01, diff --git a/src/test/resources/valid_tests/FullConfig.cnnt b/src/test/resources/valid_tests/FullConfig.cnnt index df3313b7263ab850c4398c74d2e9e114f184f629..da584682a9743da4f389343f6fb11948b7cd6ba0 100644 --- a/src/test/resources/valid_tests/FullConfig.cnnt +++ b/src/test/resources/valid_tests/FullConfig.cnnt @@ -3,6 +3,7 @@ configuration FullConfig{ batch_size : 100 load_checkpoint : true eval_metric : mse + loss : cross_entropy context : gpu normalize : true optimizer : rmsprop{