From fa500d5b348d5588d23930a723aecf9e1aa8e0f5 Mon Sep 17 00:00:00 2001 From: eyuhar Date: Wed, 5 Jun 2019 12:20:43 +0200 Subject: [PATCH 1/4] added loss functions --- pom.xml | 2 +- .../ArchitectureElementData.java | 23 - .../CNNArch2MxNetTemplateController.java | 2 - .../CNNArchTemplateController.java | 41 +- .../mxnetgenerator/ConfigurationData.java | 27 + .../resources/templates/mxnet/CNNCreator.ftl | 83 ++- .../resources/templates/mxnet/CNNTrainer.ftl | 10 + .../templates/mxnet/elements/Output.ftl | 10 +- .../mxnetgenerator/GenerationTest.java | 2 +- .../target_code/CNNCreator_Alexnet.py | 397 +++++++---- .../CNNCreator_CifarClassifierNetwork.py | 673 ++++++++++-------- .../resources/target_code/CNNCreator_VGG16.py | 447 +++++++----- .../target_code/CNNTrainer_fullConfig.py | 4 + .../target_code/CNNTrainer_simpleConfig.py | 1 + .../resources/valid_tests/FullConfig.cnnt | 4 + .../resources/valid_tests/SimpleConfig.cnnt | 1 + 16 files changed, 1017 insertions(+), 710 deletions(-) diff --git a/pom.xml b/pom.xml index 8fcd494..5528e63 100644 --- a/pom.xml +++ b/pom.xml @@ -16,7 +16,7 @@ 0.3.0-SNAPSHOT - 0.2.6 + 0.3.1-SNAPSHOT 0.1.4 diff --git a/src/main/java/de/monticore/lang/monticar/cnnarch/mxnetgenerator/ArchitectureElementData.java b/src/main/java/de/monticore/lang/monticar/cnnarch/mxnetgenerator/ArchitectureElementData.java index 631ef2d..eb20cb4 100644 --- a/src/main/java/de/monticore/lang/monticar/cnnarch/mxnetgenerator/ArchitectureElementData.java +++ b/src/main/java/de/monticore/lang/monticar/cnnarch/mxnetgenerator/ArchitectureElementData.java @@ -70,29 +70,6 @@ public class ArchitectureElementData { return getTemplateController().getLayerInputs(getElement()); } - public boolean isLogisticRegressionOutput(){ - return getTemplateController().isLogisticRegressionOutput(getElement()); - } - - - public boolean isLinearRegressionOutput(){ - boolean result = getTemplateController().isLinearRegressionOutput(getElement()); - if (result){ - Log.warn("The Output '" + getElement().getName() + "' is a linear regression output (squared loss) during training" + - " because the previous architecture element is not a softmax (cross-entropy loss) or sigmoid (logistic regression loss) activation. " + - "Other loss functions are currently not supported. " - , getElement().getSourcePosition()); - } - return result; - } - - public boolean isSoftmaxOutput(){ - return getTemplateController().isSoftmaxOutput(getElement()); - } - - - - public List getKernel(){ return ((LayerSymbol) getElement()) .getIntTupleValue(AllPredefinedLayers.KERNEL_NAME).get(); diff --git a/src/main/java/de/monticore/lang/monticar/cnnarch/mxnetgenerator/CNNArch2MxNetTemplateController.java b/src/main/java/de/monticore/lang/monticar/cnnarch/mxnetgenerator/CNNArch2MxNetTemplateController.java index 7e9ffd0..53e5670 100644 --- a/src/main/java/de/monticore/lang/monticar/cnnarch/mxnetgenerator/CNNArch2MxNetTemplateController.java +++ b/src/main/java/de/monticore/lang/monticar/cnnarch/mxnetgenerator/CNNArch2MxNetTemplateController.java @@ -37,10 +37,8 @@ public class CNNArch2MxNetTemplateController extends CNNArchTemplateController { if (layer.isAtomic()){ ArchitectureElementSymbol nextElement = layer.getOutputElement().get(); - if (!isSoftmaxOutput(nextElement) && !isLogisticRegressionOutput(nextElement)){ String templateName = layer.getDeclaration().getName(); include(TEMPLATE_ELEMENTS_DIR_PATH, templateName, writer); - } } else { include(layer.getResolvedThis().get(), writer); } diff --git a/src/main/java/de/monticore/lang/monticar/cnnarch/mxnetgenerator/CNNArchTemplateController.java b/src/main/java/de/monticore/lang/monticar/cnnarch/mxnetgenerator/CNNArchTemplateController.java index 04ab544..18d6ed6 100644 --- a/src/main/java/de/monticore/lang/monticar/cnnarch/mxnetgenerator/CNNArchTemplateController.java +++ b/src/main/java/de/monticore/lang/monticar/cnnarch/mxnetgenerator/CNNArchTemplateController.java @@ -139,16 +139,12 @@ public abstract class CNNArchTemplateController { public List getLayerInputs(ArchitectureElementSymbol layer){ List inputNames = new ArrayList<>(); - if (isSoftmaxOutput(layer) || isLogisticRegressionOutput(layer)){ - inputNames = getLayerInputs(layer.getInputElement().get()); - } else { - for (ArchitectureElementSymbol input : layer.getPrevious()) { - if (input.getOutputTypes().size() == 1) { - inputNames.add(getName(input)); - } else { - for (int i = 0; i < input.getOutputTypes().size(); i++) { - inputNames.add(getName(input) + "[" + i + "]"); - } + for (ArchitectureElementSymbol input : layer.getPrevious()) { + if (input.getOutputTypes().size() == 1) { + inputNames.add(getName(input)); + } else { + for (int i = 0; i < input.getOutputTypes().size(); i++) { + inputNames.add(getName(input) + "[" + i + "]"); } } } @@ -220,29 +216,4 @@ public abstract class CNNArchTemplateController { return stringBuilder.toString(); } - - public boolean isLogisticRegressionOutput(ArchitectureElementSymbol architectureElement){ - return isTOutput(Sigmoid.class, architectureElement); - } - - public boolean isLinearRegressionOutput(ArchitectureElementSymbol architectureElement){ - return architectureElement.isOutput() - && !isLogisticRegressionOutput(architectureElement) - && !isSoftmaxOutput(architectureElement); - } - - - public boolean isSoftmaxOutput(ArchitectureElementSymbol architectureElement){ - return isTOutput(Softmax.class, architectureElement); - } - - private boolean isTOutput(Class inputPredefinedLayerClass, ArchitectureElementSymbol architectureElement){ - if (architectureElement.isOutput() - && architectureElement.getInputElement().isPresent() - && architectureElement.getInputElement().get() instanceof LayerSymbol){ - LayerSymbol inputLayer = (LayerSymbol) architectureElement.getInputElement().get(); - return inputPredefinedLayerClass.isInstance(inputLayer.getDeclaration()); - } - return false; - } } diff --git a/src/main/java/de/monticore/lang/monticar/cnnarch/mxnetgenerator/ConfigurationData.java b/src/main/java/de/monticore/lang/monticar/cnnarch/mxnetgenerator/ConfigurationData.java index 07adb62..5dc327f 100644 --- a/src/main/java/de/monticore/lang/monticar/cnnarch/mxnetgenerator/ConfigurationData.java +++ b/src/main/java/de/monticore/lang/monticar/cnnarch/mxnetgenerator/ConfigurationData.java @@ -67,6 +67,33 @@ public class ConfigurationData { return getConfiguration().getEntry("eval_metric").getValue().toString(); } + public String getLossName() { + if (getConfiguration().getLoss() == null) { + return null; + } + return getConfiguration().getLoss().getName(); + } + + public Map getLossParams() { + + Map mapToStrings = new HashMap<>(); + Map lossParams = getConfiguration().getLoss().getLossParamMap(); + for (Map.Entry entry : lossParams.entrySet()) { + String paramName = entry.getKey(); + String valueAsString = entry.getValue().toString(); + Class realClass = entry.getValue().getValue().getValue().getClass(); + if (realClass == Boolean.class) { + valueAsString = (Boolean) entry.getValue().getValue().getValue() ? "True" : "False"; + } + mapToStrings.put(paramName, valueAsString); + } + if (mapToStrings.isEmpty()){ + return null; + } else{ + return mapToStrings;} + } + + public String getOptimizerName() { if (getConfiguration().getOptimizer() == null) { return null; diff --git a/src/main/resources/templates/mxnet/CNNCreator.ftl b/src/main/resources/templates/mxnet/CNNCreator.ftl index 5a0c600..d8d51c7 100644 --- a/src/main/resources/templates/mxnet/CNNCreator.ftl +++ b/src/main/resources/templates/mxnet/CNNCreator.ftl @@ -104,10 +104,80 @@ class ${tc.fileNameWithoutEnding}: logging.error("Data loading failure. File '" + os.path.abspath(train_path) + "' does not exist.") sys.exit(1) + def loss_function(self, loss, params): + label = mx.symbol.var(name=self._output_names_[0], ) + prediction = self.module.symbol.get_children()[0] + + margin = params['margin'] if 'margin' in params else 1.0 + sparseLabel = params['sparse_label'] if 'sparse_label' in params else True + + if loss == 'softmax_cross_entropy': + fromLogits = params['from_logits'] if 'from_logits' in params else False + if not fromLogits: + prediction = mx.symbol.log_softmax(data=prediction, axis=1) + if sparseLabel: + loss_func = mx.symbol.mean(-mx.symbol.pick(prediction, label, axis=-1, keepdims=True), axis=0, exclude=True) + else: + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(-mx.symbol.sum(prediction * label, axis=-1, keepdims=True), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="softmax_cross_entropy") + elif loss == 'cross_entropy': + prediction = mx.symbol.log(prediction) + if sparseLabel: + loss_func = mx.symbol.mean(-mx.symbol.pick(prediction, label, axis=-1, keepdims=True), axis=0, exclude=True) + else: + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(-mx.symbol.sum(prediction * label, axis=-1, keepdims=True), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="cross_entropy") + elif loss == 'sigmoid_binary_cross_entropy': + loss_func = mx.symbol.LogisticRegressionOutput(data=prediction, name=self.module.symbol.name) + elif loss == 'l1': + loss_func = mx.symbol.MAERegressionOutput(data=prediction, name=self.module.symbol.name) + elif loss == 'l2': + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(mx.symbol.square((label - prediction) / 2), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="L2") + elif loss == 'huber': + rho = params['rho'] if 'rho' in params else 1 + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.abs(label - prediction) + loss_func = mx.symbol.where(loss_func > rho, loss_func - 0.5 * rho, (0.5 / rho) * mx.symbol.square(loss_func)) + loss_func = mx.symbol.mean(loss_func, axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="huber") + elif loss == 'hinge': + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(mx.symbol.relu(margin - prediction * label), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="hinge") + elif loss == 'squared_hinge': + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(mx.symbol.square(mx.symbol.relu(margin - prediction * label)), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="squared_hinge") + elif loss == 'logistic': + labelFormat = params['label_format'] if 'label_format' in params else 'signed' + if labelFormat not in ["binary", "signed"]: + logging.error("label_format can only be signed or binary") + label = mx.symbol.reshape_like(label, prediction) + if labelFormat == 'signed': + label = (label + 1.0)/2.0 + loss_func = mx.symbol.relu(prediction) - prediction * label + loss_func = loss_func + mx.symbol.Activation(-mx.symbol.abs(prediction), act_type="softrelu") + loss_func = mx.symbol.MakeLoss(mx.symbol.mean(loss_func, 0, exclude=True), name="logistic") + elif loss == 'kullback_leibler': + fromLogits = params['from_logits'] if 'from_logits' in params else True + if not fromLogits: + prediction = mx.symbol.log_softmax(prediction, axis=1) + loss_func = mx.symbol.mean(label * (mx.symbol.log(label) - prediction), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="kullback_leibler") + else: + logging.error("Invalid loss parameter.") + + return loss_func def train(self, batch_size=64, num_epoch=10, eval_metric='acc', + loss ='softmax_cross_entropy', + loss_params={}, optimizer='adam', optimizer_params=(('learning_rate', 0.001),), load_checkpoint=True, @@ -136,7 +206,6 @@ class ${tc.fileNameWithoutEnding}: del optimizer_params['step_size'] del optimizer_params['learning_rate_decay'] - train_iter, test_iter, data_mean, data_std = self.load_data(batch_size) if self.module == None: if normalize: @@ -144,6 +213,14 @@ class ${tc.fileNameWithoutEnding}: else: self.construct(mx_context) + loss_func = self.loss_function(loss=loss, params=loss_params) + + self.module = mx.mod.Module( + symbol=mx.symbol.Group([loss_func, mx.symbol.BlockGrad(self.module.symbol.get_children()[0], name="pred")]), + data_names=self._input_names_, + label_names=self._output_names_, + context=mx_context) + begin_epoch = 0 if load_checkpoint: begin_epoch = self.load(mx_context) @@ -157,9 +234,11 @@ class ${tc.fileNameWithoutEnding}: if not os.path.isdir(self._model_dir_): raise + metric = mx.metric.create(eval_metric, output_names=['pred_output']) + self.module.fit( train_data=train_iter, - eval_metric=eval_metric, + eval_metric=metric, eval_data=test_iter, optimizer=optimizer, optimizer_params=optimizer_params, diff --git a/src/main/resources/templates/mxnet/CNNTrainer.ftl b/src/main/resources/templates/mxnet/CNNTrainer.ftl index de2e1fe..c3c0a47 100644 --- a/src/main/resources/templates/mxnet/CNNTrainer.ftl +++ b/src/main/resources/templates/mxnet/CNNTrainer.ftl @@ -31,6 +31,16 @@ if __name__ == "__main__": <#if (config.evalMetric)??> eval_metric='${config.evalMetric}', +<#if (config.configuration.loss)??> + loss='${config.lossName}', +<#if (config.lossParams)??> + loss_params={ +<#list config.lossParams?keys as param> + '${param}': ${config.lossParams[param]}<#sep>, + +}, + + <#if (config.configuration.optimizer)??> optimizer='${config.optimizerName}', optimizer_params={ diff --git a/src/main/resources/templates/mxnet/elements/Output.ftl b/src/main/resources/templates/mxnet/elements/Output.ftl index cd80d10..69df8d8 100644 --- a/src/main/resources/templates/mxnet/elements/Output.ftl +++ b/src/main/resources/templates/mxnet/elements/Output.ftl @@ -1,11 +1,3 @@ - -<#if element.softmaxOutput> ${element.name} = mx.symbol.SoftmaxOutput(data=${element.inputs[0]}, name="${element.name}") -<#elseif element.logisticRegressionOutput> - ${element.name} = mx.symbol.LogisticRegressionOutput(data=${element.inputs[0]}, - name="${element.name}") -<#elseif element.linearRegressionOutput> - ${element.name} = mx.symbol.LinearRegressionOutput(data=${element.inputs[0]}, - name="${element.name}") - \ No newline at end of file + \ No newline at end of file diff --git a/src/test/java/de/monticore/lang/monticar/cnnarch/mxnetgenerator/GenerationTest.java b/src/test/java/de/monticore/lang/monticar/cnnarch/mxnetgenerator/GenerationTest.java index 527011b..c5a08bc 100644 --- a/src/test/java/de/monticore/lang/monticar/cnnarch/mxnetgenerator/GenerationTest.java +++ b/src/test/java/de/monticore/lang/monticar/cnnarch/mxnetgenerator/GenerationTest.java @@ -112,7 +112,7 @@ public class GenerationTest extends AbstractSymtabTest{ Log.getFindings().clear(); String[] args = {"-m", "src/test/resources/valid_tests", "-r", "MultipleOutputs"}; CNNArch2MxNetCli.main(args); - assertTrue(Log.getFindings().size() == 3); + assertTrue(Log.getFindings().size() == 1); } @Test diff --git a/src/test/resources/target_code/CNNCreator_Alexnet.py b/src/test/resources/target_code/CNNCreator_Alexnet.py index 0da80ee..885d91d 100644 --- a/src/test/resources/target_code/CNNCreator_Alexnet.py +++ b/src/test/resources/target_code/CNNCreator_Alexnet.py @@ -52,10 +52,10 @@ class CNNCreator_Alexnet: else: logging.info("Loading checkpoint: " + param_file) self.module.load(prefix=self._model_dir_ + self._model_prefix_, - epoch=lastEpoch, - data_names=self._input_names_, - label_names=self._output_names_, - context=context) + epoch=lastEpoch, + data_names=self._input_names_, + label_names=self._output_names_, + context=context) return lastEpoch @@ -104,10 +104,80 @@ class CNNCreator_Alexnet: logging.error("Data loading failure. File '" + os.path.abspath(train_path) + "' does not exist.") sys.exit(1) + def loss_function(self, loss, params): + label = mx.symbol.var(name=self._output_names_[0], ) + prediction = self.module.symbol.get_children()[0] + + margin = params['margin'] if 'margin' in params else 1.0 + sparseLabel = params['sparse_label'] if 'sparse_label' in params else True + + if loss == 'softmax_cross_entropy': + fromLogits = params['from_logits'] if 'from_logits' in params else False + if not fromLogits: + prediction = mx.symbol.log_softmax(data=prediction, axis=1) + if sparseLabel: + loss_func = mx.symbol.mean(-mx.symbol.pick(prediction, label, axis=-1, keepdims=True), axis=0, exclude=True) + else: + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(-mx.symbol.sum(prediction * label, axis=-1, keepdims=True), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="softmax_cross_entropy") + elif loss == 'cross_entropy': + prediction = mx.symbol.log(prediction) + if sparseLabel: + loss_func = mx.symbol.mean(-mx.symbol.pick(prediction, label, axis=-1, keepdims=True), axis=0, exclude=True) + else: + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(-mx.symbol.sum(prediction * label, axis=-1, keepdims=True), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="cross_entropy") + elif loss == 'sigmoid_binary_cross_entropy': + loss_func = mx.symbol.LogisticRegressionOutput(data=prediction, name=self.module.symbol.name) + elif loss == 'l1': + loss_func = mx.symbol.MAERegressionOutput(data=prediction, name=self.module.symbol.name) + elif loss == 'l2': + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(mx.symbol.square((label - prediction) / 2), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="L2") + elif loss == 'huber': + rho = params['rho'] if 'rho' in params else 1 + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.abs(label - prediction) + loss_func = mx.symbol.where(loss_func > rho, loss_func - 0.5 * rho, (0.5 / rho) * mx.symbol.square(loss_func)) + loss_func = mx.symbol.mean(loss_func, axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="huber") + elif loss == 'hinge': + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(mx.symbol.relu(margin - prediction * label), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="hinge") + elif loss == 'squared_hinge': + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(mx.symbol.square(mx.symbol.relu(margin - prediction * label)), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="squared_hinge") + elif loss == 'logistic': + labelFormat = params['label_format'] if 'label_format' in params else 'signed' + if labelFormat not in ["binary", "signed"]: + logging.error("label_format can only be signed or binary") + label = mx.symbol.reshape_like(label, prediction) + if labelFormat == 'signed': + label = (label + 1.0)/2.0 + loss_func = mx.symbol.relu(prediction) - prediction * label + loss_func = loss_func + mx.symbol.Activation(-mx.symbol.abs(prediction), act_type="softrelu") + loss_func = mx.symbol.MakeLoss(mx.symbol.mean(loss_func, 0, exclude=True), name="logistic") + elif loss == 'kullback_leibler': + fromLogits = params['from_logits'] if 'from_logits' in params else True + if not fromLogits: + prediction = mx.symbol.log_softmax(prediction, axis=1) + loss_func = mx.symbol.mean(label * (mx.symbol.log(label) - prediction), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="kullback_leibler") + else: + logging.error("Invalid loss parameter.") + + return loss_func def train(self, batch_size=64, num_epoch=10, eval_metric='acc', + loss ='softmax_cross_entropy', + loss_params={}, optimizer='adam', optimizer_params=(('learning_rate', 0.001),), load_checkpoint=True, @@ -130,13 +200,12 @@ class CNNCreator_Alexnet: min_learning_rate = optimizer_params['learning_rate_minimum'] del optimizer_params['learning_rate_minimum'] optimizer_params['lr_scheduler'] = mx.lr_scheduler.FactorScheduler( - optimizer_params['step_size'], - factor=optimizer_params['learning_rate_decay'], - stop_factor_lr=min_learning_rate) + optimizer_params['step_size'], + factor=optimizer_params['learning_rate_decay'], + stop_factor_lr=min_learning_rate) del optimizer_params['step_size'] del optimizer_params['learning_rate_decay'] - train_iter, test_iter, data_mean, data_std = self.load_data(batch_size) if self.module == None: if normalize: @@ -144,6 +213,14 @@ class CNNCreator_Alexnet: else: self.construct(mx_context) + loss_func = self.loss_function(loss=loss, params=loss_params) + + self.module = mx.mod.Module( + symbol=mx.symbol.Group([loss_func, mx.symbol.BlockGrad(self.module.symbol.get_children()[0], name="pred")]), + data_names=self._input_names_, + label_names=self._output_names_, + context=mx_context) + begin_epoch = 0 if load_checkpoint: begin_epoch = self.load(mx_context) @@ -157,9 +234,11 @@ class CNNCreator_Alexnet: if not os.path.isdir(self._model_dir_): raise + metric = mx.metric.create(eval_metric, output_names=['pred_output']) + self.module.fit( train_data=train_iter, - eval_metric=eval_metric, + eval_metric=metric, eval_data=test_iter, optimizer=optimizer, optimizer_params=optimizer_params, @@ -173,7 +252,7 @@ class CNNCreator_Alexnet: def construct(self, context, data_mean=None, data_std=None): data = mx.sym.var("data", - shape=(0,3,224,224)) + shape=(0,3,224,224)) # data, output shape: {[3,224,224]} if not data_mean is None: @@ -185,244 +264,246 @@ class CNNCreator_Alexnet: data = mx.symbol.broadcast_sub(data, _data_mean_) data = mx.symbol.broadcast_div(data, _data_std_) conv1_ = mx.symbol.pad(data=data, - mode='constant', - pad_width=(0,0,0,0,2,1,2,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,2,1,2,1), + constant_value=0) conv1_ = mx.symbol.Convolution(data=conv1_, - kernel=(11,11), - stride=(4,4), - num_filter=96, - no_bias=False, - name="conv1_") + kernel=(11,11), + stride=(4,4), + num_filter=96, + no_bias=False, + name="conv1_") # conv1_, output shape: {[96,55,55]} lrn1_ = mx.symbol.LRN(data=conv1_, - alpha=0.0001, - beta=0.75, - knorm=2, - nsize=5, - name="lrn1_") + alpha=0.0001, + beta=0.75, + knorm=2, + nsize=5, + name="lrn1_") pool1_ = mx.symbol.Pooling(data=lrn1_, - kernel=(3,3), - pool_type="max", - stride=(2,2), - name="pool1_") + kernel=(3,3), + pool_type="max", + stride=(2,2), + name="pool1_") # pool1_, output shape: {[96,27,27]} relu1_ = mx.symbol.Activation(data=pool1_, - act_type='relu', - name="relu1_") + act_type='relu', + name="relu1_") split1_ = mx.symbol.split(data=relu1_, - num_outputs=2, - axis=1, - name="split1_") + num_outputs=2, + axis=1, + name="split1_") # split1_, output shape: {[48,27,27][48,27,27]} get2_1_ = split1_[0] conv2_1_ = mx.symbol.pad(data=get2_1_, - mode='constant', - pad_width=(0,0,0,0,2,2,2,2), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,2,2,2,2), + constant_value=0) conv2_1_ = mx.symbol.Convolution(data=conv2_1_, - kernel=(5,5), - stride=(1,1), - num_filter=128, - no_bias=False, - name="conv2_1_") + kernel=(5,5), + stride=(1,1), + num_filter=128, + no_bias=False, + name="conv2_1_") # conv2_1_, output shape: {[128,27,27]} lrn2_1_ = mx.symbol.LRN(data=conv2_1_, - alpha=0.0001, - beta=0.75, - knorm=2, - nsize=5, - name="lrn2_1_") + alpha=0.0001, + beta=0.75, + knorm=2, + nsize=5, + name="lrn2_1_") pool2_1_ = mx.symbol.Pooling(data=lrn2_1_, - kernel=(3,3), - pool_type="max", - stride=(2,2), - name="pool2_1_") + kernel=(3,3), + pool_type="max", + stride=(2,2), + name="pool2_1_") # pool2_1_, output shape: {[128,13,13]} relu2_1_ = mx.symbol.Activation(data=pool2_1_, - act_type='relu', - name="relu2_1_") + act_type='relu', + name="relu2_1_") get2_2_ = split1_[1] conv2_2_ = mx.symbol.pad(data=get2_2_, - mode='constant', - pad_width=(0,0,0,0,2,2,2,2), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,2,2,2,2), + constant_value=0) conv2_2_ = mx.symbol.Convolution(data=conv2_2_, - kernel=(5,5), - stride=(1,1), - num_filter=128, - no_bias=False, - name="conv2_2_") + kernel=(5,5), + stride=(1,1), + num_filter=128, + no_bias=False, + name="conv2_2_") # conv2_2_, output shape: {[128,27,27]} lrn2_2_ = mx.symbol.LRN(data=conv2_2_, - alpha=0.0001, - beta=0.75, - knorm=2, - nsize=5, - name="lrn2_2_") + alpha=0.0001, + beta=0.75, + knorm=2, + nsize=5, + name="lrn2_2_") pool2_2_ = mx.symbol.Pooling(data=lrn2_2_, - kernel=(3,3), - pool_type="max", - stride=(2,2), - name="pool2_2_") + kernel=(3,3), + pool_type="max", + stride=(2,2), + name="pool2_2_") # pool2_2_, output shape: {[128,13,13]} relu2_2_ = mx.symbol.Activation(data=pool2_2_, - act_type='relu', - name="relu2_2_") + act_type='relu', + name="relu2_2_") concatenate3_ = mx.symbol.concat(relu2_1_, relu2_2_, - dim=1, - name="concatenate3_") + dim=1, + name="concatenate3_") # concatenate3_, output shape: {[256,13,13]} conv3_ = mx.symbol.pad(data=concatenate3_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv3_ = mx.symbol.Convolution(data=conv3_, - kernel=(3,3), - stride=(1,1), - num_filter=384, - no_bias=False, - name="conv3_") + kernel=(3,3), + stride=(1,1), + num_filter=384, + no_bias=False, + name="conv3_") # conv3_, output shape: {[384,13,13]} relu3_ = mx.symbol.Activation(data=conv3_, - act_type='relu', - name="relu3_") + act_type='relu', + name="relu3_") split3_ = mx.symbol.split(data=relu3_, - num_outputs=2, - axis=1, - name="split3_") + num_outputs=2, + axis=1, + name="split3_") # split3_, output shape: {[192,13,13][192,13,13]} get4_1_ = split3_[0] conv4_1_ = mx.symbol.pad(data=get4_1_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv4_1_ = mx.symbol.Convolution(data=conv4_1_, - kernel=(3,3), - stride=(1,1), - num_filter=192, - no_bias=False, - name="conv4_1_") + kernel=(3,3), + stride=(1,1), + num_filter=192, + no_bias=False, + name="conv4_1_") # conv4_1_, output shape: {[192,13,13]} relu4_1_ = mx.symbol.Activation(data=conv4_1_, - act_type='relu', - name="relu4_1_") + act_type='relu', + name="relu4_1_") conv5_1_ = mx.symbol.pad(data=relu4_1_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv5_1_ = mx.symbol.Convolution(data=conv5_1_, - kernel=(3,3), - stride=(1,1), - num_filter=128, - no_bias=False, - name="conv5_1_") + kernel=(3,3), + stride=(1,1), + num_filter=128, + no_bias=False, + name="conv5_1_") # conv5_1_, output shape: {[128,13,13]} pool5_1_ = mx.symbol.Pooling(data=conv5_1_, - kernel=(3,3), - pool_type="max", - stride=(2,2), - name="pool5_1_") + kernel=(3,3), + pool_type="max", + stride=(2,2), + name="pool5_1_") # pool5_1_, output shape: {[128,6,6]} relu5_1_ = mx.symbol.Activation(data=pool5_1_, - act_type='relu', - name="relu5_1_") + act_type='relu', + name="relu5_1_") get4_2_ = split3_[1] conv4_2_ = mx.symbol.pad(data=get4_2_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv4_2_ = mx.symbol.Convolution(data=conv4_2_, - kernel=(3,3), - stride=(1,1), - num_filter=192, - no_bias=False, - name="conv4_2_") + kernel=(3,3), + stride=(1,1), + num_filter=192, + no_bias=False, + name="conv4_2_") # conv4_2_, output shape: {[192,13,13]} relu4_2_ = mx.symbol.Activation(data=conv4_2_, - act_type='relu', - name="relu4_2_") + act_type='relu', + name="relu4_2_") conv5_2_ = mx.symbol.pad(data=relu4_2_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv5_2_ = mx.symbol.Convolution(data=conv5_2_, - kernel=(3,3), - stride=(1,1), - num_filter=128, - no_bias=False, - name="conv5_2_") + kernel=(3,3), + stride=(1,1), + num_filter=128, + no_bias=False, + name="conv5_2_") # conv5_2_, output shape: {[128,13,13]} pool5_2_ = mx.symbol.Pooling(data=conv5_2_, - kernel=(3,3), - pool_type="max", - stride=(2,2), - name="pool5_2_") + kernel=(3,3), + pool_type="max", + stride=(2,2), + name="pool5_2_") # pool5_2_, output shape: {[128,6,6]} relu5_2_ = mx.symbol.Activation(data=pool5_2_, - act_type='relu', - name="relu5_2_") + act_type='relu', + name="relu5_2_") concatenate6_ = mx.symbol.concat(relu5_1_, relu5_2_, - dim=1, - name="concatenate6_") + dim=1, + name="concatenate6_") # concatenate6_, output shape: {[256,6,6]} fc6_ = mx.symbol.flatten(data=concatenate6_) fc6_ = mx.symbol.FullyConnected(data=fc6_, - num_hidden=4096, - no_bias=False, - name="fc6_") + num_hidden=4096, + no_bias=False, + name="fc6_") relu6_ = mx.symbol.Activation(data=fc6_, - act_type='relu', - name="relu6_") + act_type='relu', + name="relu6_") dropout6_ = mx.symbol.Dropout(data=relu6_, - p=0.5, - name="dropout6_") + p=0.5, + name="dropout6_") fc7_ = mx.symbol.FullyConnected(data=dropout6_, - num_hidden=4096, - no_bias=False, - name="fc7_") + num_hidden=4096, + no_bias=False, + name="fc7_") relu7_ = mx.symbol.Activation(data=fc7_, - act_type='relu', - name="relu7_") + act_type='relu', + name="relu7_") dropout7_ = mx.symbol.Dropout(data=relu7_, - p=0.5, - name="dropout7_") + p=0.5, + name="dropout7_") fc8_ = mx.symbol.FullyConnected(data=dropout7_, - num_hidden=10, - no_bias=False, - name="fc8_") - - predictions = mx.symbol.SoftmaxOutput(data=fc8_, - name="predictions") + num_hidden=10, + no_bias=False, + name="fc8_") + softmax8_ = mx.symbol.softmax(data=fc8_, + axis=1, + name="softmax8_") + predictions = mx.symbol.SoftmaxOutput(data=softmax8_, + name="predictions") self.module = mx.mod.Module(symbol=mx.symbol.Group([predictions]), - data_names=self._input_names_, - label_names=self._output_names_, - context=context) + data_names=self._input_names_, + label_names=self._output_names_, + context=context) diff --git a/src/test/resources/target_code/CNNCreator_CifarClassifierNetwork.py b/src/test/resources/target_code/CNNCreator_CifarClassifierNetwork.py index 961e732..e2ba49c 100644 --- a/src/test/resources/target_code/CNNCreator_CifarClassifierNetwork.py +++ b/src/test/resources/target_code/CNNCreator_CifarClassifierNetwork.py @@ -52,10 +52,10 @@ class CNNCreator_CifarClassifierNetwork: else: logging.info("Loading checkpoint: " + param_file) self.module.load(prefix=self._model_dir_ + self._model_prefix_, - epoch=lastEpoch, - data_names=self._input_names_, - label_names=self._output_names_, - context=context) + epoch=lastEpoch, + data_names=self._input_names_, + label_names=self._output_names_, + context=context) return lastEpoch @@ -104,10 +104,80 @@ class CNNCreator_CifarClassifierNetwork: logging.error("Data loading failure. File '" + os.path.abspath(train_path) + "' does not exist.") sys.exit(1) + def loss_function(self, loss, params): + label = mx.symbol.var(name=self._output_names_[0], ) + prediction = self.module.symbol.get_children()[0] + + margin = params['margin'] if 'margin' in params else 1.0 + sparseLabel = params['sparse_label'] if 'sparse_label' in params else True + + if loss == 'softmax_cross_entropy': + fromLogits = params['from_logits'] if 'from_logits' in params else False + if not fromLogits: + prediction = mx.symbol.log_softmax(data=prediction, axis=1) + if sparseLabel: + loss_func = mx.symbol.mean(-mx.symbol.pick(prediction, label, axis=-1, keepdims=True), axis=0, exclude=True) + else: + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(-mx.symbol.sum(prediction * label, axis=-1, keepdims=True), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="softmax_cross_entropy") + elif loss == 'cross_entropy': + prediction = mx.symbol.log(prediction) + if sparseLabel: + loss_func = mx.symbol.mean(-mx.symbol.pick(prediction, label, axis=-1, keepdims=True), axis=0, exclude=True) + else: + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(-mx.symbol.sum(prediction * label, axis=-1, keepdims=True), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="cross_entropy") + elif loss == 'sigmoid_binary_cross_entropy': + loss_func = mx.symbol.LogisticRegressionOutput(data=prediction, name=self.module.symbol.name) + elif loss == 'l1': + loss_func = mx.symbol.MAERegressionOutput(data=prediction, name=self.module.symbol.name) + elif loss == 'l2': + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(mx.symbol.square((label - prediction) / 2), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="L2") + elif loss == 'huber': + rho = params['rho'] if 'rho' in params else 1 + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.abs(label - prediction) + loss_func = mx.symbol.where(loss_func > rho, loss_func - 0.5 * rho, (0.5 / rho) * mx.symbol.square(loss_func)) + loss_func = mx.symbol.mean(loss_func, axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="huber") + elif loss == 'hinge': + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(mx.symbol.relu(margin - prediction * label), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="hinge") + elif loss == 'squared_hinge': + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(mx.symbol.square(mx.symbol.relu(margin - prediction * label)), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="squared_hinge") + elif loss == 'logistic': + labelFormat = params['label_format'] if 'label_format' in params else 'signed' + if labelFormat not in ["binary", "signed"]: + logging.error("label_format can only be signed or binary") + label = mx.symbol.reshape_like(label, prediction) + if labelFormat == 'signed': + label = (label + 1.0)/2.0 + loss_func = mx.symbol.relu(prediction) - prediction * label + loss_func = loss_func + mx.symbol.Activation(-mx.symbol.abs(prediction), act_type="softrelu") + loss_func = mx.symbol.MakeLoss(mx.symbol.mean(loss_func, 0, exclude=True), name="logistic") + elif loss == 'kullback_leibler': + fromLogits = params['from_logits'] if 'from_logits' in params else True + if not fromLogits: + prediction = mx.symbol.log_softmax(prediction, axis=1) + loss_func = mx.symbol.mean(label * (mx.symbol.log(label) - prediction), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="kullback_leibler") + else: + logging.error("Invalid loss parameter.") + + return loss_func def train(self, batch_size=64, num_epoch=10, eval_metric='acc', + loss ='softmax_cross_entropy', + loss_params={}, optimizer='adam', optimizer_params=(('learning_rate', 0.001),), load_checkpoint=True, @@ -130,13 +200,12 @@ class CNNCreator_CifarClassifierNetwork: min_learning_rate = optimizer_params['learning_rate_minimum'] del optimizer_params['learning_rate_minimum'] optimizer_params['lr_scheduler'] = mx.lr_scheduler.FactorScheduler( - optimizer_params['step_size'], - factor=optimizer_params['learning_rate_decay'], - stop_factor_lr=min_learning_rate) + optimizer_params['step_size'], + factor=optimizer_params['learning_rate_decay'], + stop_factor_lr=min_learning_rate) del optimizer_params['step_size'] del optimizer_params['learning_rate_decay'] - train_iter, test_iter, data_mean, data_std = self.load_data(batch_size) if self.module == None: if normalize: @@ -144,6 +213,14 @@ class CNNCreator_CifarClassifierNetwork: else: self.construct(mx_context) + loss_func = self.loss_function(loss=loss, params=loss_params) + + self.module = mx.mod.Module( + symbol=mx.symbol.Group([loss_func, mx.symbol.BlockGrad(self.module.symbol.get_children()[0], name="pred")]), + data_names=self._input_names_, + label_names=self._output_names_, + context=mx_context) + begin_epoch = 0 if load_checkpoint: begin_epoch = self.load(mx_context) @@ -157,9 +234,11 @@ class CNNCreator_CifarClassifierNetwork: if not os.path.isdir(self._model_dir_): raise + metric = mx.metric.create(eval_metric, output_names=['pred_output']) + self.module.fit( train_data=train_iter, - eval_metric=eval_metric, + eval_metric=metric, eval_data=test_iter, optimizer=optimizer, optimizer_params=optimizer_params, @@ -173,7 +252,7 @@ class CNNCreator_CifarClassifierNetwork: def construct(self, context, data_mean=None, data_std=None): data = mx.sym.var("data", - shape=(0,3,32,32)) + shape=(0,3,32,32)) # data, output shape: {[3,32,32]} if not data_mean is None: @@ -185,482 +264,484 @@ class CNNCreator_CifarClassifierNetwork: data = mx.symbol.broadcast_sub(data, _data_mean_) data = mx.symbol.broadcast_div(data, _data_std_) conv2_1_ = mx.symbol.pad(data=data, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv2_1_ = mx.symbol.Convolution(data=conv2_1_, - kernel=(3,3), - stride=(1,1), - num_filter=8, - no_bias=False, - name="conv2_1_") + kernel=(3,3), + stride=(1,1), + num_filter=8, + no_bias=False, + name="conv2_1_") # conv2_1_, output shape: {[8,32,32]} batchnorm2_1_ = mx.symbol.BatchNorm(data=conv2_1_, - fix_gamma=True, - name="batchnorm2_1_") + fix_gamma=True, + name="batchnorm2_1_") relu2_1_ = mx.symbol.Activation(data=batchnorm2_1_, - act_type='relu', - name="relu2_1_") + act_type='relu', + name="relu2_1_") conv3_1_ = mx.symbol.pad(data=relu2_1_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv3_1_ = mx.symbol.Convolution(data=conv3_1_, - kernel=(3,3), - stride=(1,1), - num_filter=8, - no_bias=False, - name="conv3_1_") + kernel=(3,3), + stride=(1,1), + num_filter=8, + no_bias=False, + name="conv3_1_") # conv3_1_, output shape: {[8,32,32]} batchnorm3_1_ = mx.symbol.BatchNorm(data=conv3_1_, - fix_gamma=True, - name="batchnorm3_1_") + fix_gamma=True, + name="batchnorm3_1_") conv2_2_ = mx.symbol.Convolution(data=data, - kernel=(1,1), - stride=(1,1), - num_filter=8, - no_bias=False, - name="conv2_2_") + kernel=(1,1), + stride=(1,1), + num_filter=8, + no_bias=False, + name="conv2_2_") # conv2_2_, output shape: {[8,32,32]} batchnorm2_2_ = mx.symbol.BatchNorm(data=conv2_2_, - fix_gamma=True, - name="batchnorm2_2_") + fix_gamma=True, + name="batchnorm2_2_") add4_ = batchnorm3_1_ + batchnorm2_2_ # add4_, output shape: {[8,32,32]} relu4_ = mx.symbol.Activation(data=add4_, - act_type='relu', - name="relu4_") + act_type='relu', + name="relu4_") conv5_1_ = mx.symbol.pad(data=relu4_, - mode='constant', - pad_width=(0,0,0,0,1,0,1,0), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,0,1,0), + constant_value=0) conv5_1_ = mx.symbol.Convolution(data=conv5_1_, - kernel=(3,3), - stride=(2,2), - num_filter=16, - no_bias=False, - name="conv5_1_") + kernel=(3,3), + stride=(2,2), + num_filter=16, + no_bias=False, + name="conv5_1_") # conv5_1_, output shape: {[16,16,16]} batchnorm5_1_ = mx.symbol.BatchNorm(data=conv5_1_, - fix_gamma=True, - name="batchnorm5_1_") + fix_gamma=True, + name="batchnorm5_1_") relu5_1_ = mx.symbol.Activation(data=batchnorm5_1_, - act_type='relu', - name="relu5_1_") + act_type='relu', + name="relu5_1_") conv6_1_ = mx.symbol.pad(data=relu5_1_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv6_1_ = mx.symbol.Convolution(data=conv6_1_, - kernel=(3,3), - stride=(1,1), - num_filter=16, - no_bias=False, - name="conv6_1_") + kernel=(3,3), + stride=(1,1), + num_filter=16, + no_bias=False, + name="conv6_1_") # conv6_1_, output shape: {[16,16,16]} batchnorm6_1_ = mx.symbol.BatchNorm(data=conv6_1_, - fix_gamma=True, - name="batchnorm6_1_") + fix_gamma=True, + name="batchnorm6_1_") conv5_2_ = mx.symbol.Convolution(data=relu4_, - kernel=(1,1), - stride=(2,2), - num_filter=16, - no_bias=False, - name="conv5_2_") + kernel=(1,1), + stride=(2,2), + num_filter=16, + no_bias=False, + name="conv5_2_") # conv5_2_, output shape: {[16,16,16]} batchnorm5_2_ = mx.symbol.BatchNorm(data=conv5_2_, - fix_gamma=True, - name="batchnorm5_2_") + fix_gamma=True, + name="batchnorm5_2_") add7_ = batchnorm6_1_ + batchnorm5_2_ # add7_, output shape: {[16,16,16]} relu7_ = mx.symbol.Activation(data=add7_, - act_type='relu', - name="relu7_") + act_type='relu', + name="relu7_") conv8_1_ = mx.symbol.pad(data=relu7_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv8_1_ = mx.symbol.Convolution(data=conv8_1_, - kernel=(3,3), - stride=(1,1), - num_filter=16, - no_bias=False, - name="conv8_1_") + kernel=(3,3), + stride=(1,1), + num_filter=16, + no_bias=False, + name="conv8_1_") # conv8_1_, output shape: {[16,16,16]} batchnorm8_1_ = mx.symbol.BatchNorm(data=conv8_1_, - fix_gamma=True, - name="batchnorm8_1_") + fix_gamma=True, + name="batchnorm8_1_") relu8_1_ = mx.symbol.Activation(data=batchnorm8_1_, - act_type='relu', - name="relu8_1_") + act_type='relu', + name="relu8_1_") conv9_1_ = mx.symbol.pad(data=relu8_1_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv9_1_ = mx.symbol.Convolution(data=conv9_1_, - kernel=(3,3), - stride=(1,1), - num_filter=16, - no_bias=False, - name="conv9_1_") + kernel=(3,3), + stride=(1,1), + num_filter=16, + no_bias=False, + name="conv9_1_") # conv9_1_, output shape: {[16,16,16]} batchnorm9_1_ = mx.symbol.BatchNorm(data=conv9_1_, - fix_gamma=True, - name="batchnorm9_1_") + fix_gamma=True, + name="batchnorm9_1_") add10_ = batchnorm9_1_ + relu7_ # add10_, output shape: {[16,16,16]} relu10_ = mx.symbol.Activation(data=add10_, - act_type='relu', - name="relu10_") + act_type='relu', + name="relu10_") conv11_1_ = mx.symbol.pad(data=relu10_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv11_1_ = mx.symbol.Convolution(data=conv11_1_, - kernel=(3,3), - stride=(1,1), - num_filter=16, - no_bias=False, - name="conv11_1_") + kernel=(3,3), + stride=(1,1), + num_filter=16, + no_bias=False, + name="conv11_1_") # conv11_1_, output shape: {[16,16,16]} batchnorm11_1_ = mx.symbol.BatchNorm(data=conv11_1_, - fix_gamma=True, - name="batchnorm11_1_") + fix_gamma=True, + name="batchnorm11_1_") relu11_1_ = mx.symbol.Activation(data=batchnorm11_1_, - act_type='relu', - name="relu11_1_") + act_type='relu', + name="relu11_1_") conv12_1_ = mx.symbol.pad(data=relu11_1_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv12_1_ = mx.symbol.Convolution(data=conv12_1_, - kernel=(3,3), - stride=(1,1), - num_filter=16, - no_bias=False, - name="conv12_1_") + kernel=(3,3), + stride=(1,1), + num_filter=16, + no_bias=False, + name="conv12_1_") # conv12_1_, output shape: {[16,16,16]} batchnorm12_1_ = mx.symbol.BatchNorm(data=conv12_1_, - fix_gamma=True, - name="batchnorm12_1_") + fix_gamma=True, + name="batchnorm12_1_") add13_ = batchnorm12_1_ + relu10_ # add13_, output shape: {[16,16,16]} relu13_ = mx.symbol.Activation(data=add13_, - act_type='relu', - name="relu13_") + act_type='relu', + name="relu13_") conv14_1_ = mx.symbol.pad(data=relu13_, - mode='constant', - pad_width=(0,0,0,0,1,0,1,0), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,0,1,0), + constant_value=0) conv14_1_ = mx.symbol.Convolution(data=conv14_1_, - kernel=(3,3), - stride=(2,2), - num_filter=32, - no_bias=False, - name="conv14_1_") + kernel=(3,3), + stride=(2,2), + num_filter=32, + no_bias=False, + name="conv14_1_") # conv14_1_, output shape: {[32,8,8]} batchnorm14_1_ = mx.symbol.BatchNorm(data=conv14_1_, - fix_gamma=True, - name="batchnorm14_1_") + fix_gamma=True, + name="batchnorm14_1_") relu14_1_ = mx.symbol.Activation(data=batchnorm14_1_, - act_type='relu', - name="relu14_1_") + act_type='relu', + name="relu14_1_") conv15_1_ = mx.symbol.pad(data=relu14_1_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv15_1_ = mx.symbol.Convolution(data=conv15_1_, - kernel=(3,3), - stride=(1,1), - num_filter=32, - no_bias=False, - name="conv15_1_") + kernel=(3,3), + stride=(1,1), + num_filter=32, + no_bias=False, + name="conv15_1_") # conv15_1_, output shape: {[32,8,8]} batchnorm15_1_ = mx.symbol.BatchNorm(data=conv15_1_, - fix_gamma=True, - name="batchnorm15_1_") + fix_gamma=True, + name="batchnorm15_1_") conv14_2_ = mx.symbol.Convolution(data=relu13_, - kernel=(1,1), - stride=(2,2), - num_filter=32, - no_bias=False, - name="conv14_2_") + kernel=(1,1), + stride=(2,2), + num_filter=32, + no_bias=False, + name="conv14_2_") # conv14_2_, output shape: {[32,8,8]} batchnorm14_2_ = mx.symbol.BatchNorm(data=conv14_2_, - fix_gamma=True, - name="batchnorm14_2_") + fix_gamma=True, + name="batchnorm14_2_") add16_ = batchnorm15_1_ + batchnorm14_2_ # add16_, output shape: {[32,8,8]} relu16_ = mx.symbol.Activation(data=add16_, - act_type='relu', - name="relu16_") + act_type='relu', + name="relu16_") conv17_1_ = mx.symbol.pad(data=relu16_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv17_1_ = mx.symbol.Convolution(data=conv17_1_, - kernel=(3,3), - stride=(1,1), - num_filter=32, - no_bias=False, - name="conv17_1_") + kernel=(3,3), + stride=(1,1), + num_filter=32, + no_bias=False, + name="conv17_1_") # conv17_1_, output shape: {[32,8,8]} batchnorm17_1_ = mx.symbol.BatchNorm(data=conv17_1_, - fix_gamma=True, - name="batchnorm17_1_") + fix_gamma=True, + name="batchnorm17_1_") relu17_1_ = mx.symbol.Activation(data=batchnorm17_1_, - act_type='relu', - name="relu17_1_") + act_type='relu', + name="relu17_1_") conv18_1_ = mx.symbol.pad(data=relu17_1_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv18_1_ = mx.symbol.Convolution(data=conv18_1_, - kernel=(3,3), - stride=(1,1), - num_filter=32, - no_bias=False, - name="conv18_1_") + kernel=(3,3), + stride=(1,1), + num_filter=32, + no_bias=False, + name="conv18_1_") # conv18_1_, output shape: {[32,8,8]} batchnorm18_1_ = mx.symbol.BatchNorm(data=conv18_1_, - fix_gamma=True, - name="batchnorm18_1_") + fix_gamma=True, + name="batchnorm18_1_") add19_ = batchnorm18_1_ + relu16_ # add19_, output shape: {[32,8,8]} relu19_ = mx.symbol.Activation(data=add19_, - act_type='relu', - name="relu19_") + act_type='relu', + name="relu19_") conv20_1_ = mx.symbol.pad(data=relu19_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv20_1_ = mx.symbol.Convolution(data=conv20_1_, - kernel=(3,3), - stride=(1,1), - num_filter=32, - no_bias=False, - name="conv20_1_") + kernel=(3,3), + stride=(1,1), + num_filter=32, + no_bias=False, + name="conv20_1_") # conv20_1_, output shape: {[32,8,8]} batchnorm20_1_ = mx.symbol.BatchNorm(data=conv20_1_, - fix_gamma=True, - name="batchnorm20_1_") + fix_gamma=True, + name="batchnorm20_1_") relu20_1_ = mx.symbol.Activation(data=batchnorm20_1_, - act_type='relu', - name="relu20_1_") + act_type='relu', + name="relu20_1_") conv21_1_ = mx.symbol.pad(data=relu20_1_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv21_1_ = mx.symbol.Convolution(data=conv21_1_, - kernel=(3,3), - stride=(1,1), - num_filter=32, - no_bias=False, - name="conv21_1_") + kernel=(3,3), + stride=(1,1), + num_filter=32, + no_bias=False, + name="conv21_1_") # conv21_1_, output shape: {[32,8,8]} batchnorm21_1_ = mx.symbol.BatchNorm(data=conv21_1_, - fix_gamma=True, - name="batchnorm21_1_") + fix_gamma=True, + name="batchnorm21_1_") add22_ = batchnorm21_1_ + relu19_ # add22_, output shape: {[32,8,8]} relu22_ = mx.symbol.Activation(data=add22_, - act_type='relu', - name="relu22_") + act_type='relu', + name="relu22_") conv23_1_ = mx.symbol.pad(data=relu22_, - mode='constant', - pad_width=(0,0,0,0,1,0,1,0), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,0,1,0), + constant_value=0) conv23_1_ = mx.symbol.Convolution(data=conv23_1_, - kernel=(3,3), - stride=(2,2), - num_filter=64, - no_bias=False, - name="conv23_1_") + kernel=(3,3), + stride=(2,2), + num_filter=64, + no_bias=False, + name="conv23_1_") # conv23_1_, output shape: {[64,4,4]} batchnorm23_1_ = mx.symbol.BatchNorm(data=conv23_1_, - fix_gamma=True, - name="batchnorm23_1_") + fix_gamma=True, + name="batchnorm23_1_") relu23_1_ = mx.symbol.Activation(data=batchnorm23_1_, - act_type='relu', - name="relu23_1_") + act_type='relu', + name="relu23_1_") conv24_1_ = mx.symbol.pad(data=relu23_1_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv24_1_ = mx.symbol.Convolution(data=conv24_1_, - kernel=(3,3), - stride=(1,1), - num_filter=64, - no_bias=False, - name="conv24_1_") + kernel=(3,3), + stride=(1,1), + num_filter=64, + no_bias=False, + name="conv24_1_") # conv24_1_, output shape: {[64,4,4]} batchnorm24_1_ = mx.symbol.BatchNorm(data=conv24_1_, - fix_gamma=True, - name="batchnorm24_1_") + fix_gamma=True, + name="batchnorm24_1_") conv23_2_ = mx.symbol.Convolution(data=relu22_, - kernel=(1,1), - stride=(2,2), - num_filter=64, - no_bias=False, - name="conv23_2_") + kernel=(1,1), + stride=(2,2), + num_filter=64, + no_bias=False, + name="conv23_2_") # conv23_2_, output shape: {[64,4,4]} batchnorm23_2_ = mx.symbol.BatchNorm(data=conv23_2_, - fix_gamma=True, - name="batchnorm23_2_") + fix_gamma=True, + name="batchnorm23_2_") add25_ = batchnorm24_1_ + batchnorm23_2_ # add25_, output shape: {[64,4,4]} relu25_ = mx.symbol.Activation(data=add25_, - act_type='relu', - name="relu25_") + act_type='relu', + name="relu25_") conv26_1_ = mx.symbol.pad(data=relu25_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv26_1_ = mx.symbol.Convolution(data=conv26_1_, - kernel=(3,3), - stride=(1,1), - num_filter=64, - no_bias=False, - name="conv26_1_") + kernel=(3,3), + stride=(1,1), + num_filter=64, + no_bias=False, + name="conv26_1_") # conv26_1_, output shape: {[64,4,4]} batchnorm26_1_ = mx.symbol.BatchNorm(data=conv26_1_, - fix_gamma=True, - name="batchnorm26_1_") + fix_gamma=True, + name="batchnorm26_1_") relu26_1_ = mx.symbol.Activation(data=batchnorm26_1_, - act_type='relu', - name="relu26_1_") + act_type='relu', + name="relu26_1_") conv27_1_ = mx.symbol.pad(data=relu26_1_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv27_1_ = mx.symbol.Convolution(data=conv27_1_, - kernel=(3,3), - stride=(1,1), - num_filter=64, - no_bias=False, - name="conv27_1_") + kernel=(3,3), + stride=(1,1), + num_filter=64, + no_bias=False, + name="conv27_1_") # conv27_1_, output shape: {[64,4,4]} batchnorm27_1_ = mx.symbol.BatchNorm(data=conv27_1_, - fix_gamma=True, - name="batchnorm27_1_") + fix_gamma=True, + name="batchnorm27_1_") add28_ = batchnorm27_1_ + relu25_ # add28_, output shape: {[64,4,4]} relu28_ = mx.symbol.Activation(data=add28_, - act_type='relu', - name="relu28_") + act_type='relu', + name="relu28_") conv29_1_ = mx.symbol.pad(data=relu28_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv29_1_ = mx.symbol.Convolution(data=conv29_1_, - kernel=(3,3), - stride=(1,1), - num_filter=64, - no_bias=False, - name="conv29_1_") + kernel=(3,3), + stride=(1,1), + num_filter=64, + no_bias=False, + name="conv29_1_") # conv29_1_, output shape: {[64,4,4]} batchnorm29_1_ = mx.symbol.BatchNorm(data=conv29_1_, - fix_gamma=True, - name="batchnorm29_1_") + fix_gamma=True, + name="batchnorm29_1_") relu29_1_ = mx.symbol.Activation(data=batchnorm29_1_, - act_type='relu', - name="relu29_1_") + act_type='relu', + name="relu29_1_") conv30_1_ = mx.symbol.pad(data=relu29_1_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv30_1_ = mx.symbol.Convolution(data=conv30_1_, - kernel=(3,3), - stride=(1,1), - num_filter=64, - no_bias=False, - name="conv30_1_") + kernel=(3,3), + stride=(1,1), + num_filter=64, + no_bias=False, + name="conv30_1_") # conv30_1_, output shape: {[64,4,4]} batchnorm30_1_ = mx.symbol.BatchNorm(data=conv30_1_, - fix_gamma=True, - name="batchnorm30_1_") + fix_gamma=True, + name="batchnorm30_1_") add31_ = batchnorm30_1_ + relu28_ # add31_, output shape: {[64,4,4]} relu31_ = mx.symbol.Activation(data=add31_, - act_type='relu', - name="relu31_") + act_type='relu', + name="relu31_") globalpooling31_ = mx.symbol.Pooling(data=relu31_, - global_pool=True, - kernel=(1,1), - pool_type="avg", - name="globalpooling31_") + global_pool=True, + kernel=(1,1), + pool_type="avg", + name="globalpooling31_") # globalpooling31_, output shape: {[64,1,1]} fc31_ = mx.symbol.FullyConnected(data=globalpooling31_, - num_hidden=128, - no_bias=False, - name="fc31_") + num_hidden=128, + no_bias=False, + name="fc31_") dropout31_ = mx.symbol.Dropout(data=fc31_, - p=0.5, - name="dropout31_") + p=0.5, + name="dropout31_") fc32_ = mx.symbol.FullyConnected(data=dropout31_, - num_hidden=10, - no_bias=False, - name="fc32_") - - softmax = mx.symbol.SoftmaxOutput(data=fc32_, - name="softmax") + num_hidden=10, + no_bias=False, + name="fc32_") + softmax32_ = mx.symbol.softmax(data=fc32_, + axis=1, + name="softmax32_") + softmax = mx.symbol.SoftmaxOutput(data=softmax32_, + name="softmax") self.module = mx.mod.Module(symbol=mx.symbol.Group([softmax]), - data_names=self._input_names_, - label_names=self._output_names_, - context=context) + data_names=self._input_names_, + label_names=self._output_names_, + context=context) diff --git a/src/test/resources/target_code/CNNCreator_VGG16.py b/src/test/resources/target_code/CNNCreator_VGG16.py index 3db6883..fba9b6c 100644 --- a/src/test/resources/target_code/CNNCreator_VGG16.py +++ b/src/test/resources/target_code/CNNCreator_VGG16.py @@ -52,10 +52,10 @@ class CNNCreator_VGG16: else: logging.info("Loading checkpoint: " + param_file) self.module.load(prefix=self._model_dir_ + self._model_prefix_, - epoch=lastEpoch, - data_names=self._input_names_, - label_names=self._output_names_, - context=context) + epoch=lastEpoch, + data_names=self._input_names_, + label_names=self._output_names_, + context=context) return lastEpoch @@ -104,10 +104,80 @@ class CNNCreator_VGG16: logging.error("Data loading failure. File '" + os.path.abspath(train_path) + "' does not exist.") sys.exit(1) + def loss_function(self, loss, params): + label = mx.symbol.var(name=self._output_names_[0], ) + prediction = self.module.symbol.get_children()[0] + + margin = params['margin'] if 'margin' in params else 1.0 + sparseLabel = params['sparse_label'] if 'sparse_label' in params else True + + if loss == 'softmax_cross_entropy': + fromLogits = params['from_logits'] if 'from_logits' in params else False + if not fromLogits: + prediction = mx.symbol.log_softmax(data=prediction, axis=1) + if sparseLabel: + loss_func = mx.symbol.mean(-mx.symbol.pick(prediction, label, axis=-1, keepdims=True), axis=0, exclude=True) + else: + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(-mx.symbol.sum(prediction * label, axis=-1, keepdims=True), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="softmax_cross_entropy") + elif loss == 'cross_entropy': + prediction = mx.symbol.log(prediction) + if sparseLabel: + loss_func = mx.symbol.mean(-mx.symbol.pick(prediction, label, axis=-1, keepdims=True), axis=0, exclude=True) + else: + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(-mx.symbol.sum(prediction * label, axis=-1, keepdims=True), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="cross_entropy") + elif loss == 'sigmoid_binary_cross_entropy': + loss_func = mx.symbol.LogisticRegressionOutput(data=prediction, name=self.module.symbol.name) + elif loss == 'l1': + loss_func = mx.symbol.MAERegressionOutput(data=prediction, name=self.module.symbol.name) + elif loss == 'l2': + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(mx.symbol.square((label - prediction) / 2), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="L2") + elif loss == 'huber': + rho = params['rho'] if 'rho' in params else 1 + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.abs(label - prediction) + loss_func = mx.symbol.where(loss_func > rho, loss_func - 0.5 * rho, (0.5 / rho) * mx.symbol.square(loss_func)) + loss_func = mx.symbol.mean(loss_func, axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="huber") + elif loss == 'hinge': + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(mx.symbol.relu(margin - prediction * label), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="hinge") + elif loss == 'squared_hinge': + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(mx.symbol.square(mx.symbol.relu(margin - prediction * label)), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="squared_hinge") + elif loss == 'logistic': + labelFormat = params['label_format'] if 'label_format' in params else 'signed' + if labelFormat not in ["binary", "signed"]: + logging.error("label_format can only be signed or binary") + label = mx.symbol.reshape_like(label, prediction) + if labelFormat == 'signed': + label = (label + 1.0)/2.0 + loss_func = mx.symbol.relu(prediction) - prediction * label + loss_func = loss_func + mx.symbol.Activation(-mx.symbol.abs(prediction), act_type="softrelu") + loss_func = mx.symbol.MakeLoss(mx.symbol.mean(loss_func, 0, exclude=True), name="logistic") + elif loss == 'kullback_leibler': + fromLogits = params['from_logits'] if 'from_logits' in params else True + if not fromLogits: + prediction = mx.symbol.log_softmax(prediction, axis=1) + loss_func = mx.symbol.mean(label * (mx.symbol.log(label) - prediction), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="kullback_leibler") + else: + logging.error("Invalid loss parameter.") + + return loss_func def train(self, batch_size=64, num_epoch=10, eval_metric='acc', + loss ='softmax_cross_entropy', + loss_params={}, optimizer='adam', optimizer_params=(('learning_rate', 0.001),), load_checkpoint=True, @@ -130,13 +200,12 @@ class CNNCreator_VGG16: min_learning_rate = optimizer_params['learning_rate_minimum'] del optimizer_params['learning_rate_minimum'] optimizer_params['lr_scheduler'] = mx.lr_scheduler.FactorScheduler( - optimizer_params['step_size'], - factor=optimizer_params['learning_rate_decay'], - stop_factor_lr=min_learning_rate) + optimizer_params['step_size'], + factor=optimizer_params['learning_rate_decay'], + stop_factor_lr=min_learning_rate) del optimizer_params['step_size'] del optimizer_params['learning_rate_decay'] - train_iter, test_iter, data_mean, data_std = self.load_data(batch_size) if self.module == None: if normalize: @@ -144,6 +213,14 @@ class CNNCreator_VGG16: else: self.construct(mx_context) + loss_func = self.loss_function(loss=loss, params=loss_params) + + self.module = mx.mod.Module( + symbol=mx.symbol.Group([loss_func, mx.symbol.BlockGrad(self.module.symbol.get_children()[0], name="pred")]), + data_names=self._input_names_, + label_names=self._output_names_, + context=mx_context) + begin_epoch = 0 if load_checkpoint: begin_epoch = self.load(mx_context) @@ -157,9 +234,11 @@ class CNNCreator_VGG16: if not os.path.isdir(self._model_dir_): raise + metric = mx.metric.create(eval_metric, output_names=['pred_output']) + self.module.fit( train_data=train_iter, - eval_metric=eval_metric, + eval_metric=metric, eval_data=test_iter, optimizer=optimizer, optimizer_params=optimizer_params, @@ -173,7 +252,7 @@ class CNNCreator_VGG16: def construct(self, context, data_mean=None, data_std=None): data = mx.sym.var("data", - shape=(0,3,224,224)) + shape=(0,3,224,224)) # data, output shape: {[3,224,224]} if not data_mean is None: @@ -185,280 +264,282 @@ class CNNCreator_VGG16: data = mx.symbol.broadcast_sub(data, _data_mean_) data = mx.symbol.broadcast_div(data, _data_std_) conv1_ = mx.symbol.pad(data=data, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv1_ = mx.symbol.Convolution(data=conv1_, - kernel=(3,3), - stride=(1,1), - num_filter=64, - no_bias=False, - name="conv1_") + kernel=(3,3), + stride=(1,1), + num_filter=64, + no_bias=False, + name="conv1_") # conv1_, output shape: {[64,224,224]} relu1_ = mx.symbol.Activation(data=conv1_, - act_type='relu', - name="relu1_") + act_type='relu', + name="relu1_") conv2_ = mx.symbol.pad(data=relu1_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv2_ = mx.symbol.Convolution(data=conv2_, - kernel=(3,3), - stride=(1,1), - num_filter=64, - no_bias=False, - name="conv2_") + kernel=(3,3), + stride=(1,1), + num_filter=64, + no_bias=False, + name="conv2_") # conv2_, output shape: {[64,224,224]} relu2_ = mx.symbol.Activation(data=conv2_, - act_type='relu', - name="relu2_") + act_type='relu', + name="relu2_") pool2_ = mx.symbol.Pooling(data=relu2_, - kernel=(2,2), - pool_type="max", - stride=(2,2), - name="pool2_") + kernel=(2,2), + pool_type="max", + stride=(2,2), + name="pool2_") # pool2_, output shape: {[64,112,112]} conv3_ = mx.symbol.pad(data=pool2_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv3_ = mx.symbol.Convolution(data=conv3_, - kernel=(3,3), - stride=(1,1), - num_filter=128, - no_bias=False, - name="conv3_") + kernel=(3,3), + stride=(1,1), + num_filter=128, + no_bias=False, + name="conv3_") # conv3_, output shape: {[128,112,112]} relu3_ = mx.symbol.Activation(data=conv3_, - act_type='relu', - name="relu3_") + act_type='relu', + name="relu3_") conv4_ = mx.symbol.pad(data=relu3_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv4_ = mx.symbol.Convolution(data=conv4_, - kernel=(3,3), - stride=(1,1), - num_filter=128, - no_bias=False, - name="conv4_") + kernel=(3,3), + stride=(1,1), + num_filter=128, + no_bias=False, + name="conv4_") # conv4_, output shape: {[128,112,112]} relu4_ = mx.symbol.Activation(data=conv4_, - act_type='relu', - name="relu4_") + act_type='relu', + name="relu4_") pool4_ = mx.symbol.Pooling(data=relu4_, - kernel=(2,2), - pool_type="max", - stride=(2,2), - name="pool4_") + kernel=(2,2), + pool_type="max", + stride=(2,2), + name="pool4_") # pool4_, output shape: {[128,56,56]} conv5_ = mx.symbol.pad(data=pool4_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv5_ = mx.symbol.Convolution(data=conv5_, - kernel=(3,3), - stride=(1,1), - num_filter=256, - no_bias=False, - name="conv5_") + kernel=(3,3), + stride=(1,1), + num_filter=256, + no_bias=False, + name="conv5_") # conv5_, output shape: {[256,56,56]} relu5_ = mx.symbol.Activation(data=conv5_, - act_type='relu', - name="relu5_") + act_type='relu', + name="relu5_") conv6_ = mx.symbol.pad(data=relu5_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv6_ = mx.symbol.Convolution(data=conv6_, - kernel=(3,3), - stride=(1,1), - num_filter=256, - no_bias=False, - name="conv6_") + kernel=(3,3), + stride=(1,1), + num_filter=256, + no_bias=False, + name="conv6_") # conv6_, output shape: {[256,56,56]} relu6_ = mx.symbol.Activation(data=conv6_, - act_type='relu', - name="relu6_") + act_type='relu', + name="relu6_") conv7_ = mx.symbol.pad(data=relu6_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv7_ = mx.symbol.Convolution(data=conv7_, - kernel=(3,3), - stride=(1,1), - num_filter=256, - no_bias=False, - name="conv7_") + kernel=(3,3), + stride=(1,1), + num_filter=256, + no_bias=False, + name="conv7_") # conv7_, output shape: {[256,56,56]} relu7_ = mx.symbol.Activation(data=conv7_, - act_type='relu', - name="relu7_") + act_type='relu', + name="relu7_") pool7_ = mx.symbol.Pooling(data=relu7_, - kernel=(2,2), - pool_type="max", - stride=(2,2), - name="pool7_") + kernel=(2,2), + pool_type="max", + stride=(2,2), + name="pool7_") # pool7_, output shape: {[256,28,28]} conv8_ = mx.symbol.pad(data=pool7_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv8_ = mx.symbol.Convolution(data=conv8_, - kernel=(3,3), - stride=(1,1), - num_filter=512, - no_bias=False, - name="conv8_") + kernel=(3,3), + stride=(1,1), + num_filter=512, + no_bias=False, + name="conv8_") # conv8_, output shape: {[512,28,28]} relu8_ = mx.symbol.Activation(data=conv8_, - act_type='relu', - name="relu8_") + act_type='relu', + name="relu8_") conv9_ = mx.symbol.pad(data=relu8_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv9_ = mx.symbol.Convolution(data=conv9_, - kernel=(3,3), - stride=(1,1), - num_filter=512, - no_bias=False, - name="conv9_") + kernel=(3,3), + stride=(1,1), + num_filter=512, + no_bias=False, + name="conv9_") # conv9_, output shape: {[512,28,28]} relu9_ = mx.symbol.Activation(data=conv9_, - act_type='relu', - name="relu9_") + act_type='relu', + name="relu9_") conv10_ = mx.symbol.pad(data=relu9_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv10_ = mx.symbol.Convolution(data=conv10_, - kernel=(3,3), - stride=(1,1), - num_filter=512, - no_bias=False, - name="conv10_") + kernel=(3,3), + stride=(1,1), + num_filter=512, + no_bias=False, + name="conv10_") # conv10_, output shape: {[512,28,28]} relu10_ = mx.symbol.Activation(data=conv10_, - act_type='relu', - name="relu10_") + act_type='relu', + name="relu10_") pool10_ = mx.symbol.Pooling(data=relu10_, - kernel=(2,2), - pool_type="max", - stride=(2,2), - name="pool10_") + kernel=(2,2), + pool_type="max", + stride=(2,2), + name="pool10_") # pool10_, output shape: {[512,14,14]} conv11_ = mx.symbol.pad(data=pool10_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv11_ = mx.symbol.Convolution(data=conv11_, - kernel=(3,3), - stride=(1,1), - num_filter=512, - no_bias=False, - name="conv11_") + kernel=(3,3), + stride=(1,1), + num_filter=512, + no_bias=False, + name="conv11_") # conv11_, output shape: {[512,14,14]} relu11_ = mx.symbol.Activation(data=conv11_, - act_type='relu', - name="relu11_") + act_type='relu', + name="relu11_") conv12_ = mx.symbol.pad(data=relu11_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv12_ = mx.symbol.Convolution(data=conv12_, - kernel=(3,3), - stride=(1,1), - num_filter=512, - no_bias=False, - name="conv12_") + kernel=(3,3), + stride=(1,1), + num_filter=512, + no_bias=False, + name="conv12_") # conv12_, output shape: {[512,14,14]} relu12_ = mx.symbol.Activation(data=conv12_, - act_type='relu', - name="relu12_") + act_type='relu', + name="relu12_") conv13_ = mx.symbol.pad(data=relu12_, - mode='constant', - pad_width=(0,0,0,0,1,1,1,1), - constant_value=0) + mode='constant', + pad_width=(0,0,0,0,1,1,1,1), + constant_value=0) conv13_ = mx.symbol.Convolution(data=conv13_, - kernel=(3,3), - stride=(1,1), - num_filter=512, - no_bias=False, - name="conv13_") + kernel=(3,3), + stride=(1,1), + num_filter=512, + no_bias=False, + name="conv13_") # conv13_, output shape: {[512,14,14]} relu13_ = mx.symbol.Activation(data=conv13_, - act_type='relu', - name="relu13_") + act_type='relu', + name="relu13_") pool13_ = mx.symbol.Pooling(data=relu13_, - kernel=(2,2), - pool_type="max", - stride=(2,2), - name="pool13_") + kernel=(2,2), + pool_type="max", + stride=(2,2), + name="pool13_") # pool13_, output shape: {[512,7,7]} fc13_ = mx.symbol.flatten(data=pool13_) fc13_ = mx.symbol.FullyConnected(data=fc13_, - num_hidden=4096, - no_bias=False, - name="fc13_") + num_hidden=4096, + no_bias=False, + name="fc13_") relu14_ = mx.symbol.Activation(data=fc13_, - act_type='relu', - name="relu14_") + act_type='relu', + name="relu14_") dropout14_ = mx.symbol.Dropout(data=relu14_, - p=0.5, - name="dropout14_") + p=0.5, + name="dropout14_") fc14_ = mx.symbol.FullyConnected(data=dropout14_, - num_hidden=4096, - no_bias=False, - name="fc14_") + num_hidden=4096, + no_bias=False, + name="fc14_") relu15_ = mx.symbol.Activation(data=fc14_, - act_type='relu', - name="relu15_") + act_type='relu', + name="relu15_") dropout15_ = mx.symbol.Dropout(data=relu15_, - p=0.5, - name="dropout15_") + p=0.5, + name="dropout15_") fc15_ = mx.symbol.FullyConnected(data=dropout15_, - num_hidden=1000, - no_bias=False, - name="fc15_") - - predictions = mx.symbol.SoftmaxOutput(data=fc15_, - name="predictions") + num_hidden=1000, + no_bias=False, + name="fc15_") + softmax15_ = mx.symbol.softmax(data=fc15_, + axis=1, + name="softmax15_") + predictions = mx.symbol.SoftmaxOutput(data=softmax15_, + name="predictions") self.module = mx.mod.Module(symbol=mx.symbol.Group([predictions]), - data_names=self._input_names_, - label_names=self._output_names_, - context=context) + data_names=self._input_names_, + label_names=self._output_names_, + context=context) diff --git a/src/test/resources/target_code/CNNTrainer_fullConfig.py b/src/test/resources/target_code/CNNTrainer_fullConfig.py index 2822ae2..d933f32 100644 --- a/src/test/resources/target_code/CNNTrainer_fullConfig.py +++ b/src/test/resources/target_code/CNNTrainer_fullConfig.py @@ -16,6 +16,10 @@ if __name__ == "__main__": context='gpu', normalize=True, eval_metric='mse', + loss='softmax_cross_entropy', + loss_params={ + 'sparse_label': True, + 'from_logits': False}, optimizer='rmsprop', optimizer_params={ 'weight_decay': 0.01, diff --git a/src/test/resources/target_code/CNNTrainer_simpleConfig.py b/src/test/resources/target_code/CNNTrainer_simpleConfig.py index bf9f002..8e915d5 100644 --- a/src/test/resources/target_code/CNNTrainer_simpleConfig.py +++ b/src/test/resources/target_code/CNNTrainer_simpleConfig.py @@ -12,6 +12,7 @@ if __name__ == "__main__": simpleConfig.train( batch_size=100, num_epoch=50, + loss='cross_entropy', optimizer='adam', optimizer_params={ 'learning_rate': 0.001} diff --git a/src/test/resources/valid_tests/FullConfig.cnnt b/src/test/resources/valid_tests/FullConfig.cnnt index df3313b..b8de4fc 100644 --- a/src/test/resources/valid_tests/FullConfig.cnnt +++ b/src/test/resources/valid_tests/FullConfig.cnnt @@ -3,6 +3,10 @@ configuration FullConfig{ batch_size : 100 load_checkpoint : true eval_metric : mse + loss: softmax_cross_entropy{ + sparse_label: true + from_logits: false + } context : gpu normalize : true optimizer : rmsprop{ diff --git a/src/test/resources/valid_tests/SimpleConfig.cnnt b/src/test/resources/valid_tests/SimpleConfig.cnnt index d3d3156..fe2672b 100644 --- a/src/test/resources/valid_tests/SimpleConfig.cnnt +++ b/src/test/resources/valid_tests/SimpleConfig.cnnt @@ -1,6 +1,7 @@ configuration SimpleConfig{ num_epoch : 50 batch_size : 100 + loss : cross_entropy optimizer : adam{ learning_rate : 0.001 } -- GitLab From 75d6c0dc6a5e72f7fd8289379b92599af2a6e54c Mon Sep 17 00:00:00 2001 From: eyuhar Date: Wed, 5 Jun 2019 12:29:05 +0200 Subject: [PATCH 2/4] ignore vscode --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 1178002..184aba8 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,6 @@ nppBackup .classpath .idea .git - +.vscode *.iml -- GitLab From 7c76d83e8307b54e4dee9d8877383b13e1530956 Mon Sep 17 00:00:00 2001 From: eyuhar Date: Fri, 7 Jun 2019 10:12:29 +0200 Subject: [PATCH 3/4] added log cosh loss --- pom.xml | 2 +- src/main/resources/templates/mxnet/CNNCreator.ftl | 3 +++ src/test/resources/target_code/CNNCreator_Alexnet.py | 3 +++ .../resources/target_code/CNNCreator_CifarClassifierNetwork.py | 3 +++ src/test/resources/target_code/CNNCreator_VGG16.py | 3 +++ 5 files changed, 13 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 5528e63..3dff708 100644 --- a/pom.xml +++ b/pom.xml @@ -16,7 +16,7 @@ 0.3.0-SNAPSHOT - 0.3.1-SNAPSHOT + 0.3.2-SNAPSHOT 0.1.4 diff --git a/src/main/resources/templates/mxnet/CNNCreator.ftl b/src/main/resources/templates/mxnet/CNNCreator.ftl index d8d51c7..0e47026 100644 --- a/src/main/resources/templates/mxnet/CNNCreator.ftl +++ b/src/main/resources/templates/mxnet/CNNCreator.ftl @@ -168,6 +168,9 @@ class ${tc.fileNameWithoutEnding}: prediction = mx.symbol.log_softmax(prediction, axis=1) loss_func = mx.symbol.mean(label * (mx.symbol.log(label) - prediction), axis=0, exclude=True) loss_func = mx.symbol.MakeLoss(loss_func, name="kullback_leibler") + elif loss == 'log_cosh': + loss_func = mx.symbol.mean(mx.symbol.log(mx.symbol.cosh(prediction - label)), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="log_cosh") else: logging.error("Invalid loss parameter.") diff --git a/src/test/resources/target_code/CNNCreator_Alexnet.py b/src/test/resources/target_code/CNNCreator_Alexnet.py index 885d91d..cae57a8 100644 --- a/src/test/resources/target_code/CNNCreator_Alexnet.py +++ b/src/test/resources/target_code/CNNCreator_Alexnet.py @@ -168,6 +168,9 @@ class CNNCreator_Alexnet: prediction = mx.symbol.log_softmax(prediction, axis=1) loss_func = mx.symbol.mean(label * (mx.symbol.log(label) - prediction), axis=0, exclude=True) loss_func = mx.symbol.MakeLoss(loss_func, name="kullback_leibler") + elif loss == 'log_cosh': + loss_func = mx.symbol.mean(mx.symbol.log(mx.symbol.cosh(prediction - label)), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="log_cosh") else: logging.error("Invalid loss parameter.") diff --git a/src/test/resources/target_code/CNNCreator_CifarClassifierNetwork.py b/src/test/resources/target_code/CNNCreator_CifarClassifierNetwork.py index e2ba49c..4c1fe9c 100644 --- a/src/test/resources/target_code/CNNCreator_CifarClassifierNetwork.py +++ b/src/test/resources/target_code/CNNCreator_CifarClassifierNetwork.py @@ -168,6 +168,9 @@ class CNNCreator_CifarClassifierNetwork: prediction = mx.symbol.log_softmax(prediction, axis=1) loss_func = mx.symbol.mean(label * (mx.symbol.log(label) - prediction), axis=0, exclude=True) loss_func = mx.symbol.MakeLoss(loss_func, name="kullback_leibler") + elif loss == 'log_cosh': + loss_func = mx.symbol.mean(mx.symbol.log(mx.symbol.cosh(prediction - label)), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="log_cosh") else: logging.error("Invalid loss parameter.") diff --git a/src/test/resources/target_code/CNNCreator_VGG16.py b/src/test/resources/target_code/CNNCreator_VGG16.py index fba9b6c..39ce34b 100644 --- a/src/test/resources/target_code/CNNCreator_VGG16.py +++ b/src/test/resources/target_code/CNNCreator_VGG16.py @@ -168,6 +168,9 @@ class CNNCreator_VGG16: prediction = mx.symbol.log_softmax(prediction, axis=1) loss_func = mx.symbol.mean(label * (mx.symbol.log(label) - prediction), axis=0, exclude=True) loss_func = mx.symbol.MakeLoss(loss_func, name="kullback_leibler") + elif loss == 'log_cosh': + loss_func = mx.symbol.mean(mx.symbol.log(mx.symbol.cosh(prediction - label)), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="log_cosh") else: logging.error("Invalid loss parameter.") -- GitLab From 0d960ff85a2b458bc87bee16263b6c4247934d7d Mon Sep 17 00:00:00 2001 From: eyuhar Date: Mon, 24 Jun 2019 12:59:31 +0200 Subject: [PATCH 4/4] changed version --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 3dff708..0147d39 100644 --- a/pom.xml +++ b/pom.xml @@ -8,7 +8,7 @@ de.monticore.lang.monticar cnnarch-mxnet-generator - 0.2.15-SNAPSHOT + 0.2.16-SNAPSHOT @@ -16,7 +16,7 @@ 0.3.0-SNAPSHOT - 0.3.2-SNAPSHOT + 0.3.4-SNAPSHOT 0.1.4 -- GitLab