diff --git a/pom.xml b/pom.xml index 2e4bc4f3a7bd5d7a6f1f083bd5e3fadad00af0c8..dcaf7f77f38d4ce5dfc8844a4d2713c4911bee5d 100644 --- a/pom.xml +++ b/pom.xml @@ -16,11 +16,11 @@ 0.2.8-SNAPSHOT - 0.3.2-SNAPSHOT - 0.0.1-SNAPSHOT + 0.3.4-SNAPSHOT + 0.0.2-SNAPSHOT 0.2.16-SNAPSHOT 0.2.12-SNAPSHOT - 0.2.1-SNAPSHOT + 0.2.2-SNAPSHOT 0.1.4 diff --git a/src/test/resources/target_code/CNNCreator_cifar10_cifar10Classifier_net.py b/src/test/resources/target_code/CNNCreator_cifar10_cifar10Classifier_net.py index 27adb268968710153d9c41d57cbdcee40bdeaa3b..02a413e00cd17082c7dbbab9ab7a1087e1d2d068 100644 --- a/src/test/resources/target_code/CNNCreator_cifar10_cifar10Classifier_net.py +++ b/src/test/resources/target_code/CNNCreator_cifar10_cifar10Classifier_net.py @@ -104,10 +104,83 @@ class CNNCreator_cifar10_cifar10Classifier_net: logging.error("Data loading failure. File '" + os.path.abspath(train_path) + "' does not exist.") sys.exit(1) + def loss_function(self, loss, params): + label = mx.symbol.var(name=self._output_names_[0], ) + prediction = self.module.symbol.get_children()[0] + + margin = params['margin'] if 'margin' in params else 1.0 + sparseLabel = params['sparse_label'] if 'sparse_label' in params else True + + if loss == 'softmax_cross_entropy': + fromLogits = params['from_logits'] if 'from_logits' in params else False + if not fromLogits: + prediction = mx.symbol.log_softmax(data=prediction, axis=1) + if sparseLabel: + loss_func = mx.symbol.mean(-mx.symbol.pick(prediction, label, axis=-1, keepdims=True), axis=0, exclude=True) + else: + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(-mx.symbol.sum(prediction * label, axis=-1, keepdims=True), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="softmax_cross_entropy") + elif loss == 'cross_entropy': + prediction = mx.symbol.log(prediction) + if sparseLabel: + loss_func = mx.symbol.mean(-mx.symbol.pick(prediction, label, axis=-1, keepdims=True), axis=0, exclude=True) + else: + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(-mx.symbol.sum(prediction * label, axis=-1, keepdims=True), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="cross_entropy") + elif loss == 'sigmoid_binary_cross_entropy': + loss_func = mx.symbol.LogisticRegressionOutput(data=prediction, name=self.module.symbol.name) + elif loss == 'l1': + loss_func = mx.symbol.MAERegressionOutput(data=prediction, name=self.module.symbol.name) + elif loss == 'l2': + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(mx.symbol.square((label - prediction) / 2), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="L2") + elif loss == 'huber': + rho = params['rho'] if 'rho' in params else 1 + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.abs(label - prediction) + loss_func = mx.symbol.where(loss_func > rho, loss_func - 0.5 * rho, (0.5 / rho) * mx.symbol.square(loss_func)) + loss_func = mx.symbol.mean(loss_func, axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="huber") + elif loss == 'hinge': + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(mx.symbol.relu(margin - prediction * label), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="hinge") + elif loss == 'squared_hinge': + label = mx.symbol.reshape_like(label, prediction) + loss_func = mx.symbol.mean(mx.symbol.square(mx.symbol.relu(margin - prediction * label)), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="squared_hinge") + elif loss == 'logistic': + labelFormat = params['label_format'] if 'label_format' in params else 'signed' + if labelFormat not in ["binary", "signed"]: + logging.error("label_format can only be signed or binary") + label = mx.symbol.reshape_like(label, prediction) + if labelFormat == 'signed': + label = (label + 1.0)/2.0 + loss_func = mx.symbol.relu(prediction) - prediction * label + loss_func = loss_func + mx.symbol.Activation(-mx.symbol.abs(prediction), act_type="softrelu") + loss_func = mx.symbol.MakeLoss(mx.symbol.mean(loss_func, 0, exclude=True), name="logistic") + elif loss == 'kullback_leibler': + fromLogits = params['from_logits'] if 'from_logits' in params else True + if not fromLogits: + prediction = mx.symbol.log_softmax(prediction, axis=1) + loss_func = mx.symbol.mean(label * (mx.symbol.log(label) - prediction), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="kullback_leibler") + elif loss == 'log_cosh': + loss_func = mx.symbol.mean(mx.symbol.log(mx.symbol.cosh(prediction - label)), axis=0, exclude=True) + loss_func = mx.symbol.MakeLoss(loss_func, name="log_cosh") + else: + logging.error("Invalid loss parameter.") + + return loss_func def train(self, batch_size=64, num_epoch=10, eval_metric='acc', + loss ='softmax_cross_entropy', + loss_params={}, optimizer='adam', optimizer_params=(('learning_rate', 0.001),), load_checkpoint=True, @@ -136,7 +209,6 @@ class CNNCreator_cifar10_cifar10Classifier_net: del optimizer_params['step_size'] del optimizer_params['learning_rate_decay'] - train_iter, test_iter, data_mean, data_std = self.load_data(batch_size) if self.module == None: if normalize: @@ -144,6 +216,14 @@ class CNNCreator_cifar10_cifar10Classifier_net: else: self.construct(mx_context) + loss_func = self.loss_function(loss=loss, params=loss_params) + + self.module = mx.mod.Module( + symbol=mx.symbol.Group([loss_func, mx.symbol.BlockGrad(self.module.symbol.get_children()[0], name="pred")]), + data_names=self._input_names_, + label_names=self._output_names_, + context=mx_context) + begin_epoch = 0 if load_checkpoint: begin_epoch = self.load(mx_context) @@ -157,9 +237,11 @@ class CNNCreator_cifar10_cifar10Classifier_net: if not os.path.isdir(self._model_dir_): raise + metric = mx.metric.create(eval_metric, output_names=['pred_output']) + self.module.fit( train_data=train_iter, - eval_metric=eval_metric, + eval_metric=metric, eval_data=test_iter, optimizer=optimizer, optimizer_params=optimizer_params, @@ -656,8 +738,10 @@ class CNNCreator_cifar10_cifar10Classifier_net: num_hidden=10, no_bias=False, name="fc32_") - - softmax = mx.symbol.SoftmaxOutput(data=fc32_, + softmax32_ = mx.symbol.softmax(data=fc32_, + axis=1, + name="softmax32_") + softmax = mx.symbol.SoftmaxOutput(data=softmax32_, name="softmax") self.module = mx.mod.Module(symbol=mx.symbol.Group([softmax]), diff --git a/src/test/resources/target_code/gluon/CNNNet_mnist_mnistClassifier_net.py b/src/test/resources/target_code/gluon/CNNNet_mnist_mnistClassifier_net.py index 166574071a5e749694e31fe2841d36bc15b0bbe3..e5f4f2187c85c92edb69728120bb2fc01a8a36b3 100644 --- a/src/test/resources/target_code/gluon/CNNNet_mnist_mnistClassifier_net.py +++ b/src/test/resources/target_code/gluon/CNNNet_mnist_mnistClassifier_net.py @@ -120,7 +120,7 @@ class Net_0(gluon.HybridBlock): self.fc3_ = gluon.nn.Dense(units=10, use_bias=True) # fc3_, output shape: {[10,1,1]} - self.last_layers['predictions'] = 'softmax' + self.softmax3_ = Softmax() def hybrid_forward(self, F, image): @@ -134,6 +134,7 @@ class Net_0(gluon.HybridBlock): fc2_ = self.fc2_(fc2_flatten_) relu2_ = self.relu2_(fc2_) fc3_ = self.fc3_(relu2_) - outputs.append(fc3_) + softmax3_ = self.softmax3_(fc3_) + outputs.append(softmax3_) return outputs[0] diff --git a/src/test/resources/target_code/gluon/CNNSupervisedTrainer_mnist_mnistClassifier_net.py b/src/test/resources/target_code/gluon/CNNSupervisedTrainer_mnist_mnistClassifier_net.py index ec9005864e4f3aec0eef3495c8e223240d93ee6d..de6feee2ff6063476bfe550d24623382f9befe7b 100644 --- a/src/test/resources/target_code/gluon/CNNSupervisedTrainer_mnist_mnistClassifier_net.py +++ b/src/test/resources/target_code/gluon/CNNSupervisedTrainer_mnist_mnistClassifier_net.py @@ -6,6 +6,31 @@ import os import shutil from mxnet import gluon, autograd, nd +class CrossEntropyLoss(gluon.loss.Loss): + def __init__(self, axis=-1, sparse_label=True, weight=None, batch_axis=0, **kwargs): + super(CrossEntropyLoss, self).__init__(weight, batch_axis, **kwargs) + self._axis = axis + self._sparse_label = sparse_label + + def hybrid_forward(self, F, pred, label, sample_weight=None): + pred = F.log(pred) + if self._sparse_label: + loss = -F.pick(pred, label, axis=self._axis, keepdims=True) + else: + label = gluon.loss._reshape_like(F, label, pred) + loss = -F.sum(pred * label, axis=self._axis, keepdims=True) + loss = gluon.loss._apply_weighting(F, loss, self._weight, sample_weight) + return F.mean(loss, axis=self._batch_axis, exclude=True) + +class LogCoshLoss(gluon.loss.Loss): + def __init__(self, weight=None, batch_axis=0, **kwargs): + super(LogCoshLoss, self).__init__(weight, batch_axis, **kwargs) + + def hybrid_forward(self, F, pred, label, sample_weight=None): + loss = F.log(F.cosh(pred - label)) + loss = gluon.loss._apply_weighting(F, loss, self._weight, sample_weight) + return F.mean(loss, axis=self._batch_axis, exclude=True) + class CNNSupervisedTrainer_mnist_mnistClassifier_net: def __init__(self, data_loader, net_constructor): self._data_loader = data_loader @@ -15,6 +40,8 @@ class CNNSupervisedTrainer_mnist_mnistClassifier_net: def train(self, batch_size=64, num_epoch=10, eval_metric='acc', + loss ='softmax_cross_entropy', + loss_params={}, optimizer='adam', optimizer_params=(('learning_rate', 0.001),), load_checkpoint=True, @@ -68,19 +95,36 @@ class CNNSupervisedTrainer_mnist_mnistClassifier_net: trainers = [mx.gluon.Trainer(network.collect_params(), optimizer, optimizer_params) for network in self._networks.values()] - loss_functions = {} - - for network in self._networks.values(): - for output_name, last_layer in network.last_layers.items(): - if last_layer == 'softmax': - loss_functions[output_name] = mx.gluon.loss.SoftmaxCrossEntropyLoss() - elif last_layer == 'sigmoid': - loss_functions[output_name] = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss() - elif last_layer == 'linear': - loss_functions[output_name] = mx.gluon.loss.L2Loss() - else: - loss_functions[output_name] = mx.gluon.loss.L2Loss() - logging.warning("Invalid last layer, defaulting to L2 loss") + margin = loss_params['margin'] if 'margin' in loss_params else 1.0 + sparseLabel = loss_params['sparse_label'] if 'sparse_label' in loss_params else True + if loss == 'softmax_cross_entropy': + fromLogits = loss_params['from_logits'] if 'from_logits' in loss_params else False + loss_function = mx.gluon.loss.SoftmaxCrossEntropyLoss(from_logits=fromLogits, sparse_label=sparseLabel) + elif loss == 'sigmoid_binary_cross_entropy': + loss_function = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss() + elif loss == 'cross_entropy': + loss_function = CrossEntropyLoss(sparse_label=sparseLabel) + elif loss == 'l2': + loss_function = mx.gluon.loss.L2Loss() + elif loss == 'l1': + loss_function = mx.gluon.loss.L2Loss() + elif loss == 'huber': + rho = loss_params['rho'] if 'rho' in loss_params else 1 + loss_function = mx.gluon.loss.HuberLoss(rho=rho) + elif loss == 'hinge': + loss_function = mx.gluon.loss.HingeLoss(margin=margin) + elif loss == 'squared_hinge': + loss_function = mx.gluon.loss.SquaredHingeLoss(margin=margin) + elif loss == 'logistic': + labelFormat = loss_params['label_format'] if 'label_format' in loss_params else 'signed' + loss_function = mx.gluon.loss.LogisticLoss(label_format=labelFormat) + elif loss == 'kullback_leibler': + fromLogits = loss_params['from_logits'] if 'from_logits' in loss_params else True + loss_function = mx.gluon.loss.KLDivLoss(from_logits=fromLogits) + elif loss == 'log_cosh': + loss_function = LogCoshLoss() + else: + logging.error("Invalid loss parameter.") speed_period = 50 tic = None @@ -95,7 +139,7 @@ class CNNSupervisedTrainer_mnist_mnistClassifier_net: predictions_output = self._networks[0](image_data) loss = \ - loss_functions['predictions'](predictions_output, predictions_label) + loss_function(predictions_output, predictions_label) loss.backward()