Commit 7184ebe9 authored by Sebastian N.'s avatar Sebastian N.
Browse files

Merge master changes to develop

parents 9d7ca665 ff353c4f
Pipeline #158791 passed with stages
in 4 minutes and 13 seconds
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
<groupId>de.monticore.lang.monticar</groupId> <groupId>de.monticore.lang.monticar</groupId>
<artifactId>cnnarch-gluon-generator</artifactId> <artifactId>cnnarch-gluon-generator</artifactId>
<version>0.2.1-SNAPSHOT</version> <version>0.2.2-SNAPSHOT</version>
<!-- == PROJECT DEPENDENCIES ============================================= --> <!-- == PROJECT DEPENDENCIES ============================================= -->
...@@ -16,8 +16,8 @@ ...@@ -16,8 +16,8 @@
<!-- .. SE-Libraries .................................................. --> <!-- .. SE-Libraries .................................................. -->
<CNNArch.version>0.3.1-SNAPSHOT</CNNArch.version> <CNNArch.version>0.3.1-SNAPSHOT</CNNArch.version>
<CNNTrain.version>0.3.2-SNAPSHOT</CNNTrain.version> <CNNTrain.version>0.3.4-SNAPSHOT</CNNTrain.version>
<CNNArch2X.version>0.0.1-SNAPSHOT</CNNArch2X.version> <CNNArch2X.version>0.0.2-SNAPSHOT</CNNArch2X.version>
<embedded-montiarc-math-opt-generator>0.1.4</embedded-montiarc-math-opt-generator> <embedded-montiarc-math-opt-generator>0.1.4</embedded-montiarc-math-opt-generator>
<EMADL2PythonWrapper.version>0.0.1</EMADL2PythonWrapper.version> <EMADL2PythonWrapper.version>0.0.1</EMADL2PythonWrapper.version>
......
...@@ -85,10 +85,8 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController { ...@@ -85,10 +85,8 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
if (layer.isAtomic()){ if (layer.isAtomic()){
ArchitectureElementSymbol nextElement = layer.getOutputElement().get(); ArchitectureElementSymbol nextElement = layer.getOutputElement().get();
if (!isSoftmaxOutput(nextElement) && !isLogisticRegressionOutput(nextElement)){ String templateName = layer.getDeclaration().getName();
String templateName = layer.getDeclaration().getName(); include(TEMPLATE_ELEMENTS_DIR_PATH, templateName, writer, netDefinitionMode);
include(TEMPLATE_ELEMENTS_DIR_PATH, templateName, writer, netDefinitionMode);
}
} }
else { else {
include(layer.getResolvedThis().get(), writer, netDefinitionMode); include(layer.getResolvedThis().get(), writer, netDefinitionMode);
......
...@@ -6,6 +6,31 @@ import os ...@@ -6,6 +6,31 @@ import os
import shutil import shutil
from mxnet import gluon, autograd, nd from mxnet import gluon, autograd, nd
class CrossEntropyLoss(gluon.loss.Loss):
def __init__(self, axis=-1, sparse_label=True, weight=None, batch_axis=0, **kwargs):
super(CrossEntropyLoss, self).__init__(weight, batch_axis, **kwargs)
self._axis = axis
self._sparse_label = sparse_label
def hybrid_forward(self, F, pred, label, sample_weight=None):
pred = F.log(pred)
if self._sparse_label:
loss = -F.pick(pred, label, axis=self._axis, keepdims=True)
else:
label = gluon.loss._reshape_like(F, label, pred)
loss = -F.sum(pred * label, axis=self._axis, keepdims=True)
loss = gluon.loss._apply_weighting(F, loss, self._weight, sample_weight)
return F.mean(loss, axis=self._batch_axis, exclude=True)
class LogCoshLoss(gluon.loss.Loss):
def __init__(self, weight=None, batch_axis=0, **kwargs):
super(LogCoshLoss, self).__init__(weight, batch_axis, **kwargs)
def hybrid_forward(self, F, pred, label, sample_weight=None):
loss = F.log(F.cosh(pred - label))
loss = gluon.loss._apply_weighting(F, loss, self._weight, sample_weight)
return F.mean(loss, axis=self._batch_axis, exclude=True)
class ${tc.fileNameWithoutEnding}: class ${tc.fileNameWithoutEnding}:
def __init__(self, data_loader, net_constructor): def __init__(self, data_loader, net_constructor):
self._data_loader = data_loader self._data_loader = data_loader
...@@ -15,6 +40,8 @@ class ${tc.fileNameWithoutEnding}: ...@@ -15,6 +40,8 @@ class ${tc.fileNameWithoutEnding}:
def train(self, batch_size=64, def train(self, batch_size=64,
num_epoch=10, num_epoch=10,
eval_metric='acc', eval_metric='acc',
loss ='softmax_cross_entropy',
loss_params={},
optimizer='adam', optimizer='adam',
optimizer_params=(('learning_rate', 0.001),), optimizer_params=(('learning_rate', 0.001),),
load_checkpoint=True, load_checkpoint=True,
...@@ -68,19 +95,36 @@ class ${tc.fileNameWithoutEnding}: ...@@ -68,19 +95,36 @@ class ${tc.fileNameWithoutEnding}:
trainers = [mx.gluon.Trainer(network.collect_params(), optimizer, optimizer_params) for network in self._networks.values()] trainers = [mx.gluon.Trainer(network.collect_params(), optimizer, optimizer_params) for network in self._networks.values()]
loss_functions = {} margin = loss_params['margin'] if 'margin' in loss_params else 1.0
sparseLabel = loss_params['sparse_label'] if 'sparse_label' in loss_params else True
for network in self._networks.values(): if loss == 'softmax_cross_entropy':
for output_name, last_layer in network.last_layers.items(): fromLogits = loss_params['from_logits'] if 'from_logits' in loss_params else False
if last_layer == 'softmax': loss_function = mx.gluon.loss.SoftmaxCrossEntropyLoss(from_logits=fromLogits, sparse_label=sparseLabel)
loss_functions[output_name] = mx.gluon.loss.SoftmaxCrossEntropyLoss() elif loss == 'sigmoid_binary_cross_entropy':
elif last_layer == 'sigmoid': loss_function = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss()
loss_functions[output_name] = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss() elif loss == 'cross_entropy':
elif last_layer == 'linear': loss_function = CrossEntropyLoss(sparse_label=sparseLabel)
loss_functions[output_name] = mx.gluon.loss.L2Loss() elif loss == 'l2':
else: loss_function = mx.gluon.loss.L2Loss()
loss_functions[output_name] = mx.gluon.loss.L2Loss() elif loss == 'l1':
logging.warning("Invalid last layer, defaulting to L2 loss") loss_function = mx.gluon.loss.L2Loss()
elif loss == 'huber':
rho = loss_params['rho'] if 'rho' in loss_params else 1
loss_function = mx.gluon.loss.HuberLoss(rho=rho)
elif loss == 'hinge':
loss_function = mx.gluon.loss.HingeLoss(margin=margin)
elif loss == 'squared_hinge':
loss_function = mx.gluon.loss.SquaredHingeLoss(margin=margin)
elif loss == 'logistic':
labelFormat = loss_params['label_format'] if 'label_format' in loss_params else 'signed'
loss_function = mx.gluon.loss.LogisticLoss(label_format=labelFormat)
elif loss == 'kullback_leibler':
fromLogits = loss_params['from_logits'] if 'from_logits' in loss_params else True
loss_function = mx.gluon.loss.KLDivLoss(from_logits=fromLogits)
elif loss == 'log_cosh':
loss_function = LogCoshLoss()
else:
logging.error("Invalid loss parameter.")
speed_period = 50 speed_period = 50
tic = None tic = None
...@@ -108,7 +152,7 @@ ${tc.include(stream, "PYTHON_INLINE")} ...@@ -108,7 +152,7 @@ ${tc.include(stream, "PYTHON_INLINE")}
<#list tc.architecture.streams as stream> <#list tc.architecture.streams as stream>
<#if stream.isNetwork()> <#if stream.isNetwork()>
<#list tc.getStreamOutputNames(stream) as output_name> <#list tc.getStreamOutputNames(stream) as output_name>
loss_functions['${output_name}'](${output_name}_output, ${output_name}_label)<#sep> + \ loss_function(${output_name}_output, ${output_name}_label)<#sep> + \
</#list><#sep> + \ </#list><#sep> + \
</#if> </#if>
</#list> </#list>
......
...@@ -39,6 +39,16 @@ if __name__ == "__main__": ...@@ -39,6 +39,16 @@ if __name__ == "__main__":
<#if (config.evalMetric)??> <#if (config.evalMetric)??>
eval_metric='${config.evalMetric}', eval_metric='${config.evalMetric}',
</#if> </#if>
<#if (config.configuration.loss)??>
loss='${config.lossName}',
<#if (config.lossParams)??>
loss_params={
<#list config.lossParams?keys as param>
'${param}': ${config.lossParams[param]}<#sep>,
</#list>
},
</#if>
</#if>
<#if (config.configuration.optimizer)??> <#if (config.configuration.optimizer)??>
optimizer='${config.optimizerName}', optimizer='${config.optimizerName}',
optimizer_params={ optimizer_params={
......
<#assign input = element.inputs[0]> <#assign input = element.inputs[0]>
<#if mode == "ARCHITECTURE_DEFINITION"> <#if mode == "FORWARD_FUNCTION">
<#if element.softmaxOutput>
self.last_layers['${element.name}'] = 'softmax'
<#elseif element.logisticRegressionOutput>
self.last_layers['${element.name}'] = 'sigmoid'
<#elseif element.linearRegressionOutput>
self.last_layers['${element.name}'] = 'linear'
<#elseif element.oneHotOutput>
self.last_layers['${element.name}'] = 'softmax'
</#if>
<#elseif mode == "FORWARD_FUNCTION">
outputs.append(${input}) outputs.append(${input})
<#elseif mode == "PYTHON_INLINE"> <#elseif mode == "PYTHON_INLINE">
${element.name}_output = ${input} ${element.name}_output = ${input}
......
...@@ -5,8 +5,15 @@ ...@@ -5,8 +5,15 @@
<#else> <#else>
'use_fix_target': False, 'use_fix_target': False,
</#if> </#if>
<#if (config.loss)??> <#if (config.configuration.loss)??>
'loss_function': '${config.loss}', 'loss': '${config.lossName}',
<#if (config.lossParams)??>
'loss_params': {
<#list config.lossParams?keys as param>
'${param}': ${config.lossParams[param]}<#sep>,
</#list>
},
</#if>
</#if> </#if>
<#if (config.configuration.optimizer)??> <#if (config.configuration.optimizer)??>
'optimizer': '${config.optimizerName}', 'optimizer': '${config.optimizerName}',
......
...@@ -205,7 +205,7 @@ class Net_0(gluon.HybridBlock): ...@@ -205,7 +205,7 @@ class Net_0(gluon.HybridBlock):
self.fc8_ = gluon.nn.Dense(units=10, use_bias=True) self.fc8_ = gluon.nn.Dense(units=10, use_bias=True)
# fc8_, output shape: {[10,1,1]} # fc8_, output shape: {[10,1,1]}
self.last_layers['predictions'] = 'softmax' self.softmax8_ = Softmax()
def hybrid_forward(self, F, data): def hybrid_forward(self, F, data):
...@@ -271,6 +271,7 @@ class Net_0(gluon.HybridBlock): ...@@ -271,6 +271,7 @@ class Net_0(gluon.HybridBlock):
relu7_ = self.relu7_(fc7_) relu7_ = self.relu7_(fc7_)
dropout7_ = self.dropout7_(relu7_) dropout7_ = self.dropout7_(relu7_)
fc8_ = self.fc8_(dropout7_) fc8_ = self.fc8_(dropout7_)
outputs.append(fc8_) softmax8_ = self.softmax8_(fc8_)
outputs.append(softmax8_)
return outputs[0] return outputs[0]
...@@ -356,7 +356,7 @@ class Net_0(gluon.HybridBlock): ...@@ -356,7 +356,7 @@ class Net_0(gluon.HybridBlock):
self.fc32_ = gluon.nn.Dense(units=10, use_bias=True) self.fc32_ = gluon.nn.Dense(units=10, use_bias=True)
# fc32_, output shape: {[10,1,1]} # fc32_, output shape: {[10,1,1]}
self.last_layers['softmax'] = 'softmax' self.softmax32_ = Softmax()
def hybrid_forward(self, F, data): def hybrid_forward(self, F, data):
...@@ -464,6 +464,7 @@ class Net_0(gluon.HybridBlock): ...@@ -464,6 +464,7 @@ class Net_0(gluon.HybridBlock):
fc31_ = self.fc31_(globalpooling31_) fc31_ = self.fc31_(globalpooling31_)
dropout31_ = self.dropout31_(fc31_) dropout31_ = self.dropout31_(fc31_)
fc32_ = self.fc32_(dropout31_) fc32_ = self.fc32_(dropout31_)
outputs.append(fc32_) softmax32_ = self.softmax32_(fc32_)
outputs.append(softmax32_)
return outputs[0] return outputs[0]
...@@ -233,7 +233,7 @@ class Net_0(gluon.HybridBlock): ...@@ -233,7 +233,7 @@ class Net_0(gluon.HybridBlock):
self.fc15_ = gluon.nn.Dense(units=1000, use_bias=True) self.fc15_ = gluon.nn.Dense(units=1000, use_bias=True)
# fc15_, output shape: {[1000,1,1]} # fc15_, output shape: {[1000,1,1]}
self.last_layers['predictions'] = 'softmax' self.softmax15_ = Softmax()
def hybrid_forward(self, F, data): def hybrid_forward(self, F, data):
...@@ -291,6 +291,7 @@ class Net_0(gluon.HybridBlock): ...@@ -291,6 +291,7 @@ class Net_0(gluon.HybridBlock):
relu15_ = self.relu15_(fc14_) relu15_ = self.relu15_(fc14_)
dropout15_ = self.dropout15_(relu15_) dropout15_ = self.dropout15_(relu15_)
fc15_ = self.fc15_(dropout15_) fc15_ = self.fc15_(dropout15_)
outputs.append(fc15_) softmax15_ = self.softmax15_(fc15_)
outputs.append(softmax15_)
return outputs[0] return outputs[0]
...@@ -6,6 +6,31 @@ import os ...@@ -6,6 +6,31 @@ import os
import shutil import shutil
from mxnet import gluon, autograd, nd from mxnet import gluon, autograd, nd
class CrossEntropyLoss(gluon.loss.Loss):
def __init__(self, axis=-1, sparse_label=True, weight=None, batch_axis=0, **kwargs):
super(CrossEntropyLoss, self).__init__(weight, batch_axis, **kwargs)
self._axis = axis
self._sparse_label = sparse_label
def hybrid_forward(self, F, pred, label, sample_weight=None):
pred = F.log(pred)
if self._sparse_label:
loss = -F.pick(pred, label, axis=self._axis, keepdims=True)
else:
label = gluon.loss._reshape_like(F, label, pred)
loss = -F.sum(pred * label, axis=self._axis, keepdims=True)
loss = gluon.loss._apply_weighting(F, loss, self._weight, sample_weight)
return F.mean(loss, axis=self._batch_axis, exclude=True)
class LogCoshLoss(gluon.loss.Loss):
def __init__(self, weight=None, batch_axis=0, **kwargs):
super(LogCoshLoss, self).__init__(weight, batch_axis, **kwargs)
def hybrid_forward(self, F, pred, label, sample_weight=None):
loss = F.log(F.cosh(pred - label))
loss = gluon.loss._apply_weighting(F, loss, self._weight, sample_weight)
return F.mean(loss, axis=self._batch_axis, exclude=True)
class CNNSupervisedTrainer_Alexnet: class CNNSupervisedTrainer_Alexnet:
def __init__(self, data_loader, net_constructor): def __init__(self, data_loader, net_constructor):
self._data_loader = data_loader self._data_loader = data_loader
...@@ -15,6 +40,8 @@ class CNNSupervisedTrainer_Alexnet: ...@@ -15,6 +40,8 @@ class CNNSupervisedTrainer_Alexnet:
def train(self, batch_size=64, def train(self, batch_size=64,
num_epoch=10, num_epoch=10,
eval_metric='acc', eval_metric='acc',
loss ='softmax_cross_entropy',
loss_params={},
optimizer='adam', optimizer='adam',
optimizer_params=(('learning_rate', 0.001),), optimizer_params=(('learning_rate', 0.001),),
load_checkpoint=True, load_checkpoint=True,
...@@ -68,19 +95,36 @@ class CNNSupervisedTrainer_Alexnet: ...@@ -68,19 +95,36 @@ class CNNSupervisedTrainer_Alexnet:
trainers = [mx.gluon.Trainer(network.collect_params(), optimizer, optimizer_params) for network in self._networks.values()] trainers = [mx.gluon.Trainer(network.collect_params(), optimizer, optimizer_params) for network in self._networks.values()]
loss_functions = {} margin = loss_params['margin'] if 'margin' in loss_params else 1.0
sparseLabel = loss_params['sparse_label'] if 'sparse_label' in loss_params else True
for network in self._networks.values(): if loss == 'softmax_cross_entropy':
for output_name, last_layer in network.last_layers.items(): fromLogits = loss_params['from_logits'] if 'from_logits' in loss_params else False
if last_layer == 'softmax': loss_function = mx.gluon.loss.SoftmaxCrossEntropyLoss(from_logits=fromLogits, sparse_label=sparseLabel)
loss_functions[output_name] = mx.gluon.loss.SoftmaxCrossEntropyLoss() elif loss == 'sigmoid_binary_cross_entropy':
elif last_layer == 'sigmoid': loss_function = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss()
loss_functions[output_name] = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss() elif loss == 'cross_entropy':
elif last_layer == 'linear': loss_function = CrossEntropyLoss(sparse_label=sparseLabel)
loss_functions[output_name] = mx.gluon.loss.L2Loss() elif loss == 'l2':
else: loss_function = mx.gluon.loss.L2Loss()
loss_functions[output_name] = mx.gluon.loss.L2Loss() elif loss == 'l1':
logging.warning("Invalid last layer, defaulting to L2 loss") loss_function = mx.gluon.loss.L2Loss()
elif loss == 'huber':
rho = loss_params['rho'] if 'rho' in loss_params else 1
loss_function = mx.gluon.loss.HuberLoss(rho=rho)
elif loss == 'hinge':
loss_function = mx.gluon.loss.HingeLoss(margin=margin)
elif loss == 'squared_hinge':
loss_function = mx.gluon.loss.SquaredHingeLoss(margin=margin)
elif loss == 'logistic':
labelFormat = loss_params['label_format'] if 'label_format' in loss_params else 'signed'
loss_function = mx.gluon.loss.LogisticLoss(label_format=labelFormat)
elif loss == 'kullback_leibler':
fromLogits = loss_params['from_logits'] if 'from_logits' in loss_params else True
loss_function = mx.gluon.loss.KLDivLoss(from_logits=fromLogits)
elif loss == 'log_cosh':
loss_function = LogCoshLoss()
else:
logging.error("Invalid loss parameter.")
speed_period = 50 speed_period = 50
tic = None tic = None
...@@ -95,7 +139,7 @@ class CNNSupervisedTrainer_Alexnet: ...@@ -95,7 +139,7 @@ class CNNSupervisedTrainer_Alexnet:
predictions_output = self._networks[0](data_data) predictions_output = self._networks[0](data_data)
loss = \ loss = \
loss_functions['predictions'](predictions_output, predictions_label) loss_function(predictions_output, predictions_label)
loss.backward() loss.backward()
......
...@@ -6,6 +6,31 @@ import os ...@@ -6,6 +6,31 @@ import os
import shutil import shutil
from mxnet import gluon, autograd, nd from mxnet import gluon, autograd, nd
class CrossEntropyLoss(gluon.loss.Loss):
def __init__(self, axis=-1, sparse_label=True, weight=None, batch_axis=0, **kwargs):
super(CrossEntropyLoss, self).__init__(weight, batch_axis, **kwargs)
self._axis = axis
self._sparse_label = sparse_label
def hybrid_forward(self, F, pred, label, sample_weight=None):
pred = F.log(pred)
if self._sparse_label:
loss = -F.pick(pred, label, axis=self._axis, keepdims=True)
else:
label = gluon.loss._reshape_like(F, label, pred)
loss = -F.sum(pred * label, axis=self._axis, keepdims=True)
loss = gluon.loss._apply_weighting(F, loss, self._weight, sample_weight)
return F.mean(loss, axis=self._batch_axis, exclude=True)
class LogCoshLoss(gluon.loss.Loss):
def __init__(self, weight=None, batch_axis=0, **kwargs):
super(LogCoshLoss, self).__init__(weight, batch_axis, **kwargs)
def hybrid_forward(self, F, pred, label, sample_weight=None):
loss = F.log(F.cosh(pred - label))
loss = gluon.loss._apply_weighting(F, loss, self._weight, sample_weight)
return F.mean(loss, axis=self._batch_axis, exclude=True)
class CNNSupervisedTrainer_CifarClassifierNetwork: class CNNSupervisedTrainer_CifarClassifierNetwork:
def __init__(self, data_loader, net_constructor): def __init__(self, data_loader, net_constructor):
self._data_loader = data_loader self._data_loader = data_loader
...@@ -15,6 +40,8 @@ class CNNSupervisedTrainer_CifarClassifierNetwork: ...@@ -15,6 +40,8 @@ class CNNSupervisedTrainer_CifarClassifierNetwork:
def train(self, batch_size=64, def train(self, batch_size=64,
num_epoch=10, num_epoch=10,
eval_metric='acc', eval_metric='acc',
loss ='softmax_cross_entropy',
loss_params={},
optimizer='adam', optimizer='adam',
optimizer_params=(('learning_rate', 0.001),), optimizer_params=(('learning_rate', 0.001),),
load_checkpoint=True, load_checkpoint=True,
...@@ -68,19 +95,36 @@ class CNNSupervisedTrainer_CifarClassifierNetwork: ...@@ -68,19 +95,36 @@ class CNNSupervisedTrainer_CifarClassifierNetwork:
trainers = [mx.gluon.Trainer(network.collect_params(), optimizer, optimizer_params) for network in self._networks.values()] trainers = [mx.gluon.Trainer(network.collect_params(), optimizer, optimizer_params) for network in self._networks.values()]
loss_functions = {} margin = loss_params['margin'] if 'margin' in loss_params else 1.0
sparseLabel = loss_params['sparse_label'] if 'sparse_label' in loss_params else True
for network in self._networks.values(): if loss == 'softmax_cross_entropy':
for output_name, last_layer in network.last_layers.items(): fromLogits = loss_params['from_logits'] if 'from_logits' in loss_params else False
if last_layer == 'softmax': loss_function = mx.gluon.loss.SoftmaxCrossEntropyLoss(from_logits=fromLogits, sparse_label=sparseLabel)
loss_functions[output_name] = mx.gluon.loss.SoftmaxCrossEntropyLoss() elif loss == 'sigmoid_binary_cross_entropy':
elif last_layer == 'sigmoid': loss_function = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss()
loss_functions[output_name] = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss() elif loss == 'cross_entropy':
elif last_layer == 'linear': loss_function = CrossEntropyLoss(sparse_label=sparseLabel)
loss_functions[output_name] = mx.gluon.loss.L2Loss() elif loss == 'l2':
else: loss_function = mx.gluon.loss.L2Loss()
loss_functions[output_name] = mx.gluon.loss.L2Loss() elif loss == 'l1':
logging.warning("Invalid last layer, defaulting to L2 loss") loss_function = mx.gluon.loss.L2Loss()
elif loss == 'huber':
rho = loss_params['rho'] if 'rho' in loss_params else 1
loss_function = mx.gluon.loss.HuberLoss(rho=rho)
elif loss == 'hinge':
loss_function = mx.gluon.loss.HingeLoss(margin=margin)
elif loss == 'squared_hinge':
loss_function = mx.gluon.loss.SquaredHingeLoss(margin=margin)
elif loss == 'logistic':
labelFormat = loss_params['label_format'] if 'label_format' in loss_params else 'signed'
loss_function = mx.gluon.loss.LogisticLoss(label_format=labelFormat)
elif loss == 'kullback_leibler':
fromLogits = loss_params['from_logits'] if 'from_logits' in loss_params else True
loss_function = mx.gluon.loss.KLDivLoss(from_logits=fromLogits)
elif loss == 'log_cosh':
loss_function = LogCoshLoss()
else:
logging.error("Invalid loss parameter.")
speed_period = 50 speed_period = 50
tic = None tic = None
...@@ -95,7 +139,7 @@ class CNNSupervisedTrainer_CifarClassifierNetwork: ...@@ -95,7 +139,7 @@ class CNNSupervisedTrainer_CifarClassifierNetwork:
softmax_output = self._networks[0](data_data) softmax_output = self._networks[0](data_data)
loss = \ loss = \
loss_functions['softmax'](softmax_output, softmax_label) loss_function(softmax_output, softmax_label)
loss.backward() loss.backward()
......
...@@ -6,6 +6,31 @@ import os ...@@ -6,6 +6,31 @@ import os
import shutil import shutil
from mxnet import gluon, autograd, nd from mxnet import gluon, autograd, nd
class CrossEntropyLoss(gluon.loss.Loss):
def __init__(self, axis=-1, sparse_label=True, weight=None, batch_axis=0, **kwargs):
super(CrossEntropyLoss, self).__init__(weight, batch_axis, **kwargs)
self._axis = axis
self._sparse_label = sparse_label
def hybrid_forward(self, F, pred, label, sample_weight=None):
pred = F.log(pred)
if self._sparse_label:
loss = -F.pick(pred, label, axis=self._axis, keepdims=True)
else:
label = gluon.loss._reshape_like(F, label, pred)
loss = -F.sum(pred * label, axis=self._axis, keepdims=True)
loss = gluon.loss._apply_weighting(F, loss, self._weight, sample_weight)
return F.mean(loss, axis=self._batch_axis, exclude=True)
class LogCoshLoss(gluon.loss.Loss):
def __init__(self, weight=None, batch_axis=0, **kwargs):
super(LogCoshLoss, self).__init__(weight, batch_axis, **kwargs)