Aufgrund eines Versionsupdates wird GitLab am 17.12. zwischen 9:00 und 9:30 Uhr kurzzeitig nicht zur Verfügung stehen. / Due to a version upgrade, GitLab won't be accessible at 17.12. between 9 and 9:30 a.m.

Commit fa500d5b authored by eyuhar's avatar eyuhar

added loss functions

parent 20d0b24c
......@@ -16,7 +16,7 @@
<!-- .. SE-Libraries .................................................. -->
<CNNArch.version>0.3.0-SNAPSHOT</CNNArch.version>
<CNNTrain.version>0.2.6</CNNTrain.version>
<CNNTrain.version>0.3.1-SNAPSHOT</CNNTrain.version>
<embedded-montiarc-math-opt-generator>0.1.4</embedded-montiarc-math-opt-generator>
<!-- .. Libraries .................................................. -->
......
......@@ -70,29 +70,6 @@ public class ArchitectureElementData {
return getTemplateController().getLayerInputs(getElement());
}
public boolean isLogisticRegressionOutput(){
return getTemplateController().isLogisticRegressionOutput(getElement());
}
public boolean isLinearRegressionOutput(){
boolean result = getTemplateController().isLinearRegressionOutput(getElement());
if (result){
Log.warn("The Output '" + getElement().getName() + "' is a linear regression output (squared loss) during training" +
" because the previous architecture element is not a softmax (cross-entropy loss) or sigmoid (logistic regression loss) activation. " +
"Other loss functions are currently not supported. "
, getElement().getSourcePosition());
}
return result;
}
public boolean isSoftmaxOutput(){
return getTemplateController().isSoftmaxOutput(getElement());
}
public List<Integer> getKernel(){
return ((LayerSymbol) getElement())
.getIntTupleValue(AllPredefinedLayers.KERNEL_NAME).get();
......
......@@ -37,10 +37,8 @@ public class CNNArch2MxNetTemplateController extends CNNArchTemplateController {
if (layer.isAtomic()){
ArchitectureElementSymbol nextElement = layer.getOutputElement().get();
if (!isSoftmaxOutput(nextElement) && !isLogisticRegressionOutput(nextElement)){
String templateName = layer.getDeclaration().getName();
include(TEMPLATE_ELEMENTS_DIR_PATH, templateName, writer);
}
} else {
include(layer.getResolvedThis().get(), writer);
}
......
......@@ -139,16 +139,12 @@ public abstract class CNNArchTemplateController {
public List<String> getLayerInputs(ArchitectureElementSymbol layer){
List<String> inputNames = new ArrayList<>();
if (isSoftmaxOutput(layer) || isLogisticRegressionOutput(layer)){
inputNames = getLayerInputs(layer.getInputElement().get());
} else {
for (ArchitectureElementSymbol input : layer.getPrevious()) {
if (input.getOutputTypes().size() == 1) {
inputNames.add(getName(input));
} else {
for (int i = 0; i < input.getOutputTypes().size(); i++) {
inputNames.add(getName(input) + "[" + i + "]");
}
for (ArchitectureElementSymbol input : layer.getPrevious()) {
if (input.getOutputTypes().size() == 1) {
inputNames.add(getName(input));
} else {
for (int i = 0; i < input.getOutputTypes().size(); i++) {
inputNames.add(getName(input) + "[" + i + "]");
}
}
}
......@@ -220,29 +216,4 @@ public abstract class CNNArchTemplateController {
return stringBuilder.toString();
}
public boolean isLogisticRegressionOutput(ArchitectureElementSymbol architectureElement){
return isTOutput(Sigmoid.class, architectureElement);
}
public boolean isLinearRegressionOutput(ArchitectureElementSymbol architectureElement){
return architectureElement.isOutput()
&& !isLogisticRegressionOutput(architectureElement)
&& !isSoftmaxOutput(architectureElement);
}
public boolean isSoftmaxOutput(ArchitectureElementSymbol architectureElement){
return isTOutput(Softmax.class, architectureElement);
}
private boolean isTOutput(Class inputPredefinedLayerClass, ArchitectureElementSymbol architectureElement){
if (architectureElement.isOutput()
&& architectureElement.getInputElement().isPresent()
&& architectureElement.getInputElement().get() instanceof LayerSymbol){
LayerSymbol inputLayer = (LayerSymbol) architectureElement.getInputElement().get();
return inputPredefinedLayerClass.isInstance(inputLayer.getDeclaration());
}
return false;
}
}
......@@ -67,6 +67,33 @@ public class ConfigurationData {
return getConfiguration().getEntry("eval_metric").getValue().toString();
}
public String getLossName() {
if (getConfiguration().getLoss() == null) {
return null;
}
return getConfiguration().getLoss().getName();
}
public Map<String, String> getLossParams() {
Map<String, String> mapToStrings = new HashMap<>();
Map<String, LossParamSymbol> lossParams = getConfiguration().getLoss().getLossParamMap();
for (Map.Entry<String, LossParamSymbol> entry : lossParams.entrySet()) {
String paramName = entry.getKey();
String valueAsString = entry.getValue().toString();
Class realClass = entry.getValue().getValue().getValue().getClass();
if (realClass == Boolean.class) {
valueAsString = (Boolean) entry.getValue().getValue().getValue() ? "True" : "False";
}
mapToStrings.put(paramName, valueAsString);
}
if (mapToStrings.isEmpty()){
return null;
} else{
return mapToStrings;}
}
public String getOptimizerName() {
if (getConfiguration().getOptimizer() == null) {
return null;
......
......@@ -104,10 +104,80 @@ class ${tc.fileNameWithoutEnding}:
logging.error("Data loading failure. File '" + os.path.abspath(train_path) + "' does not exist.")
sys.exit(1)
def loss_function(self, loss, params):
label = mx.symbol.var(name=self._output_names_[0], )
prediction = self.module.symbol.get_children()[0]
margin = params['margin'] if 'margin' in params else 1.0
sparseLabel = params['sparse_label'] if 'sparse_label' in params else True
if loss == 'softmax_cross_entropy':
fromLogits = params['from_logits'] if 'from_logits' in params else False
if not fromLogits:
prediction = mx.symbol.log_softmax(data=prediction, axis=1)
if sparseLabel:
loss_func = mx.symbol.mean(-mx.symbol.pick(prediction, label, axis=-1, keepdims=True), axis=0, exclude=True)
else:
label = mx.symbol.reshape_like(label, prediction)
loss_func = mx.symbol.mean(-mx.symbol.sum(prediction * label, axis=-1, keepdims=True), axis=0, exclude=True)
loss_func = mx.symbol.MakeLoss(loss_func, name="softmax_cross_entropy")
elif loss == 'cross_entropy':
prediction = mx.symbol.log(prediction)
if sparseLabel:
loss_func = mx.symbol.mean(-mx.symbol.pick(prediction, label, axis=-1, keepdims=True), axis=0, exclude=True)
else:
label = mx.symbol.reshape_like(label, prediction)
loss_func = mx.symbol.mean(-mx.symbol.sum(prediction * label, axis=-1, keepdims=True), axis=0, exclude=True)
loss_func = mx.symbol.MakeLoss(loss_func, name="cross_entropy")
elif loss == 'sigmoid_binary_cross_entropy':
loss_func = mx.symbol.LogisticRegressionOutput(data=prediction, name=self.module.symbol.name)
elif loss == 'l1':
loss_func = mx.symbol.MAERegressionOutput(data=prediction, name=self.module.symbol.name)
elif loss == 'l2':
label = mx.symbol.reshape_like(label, prediction)
loss_func = mx.symbol.mean(mx.symbol.square((label - prediction) / 2), axis=0, exclude=True)
loss_func = mx.symbol.MakeLoss(loss_func, name="L2")
elif loss == 'huber':
rho = params['rho'] if 'rho' in params else 1
label = mx.symbol.reshape_like(label, prediction)
loss_func = mx.symbol.abs(label - prediction)
loss_func = mx.symbol.where(loss_func > rho, loss_func - 0.5 * rho, (0.5 / rho) * mx.symbol.square(loss_func))
loss_func = mx.symbol.mean(loss_func, axis=0, exclude=True)
loss_func = mx.symbol.MakeLoss(loss_func, name="huber")
elif loss == 'hinge':
label = mx.symbol.reshape_like(label, prediction)
loss_func = mx.symbol.mean(mx.symbol.relu(margin - prediction * label), axis=0, exclude=True)
loss_func = mx.symbol.MakeLoss(loss_func, name="hinge")
elif loss == 'squared_hinge':
label = mx.symbol.reshape_like(label, prediction)
loss_func = mx.symbol.mean(mx.symbol.square(mx.symbol.relu(margin - prediction * label)), axis=0, exclude=True)
loss_func = mx.symbol.MakeLoss(loss_func, name="squared_hinge")
elif loss == 'logistic':
labelFormat = params['label_format'] if 'label_format' in params else 'signed'
if labelFormat not in ["binary", "signed"]:
logging.error("label_format can only be signed or binary")
label = mx.symbol.reshape_like(label, prediction)
if labelFormat == 'signed':
label = (label + 1.0)/2.0
loss_func = mx.symbol.relu(prediction) - prediction * label
loss_func = loss_func + mx.symbol.Activation(-mx.symbol.abs(prediction), act_type="softrelu")
loss_func = mx.symbol.MakeLoss(mx.symbol.mean(loss_func, 0, exclude=True), name="logistic")
elif loss == 'kullback_leibler':
fromLogits = params['from_logits'] if 'from_logits' in params else True
if not fromLogits:
prediction = mx.symbol.log_softmax(prediction, axis=1)
loss_func = mx.symbol.mean(label * (mx.symbol.log(label) - prediction), axis=0, exclude=True)
loss_func = mx.symbol.MakeLoss(loss_func, name="kullback_leibler")
else:
logging.error("Invalid loss parameter.")
return loss_func
def train(self, batch_size=64,
num_epoch=10,
eval_metric='acc',
loss ='softmax_cross_entropy',
loss_params={},
optimizer='adam',
optimizer_params=(('learning_rate', 0.001),),
load_checkpoint=True,
......@@ -136,7 +206,6 @@ class ${tc.fileNameWithoutEnding}:
del optimizer_params['step_size']
del optimizer_params['learning_rate_decay']
train_iter, test_iter, data_mean, data_std = self.load_data(batch_size)
if self.module == None:
if normalize:
......@@ -144,6 +213,14 @@ class ${tc.fileNameWithoutEnding}:
else:
self.construct(mx_context)
loss_func = self.loss_function(loss=loss, params=loss_params)
self.module = mx.mod.Module(
symbol=mx.symbol.Group([loss_func, mx.symbol.BlockGrad(self.module.symbol.get_children()[0], name="pred")]),
data_names=self._input_names_,
label_names=self._output_names_,
context=mx_context)
begin_epoch = 0
if load_checkpoint:
begin_epoch = self.load(mx_context)
......@@ -157,9 +234,11 @@ class ${tc.fileNameWithoutEnding}:
if not os.path.isdir(self._model_dir_):
raise
metric = mx.metric.create(eval_metric, output_names=['pred_output'])
self.module.fit(
train_data=train_iter,
eval_metric=eval_metric,
eval_metric=metric,
eval_data=test_iter,
optimizer=optimizer,
optimizer_params=optimizer_params,
......
......@@ -31,6 +31,16 @@ if __name__ == "__main__":
<#if (config.evalMetric)??>
eval_metric='${config.evalMetric}',
</#if>
<#if (config.configuration.loss)??>
loss='${config.lossName}',
<#if (config.lossParams)??>
loss_params={
<#list config.lossParams?keys as param>
'${param}': ${config.lossParams[param]}<#sep>,
</#list>
},
</#if>
</#if>
<#if (config.configuration.optimizer)??>
optimizer='${config.optimizerName}',
optimizer_params={
......
<#if element.softmaxOutput>
${element.name} = mx.symbol.SoftmaxOutput(data=${element.inputs[0]},
name="${element.name}")
<#elseif element.logisticRegressionOutput>
${element.name} = mx.symbol.LogisticRegressionOutput(data=${element.inputs[0]},
name="${element.name}")
<#elseif element.linearRegressionOutput>
${element.name} = mx.symbol.LinearRegressionOutput(data=${element.inputs[0]},
name="${element.name}")
</#if>
\ No newline at end of file
\ No newline at end of file
......@@ -112,7 +112,7 @@ public class GenerationTest extends AbstractSymtabTest{
Log.getFindings().clear();
String[] args = {"-m", "src/test/resources/valid_tests", "-r", "MultipleOutputs"};
CNNArch2MxNetCli.main(args);
assertTrue(Log.getFindings().size() == 3);
assertTrue(Log.getFindings().size() == 1);
}
@Test
......
......@@ -16,6 +16,10 @@ if __name__ == "__main__":
context='gpu',
normalize=True,
eval_metric='mse',
loss='softmax_cross_entropy',
loss_params={
'sparse_label': True,
'from_logits': False},
optimizer='rmsprop',
optimizer_params={
'weight_decay': 0.01,
......
......@@ -12,6 +12,7 @@ if __name__ == "__main__":
simpleConfig.train(
batch_size=100,
num_epoch=50,
loss='cross_entropy',
optimizer='adam',
optimizer_params={
'learning_rate': 0.001}
......
......@@ -3,6 +3,10 @@ configuration FullConfig{
batch_size : 100
load_checkpoint : true
eval_metric : mse
loss: softmax_cross_entropy{
sparse_label: true
from_logits: false
}
context : gpu
normalize : true
optimizer : rmsprop{
......
configuration SimpleConfig{
num_epoch : 50
batch_size : 100
loss : cross_entropy
optimizer : adam{
learning_rate : 0.001
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment