Aufgrund einer Störung des s3 Storage, könnten in nächster Zeit folgende GitLab Funktionen nicht zur Verfügung stehen: LFS, Container Registry, Job Artifacs, Uploads (Wiki, Bilder, Projekt-Exporte). Wir bitten um Verständnis. Es wird mit Hochdruck an der Behebung des Problems gearbeitet. Weitere Informationen zur Störung des Object Storage finden Sie hier: https://maintenance.itc.rwth-aachen.de/ticket/status/messages/59-object-storage-pilot

Commit 8dffa375 authored by Carlos Alfredo Yeverino Rodriguez's avatar Carlos Alfredo Yeverino Rodriguez
Browse files

Processed loss parameter and set default value based on the last layer of the network architecture

parent f0801126
Pipeline #105357 passed with stages
in 5 minutes and 10 seconds
......@@ -16,7 +16,7 @@
<!-- .. SE-Libraries .................................................. -->
<CNNArch.version>0.2.7</CNNArch.version>
<CNNTrain.version>0.2.5</CNNTrain.version>
<CNNTrain.version>0.2.6-SNAPSHOT</CNNTrain.version>
<embedded-montiarc-math-generator>0.1.2-SNAPSHOT</embedded-montiarc-math-generator>
<!-- .. Libraries .................................................. -->
......
......@@ -45,6 +45,10 @@ public class CNNArchTemplateController {
private ArchitectureElementData dataElement;
public String loss;
public static final String CROSS_ENTROPY = "cross_entropy";
public static final String EUCLIDEAN = "euclidean";
public CNNArchTemplateController(ArchitectureSymbol architecture) {
setArchitecture(architecture);
}
......@@ -123,6 +127,10 @@ public class CNNArchTemplateController {
return list;
}
public String getArchitectureLoss(){
return this.loss;
}
public void include(String relativePath, String templateWithoutFileEnding, Writer writer){
String templatePath = relativePath + templateWithoutFileEnding + FTL_FILE_ENDING;
Map<String, Object> ftlContext = new HashMap<>();
......@@ -236,18 +244,30 @@ public class CNNArchTemplateController {
public boolean isLogisticRegressionOutput(ArchitectureElementSymbol architectureElement){
return isTOutput(Sigmoid.class, architectureElement);
if (isTOutput(Sigmoid.class, architectureElement)){
this.loss = CROSS_ENTROPY;
return true;
}
return false;
}
public boolean isLinearRegressionOutput(ArchitectureElementSymbol architectureElement){
return architectureElement.isOutput()
if (architectureElement.isOutput()
&& !isLogisticRegressionOutput(architectureElement)
&& !isSoftmaxOutput(architectureElement);
&& !isSoftmaxOutput(architectureElement)){
this.loss = EUCLIDEAN;
return true;
}
return false;
}
public boolean isSoftmaxOutput(ArchitectureElementSymbol architectureElement){
return isTOutput(Softmax.class, architectureElement);
if (isTOutput(Softmax.class, architectureElement)){
this.loss = CROSS_ENTROPY;
return true;
}
return false;
}
private boolean isTOutput(Class inputPredefinedLayerClass, ArchitectureElementSymbol architectureElement){
......
......@@ -67,6 +67,13 @@ public class ConfigurationData {
return getConfiguration().getEntry("eval_metric").getValue().toString();
}
public String getLoss() {
if (!getConfiguration().getEntryMap().containsKey("loss")) {
return null;
}
return getConfiguration().getEntry("loss").getValue().toString();
}
public String getOptimizerName() {
if (getConfiguration().getOptimizer() == null) {
return null;
......
......@@ -65,10 +65,14 @@ class ${tc.fileNameWithoutEnding}:
${tc.include(tc.architecture.body)}
# this adds the loss and optimizer
def add_training_operators(self, model, output, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) :
def add_training_operators(self, model, output, label, device_opts, loss, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) :
with core.DeviceScope(device_opts):
if loss == 'cross_entropy':
xent = model.LabelCrossEntropy([output, label], 'xent')
loss = model.AveragedLoss(xent, "loss")
elif loss == 'euclidean':
dist = model.net.SquaredL2Distance([label, output], 'dist')
loss = dist.AveragedLoss([], ['loss'])
model.AddGradientOperators([loss])
......@@ -105,7 +109,7 @@ ${tc.include(tc.architecture.body)}
accuracy = brew.accuracy(model, [output, label], "accuracy", top_k=3)
return accuracy
def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) :
def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', loss='${tc.architectureLoss}', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) :
if context == 'cpu':
device_opts = core.DeviceOption(caffe2_pb2.CPU, 0)
print("CPU mode selected")
......
......@@ -29,6 +29,9 @@ if __name__ == "__main__":
<#if (config.evalMetric)??>
eval_metric='${config.evalMetric}',
</#if>
<#if (config.loss)??>
loss='${config.loss}',
</#if>
<#if (config.configuration.optimizer)??>
opt_type='${config.optimizerName}',
<#list config.optimizerParams?keys as param>
......
......@@ -82,10 +82,14 @@ class CNNCreator_LeNet:
return predictions
# this adds the loss and optimizer
def add_training_operators(self, model, output, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) :
def add_training_operators(self, model, output, label, device_opts, loss, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) :
with core.DeviceScope(device_opts):
if loss == 'cross_entropy':
xent = model.LabelCrossEntropy([output, label], 'xent')
loss = model.AveragedLoss(xent, "loss")
elif loss == 'euclidean':
dist = model.net.SquaredL2Distance([label, output], 'dist')
loss = dist.AveragedLoss([], ['loss'])
model.AddGradientOperators([loss])
......@@ -122,7 +126,7 @@ class CNNCreator_LeNet:
accuracy = brew.accuracy(model, [output, label], "accuracy", top_k=3)
return accuracy
def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) :
def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', loss='cross_entropy', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) :
if context == 'cpu':
device_opts = core.DeviceOption(caffe2_pb2.CPU, 0)
print("CPU mode selected")
......
......@@ -128,10 +128,14 @@ class CNNCreator_VGG16:
return predictions
# this adds the loss and optimizer
def add_training_operators(self, model, output, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) :
def add_training_operators(self, model, output, label, device_opts, loss, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) :
with core.DeviceScope(device_opts):
if loss == 'cross_entropy':
xent = model.LabelCrossEntropy([output, label], 'xent')
loss = model.AveragedLoss(xent, "loss")
elif loss == 'euclidean':
dist = model.net.SquaredL2Distance([label, output], 'dist')
loss = dist.AveragedLoss([], ['loss'])
model.AddGradientOperators([loss])
......@@ -168,7 +172,7 @@ class CNNCreator_VGG16:
accuracy = brew.accuracy(model, [output, label], "accuracy", top_k=3)
return accuracy
def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) :
def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', loss='cross_entropy', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) :
if context == 'cpu':
device_opts = core.DeviceOption(caffe2_pb2.CPU, 0)
print("CPU mode selected")
......
......@@ -18,6 +18,7 @@ if __name__ == "__main__":
batch_size=100,
context='gpu',
eval_metric='mse',
loss='cross_entropy',
opt_type='rmsprop',
epsilon=1.0E-6,
weight_decay=0.01,
......
......@@ -3,6 +3,7 @@ configuration FullConfig{
batch_size : 100
load_checkpoint : true
eval_metric : mse
loss : cross_entropy
context : gpu
normalize : true
optimizer : rmsprop{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment