Aufgrund einer Störung des s3 Storage, könnten in nächster Zeit folgende GitLab Funktionen nicht zur Verfügung stehen: Container Registry, Job Artifacs,. Wir bitten um Verständnis. Es wird mit Hochdruck an der Behebung des Problems gearbeitet. Weitere Informationen zur Störung des Object Storage finden Sie hier: https://maintenance.itc.rwth-aachen.de/ticket/status/messages/59-object-storage-pilot

Commit 09e42ded authored by Christian Fuß's avatar Christian Fuß
Browse files

fixed various bugs, in particular to allow integer outputs in C++ code...

fixed various bugs, in particular to allow integer outputs in C++ code generation and to fix shape errors for LSTM inputs
parent 26ad1bfe
Pipeline #186458 failed with stages
in 1 minute and 34 seconds
......@@ -188,7 +188,7 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
}
}
outputNames.addAll(getStreamLayerVariableMembers(stream, "1", true).keySet());
outputNames.addAll(getStreamLayerVariableMembers(stream, "1", true, false).keySet());
return outputNames;
}
......@@ -208,13 +208,13 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
}
// Used to initialize all layer variable members which are passed through the networks
public Map<String, List<List<String>>> getLayerVariableMembers(String batchSize) {
public Map<String, List<List<String>>> getLayerVariableMembers(String batchSize, boolean includeStates) {
Map<String, List<List<String>>> members = new LinkedHashMap<>();
int index = 0;
for (SerialCompositeElementSymbol stream : getArchitecture().getStreams()) {
List<List<String>> value = new ArrayList<>();
Map<String, List<String>> member = getStreamLayerVariableMembers(stream, batchSize, true);
Map<String, List<String>> member = getStreamLayerVariableMembers(stream, batchSize, true, includeStates);
for (List<String> entry: member.values()){
value.add(entry);
ArrayList<String> streamIndex = new ArrayList<String>();
......@@ -222,7 +222,9 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
value.add(streamIndex);
}
for(String name: member.keySet()){
members.put(name, value);
if(!members.containsKey(name)) {
members.put(name, value);
}
}
index++;
}
......@@ -277,12 +279,12 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
}
}
inputs.putAll(getStreamLayerVariableMembers(stream, "1", false));
inputs.putAll(getStreamLayerVariableMembers(stream, "1", false, false));
return inputs;
}
private Map<String, List<String>> getStreamLayerVariableMembers(SerialCompositeElementSymbol stream, String batchSize, boolean includeOutput) {
private Map<String, List<String>> getStreamLayerVariableMembers(SerialCompositeElementSymbol stream, String batchSize, boolean includeOutput, boolean includeStates) {
Map<String, List<String>> members = new LinkedHashMap<>();
List<ArchitectureElementSymbol> elements = stream.getSpannedScope().resolveLocally(ArchitectureElementSymbol.KIND);
......@@ -290,7 +292,7 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
if (element instanceof VariableSymbol) {
VariableSymbol variable = (VariableSymbol) element;
if (variable.getType() == VariableSymbol.Type.LAYER && variable.getMember() == VariableSymbol.Member.NONE) {
if (variable.getType() == VariableSymbol.Type.LAYER && (variable.getMember() == VariableSymbol.Member.NONE || includeStates)) {
LayerVariableDeclarationSymbol layerVariableDeclaration = variable.getLayerVariableDeclaration();
if (layerVariableDeclaration.getLayer().getDeclaration().isPredefined()) {
......
......@@ -32,6 +32,39 @@ class LogCoshLoss(gluon.loss.Loss):
return F.mean(loss, axis=self._batch_axis, exclude=True)
class ${tc.fileNameWithoutEnding}:
def applyBeamSearch(input, depth, width, maxDepth, currProb, netIndex, bestOutput):
bestProb = 0.0
while depth < maxDepth:
depth += 1
batchIndex = 0
for batchEntry in input:
top_k_indices = mx.nd.topk(batchEntry, axis=0, k=width)
top_k_values = mx.nd.topk(batchEntry, ret_typ='value', axis=0, k=width)
for index in range(top_k_indices.size):
#print mx.nd.array(top_k_indices[index])
#print top_k_values[index]
if depth == 1:
#print mx.nd.array(top_k_indices[index])
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
currProb * top_k_values[index], netIndex, self._networks[netIndex](mx.nd.array(top_k_indices[index])))
else:
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
currProb * top_k_values[index], netIndex, bestOutput)
if depth == maxDepth:
#print currProb
if currProb > bestProb:
bestProb = currProb
bestOutput[batchIndex] = result[batchIndex]
#print "new bestOutput: ", bestOutput
batchIndex += 1
#print bestOutput
#print bestProb
return bestOutput
def __init__(self, data_loader, net_constructor):
self._data_loader = data_loader
self._net_creator = net_constructor
......@@ -140,16 +173,14 @@ class ${tc.fileNameWithoutEnding}:
</#list>
with autograd.record():
<#include "pythonExecuteArgmax.ftl">
<#include "pythonExecuteWithLoss.ftl">
loss = 0
for element in lossList:
loss = loss + element
loss.backward()
for trainer in trainers:
trainer.step(batch_size)
......@@ -182,56 +213,19 @@ class ${tc.fileNameWithoutEnding}:
]
def applyBeamSearch(input, depth, width, maxDepth, currProb, netIndex, bestOutput):
bestProb = 0.0
while depth < maxDepth:
depth += 1
batchIndex = 0
for batchEntry in input:
top_k_indices = mx.nd.topk(batchEntry, axis=0, k=width)
top_k_values = mx.nd.topk(batchEntry, ret_typ='value', axis=0, k=width)
for index in range(top_k_indices.size):
#print mx.nd.array(top_k_indices[index])
#print top_k_values[index]
if depth == 1:
#print mx.nd.array(top_k_indices[index])
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
currProb * top_k_values[index], netIndex, self._networks[netIndex](mx.nd.array(top_k_indices[index])))
else:
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
currProb * top_k_values[index], netIndex, bestOutput)
if depth == maxDepth:
#print currProb
if currProb > bestProb:
bestProb = currProb
bestOutput[batchIndex] = result[batchIndex]
#print "new bestOutput: ", bestOutput
batchIndex += 1
#print bestOutput
#print bestProb
return bestOutput
outputs=[]
if True: <#-- Fix indentation -->
<#include "pythonExecute.ftl">
out_names=[]
<#list tc.architectureOutputs as output_name>
out_names.append(${output_name})
</#list>
predictions = []
for output_name in out_names:
for output_name in outputs:
if mx.nd.shape_array(output_name).size > 1:
predictions.append(mx.nd.argmax(output_name, axis=1))
#ArgMax already applied
else:
predictions.append(output_name)
metric.update(preds=predictions, labels=labels)
train_metric_score = metric.get()[1]
......@@ -249,15 +243,13 @@ class ${tc.fileNameWithoutEnding}:
]
outputs=[]
if True: <#-- Fix indentation -->
<#include "pythonExecute.ftl">
out_names=[]
<#list tc.architectureOutputs as output_name>
out_names.append(${output_name})
</#list>
predictions = []
for output_name in out_names:
for output_name in outputs:
if mx.nd.shape_array(output_name).size > 1:
predictions.append(mx.nd.argmax(output_name, axis=1))
#ArgMax already applied
......
<#list tc.architectureInputSymbols as input>
vector<float> ${tc.getName(input)} = CNNTranslator::translate(${input.name}<#if input.arrayAccess.isPresent()>[${input.arrayAccess.get().intValue.get()?c}]</#if>);
</#list>
<#list tc.getLayerVariableMembers("1")?keys as member>
vector<float> ${member}(${tc.join(tc.getLayerVariableMembers("1")[member][0], " * ")});
<#list tc.getLayerVariableMembers("1", true)?keys as member>
vector<float> ${member}(${tc.join(tc.getLayerVariableMembers("1", true)[member][0], " * ")});
</#list>
<#list tc.architectureOutputSymbols as output>
......@@ -26,12 +26,25 @@
<#list tc.architectureOutputSymbols as output>
<#assign shape = output.ioDeclaration.type.dimensions>
<#if shape?size == 1>
<#if (output.ioDeclaration.type.domain.isNaturalNumber() || output.ioDeclaration.type.domain.isWholeNumber())>
${output.name}<#if output.arrayAccess.isPresent()>[${output.arrayAccess.get().intValue.get()?c}]</#if> = CNNTranslator::translateToIntCol(${tc.getName(output)}, std::vector<size_t> {${shape[0]?c}});
<#else>
${output.name}<#if output.arrayAccess.isPresent()>[${output.arrayAccess.get().intValue.get()?c}]</#if> = CNNTranslator::translateToCol(${tc.getName(output)}, std::vector<size_t> {${shape[0]?c}});
</#if>
</#if>
<#if shape?size == 2>
<#if (output.ioDeclaration.type.domain.isNaturalNumber() || output.ioDeclaration.type.domain.isWholeNumber())>
${output.name}<#if output.arrayAccess.isPresent()>[${output.arrayAccess.get().intValue.get()?c}]</#if> = CNNTranslator::translateToIntMat(${tc.getName(output)}, std::vector<size_t> {${shape[0]?c}, ${shape[1]?c}});
<#else>
${output.name}<#if output.arrayAccess.isPresent()>[${output.arrayAccess.get().intValue.get()?c}]</#if> = CNNTranslator::translateToMat(${tc.getName(output)}, std::vector<size_t> {${shape[0]?c}, ${shape[1]?c}});
</#if>
</#if>
<#if shape?size == 3>
<#if (output.ioDeclaration.type.domain.isNaturalNumber() || output.ioDeclaration.type.domain.isWholeNumber())>
${output.name}<#if output.arrayAccess.isPresent()>[${output.arrayAccess.get().intValue.get()?c}]</#if> = CNNTranslator::translateToIntCube(${tc.getName(output)}, std::vector<size_t> {${shape[0]?c}, ${shape[1]?c}, ${shape[2]?c}});
<#else>
${output.name}<#if output.arrayAccess.isPresent()>[${output.arrayAccess.get().intValue.get()?c}]</#if> = CNNTranslator::translateToCube(${tc.getName(output)}, std::vector<size_t> {${shape[0]?c}, ${shape[1]?c}, ${shape[2]?c}});
</#if>
</#if>
</#list>
<#list tc.getLayerVariableMembers("batch_size")?keys as member>
<#list tc.getLayerVariableMembers("batch_size", false)?keys as member>
<#if member?ends_with("_state_")>
encoder_state_ = self._networks[${tc.getLayerVariableMembers("batch_size")[member][1][0]}].${member?replace("_state_","_output_")}.begin_state(batch_size=0, ctx=mx_context)
${member} = self._networks[${tc.getLayerVariableMembers("batch_size", false)[member][1][0]}].${member?replace("_state_","_output_")}.begin_state(batch_size=batch_size, ctx=mx_context)
<#else>
${member} = mx.nd.zeros((${tc.join(tc.getLayerVariableMembers("batch_size")[member][0], ", ")},), ctx=mx_context)
${member} = mx.nd.zeros((${tc.join(tc.getLayerVariableMembers("batch_size", false)[member][0], ", ")},), ctx=mx_context)
</#if>
</#list>
<#list tc.architectureOutputSymbols as output>
......@@ -20,9 +20,12 @@
${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]} = applyBeamSearch(input, 0, ${depth}, ${width}, 1.0, ${networkInstruction?index}, input)
<#else>
${tc.join(tc.getStreamOutputNames(networkInstruction.body, resolvedBody), ", ")} = self._networks[${networkInstruction?index}](${tc.join(tc.getStreamInputNames(networkInstruction.body, resolvedBody), ", ")})
<#if !(tc.getStreamOutputNames(networkInstruction.body)[0]?ends_with("_output_"))>
outputs.append(${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]})
</#if>
<#list resolvedBody.elements as element>
<#if element.name == "ArgMax">
${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]} = mx.nd.argmax(${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]}, axis=1)
${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]} = mx.nd.argmax(${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]}, axis=1).expand_dims(1)
</#if>
</#list>
</#if>
......@@ -30,13 +33,19 @@
<#else>
<#if networkInstruction.body.isTrainable()>
${tc.join(tc.getStreamOutputNames(networkInstruction.body), ", ")} = self._networks[${networkInstruction?index}](${tc.join(tc.getStreamInputNames(networkInstruction.body), ", ")})
<#if !(tc.getStreamOutputNames(networkInstruction.body)[0]?ends_with("_output_"))>
outputs.append(${tc.getStreamOutputNames(networkInstruction.body)[0]})
</#if>
<#list networkInstruction.body.elements as element>
<#if element.name == "ArgMax" && (tc.architecture.networkInstructions?size <= instructionCounter+1 || tc.architecture.networkInstructions[instructionCounter+1].getName() != "BeamSearch")>
${tc.getStreamOutputNames(networkInstruction.body)[0]} = mx.nd.argmax(${tc.getStreamOutputNames(networkInstruction.body)[0]}, axis=1)
${tc.getStreamOutputNames(networkInstruction.body)[0]} = mx.nd.argmax(${tc.getStreamOutputNames(networkInstruction.body)[0]}, axis=1).expand_dims(1)
</#if>
</#list>
<#else>
${tc.include(networkInstruction.body, "PYTHON_INLINE")}
<#if !(tc.getStreamOutputNames(networkInstruction.body)[0]?ends_with("_state_"))>
outputs.append(${tc.getStreamOutputNames(networkInstruction.body)[0]})
</#if>
</#if>
</#if>
<#assign instructionCounter = instructionCounter + 1>
......
<#list tc.getLayerVariableMembers("batch_size")?keys as member>
<#list tc.getLayerVariableMembers("batch_size", false)?keys as member>
<#if member?ends_with("_state_")>
encoder_state_ = self._networks[${tc.getLayerVariableMembers("batch_size")[member][1][0]}].${member?replace("_state_","_output_")}.begin_state(batch_size=0, ctx=mx_context)
${member} = self._networks[${tc.getLayerVariableMembers("batch_size", false)[member][1][0]}].${member?replace("_state_","_output_")}.begin_state(batch_size=batch_size, ctx=mx_context)
<#else>
${member} = mx.nd.zeros((${tc.join(tc.getLayerVariableMembers("batch_size")[member][0], ", ")},), ctx=mx_context)
${member} = mx.nd.zeros((${tc.join(tc.getLayerVariableMembers("batch_size", false)[member][0], ", ")},), ctx=mx_context)
</#if>
</#list>
<#list tc.architectureOutputSymbols as output>
......@@ -17,19 +17,19 @@
lossList.append(loss_function(${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]}, ${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]}label))
<#list resolvedBody.elements as element>
<#if element.name == "ArgMax">
${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]} = mx.nd.argmax(${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]}, axis=1)
${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]} = mx.nd.argmax(${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]}, axis=1).expand_dims(1)
</#if>
</#list>
</#list>
<#else>
<#if networkInstruction.body.isTrainable()>
${tc.join(tc.getStreamOutputNames(networkInstruction.body), ", ")} = self._networks[${networkInstruction?index}](${tc.join(tc.getStreamInputNames(networkInstruction.body), ", ")})
<#if tc.getStreamOutputNames(networkInstruction.body)[0] != "encoder_output_">
<#if !(tc.getStreamOutputNames(networkInstruction.body)[0]?ends_with("_output_"))>
lossList.append(loss_function(${tc.getStreamOutputNames(networkInstruction.body)[0]}, ${tc.getStreamOutputNames(networkInstruction.body)[0]}label))
</#if>
<#list networkInstruction.body.elements as element>
<#if element.name == "ArgMax">
${tc.getStreamOutputNames(networkInstruction.body)[0]} = mx.nd.argmax(${tc.getStreamOutputNames(networkInstruction.body)[0]}, axis=1)
${tc.getStreamOutputNames(networkInstruction.body)[0]} = mx.nd.argmax(${tc.getStreamOutputNames(networkInstruction.body)[0]}, axis=1).expand_dims(1)
</#if>
</#list>
<#else>
......
......@@ -32,6 +32,39 @@ class LogCoshLoss(gluon.loss.Loss):
return F.mean(loss, axis=self._batch_axis, exclude=True)
class CNNSupervisedTrainer_Alexnet:
def applyBeamSearch(input, depth, width, maxDepth, currProb, netIndex, bestOutput):
bestProb = 0.0
while depth < maxDepth:
depth += 1
batchIndex = 0
for batchEntry in input:
top_k_indices = mx.nd.topk(batchEntry, axis=0, k=width)
top_k_values = mx.nd.topk(batchEntry, ret_typ='value', axis=0, k=width)
for index in range(top_k_indices.size):
#print mx.nd.array(top_k_indices[index])
#print top_k_values[index]
if depth == 1:
#print mx.nd.array(top_k_indices[index])
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
currProb * top_k_values[index], netIndex, self._networks[netIndex](mx.nd.array(top_k_indices[index])))
else:
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
currProb * top_k_values[index], netIndex, bestOutput)
if depth == maxDepth:
#print currProb
if currProb > bestProb:
bestProb = currProb
bestOutput[batchIndex] = result[batchIndex]
#print "new bestOutput: ", bestOutput
batchIndex += 1
#print bestOutput
#print bestProb
return bestOutput
def __init__(self, data_loader, net_constructor):
self._data_loader = data_loader
self._net_creator = net_constructor
......@@ -146,10 +179,8 @@ class CNNSupervisedTrainer_Alexnet:
for element in lossList:
loss = loss + element
loss.backward()
for trainer in trainers:
trainer.step(batch_size)
......@@ -177,56 +208,22 @@ class CNNSupervisedTrainer_Alexnet:
batch.label[0].as_in_context(mx_context)
]
def applyBeamSearch(input, depth, width, maxDepth, currProb, netIndex, bestOutput):
bestProb = 0.0
while depth < maxDepth:
depth += 1
batchIndex = 0
for batchEntry in input:
top_k_indices = mx.nd.topk(batchEntry, axis=0, k=width)
top_k_values = mx.nd.topk(batchEntry, ret_typ='value', axis=0, k=width)
for index in range(top_k_indices.size):
#print mx.nd.array(top_k_indices[index])
#print top_k_values[index]
if depth == 1:
#print mx.nd.array(top_k_indices[index])
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
currProb * top_k_values[index], netIndex, self._networks[netIndex](mx.nd.array(top_k_indices[index])))
else:
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
currProb * top_k_values[index], netIndex, bestOutput)
if depth == maxDepth:
#print currProb
if currProb > bestProb:
bestProb = currProb
bestOutput[batchIndex] = result[batchIndex]
#print "new bestOutput: ", bestOutput
batchIndex += 1
#print bestOutput
#print bestProb
return bestOutput
outputs=[]
if True:
predictions_ = mx.nd.zeros((batch_size, 10,), ctx=mx_context)
predictions_ = self._networks[0](data_)
outputs.append(predictions_)
out_names=[]
out_names.append(predictions_)
predictions = []
for output_name in out_names:
for output_name in outputs:
if mx.nd.shape_array(output_name).size > 1:
predictions.append(mx.nd.argmax(output_name, axis=1))
#ArgMax already applied
else:
predictions.append(output_name)
metric.update(preds=predictions, labels=labels)
train_metric_score = metric.get()[1]
......@@ -239,15 +236,16 @@ class CNNSupervisedTrainer_Alexnet:
batch.label[0].as_in_context(mx_context)
]
outputs=[]
if True:
predictions_ = mx.nd.zeros((batch_size, 10,), ctx=mx_context)
predictions_ = self._networks[0](data_)
outputs.append(predictions_)
out_names=[]
out_names.append(predictions_)
predictions = []
for output_name in out_names:
for output_name in outputs:
if mx.nd.shape_array(output_name).size > 1:
predictions.append(mx.nd.argmax(output_name, axis=1))
#ArgMax already applied
......
......@@ -32,6 +32,39 @@ class LogCoshLoss(gluon.loss.Loss):
return F.mean(loss, axis=self._batch_axis, exclude=True)
class CNNSupervisedTrainer_CifarClassifierNetwork:
def applyBeamSearch(input, depth, width, maxDepth, currProb, netIndex, bestOutput):
bestProb = 0.0
while depth < maxDepth:
depth += 1
batchIndex = 0
for batchEntry in input:
top_k_indices = mx.nd.topk(batchEntry, axis=0, k=width)
top_k_values = mx.nd.topk(batchEntry, ret_typ='value', axis=0, k=width)
for index in range(top_k_indices.size):
#print mx.nd.array(top_k_indices[index])
#print top_k_values[index]
if depth == 1:
#print mx.nd.array(top_k_indices[index])
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
currProb * top_k_values[index], netIndex, self._networks[netIndex](mx.nd.array(top_k_indices[index])))
else:
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
currProb * top_k_values[index], netIndex, bestOutput)
if depth == maxDepth:
#print currProb
if currProb > bestProb:
bestProb = currProb
bestOutput[batchIndex] = result[batchIndex]
#print "new bestOutput: ", bestOutput
batchIndex += 1
#print bestOutput
#print bestProb
return bestOutput
def __init__(self, data_loader, net_constructor):
self._data_loader = data_loader
self._net_creator = net_constructor
......@@ -146,10 +179,8 @@ class CNNSupervisedTrainer_CifarClassifierNetwork:
for element in lossList:
loss = loss + element
loss.backward()
for trainer in trainers:
trainer.step(batch_size)
......@@ -177,56 +208,22 @@ class CNNSupervisedTrainer_CifarClassifierNetwork:
batch.label[0].as_in_context(mx_context)
]
def applyBeamSearch(input, depth, width, maxDepth, currProb, netIndex, bestOutput):
bestProb = 0.0
while depth < maxDepth:
depth += 1
batchIndex = 0
for batchEntry in input:
top_k_indices = mx.nd.topk(batchEntry, axis=0, k=width)
top_k_values = mx.nd.topk(batchEntry, ret_typ='value', axis=0, k=width)
for index in range(top_k_indices.size):
#print mx.nd.array(top_k_indices[index])
#print top_k_values[index]
if depth == 1:
#print mx.nd.array(top_k_indices[index])
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
currProb * top_k_values[index], netIndex, self._networks[netIndex](mx.nd.array(top_k_indices[index])))
else:
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
currProb * top_k_values[index], netIndex, bestOutput)
if depth == maxDepth:
#print currProb
if currProb > bestProb:
bestProb = currProb
bestOutput[batchIndex] = result[batchIndex]
#print "new bestOutput: ", bestOutput
batchIndex += 1
#print bestOutput
#print bestProb
return bestOutput
outputs=[]
if True:
softmax_ = mx.nd.zeros((batch_size, 10,), ctx=mx_context)
softmax_ = self._networks[0](data_)
outputs.append(softmax_)
out_names=[]
out_names.append(softmax_)
predictions = []
for output_name in out_names:
for output_name in outputs:
if mx.nd.shape_array(output_name).size > 1:
predictions.append(mx.nd.argmax(output_name, axis=1))
#ArgMax already applied
else:
predictions.append(output_name)
metric.update(preds=predictions, labels=labels)
train_metric_score = metric.get()[1]
......@@ -239,15 +236,16 @@ class CNNSupervisedTrainer_CifarClassifierNetwork:
batch.label[0].as_in_context(mx_context)
]
outputs=[]
if True:
softmax_ = mx.nd.zeros((batch_size, 10,), ctx=mx_context)
softmax_ = self._networks[0](data_)
outputs.append(softmax_)
out_names=[]
out_names.append(softmax_)
predictions = []
for output_name in out_names:
for output_name in outputs:
if mx.nd.shape_array(output_name).size > 1:
predictions.append(mx.nd.argmax(output_name, axis=1))
#ArgMax already applied
......
......@@ -32,6 +32,39 @@ class LogCoshLoss(gluon.loss.Loss):
return F.mean(loss, axis=self._batch_axis, exclude=True)
class CNNSupervisedTrainer_VGG16: