Commit ea781680 authored by Sebastian N.'s avatar Sebastian N.
Browse files

Merge

parents 4b0341f8 720a8fe8
Pipeline #204106 failed with stages
in 23 seconds
......@@ -11,6 +11,8 @@ import de.se_rwth.commons.logging.Log;
import java.io.Writer;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
public static final String NET_DEFINITION_MODE_KEY = "mode";
......@@ -20,6 +22,8 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
super(architecture, templateConfiguration);
}
public void include(String relativePath, String templateWithoutFileEnding, Writer writer, NetDefinitionMode netDefinitionMode){
String templatePath = relativePath + templateWithoutFileEnding + FTL_FILE_ENDING;
Map<String, Object> ftlContext = new HashMap<>();
......@@ -109,14 +113,13 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
include(architectureElement, getWriter(), netDefinitionMode);
}
public Set<String> getStreamInputNames(SerialCompositeElementSymbol stream) {
return getStreamInputs(stream).keySet();
public Set<String> getStreamInputNames(SerialCompositeElementSymbol stream, boolean outputAsArray) {
return getStreamInputs(stream, outputAsArray).keySet();
}
// used for unroll
public List<String> getStreamInputNames(SerialCompositeElementSymbol stream, SerialCompositeElementSymbol currentStream) {
List<String> inputNames = new LinkedList<>(getStreamInputNames(stream));
Map<String, String> pairs = getUnrollPairs(stream, currentStream);
public List<String> getUnrollInputNames(UnrollInstructionSymbol unroll, String variable) {
List<String> inputNames = new LinkedList<>(getStreamInputNames(unroll.getBody(), true));
Map<String, String> pairs = getUnrollPairs(unroll.getBody(), unroll.getResolvedBodies().get(0), variable);
for (int i = 0; i != inputNames.size(); ++i) {
if (pairs.containsKey(inputNames.get(i))) {
......@@ -128,28 +131,60 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
}
public Collection<List<String>> getStreamInputDimensions(SerialCompositeElementSymbol stream) {
return getStreamInputs(stream).values();
return getStreamInputs(stream, false).values();
}
public String getOutputName() {
return getNameWithoutIndex(getName(getArchitectureOutputSymbols().get(0)));
}
public String getNameAsArray(String name) {
return name.replaceAll("([0-9]+)_$", "[$1]");
}
public String getNameWithoutIndex(String name) {
return name.replaceAll("([0-9]+)_$", "").replaceAll("\\[[^\\]]+\\]$", "");
}
public String getIndex(String name, boolean defaultToZero) {
Pattern pattern = Pattern.compile("\\[([^\\]]+)\\]$");
Matcher matcher = pattern.matcher(name);
if (matcher.find()) {
return matcher.group(1);
}
return defaultToZero ? "0" : "";
}
public Set<String> getStreamOutputNames(SerialCompositeElementSymbol stream) {
public Set<String> getStreamOutputNames(SerialCompositeElementSymbol stream, boolean asArray) {
Set<String> outputNames = new LinkedHashSet<>();
for (ArchitectureElementSymbol element : stream.getLastAtomicElements()) {
if (element.isOutput()) {
outputNames.add(getName(element));
String name = getName(element);
if (asArray && element instanceof VariableSymbol) {
VariableSymbol variable = (VariableSymbol) element;
if (variable.getType() == VariableSymbol.Type.IO) {
name = getNameAsArray(name);
}
}
outputNames.add(name);
}
}
outputNames.addAll(getStreamLayerVariableMembers(stream, true, false).keySet());
outputNames.addAll(getStreamLayerVariableMembers(stream, true).keySet());
return outputNames;
}
// used for unroll
public List<String> getStreamOutputNames(SerialCompositeElementSymbol stream, SerialCompositeElementSymbol currentStream) {
List<String> outputNames = new LinkedList<>(getStreamOutputNames(stream));
Map<String, String> pairs = getUnrollPairs(stream, currentStream);
public List<String> getUnrollOutputNames(UnrollInstructionSymbol unroll, String variable) {
List<String> outputNames = new LinkedList<>(getStreamOutputNames(unroll.getBody(), true));
Map<String, String> pairs = getUnrollPairs(unroll.getBody(), unroll.getResolvedBodies().get(0), variable);
for (int i = 0; i != outputNames.size(); ++i) {
if (pairs.containsKey(outputNames.get(i))) {
......@@ -160,12 +195,25 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
return outputNames;
}
public boolean endsWithArgmax(SerialCompositeElementSymbol stream) {
List<ArchitectureElementSymbol> elements = stream.getElements();
if (elements.size() > 1) {
// Check second last element because last element is output
ArchitectureElementSymbol secondLastElement = elements.get(elements.size() - 2);
return secondLastElement.getName().equals(AllPredefinedLayers.ARG_MAX_NAME);
}
return false;
}
// Used to initialize all layer variable members which are passed through the networks
public Map<String, List<String>> getLayerVariableMembers(boolean generateStateInitializers) {
public Map<String, List<String>> getLayerVariableMembers() {
Map<String, List<String>> members = new LinkedHashMap<>();
for (SerialCompositeElementSymbol stream : getArchitecture().getStreams()) {
members.putAll(getStreamLayerVariableMembers(stream, true, generateStateInitializers));
for (NetworkInstructionSymbol networkInstruction : getArchitecture().getNetworkInstructions()) {
members.putAll(getStreamLayerVariableMembers(networkInstruction.getBody(), true));
}
return members;
......@@ -173,7 +221,7 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
// Calculate differently named VariableSymbol elements in two streams, currently used for the UnrollInstructionSymbol
// body which is resolved with t = CONST_OFFSET and the current body of the actual timestep t
public Map<String, String> getUnrollPairs(ArchitectureElementSymbol element, ArchitectureElementSymbol current) {
public Map<String, String> getUnrollPairs(ArchitectureElementSymbol element, ArchitectureElementSymbol current, String variable) {
Map<String, String> pairs = new HashMap<>();
if (element instanceof CompositeElementSymbol && current instanceof CompositeElementSymbol) {
......@@ -185,13 +233,24 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
String name = getName(elements.get(i));
String currentName = getName(currentElements.get(i));
if (elements.get(i).isOutput()) {
name = getNameAsArray(name);
}
if (currentElements.get(i).isOutput()) {
currentName = getNameAsArray(currentName);
}
if (elements.get(i) instanceof VariableSymbol && currentElements.get(i) instanceof VariableSymbol) {
if (name != null && currentName != null && !name.equals(currentName)) {
String newIndex = variable + "-1+" + getIndex(currentName, true);
currentName = currentName.replaceAll("\\[([0-9]+)\\]$", "[" + newIndex + "]");
pairs.put(name, currentName);
}
}
pairs.putAll(getUnrollPairs(elements.get(i), currentElements.get(i)));
pairs.putAll(getUnrollPairs(elements.get(i), currentElements.get(i), variable));
}
}
}
......@@ -199,7 +258,7 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
return pairs;
}
private Map<String, List<String>> getStreamInputs(SerialCompositeElementSymbol stream) {
private Map<String, List<String>> getStreamInputs(SerialCompositeElementSymbol stream, boolean outputAsArray) {
Map<String, List<String>> inputs = new LinkedHashMap<>();
for (ArchitectureElementSymbol element : stream.getFirstAtomicElements()) {
......@@ -211,19 +270,29 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
dimensions.add(intDimension.toString());
}
inputs.put(getName(element), dimensions);
String name = getName(element);
if (outputAsArray && element.isOutput() && element instanceof VariableSymbol) {
VariableSymbol variable = (VariableSymbol) element;
if (variable.getType() == VariableSymbol.Type.IO) {
name = getNameAsArray(name);
}
}
inputs.put(name, dimensions);
}
else if (element instanceof ConstantSymbol) {
inputs.put(getName(element), Arrays.asList("1"));
}
}
inputs.putAll(getStreamLayerVariableMembers(stream, false, false));
inputs.putAll(getStreamLayerVariableMembers(stream, false));
return inputs;
}
private Map<String, List<String>> getStreamLayerVariableMembers(SerialCompositeElementSymbol stream, boolean includeOutput, boolean generateStateInitializaters) {
private Map<String, List<String>> getStreamLayerVariableMembers(SerialCompositeElementSymbol stream, boolean includeOutput) {
Map<String, List<String>> members = new LinkedHashMap<>();
List<ArchitectureElementSymbol> elements = stream.getSpannedScope().resolveLocally(ArchitectureElementSymbol.KIND);
......@@ -231,7 +300,7 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
if (element instanceof VariableSymbol) {
VariableSymbol variable = (VariableSymbol) element;
if (variable.getType() == VariableSymbol.Type.LAYER && (variable.getMember() == VariableSymbol.Member.NONE || generateStateInitializaters)) {
if (variable.getType() == VariableSymbol.Type.LAYER && (variable.getMember() == VariableSymbol.Member.NONE)) {
LayerVariableDeclarationSymbol layerVariableDeclaration = variable.getLayerVariableDeclaration();
if (layerVariableDeclaration.getLayer().getDeclaration().isPredefined()) {
......@@ -303,16 +372,27 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
return dimensions;
}
public boolean hasUnrollInstructions() {
for (NetworkInstructionSymbol networkInstruction : getArchitecture().getNetworkInstructions()) {
if (networkInstruction.isUnroll()) {
return true;
}
}
return false;
}
public boolean isAttentionNetwork(){
return AllAttentionModels.getAttentionModels().contains(getComponentName());
}
public int getBeamSearchWidth(UnrollInstructionSymbol unroll){
return unroll.getIntValue(AllPredefinedLayers.WIDTH_NAME).get();
public int getBeamSearchMaxLength(UnrollInstructionSymbol unroll){
return unroll.getIntValue(AllPredefinedLayers.MAX_LENGTH_NAME).get();
}
public int getBeamSearchLength(UnrollInstructionSymbol unroll){
return unroll.getIntValue(AllPredefinedLayers.MAX_LENGTH_NAME).get();
public int getBeamSearchWidth(UnrollInstructionSymbol unroll){
// Beam search with width 1 is greedy search
return unroll.getIntValue(AllPredefinedLayers.WIDTH_NAME).orElse(1);
}
}
......@@ -6,9 +6,7 @@ package de.monticore.lang.monticar.cnnarch.gluongenerator;
*/
public enum NetDefinitionMode {
ARCHITECTURE_DEFINITION,
FORWARD_FUNCTION,
PYTHON_INLINE,
CPP_INLINE;
FORWARD_FUNCTION;
public static NetDefinitionMode fromString(final String netDefinitionMode) {
switch(netDefinitionMode) {
......@@ -16,10 +14,6 @@ public enum NetDefinitionMode {
return ARCHITECTURE_DEFINITION;
case "FORWARD_FUNCTION":
return FORWARD_FUNCTION;
case "PYTHON_INLINE":
return PYTHON_INLINE;
case "CPP_INLINE":
return CPP_INLINE;
default:
throw new IllegalArgumentException("Unknown Net Definition Mode");
}
......
......@@ -4,9 +4,7 @@ import logging
import os
<#list tc.architecture.networkInstructions as networkInstruction>
<#if networkInstruction.body.isTrainable()>
from CNNNet_${tc.fullArchitectureName} import Net_${networkInstruction?index}
</#if>
</#list>
class ${tc.fileNameWithoutEnding}:
......@@ -54,12 +52,10 @@ class ${tc.fileNameWithoutEnding}:
def construct(self, context, data_mean=None, data_std=None):
<#list tc.architecture.networkInstructions as networkInstruction>
<#if networkInstruction.body.isTrainable()>
self.networks[${networkInstruction?index}] = Net_${networkInstruction?index}(data_mean=data_mean, data_std=data_std)
self.networks[${networkInstruction?index}].collect_params().initialize(self.weight_initializer, ctx=context)
self.networks[${networkInstruction?index}].hybridize()
self.networks[${networkInstruction?index}](<#list tc.getStreamInputDimensions(networkInstruction.body) as dimensions>mx.nd.zeros((1, ${tc.join(tc.cutDimensions(dimensions), ",")},), ctx=context)<#sep>, </#list>)
</#if>
</#list>
if not os.path.exists(self._model_dir_):
......
......@@ -13,7 +13,7 @@ class ${tc.fileNameWithoutEnding}:
def __init__(self):
self._data_dir = "${tc.dataPath}/"
def load_data(self, batch_size):
def load_data(self, train_batch_size, test_batch_size):
train_h5, test_h5 = self.load_h5_files()
train_data = {}
......@@ -37,7 +37,11 @@ class ${tc.fileNameWithoutEnding}:
train_iter = mx.io.NDArrayIter(data=train_data,
label=train_label,
batch_size=batch_size)
batch_size=train_batch_size)
train_test_iter = mx.io.NDArrayIter(data=train_data,
label=train_label,
batch_size=test_batch_size)
test_iter = None
......@@ -56,12 +60,11 @@ class ${tc.fileNameWithoutEnding}:
test_label[index] = test_h5[output_name]
index += 1
test_iter = mx.io.NDArrayIter(data=test_data,
label=test_label,
batch_size=batch_size)
batch_size=test_batch_size)
return train_iter, test_iter, data_mean, data_std, train_images, test_images
return train_iter, train_test_iter, test_iter, data_mean, data_std, train_images, test_images
def load_h5_files(self):
train_h5 = None
......
......@@ -88,22 +88,19 @@ class CustomGRU(gluon.HybridBlock):
<#list tc.architecture.networkInstructions as networkInstruction>
<#if networkInstruction.body.isTrainable()>
class Net_${networkInstruction?index}(gluon.HybridBlock):
def __init__(self, data_mean=None, data_std=None, **kwargs):
super(Net_${networkInstruction?index}, self).__init__(**kwargs)
self.last_layers = {}
with self.name_scope():
${tc.include(networkInstruction.body, "ARCHITECTURE_DEFINITION")}
pass
def hybrid_forward(self, F, ${tc.join(tc.getStreamInputNames(networkInstruction.body), ", ")}):
def hybrid_forward(self, F, ${tc.join(tc.getStreamInputNames(networkInstruction.body, false), ", ")}):
${tc.include(networkInstruction.body, "FORWARD_FUNCTION")}
<#if tc.isAttentionNetwork() && networkInstruction.isUnroll() >
return ${tc.join(tc.getStreamOutputNames(networkInstruction.body), ", ")}, attention_output_
return ${tc.join(tc.getStreamOutputNames(networkInstruction.body, false), ", ")}, attention_output_
<#else>
return ${tc.join(tc.getStreamOutputNames(networkInstruction.body), ", ")}
return ${tc.join(tc.getStreamOutputNames(networkInstruction.body, false), ", ")}
</#if>
</#if>
</#list>
......@@ -11,16 +11,15 @@
#include <CNNBufferFile.h>
<#list tc.architecture.networkInstructions as networkInstruction>
<#if networkInstruction.body.isTrainable()>
class ${tc.fileNameWithoutEnding}_${networkInstruction?index}{
public:
const std::string json_file = "model/${tc.componentName}/model_${networkInstruction?index}_newest-symbol.json";
const std::string param_file = "model/${tc.componentName}/model_${networkInstruction?index}_newest-0000.params";
const std::vector<std::string> input_keys = {
<#if tc.getStreamInputNames(networkInstruction.body)?size == 1>
<#if tc.getStreamInputNames(networkInstruction.body, true)?size == 1>
"data"
<#else>
<#list tc.getStreamInputNames(networkInstruction.body) as variable>"data${variable?index}"<#sep>, </#list>
<#list tc.getStreamInputNames(networkInstruction.body, true) as variable>"data${variable?index}"<#sep>, </#list>
</#if>
};
const std::vector<std::vector<mx_uint>> input_shapes = {<#list tc.getStreamInputDimensions(networkInstruction.body) as dimensions>{${tc.join(dimensions, ", ")}}<#sep>, </#list>};
......@@ -36,9 +35,9 @@ public:
if(handle) MXPredFree(handle);
}
void predict(${tc.join(tc.getStreamInputNames(networkInstruction.body), ", ", "const std::vector<float> &in_", "")},
${tc.join(tc.getStreamOutputNames(networkInstruction.body), ", ", "std::vector<float> &out_", "")}){
<#list tc.getStreamInputNames(networkInstruction.body) as variable>
void predict(${tc.join(tc.getStreamInputNames(networkInstruction.body, false), ", ", "const std::vector<float> &in_", "")},
${tc.join(tc.getStreamOutputNames(networkInstruction.body, false), ", ", "std::vector<float> &out_", "")}){
<#list tc.getStreamInputNames(networkInstruction.body, false) as variable>
MXPredSetInput(handle, input_keys[${variable?index}].c_str(), in_${variable}.data(), static_cast<mx_uint>(in_${variable}.size()));
</#list>
......@@ -49,7 +48,7 @@ public:
mx_uint shape_len;
size_t size;
<#list tc.getStreamOutputNames(networkInstruction.body) as variable>
<#list tc.getStreamOutputNames(networkInstruction.body, false) as variable>
output_index = ${variable?index?c};
MXPredGetOutputShape(handle, output_index, &shape, &shape_len);
size = 1;
......@@ -114,7 +113,6 @@ public:
assert(handle);
}
};
</#if>
</#list>
#endif // ${tc.fileNameWithoutEnding?upper_case}
......@@ -172,39 +172,6 @@ class BLEU(mx.metric.EvalMetric):
class ${tc.fileNameWithoutEnding}:
def applyBeamSearch(input, length, width, maxLength, currProb, netIndex, bestOutput):
bestProb = 0.0
while length < maxLength:
length += 1
batchIndex = 0
for batchEntry in input:
top_k_indices = mx.nd.topk(batchEntry, axis=0, k=width)
top_k_values = mx.nd.topk(batchEntry, ret_typ='value', axis=0, k=width)
for index in range(top_k_indices.size):
#print mx.nd.array(top_k_indices[index])
#print top_k_values[index]
if length == 1:
#print mx.nd.array(top_k_indices[index])
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), length, width, maxLength,
currProb * top_k_values[index], netIndex, self._networks[netIndex](mx.nd.array(top_k_indices[index])))
else:
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), length, width, maxLength,
currProb * top_k_values[index], netIndex, bestOutput)
if length == maxLength:
#print currProb
if currProb > bestProb:
bestProb = currProb
bestOutput[batchIndex] = result[batchIndex]
#print "new bestOutput: ", bestOutput
batchIndex += 1
#print bestOutput
#print bestProb
return bestOutput
def __init__(self, data_loader, net_constructor):
self._data_loader = data_loader
self._net_creator = net_constructor
......@@ -245,8 +212,10 @@ class ${tc.fileNameWithoutEnding}:
del optimizer_params['step_size']
del optimizer_params['learning_rate_decay']
train_batch_size = batch_size
test_batch_size = ${tc.hasUnrollInstructions()?then('1', 'batch_size')}
train_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(batch_size)
train_iter, train_test_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(train_batch_size, test_batch_size)
if normalize:
self._net_creator.construct(context=mx_context, data_mean=data_mean, data_std=data_std)
......@@ -311,19 +280,8 @@ class ${tc.fileNameWithoutEnding}:
for epoch in range(begin_epoch, begin_epoch + num_epoch):
train_iter.reset()
for batch_i, batch in enumerate(train_iter):
<#list tc.architectureInputs as input_name>
${input_name} = batch.data[0].as_in_context(mx_context)
</#list>
<#list tc.architectureOutputs as output_name>
<#if output_name != 'attention_'>
${output_name}label = batch.label[${output_name?index}].as_in_context(mx_context)
</#if>
</#list>
outputs=[]
with autograd.record():
<#include "pythonExecuteWithLoss.ftl">
<#include "pythonExecuteTrain.ftl">
loss = 0
for element in lossList:
......@@ -349,26 +307,11 @@ class ${tc.fileNameWithoutEnding}:
tic = None
train_iter.reset()
train_test_iter.reset()
metric = mx.metric.create(eval_metric, **eval_metric_params)
for batch_i, batch in enumerate(train_iter):
<#list tc.architectureInputs as input_name>
${input_name} = batch.data[0].as_in_context(mx_context)
</#list>
labels = [
<#list tc.architectureOutputs as output_name>
<#if output_name != 'attention_'>
batch.label[${output_name?index}].as_in_context(mx_context)<#sep>,
</#if>
</#list>
]
outputs=[]
for batch_i, batch in enumerate(train_test_iter):
if True: <#-- Fix indentation -->
<#include "pythonExecute.ftl">
<#include "pythonExecuteTest.ftl">
<#include "saveAttentionImageTrain.ftl">
......@@ -378,7 +321,6 @@ class ${tc.fileNameWithoutEnding}:
for output_name in outputs:
if mx.nd.shape_array(mx.nd.squeeze(output_name)).size > 1:
predictions.append(mx.nd.argmax(output_name, axis=1))
#ArgMax already applied
else:
predictions.append(output_name)
......@@ -388,23 +330,8 @@ class ${tc.fileNameWithoutEnding}:
test_iter.reset()
metric = mx.metric.create(eval_metric, **eval_metric_params)
for batch_i, batch in enumerate(test_iter):
<#list tc.architectureInputs as input_name>
${input_name} = batch.data[0].as_in_context(mx_context)
</#list>
labels = [
<#list tc.architectureOutputs as output_name>
<#if output_name != 'attention_'>
batch.label[${output_name?index}].as_in_context(mx_context)<#sep>,
</#if>
</#list>
]
outputs=[]
if True: <#-- Fix indentation -->
<#include "pythonExecute.ftl">
<#include "pythonExecuteTest.ftl">
<#include "saveAttentionImageTest.ftl">
......
<#-- (c) https://github.com/MontiCore/monticore -->
<#if mode == "FORWARD_FUNCTION">
${element.name} = ${tc.join(element.inputs, " + ")}
<#elseif mode == "PYTHON_INLINE">
${element.name} = ${tc.join(element.inputs, " + ")}
<#elseif mode == "CPP_INLINE">
vector<float> ${element.name}(${element.inputs[0]}.size());
for (size_t i = 0; i != ${element.name}.size(); ++i) {
${element.name}[i] = ${tc.join(element.inputs, " + ", "", "[i]")};
}
</#if>
<#if mode == "FORWARD_FUNCTION">
${element.name} = F.broadcast_add(${tc.join(element.inputs, ",")})
<#elseif mode == "PYTHON_INLINE">
self.${element.name} = mx.nd.broadcast_add(${tc.join(element.inputs, ",")})
</#if>
\ No newline at end of file
<#-- (c) https://github.com/MontiCore/monticore -->
<#if mode == "FORWARD_FUNCTION">
${element.name} = ${element.inputs[element.index]}
<#elseif mode == "PYTHON_INLINE">
${element.name} = ${element.inputs[element.index]}
<#elseif mode == "CPP_INLINE">
vector<float> ${element.name} = ${element.inputs[element.index]};
</#if>
......@@ -3,9 +3,5 @@
<#assign input = element.inputs[0]>
<#if mode == "FORWARD_FUNCTION">
${element.name} = ${input}
<#elseif mode == "PYTHON_INLINE">
${element.name} = ${input}
<#elseif mode == "CPP_INLINE">