Commit 4b0341f8 authored by Christian Fuß's avatar Christian Fuß
Browse files

resolved merge conflicts

parents 8ad4df06 bdcafb9f
Pipeline #202185 failed with stages
in 24 seconds
......@@ -16,8 +16,9 @@
<properties>
<!-- .. SE-Libraries .................................................. -->
<CNNArch.version>0.3.3-SNAPSHOT</CNNArch.version>
<CNNTrain.version>0.3.7-SNAPSHOT</CNNTrain.version>
<CNNTrain.version>0.3.8-SNAPSHOT</CNNTrain.version>
<CNNArch2X.version>0.0.4-SNAPSHOT</CNNArch2X.version>
<embedded-montiarc-math-opt-generator>0.1.4</embedded-montiarc-math-opt-generator>
<EMADL2PythonWrapper.version>0.0.2-SNAPSHOT</EMADL2PythonWrapper.version>
......
package de.monticore.lang.monticar.cnnarch.gluongenerator;
import java.util.HashSet;
import java.util.Set;
public class AllAttentionModels {
public static Set<String> getAttentionModels() {
//List of all models that use attention and should save images of the attention over time
Set models = new HashSet();
models.add("showAttendTell.Show_attend_tell");
return models;
}
}
\ No newline at end of file
......@@ -9,6 +9,7 @@ import de.monticore.lang.monticar.cnnarch._symboltable.ArchitectureSymbol;
import de.monticore.lang.monticar.generator.FileContent;
import de.monticore.lang.monticar.generator.cmake.CMakeConfig;
import de.monticore.lang.monticar.generator.cmake.CMakeFindModule;
import de.se_rwth.commons.logging.Log;
import java.util.*;
......@@ -64,6 +65,9 @@ public class CNNArch2Gluon extends CNNArchGenerator {
temp = controller.process("CNNSupervisedTrainer", Target.PYTHON);
fileContentMap.put(temp.getKey(), temp.getValue());
temp = controller.process("BeamSearch", Target.CPP);
fileContentMap.put(temp.getKey().replace(".h", ""), temp.getValue());
temp = controller.process("execute", Target.CPP);
fileContentMap.put(temp.getKey().replace(".h", ""), temp.getValue());
......
......@@ -43,4 +43,9 @@ public class CNNArch2GluonArchitectureSupportChecker extends ArchitectureSupport
return true;
}
@Override
protected boolean checkUnroll(ArchitectureSymbol architecture) {
return true;
}
}
......@@ -28,6 +28,16 @@ public class CNNArch2GluonLayerSupportChecker extends LayerSupportChecker {
supportedLayerList.add(AllPredefinedLayers.LSTM_NAME);
supportedLayerList.add(AllPredefinedLayers.GRU_NAME);
supportedLayerList.add(AllPredefinedLayers.EMBEDDING_NAME);
supportedLayerList.add(AllPredefinedLayers.ARG_MAX_NAME);
supportedLayerList.add(AllPredefinedLayers.REPEAT_NAME);
supportedLayerList.add(AllPredefinedLayers.DOT_NAME);
supportedLayerList.add(AllPredefinedLayers.EXPAND_DIMS_NAME);
supportedLayerList.add(AllPredefinedLayers.SQUEEZE_NAME);
supportedLayerList.add(AllPredefinedLayers.SWAPAXES_NAME);
supportedLayerList.add(AllPredefinedLayers.BROADCAST_MULTIPLY_NAME);
supportedLayerList.add(AllPredefinedLayers.REDUCE_SUM_NAME);
supportedLayerList.add(AllPredefinedLayers.BROADCAST_ADD_NAME);
supportedLayerList.add(AllPredefinedLayers.RESHAPE_NAME);
}
}
......@@ -6,6 +6,8 @@ import de.monticore.lang.monticar.cnnarch.generator.CNNArchTemplateController;
import de.monticore.lang.monticar.cnnarch._symboltable.*;
import de.monticore.lang.monticar.cnnarch.generator.TemplateConfiguration;
import de.monticore.lang.monticar.cnnarch.predefined.AllPredefinedLayers;
import de.se_rwth.commons.logging.Log;
import java.io.Writer;
import java.util.*;
......@@ -41,25 +43,15 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
}
}
else if (element.getType() == VariableSymbol.Type.LAYER) {
include(TEMPLATE_ELEMENTS_DIR_PATH, element.getLayerVariableDeclaration().getLayer().getName(), writer, netDefinitionMode);
if (element.getMember() == VariableSymbol.Member.STATE) {
include(TEMPLATE_ELEMENTS_DIR_PATH, "Output", writer, netDefinitionMode);
} else if (element.getMember() == VariableSymbol.Member.NONE) {
include(TEMPLATE_ELEMENTS_DIR_PATH, element.getLayerVariableDeclaration().getLayer().getName(), writer, netDefinitionMode);
}
}
}
else {
include(element.getResolvedThis().get(), writer, netDefinitionMode);
}
setCurrentElement(previousElement);
}
public void include(ConstantSymbol constant, Writer writer, NetDefinitionMode netDefinitionMode) {
ArchitectureElementData previousElement = getCurrentElement();
setCurrentElement(constant);
if (constant.isAtomic()) {
include(TEMPLATE_ELEMENTS_DIR_PATH, "Const", writer, netDefinitionMode);
}
else {
include(constant.getResolvedThis().get(), writer, netDefinitionMode);
include((ArchitectureElementSymbol) element.getResolvedThis().get(), writer, netDefinitionMode);
}
setCurrentElement(previousElement);
......@@ -74,7 +66,7 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
include(TEMPLATE_ELEMENTS_DIR_PATH, templateName, writer, netDefinitionMode);
}
else {
include(layer.getResolvedThis().get(), writer, netDefinitionMode);
include((ArchitectureElementSymbol) layer.getResolvedThis().get(), writer, netDefinitionMode);
}
setCurrentElement(previousElement);
......@@ -99,7 +91,7 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
include((LayerSymbol) architectureElement, writer, netDefinitionMode);
}
else if (architectureElement instanceof ConstantSymbol) {
include((ConstantSymbol) architectureElement, writer, netDefinitionMode);
}
else {
include((VariableSymbol) architectureElement, writer, netDefinitionMode);
......@@ -121,6 +113,20 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
return getStreamInputs(stream).keySet();
}
// used for unroll
public List<String> getStreamInputNames(SerialCompositeElementSymbol stream, SerialCompositeElementSymbol currentStream) {
List<String> inputNames = new LinkedList<>(getStreamInputNames(stream));
Map<String, String> pairs = getUnrollPairs(stream, currentStream);
for (int i = 0; i != inputNames.size(); ++i) {
if (pairs.containsKey(inputNames.get(i))) {
inputNames.set(i, pairs.get(inputNames.get(i)));
}
}
return inputNames;
}
public Collection<List<String>> getStreamInputDimensions(SerialCompositeElementSymbol stream) {
return getStreamInputs(stream).values();
}
......@@ -134,22 +140,65 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
}
}
outputNames.addAll(getStreamLayerVariableMembers(stream, "1", true).keySet());
outputNames.addAll(getStreamLayerVariableMembers(stream, true, false).keySet());
return outputNames;
}
// used for unroll
public List<String> getStreamOutputNames(SerialCompositeElementSymbol stream, SerialCompositeElementSymbol currentStream) {
List<String> outputNames = new LinkedList<>(getStreamOutputNames(stream));
Map<String, String> pairs = getUnrollPairs(stream, currentStream);
for (int i = 0; i != outputNames.size(); ++i) {
if (pairs.containsKey(outputNames.get(i))) {
outputNames.set(i, pairs.get(outputNames.get(i)));
}
}
return outputNames;
}
// Used to initialize all layer variable members which are passed through the networks
public Map<String, List<String>> getLayerVariableMembers(String batchSize) {
public Map<String, List<String>> getLayerVariableMembers(boolean generateStateInitializers) {
Map<String, List<String>> members = new LinkedHashMap<>();
for (SerialCompositeElementSymbol stream : getArchitecture().getStreams()) {
members.putAll(getStreamLayerVariableMembers(stream, batchSize, true));
members.putAll(getStreamLayerVariableMembers(stream, true, generateStateInitializers));
}
return members;
}
// Calculate differently named VariableSymbol elements in two streams, currently used for the UnrollInstructionSymbol
// body which is resolved with t = CONST_OFFSET and the current body of the actual timestep t
public Map<String, String> getUnrollPairs(ArchitectureElementSymbol element, ArchitectureElementSymbol current) {
Map<String, String> pairs = new HashMap<>();
if (element instanceof CompositeElementSymbol && current instanceof CompositeElementSymbol) {
List<ArchitectureElementSymbol> elements = ((CompositeElementSymbol) element).getElements();
List<ArchitectureElementSymbol> currentElements = ((CompositeElementSymbol) current).getElements();
if (elements.size() == currentElements.size()) {
for (int i = 0; i != currentElements.size(); ++i) {
String name = getName(elements.get(i));
String currentName = getName(currentElements.get(i));
if (elements.get(i) instanceof VariableSymbol && currentElements.get(i) instanceof VariableSymbol) {
if (name != null && currentName != null && !name.equals(currentName)) {
pairs.put(name, currentName);
}
}
pairs.putAll(getUnrollPairs(elements.get(i), currentElements.get(i)));
}
}
}
return pairs;
}
private Map<String, List<String>> getStreamInputs(SerialCompositeElementSymbol stream) {
Map<String, List<String>> inputs = new LinkedHashMap<>();
......@@ -162,36 +211,42 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
dimensions.add(intDimension.toString());
}
// Add batch size dimension
dimensions.add(0, "1");
inputs.put(getName(element), dimensions);
}
else if (element instanceof ConstantSymbol) {
inputs.put(getName(element), Arrays.asList("1"));
}
}
inputs.putAll(getStreamLayerVariableMembers(stream, "1", false));
inputs.putAll(getStreamLayerVariableMembers(stream, false, false));
return inputs;
}
private Map<String, List<String>> getStreamLayerVariableMembers(SerialCompositeElementSymbol stream, String batchSize, boolean includeOutput) {
Map<String, List<String>> members = new HashMap<>();
private Map<String, List<String>> getStreamLayerVariableMembers(SerialCompositeElementSymbol stream, boolean includeOutput, boolean generateStateInitializaters) {
Map<String, List<String>> members = new LinkedHashMap<>();
List<ArchitectureElementSymbol> elements = stream.getSpannedScope().resolveLocally(ArchitectureElementSymbol.KIND);
for (ArchitectureElementSymbol element : elements) {
if (element instanceof VariableSymbol) {
VariableSymbol variable = (VariableSymbol) element;
if (variable.getType() == VariableSymbol.Type.LAYER && variable.getMember() == VariableSymbol.Member.NONE) {
if (variable.getType() == VariableSymbol.Type.LAYER && (variable.getMember() == VariableSymbol.Member.NONE || generateStateInitializaters)) {
LayerVariableDeclarationSymbol layerVariableDeclaration = variable.getLayerVariableDeclaration();
if (layerVariableDeclaration.getLayer().getDeclaration().isPredefined()) {
PredefinedLayerDeclaration predefinedLayerDeclaration =
(PredefinedLayerDeclaration) layerVariableDeclaration.getLayer().getDeclaration();
if (predefinedLayerDeclaration.isValidMember(VariableSymbol.Member.STATE)) {
int arrayLength = predefinedLayerDeclaration.getArrayLength(VariableSymbol.Member.STATE);
for (int i = 0; i < arrayLength; ++i) {
String name = variable.getName() + "_state_";
if (arrayLength > 1) {
name += i + "_";
}
List<Integer> intDimensions = predefinedLayerDeclaration.computeOutputTypes(
layerVariableDeclaration.getLayer().getInputTypes(),
layerVariableDeclaration.getLayer(),
......@@ -204,17 +259,19 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
dimensions.add(intDimension.toString());
}
// Add batch size dimension at index 1, since RNN states in Gluon have the format
// (layers, batch_size, units)
dimensions.add(1, batchSize);
members.put(name, dimensions);
}
if (includeOutput) {
if (predefinedLayerDeclaration.isValidMember(VariableSymbol.Member.OUTPUT)) {
arrayLength = predefinedLayerDeclaration.getArrayLength(VariableSymbol.Member.OUTPUT);
for (int i = 0; i < arrayLength; ++i) {
String name = variable.getName() + "_output_";
if (arrayLength > 1) {
name += i + "_";
}
List<Integer> intDimensions = predefinedLayerDeclaration.computeOutputTypes(
layerVariableDeclaration.getLayer().getInputTypes(),
layerVariableDeclaration.getLayer(),
......@@ -227,9 +284,6 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
dimensions.add(intDimension.toString());
}
// Add batch size dimension at index 0, since we use NTC format for RNN output in Gluon
dimensions.add(0, batchSize);
members.put(name, dimensions);
}
}
......@@ -237,8 +291,28 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
}
}
}
return members;
}
// cuts
public List<String> cutDimensions(List<String> dimensions) {
while (dimensions.size() > 1 && dimensions.get(dimensions.size() - 1).equals("1")) {
dimensions.remove(dimensions.size() - 1);
}
return dimensions;
}
public boolean isAttentionNetwork(){
return AllAttentionModels.getAttentionModels().contains(getComponentName());
}
public int getBeamSearchWidth(UnrollInstructionSymbol unroll){
return unroll.getIntValue(AllPredefinedLayers.WIDTH_NAME).get();
}
public int getBeamSearchLength(UnrollInstructionSymbol unroll){
return unroll.getIntValue(AllPredefinedLayers.MAX_LENGTH_NAME).get();
}
}
......@@ -294,29 +294,6 @@ public class GluonConfigurationData extends ConfigurationData {
return environmentParameters.containsKey(ENVIRONMENT_REWARD_TOPIC);
}
private Map<String, Object> getMultiParamEntry(final String key, final String valueName) {
if (!configurationContainsKey(key)) {
return null;
}
Map<String, Object> resultView = new HashMap<>();
MultiParamValueSymbol multiParamValue = (MultiParamValueSymbol)this.getConfiguration().getEntryMap()
.get(key).getValue();
resultView.put(valueName, multiParamValue.getValue());
resultView.putAll(multiParamValue.getParameters());
return resultView;
}
private Boolean configurationContainsKey(final String key) {
return this.getConfiguration().getEntryMap().containsKey(key);
}
private Object retrieveConfigurationEntryValueByKey(final String key) {
return this.getConfiguration().getEntry(key).getValue().getValue();
}
private Map<String, Object> getInputParameterWithName(final String parameterName) {
if (!getRlRewardFunctionParameter().isPresent()
|| !getRlRewardFunctionParameter().get().getTypeOfInputPort(parameterName).isPresent()
......
vector<float> applyBeamSearch(vector<float> input, int depth, int width, int maxDepth, double currProb, int netIndex, vector<float> bestOutput)
{
double bestProb = 0.0;
while (depth < maxDepth){
depth ++;
int batchIndex = 0;
for batchEntry in input:
int top_k_indices[width];
float top_k_values[width];
for(int i = 0; i < width; i++){
vector<float> batchEntryCopy = batchEntry;
std::nth_element(batchEntry.begin(), batchEntry.begin() + i, batchEntry.end());
top_k_values[i] = batchEntry[i]
std::vector<int>::iterator itr = std::find(batchEntryCopy.begin(), batchEntryCopy.end(), top_k_values[i]);
top_k_indices[i] = std::distance(batchEntryCopy.begin(), itr);
}
for(int index = 0; index < width; index++){
/*print mx.nd.array(top_k_indices[index]) */
/*print top_k_values[index] */
if (depth == 1){
/*print mx.nd.array(top_k_indices[index]) */
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
currProb * top_k_values[index], netIndex, self._networks[netIndex](mx.nd.array(top_k_indices[index])));
_predictor_3_.predict(target_0_, decoder_state_, target_1_, decoder_state_, decoder_output_);
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
currProb * top_k_values[index], netIndex, self._networks[netIndex](mx.nd.array(top_k_indices[index])));
}else{
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
currProb * top_k_values[index], netIndex, bestOutput);
}
if (depth == maxDepth){
/*print currProb */
if (currProb > bestProb){
bestProb = currProb;
bestOutput[batchIndex] = result[batchIndex];
/*print "new bestOutput: ", bestOutput */
}
}
batchIndex ++;
}
}
/*print bestOutput; */
/*cout << bestProb; */
return bestOutput;
}
\ No newline at end of file
......@@ -2,9 +2,10 @@
import mxnet as mx
import logging
import os
<#list tc.architecture.streams as stream>
<#if stream.isTrainable()>
from CNNNet_${tc.fullArchitectureName} import Net_${stream?index}
<#list tc.architecture.networkInstructions as networkInstruction>
<#if networkInstruction.body.isTrainable()>
from CNNNet_${tc.fullArchitectureName} import Net_${networkInstruction?index}
</#if>
</#list>
......@@ -52,12 +53,12 @@ class ${tc.fileNameWithoutEnding}:
return earliestLastEpoch
def construct(self, context, data_mean=None, data_std=None):
<#list tc.architecture.streams as stream>
<#if stream.isTrainable()>
self.networks[${stream?index}] = Net_${stream?index}(data_mean=data_mean, data_std=data_std)
self.networks[${stream?index}].collect_params().initialize(self.weight_initializer, ctx=context)
self.networks[${stream?index}].hybridize()
self.networks[${stream?index}](<#list tc.getStreamInputDimensions(stream) as dimensions>mx.nd.zeros((${tc.join(dimensions, ",")},), ctx=context)<#sep>, </#list>)
<#list tc.architecture.networkInstructions as networkInstruction>
<#if networkInstruction.body.isTrainable()>
self.networks[${networkInstruction?index}] = Net_${networkInstruction?index}(data_mean=data_mean, data_std=data_std)
self.networks[${networkInstruction?index}].collect_params().initialize(self.weight_initializer, ctx=context)
self.networks[${networkInstruction?index}].hybridize()
self.networks[${networkInstruction?index}](<#list tc.getStreamInputDimensions(networkInstruction.body) as dimensions>mx.nd.zeros((1, ${tc.join(tc.cutDimensions(dimensions), ",")},), ctx=context)<#sep>, </#list>)
</#if>
</#list>
......
......@@ -19,15 +19,21 @@ class ${tc.fileNameWithoutEnding}:
train_data = {}
data_mean = {}
data_std = {}
train_images = {}
for input_name in self._input_names_:
train_data[input_name] = train_h5[input_name]
data_mean[input_name + '_'] = nd.array(train_h5[input_name][:].mean(axis=0))
data_std[input_name + '_'] = nd.array(train_h5[input_name][:].std(axis=0) + 1e-5)
if 'images' in train_h5:
train_images = train_h5['images']
train_label = {}
index = 0
for output_name in self._output_names_:
train_label[output_name] = train_h5[output_name]
train_label[index] = train_h5[output_name]
index += 1
train_iter = mx.io.NDArrayIter(data=train_data,
label=train_label,
......@@ -37,18 +43,25 @@ class ${tc.fileNameWithoutEnding}:
if test_h5 != None:
test_data = {}
test_images = {}
for input_name in self._input_names_:
test_data[input_name] = test_h5[input_name]
if 'images' in test_h5:
test_images = test_h5['images']
test_label = {}
index = 0
for output_name in self._output_names_:
test_label[output_name] = test_h5[output_name]
test_label[index] = test_h5[output_name]
index += 1
test_iter = mx.io.NDArrayIter(data=test_data,
label=test_label,
batch_size=batch_size)
return train_iter, test_iter, data_mean, data_std
return train_iter, test_iter, data_mean, data_std, train_images, test_images
def load_h5_files(self):
train_h5 = None
......
......@@ -3,44 +3,6 @@ import mxnet as mx
import numpy as np
from mxnet import gluon
class OneHot(gluon.HybridBlock):
def __init__(self, size, **kwargs):
super(OneHot, self).__init__(**kwargs)
with self.name_scope():
self.size = size
def hybrid_forward(self, F, x):
return F.one_hot(indices=F.argmax(data=x, axis=1), depth=self.size)
class Softmax(gluon.HybridBlock):
def __init__(self, **kwargs):
super(Softmax, self).__init__(**kwargs)
def hybrid_forward(self, F, x):
return F.softmax(x)
class Split(gluon.HybridBlock):
def __init__(self, num_outputs, axis=1, **kwargs):
super(Split, self).__init__(**kwargs)
with self.name_scope():
self.axis = axis
self.num_outputs = num_outputs
def hybrid_forward(self, F, x):
return F.split(data=x, axis=self.axis, num_outputs=self.num_outputs)
class Concatenate(gluon.HybridBlock):
def __init__(self, dim=1, **kwargs):
super(Concatenate, self).__init__(**kwargs)
with self.name_scope():
self.dim = dim
def hybrid_forward(self, F, *x):
return F.concat(*x, dim=self.dim)
class ZScoreNormalization(gluon.HybridBlock):
def __init__(self, data_mean, data_std, **kwargs):
......@@ -79,18 +41,69 @@ class NoNormalization(gluon.HybridBlock):
return x
<#list tc.architecture.streams as stream>
<#if stream.isTrainable()>
class Net_${stream?index}(gluon.HybridBlock):
class Reshape(gluon.HybridBlock):
def __init__(self, shape, **kwargs):
super(Reshape, self).__init__(**kwargs)
with self.name_scope():
self.shape = shape
def hybrid_forward(self, F, x):
return F.reshape(data=x, shape=self.shape)
class CustomRNN(gluon.HybridBlock):