Commit f1869937 authored by Julian Dierkes's avatar Julian Dierkes

fixed merge conflicts with master

parents 118babe2 75fd8c2c
......@@ -19,7 +19,7 @@ git masterJobLinux:
integrationMXNetJobLinux:
stage: linux
image: registry.git.rwth-aachen.de/monticore/embeddedmontiarc/generators/emadl2cpp/integrationtests/mxnet:v0.0.3
image: registry.git.rwth-aachen.de/monticore/embeddedmontiarc/generators/emadl2cpp/integrationtests/mxnet:v0.0.4
script:
- mvn -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn -B clean install --settings settings.xml -Dtest=IntegrationMXNetTest
......@@ -33,7 +33,7 @@ integrationCaffe2JobLinux:
integrationGluonJobLinux:
stage: linux
image: registry.git.rwth-aachen.de/monticore/embeddedmontiarc/generators/emadl2cpp/integrationtests/mxnet:v0.0.3
image: registry.git.rwth-aachen.de/monticore/embeddedmontiarc/generators/emadl2cpp/integrationtests/mxnet:v0.0.4
script:
- mvn -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn -B clean install --settings settings.xml -Dtest=IntegrationGluonTest
......
This diff is collapsed.
This diff is collapsed.
......@@ -9,22 +9,22 @@
<groupId>de.monticore.lang.monticar</groupId>
<artifactId>embedded-montiarc-emadl-generator</artifactId>
<version>0.3.7-SNAPSHOT</version>
<version>0.3.8-SNAPSHOT</version>
<!-- == PROJECT DEPENDENCIES ============================================= -->
<properties>
<!-- .. SE-Libraries .................................................. -->
<emadl.version>0.2.10-SNAPSHOT</emadl.version>
<CNNTrain.version>0.3.8-SNAPSHOT</CNNTrain.version>
<emadl.version>0.2.11-SNAPSHOT</emadl.version>
<CNNTrain.version>0.3.9-SNAPSHOT</CNNTrain.version>
<cnnarch-generator.version>0.0.5-SNAPSHOT</cnnarch-generator.version>
<cnnarch-mxnet-generator.version>0.2.17-SNAPSHOT</cnnarch-mxnet-generator.version>
<cnnarch-caffe2-generator.version>0.2.13-SNAPSHOT</cnnarch-caffe2-generator.version>
<cnnarch-gluon-generator.version>0.2.9-SNAPSHOT</cnnarch-gluon-generator.version>
<cnnarch-caffe2-generator.version>0.2.14-SNAPSHOT</cnnarch-caffe2-generator.version>
<cnnarch-gluon-generator.version>0.2.10-SNAPSHOT</cnnarch-gluon-generator.version>
<cnnarch-tensorflow-generator.version>0.1.0-SNAPSHOT</cnnarch-tensorflow-generator.version>
<Common-MontiCar.version>0.0.14-20180704.113055-2</Common-MontiCar.version>
<embedded-montiarc-math-opt-generator>0.1.5</embedded-montiarc-math-opt-generator>
<embedded-montiarc-emadl-pythonwrapper-generator>0.0.2</embedded-montiarc-emadl-pythonwrapper-generator>
<!-- .. Libraries .................................................. -->
<guava.version>18.0</guava.version>
......@@ -88,6 +88,12 @@
<artifactId>cnnarch-gluon-generator</artifactId>
<version>${cnnarch-gluon-generator.version}</version>
</dependency>
<dependency>
<groupId>de.monticore.lang.monticar</groupId>
<artifactId>common-monticar</artifactId>
<version>${Common-MontiCar.version}</version>
</dependency>
<dependency>
<groupId>de.monticore.lang.monticar</groupId>
......
......@@ -58,7 +58,6 @@ public class IntegrationGluonTest extends IntegrationTest {
assertTrue(Log.getFindings().isEmpty());
}
@Ignore
@Test
public void testShowAttendTell() {
Log.getFindings().clear();
......
......@@ -42,7 +42,6 @@ public class IntegrationTensorflowTest extends IntegrationTest {
super("TENSORFLOW", "39253EC049D4A4E5FA0536AD34874B9D#1DBAEE1B1BD83FB7CB5F70AE91B29638#C4C23549E737A759721D6694C75D9771#5AF0CE68E408E8C1F000E49D72AC214A");
}
@Ignore
@Test
public void testMultipleStreams() {
Log.getFindings().clear();
......
......@@ -3,14 +3,17 @@ FROM maven:3-jdk-8
RUN apt-get update && \
apt-get install -y --no-install-recommends \
git \
libgtk2.0-dev \
python-subprocess32 \
python-tk \
wget python gcc \
build-essential cmake \
liblapack-dev libblas-dev libboost-dev libarmadillo-dev && \
rm -rf /var/lib/apt/lists/*
rm -rf /var/lib/apt/lists/*
RUN git clone https://github.com/apache/incubator-mxnet.git mxnet-source && \
cd mxnet-source && git checkout tags/1.4.0 && cd .. && \
cp -r mxnet-source/include/mxnet /usr/include/mxnet && \
rm -r mxnet-source
RUN wget https://bootstrap.pypa.io/get-pip.py
RUN python get-pip.py
RUN pip install mxnet h5py
RUN pip install mxnet h5py opencv-python matplotlib
package showAttendTell;
component Main{
ports in Z(0:255)^{3, 224, 224} image,
out Z(0:25316)^{1} softmax[19];
ports in Z(0:255)^{3, 224, 224} images,
in Z(-oo:oo)^{64,2048} data,
out Z(0:37758)^{1} target[25];
instance Show_attend_tell net1;
instance Show_attend_tell net;
connect images -> net.images;
connect net.target[:] -> target[:];
}
......@@ -3,7 +3,10 @@ configuration Show_attend_tell{
batch_size:2
context:cpu
eval_metric:bleu
loss:softmax_cross_entropy
loss:softmax_cross_entropy_ignore_indices{
ignore_indices:2
}
use_teacher_forcing:true
save_attention_image:true
optimizer:adam{
learning_rate:0.005
......
......@@ -9,10 +9,10 @@ component Show_attend_tell{
layer LSTM(units=512) decoder;
layer FullyConnected(units = 256) features;
layer FullyConnected(units = 256, flatten=false) features;
layer FullyConnected(units = 1, flatten=false) attention;
1 -> target[0];
0 -> target[0];
data -> features;
......@@ -30,7 +30,6 @@ component Show_attend_tell{
Tanh() ->
FullyConnected(units=1, flatten=false) ->
Softmax(axis=0) ->
Dropout(p=0.25) ->
attention
|
features.output
......@@ -47,7 +46,6 @@ component Show_attend_tell{
FullyConnected(units=37758) ->
Tanh() ->
Dropout(p=0.25) ->
Softmax() ->
ArgMax() ->
target[t]
};
......
......@@ -11,15 +11,15 @@ component Show_attend_tell_images_as_input{
layer LSTM(units=512) decoder;
layer FullyConnected(units = 256) features;
layer FullyConnected(units = 256, flatten=false) features;
layer FullyConnected(units = 1, flatten=false) attention;
1 -> target[0];
0 -> target[0];
images ->
Convolution(kernel=(7,7), channels=64, stride=(7,7), padding="valid") ->
Convolution(kernel=(4,4), channels=64, stride=(4,4), padding="valid") ->
GlobalPooling(pool_type="max") ->
Convolution(kernel=(7,7), channels=128, stride=(7,7), padding="same") ->
Convolution(kernel=(4,4), channels=128, stride=(4,4), padding="same") ->
Reshape(shape=(64, 128)) ->
features;
timed <t> GreedySearch(max_length=25){
......@@ -36,7 +36,6 @@ component Show_attend_tell_images_as_input{
Tanh() ->
FullyConnected(units=1, flatten=false) ->
Softmax(axis=0) ->
Dropout(p=0.25) ->
attention
|
features.output
......@@ -53,7 +52,6 @@ component Show_attend_tell_images_as_input{
FullyConnected(units=37758) ->
Tanh() ->
Dropout(p=0.25) ->
Softmax() ->
ArgMax() ->
target[t]
};
......
import mxnet as mx
import logging
import numpy as np
import time
import os
import shutil
import pickle
import math
import sys
from mxnet import gluon, autograd, nd
class CrossEntropyLoss(gluon.loss.Loss):
def __init__(self, axis=-1, sparse_label=True, weight=None, batch_axis=0, **kwargs):
super(CrossEntropyLoss, self).__init__(weight, batch_axis, **kwargs)
self._axis = axis
self._sparse_label = sparse_label
def hybrid_forward(self, F, pred, label, sample_weight=None):
pred = F.log(pred)
if self._sparse_label:
loss = -F.pick(pred, label, axis=self._axis, keepdims=True)
else:
label = gluon.loss._reshape_like(F, label, pred)
loss = -F.sum(pred * label, axis=self._axis, keepdims=True)
loss = gluon.loss._apply_weighting(F, loss, self._weight, sample_weight)
return F.mean(loss, axis=self._batch_axis, exclude=True)
class LogCoshLoss(gluon.loss.Loss):
def __init__(self, weight=None, batch_axis=0, **kwargs):
super(LogCoshLoss, self).__init__(weight, batch_axis, **kwargs)
def hybrid_forward(self, F, pred, label, sample_weight=None):
loss = F.log(F.cosh(pred - label))
loss = gluon.loss._apply_weighting(F, loss, self._weight, sample_weight)
return F.mean(loss, axis=self._batch_axis, exclude=True)
class SoftmaxCrossEntropyLossIgnoreIndices(gluon.loss.Loss):
def __init__(self, axis=-1, ignore_indices=[], sparse_label=True, from_logits=False, weight=None, batch_axis=0, **kwargs):
super(SoftmaxCrossEntropyLossIgnoreIndices, self).__init__(weight, batch_axis, **kwargs)
self._axis = axis
self._ignore_indices = ignore_indices
self._sparse_label = sparse_label
self._from_logits = from_logits
def hybrid_forward(self, F, pred, label, sample_weight=None):
log_softmax = F.log_softmax
pick = F.pick
if not self._from_logits:
pred = log_softmax(pred, self._axis)
if self._sparse_label:
loss = -pick(pred, label, axis=self._axis, keepdims=True)
else:
label = _reshape_like(F, label, pred)
loss = -(pred * label).sum(axis=self._axis, keepdims=True)
# ignore some indices for loss, e.g. <pad> tokens in NLP applications
for i in self._ignore_indices:
loss = loss * mx.nd.logical_not(mx.nd.equal(mx.nd.argmax(pred, axis=1), mx.nd.ones_like(mx.nd.argmax(pred, axis=1))*i))
return loss.mean(axis=self._batch_axis, exclude=True)
@mx.metric.register
class BLEU(mx.metric.EvalMetric):
N = 4
def __init__(self, exclude=None, name='bleu', output_names=None, label_names=None):
super(BLEU, self).__init__(name=name, output_names=output_names, label_names=label_names)
self._exclude = exclude or []
self._match_counts = [0 for _ in range(self.N)]
self._counts = [0 for _ in range(self.N)]
self._size_ref = 0
self._size_hyp = 0
def update(self, labels, preds):
labels, preds = mx.metric.check_label_shapes(labels, preds, True)
new_labels = self._convert(labels)
new_preds = self._convert(preds)
for label, pred in zip(new_labels, new_preds):
reference = [word for word in label if word not in self._exclude]
hypothesis = [word for word in pred if word not in self._exclude]
self._size_ref += len(reference)
self._size_hyp += len(hypothesis)
for n in range(self.N):
reference_ngrams = self._get_ngrams(reference, n + 1)
hypothesis_ngrams = self._get_ngrams(hypothesis, n + 1)
match_count = 0
for ngram in hypothesis_ngrams:
if ngram in reference_ngrams:
reference_ngrams.remove(ngram)
match_count += 1
self._match_counts[n] += match_count
self._counts[n] += len(hypothesis_ngrams)
def get(self):
precisions = [sys.float_info.min for n in range(self.N)]
i = 1
for n in range(self.N):
match_counts = self._match_counts[n]
counts = self._counts[n]
if counts != 0:
if match_counts == 0:
i *= 2
match_counts = 1 / i
if (match_counts / counts) > 0:
precisions[n] = match_counts / counts
bleu = self._get_brevity_penalty() * math.exp(sum(map(math.log, precisions)) / self.N)
return (self.name, bleu)
def calculate(self):
precisions = [sys.float_info.min for n in range(self.N)]
i = 1
for n in range(self.N):
match_counts = self._match_counts[n]
counts = self._counts[n]
if counts != 0:
if match_counts == 0:
i *= 2
match_counts = 1 / i
precisions[n] = match_counts / counts
return self._get_brevity_penalty() * math.exp(sum(map(math.log, precisions)) / self.N)
def _get_brevity_penalty(self):
if self._size_hyp >= self._size_ref:
return 1
else:
return math.exp(1 - (self._size_ref / self._size_hyp))
@staticmethod
def _get_ngrams(sentence, n):
ngrams = []
if len(sentence) >= n:
for i in range(len(sentence) - n + 1):
ngrams.append(sentence[i:i+n])
return ngrams
@staticmethod
def _convert(nd_list):
if len(nd_list) == 0:
return []
new_list = [[] for _ in range(nd_list[0].shape[0])]
for element in nd_list:
for i in range(element.shape[0]):
new_list[i].append(element[i].asscalar())
return new_list
class CNNSupervisedTrainer_mnist_mnistClassifier_net:
def __init__(self, data_loader, net_constructor):
self._data_loader = data_loader
self._net_creator = net_constructor
self._networks = {}
def train(self, batch_size=64,
num_epoch=10,
eval_metric='acc',
eval_metric_params={},
loss ='softmax_cross_entropy',
loss_params={},
optimizer='adam',
optimizer_params=(('learning_rate', 0.001),),
load_checkpoint=True,
context='gpu',
checkpoint_period=5,
save_attention_image=False,
use_teacher_forcing=False,
normalize=True):
if context == 'gpu':
mx_context = mx.gpu()
elif context == 'cpu':
mx_context = mx.cpu()
else:
logging.error("Context argument is '" + context + "'. Only 'cpu' and 'gpu are valid arguments'.")
if 'weight_decay' in optimizer_params:
optimizer_params['wd'] = optimizer_params['weight_decay']
del optimizer_params['weight_decay']
if 'learning_rate_decay' in optimizer_params:
min_learning_rate = 1e-08
if 'learning_rate_minimum' in optimizer_params:
min_learning_rate = optimizer_params['learning_rate_minimum']
del optimizer_params['learning_rate_minimum']
optimizer_params['lr_scheduler'] = mx.lr_scheduler.FactorScheduler(
optimizer_params['step_size'],
factor=optimizer_params['learning_rate_decay'],
stop_factor_lr=min_learning_rate)
del optimizer_params['step_size']
del optimizer_params['learning_rate_decay']
train_batch_size = batch_size
test_batch_size = batch_size
train_iter, train_test_iter, test_iter, data_mean, data_std, train_images, test_images = self._data_loader.load_data(train_batch_size, test_batch_size)
if normalize:
self._net_creator.construct(context=mx_context, data_mean=data_mean, data_std=data_std)
else:
self._net_creator.construct(context=mx_context)
begin_epoch = 0
if load_checkpoint:
begin_epoch = self._net_creator.load(mx_context)
else:
if os.path.isdir(self._net_creator._model_dir_):
shutil.rmtree(self._net_creator._model_dir_)
self._networks = self._net_creator.networks
try:
os.makedirs(self._net_creator._model_dir_)
except OSError:
if not os.path.isdir(self._net_creator._model_dir_):
raise
trainers = [mx.gluon.Trainer(network.collect_params(), optimizer, optimizer_params) for network in self._networks.values() if len(network.collect_params().values()) != 0]
margin = loss_params['margin'] if 'margin' in loss_params else 1.0
sparseLabel = loss_params['sparse_label'] if 'sparse_label' in loss_params else True
ignore_indices = [loss_params['ignore_indices']] if 'ignore_indices' in loss_params else []
if loss == 'softmax_cross_entropy':
fromLogits = loss_params['from_logits'] if 'from_logits' in loss_params else False
loss_function = mx.gluon.loss.SoftmaxCrossEntropyLoss(from_logits=fromLogits, sparse_label=sparseLabel)
elif loss == 'softmax_cross_entropy_ignore_indices':
fromLogits = loss_params['from_logits'] if 'from_logits' in loss_params else False
loss_function = SoftmaxCrossEntropyLossIgnoreIndices(ignore_indices=ignore_indices, from_logits=fromLogits, sparse_label=sparseLabel)
elif loss == 'sigmoid_binary_cross_entropy':
loss_function = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss()
elif loss == 'cross_entropy':
loss_function = CrossEntropyLoss(sparse_label=sparseLabel)
elif loss == 'l2':
loss_function = mx.gluon.loss.L2Loss()
elif loss == 'l1':
loss_function = mx.gluon.loss.L2Loss()
elif loss == 'huber':
rho = loss_params['rho'] if 'rho' in loss_params else 1
loss_function = mx.gluon.loss.HuberLoss(rho=rho)
elif loss == 'hinge':
loss_function = mx.gluon.loss.HingeLoss(margin=margin)
elif loss == 'squared_hinge':
loss_function = mx.gluon.loss.SquaredHingeLoss(margin=margin)
elif loss == 'logistic':
labelFormat = loss_params['label_format'] if 'label_format' in loss_params else 'signed'
loss_function = mx.gluon.loss.LogisticLoss(label_format=labelFormat)
elif loss == 'kullback_leibler':
fromLogits = loss_params['from_logits'] if 'from_logits' in loss_params else True
loss_function = mx.gluon.loss.KLDivLoss(from_logits=fromLogits)
elif loss == 'log_cosh':
loss_function = LogCoshLoss()
else:
logging.error("Invalid loss parameter.")
speed_period = 50
tic = None
for epoch in range(begin_epoch, begin_epoch + num_epoch):
train_iter.reset()
for batch_i, batch in enumerate(train_iter):
with autograd.record():
labels = [batch.label[i].as_in_context(mx_context) for i in range(1)]
image_ = batch.data[0].as_in_context(mx_context)
predictions_ = mx.nd.zeros((train_batch_size, 10,), ctx=mx_context)
nd.waitall()
lossList = []
predictions_ = self._networks[0](image_)
lossList.append(loss_function(predictions_, labels[0]))
loss = 0
for element in lossList:
loss = loss + element
loss.backward()
for trainer in trainers:
trainer.step(batch_size)
if tic is None:
tic = time.time()
else:
if batch_i % speed_period == 0:
try:
speed = speed_period * batch_size / (time.time() - tic)
except ZeroDivisionError:
speed = float("inf")
logging.info("Epoch[%d] Batch[%d] Speed: %.2f samples/sec" % (epoch, batch_i, speed))
tic = time.time()
tic = None
train_test_iter.reset()
metric = mx.metric.create(eval_metric, **eval_metric_params)
for batch_i, batch in enumerate(train_test_iter):
if True:
labels = [batch.label[i].as_in_context(mx_context) for i in range(1)]
image_ = batch.data[0].as_in_context(mx_context)
predictions_ = mx.nd.zeros((test_batch_size, 10,), ctx=mx_context)
nd.waitall()
outputs = []
attentionList=[]
predictions_ = self._networks[0](image_)
outputs.append(predictions_)
if save_attention_image == "True":
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
logging.getLogger('matplotlib').setLevel(logging.ERROR)
plt.clf()
fig = plt.figure(figsize=(15,15))
max_length = len(labels)-1
if(os.path.isfile('src/test/resources/training_data/Show_attend_tell/dict.pkl')):
with open('src/test/resources/training_data/Show_attend_tell/dict.pkl', 'rb') as f:
dict = pickle.load(f)
ax = fig.add_subplot(max_length//3, max_length//4, 1)
ax.imshow(train_images[0+test_batch_size*(batch_i)].transpose(1,2,0))