Aufgrund einer Wartung wird GitLab am 18.01. zwischen 8:00 und 9:00 Uhr kurzzeitig nicht zur Verfügung stehen. / Due to maintenance, GitLab will be temporarily unavailable on 18.01. between 8:00 and 9:00 am.

Commit 96c688b9 authored by Christian Fuß's avatar Christian Fuß
Browse files

Added BLEU and NIST scores for NLP tasks

parent 06da20d8
Pipeline #191795 failed with stages
in 1 minute
...@@ -25,8 +25,10 @@ class ${tc.fileNameWithoutEnding}: ...@@ -25,8 +25,10 @@ class ${tc.fileNameWithoutEnding}:
data_std[input_name + '_'] = nd.array(train_h5[input_name][:].std(axis=0) + 1e-5) data_std[input_name + '_'] = nd.array(train_h5[input_name][:].std(axis=0) + 1e-5)
train_label = {} train_label = {}
index = 0
for output_name in self._output_names_: for output_name in self._output_names_:
train_label[output_name] = train_h5[output_name] train_label[index] = train_h5[output_name]
index += 1
train_iter = mx.io.NDArrayIter(data=train_data, train_iter = mx.io.NDArrayIter(data=train_data,
label=train_label, label=train_label,
...@@ -40,8 +42,10 @@ class ${tc.fileNameWithoutEnding}: ...@@ -40,8 +42,10 @@ class ${tc.fileNameWithoutEnding}:
test_data[input_name] = test_h5[input_name] test_data[input_name] = test_h5[input_name]
test_label = {} test_label = {}
index = 0
for output_name in self._output_names_: for output_name in self._output_names_:
test_label[output_name] = test_h5[output_name] test_label[index] = test_h5[output_name]
index += 1
test_iter = mx.io.NDArrayIter(data=test_data, test_iter = mx.io.NDArrayIter(data=test_data,
label=test_label, label=test_label,
......
...@@ -4,6 +4,7 @@ import numpy as np ...@@ -4,6 +4,7 @@ import numpy as np
import time import time
import os import os
import shutil import shutil
import pickle
from mxnet import gluon, autograd, nd from mxnet import gluon, autograd, nd
class CrossEntropyLoss(gluon.loss.Loss): class CrossEntropyLoss(gluon.loss.Loss):
...@@ -31,6 +32,7 @@ class LogCoshLoss(gluon.loss.Loss): ...@@ -31,6 +32,7 @@ class LogCoshLoss(gluon.loss.Loss):
loss = gluon.loss._apply_weighting(F, loss, self._weight, sample_weight) loss = gluon.loss._apply_weighting(F, loss, self._weight, sample_weight)
return F.mean(loss, axis=self._batch_axis, exclude=True) return F.mean(loss, axis=self._batch_axis, exclude=True)
class ${tc.fileNameWithoutEnding}: class ${tc.fileNameWithoutEnding}:
def applyBeamSearch(input, length, width, maxLength, currProb, netIndex, bestOutput): def applyBeamSearch(input, length, width, maxLength, currProb, netIndex, bestOutput):
bestProb = 0.0 bestProb = 0.0
...@@ -172,6 +174,8 @@ class ${tc.fileNameWithoutEnding}: ...@@ -172,6 +174,8 @@ class ${tc.fileNameWithoutEnding}:
${output_name}label = batch.label[${output_name?index}].as_in_context(mx_context) ${output_name}label = batch.label[${output_name?index}].as_in_context(mx_context)
</#list> </#list>
outputs=[]
with autograd.record(): with autograd.record():
<#include "pythonExecuteWithLoss.ftl"> <#include "pythonExecuteWithLoss.ftl">
...@@ -226,8 +230,34 @@ class ${tc.fileNameWithoutEnding}: ...@@ -226,8 +230,34 @@ class ${tc.fileNameWithoutEnding}:
else: else:
predictions.append(output_name) predictions.append(output_name)
#print [word[0] for word in predictions] #Compute BLEU and NIST Score if data folder contains a dictionary -> NLP dataset
#print labels[0] if(os.path.isfile('${tc.dataPath}/dict.pkl')):
with open('${tc.dataPath}/dict.pkl', 'rb') as f:
dict = pickle.load(f)
import nltk.translate.bleu_score
import nltk.translate.nist_score
prediction = []
for index in range(batch_size):
sentence = ''
for entry in predictions:
sentence += dict[int(entry[index].asscalar())] + ' '
prediction.append(sentence)
for index in range(batch_size):
sentence = ''
for batchEntry in batch.label:
sentence += dict[int(batchEntry[index].asscalar())] + ' '
print "############################"
print "label: ", sentence
print "prediction: ", prediction[index]
BLEUscore = nltk.translate.bleu_score.sentence_bleu([sentence], prediction[index])
NISTscore = nltk.translate.nist_score.sentence_nist([sentence], prediction[index])
print "BLEU: ", BLEUscore
print "NIST: ", NISTscore
print "############################"
metric.update(preds=predictions, labels=labels) metric.update(preds=predictions, labels=labels)
train_metric_score = metric.get()[1] train_metric_score = metric.get()[1]
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]} = applyBeamSearch(input, 0, ${length}, ${width}, 1.0, ${networkInstruction?index}, input) ${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]} = applyBeamSearch(input, 0, ${length}, ${width}, 1.0, ${networkInstruction?index}, input)
<#else> <#else>
${tc.join(tc.getStreamOutputNames(networkInstruction.body, resolvedBody), ", ")} = self._networks[${networkInstruction?index}](${tc.join(tc.getStreamInputNames(networkInstruction.body, resolvedBody), ", ")?replace("_state_","_state_[0]")}) ${tc.join(tc.getStreamOutputNames(networkInstruction.body, resolvedBody), ", ")} = self._networks[${networkInstruction?index}](${tc.join(tc.getStreamInputNames(networkInstruction.body, resolvedBody), ", ")?replace("_state_","_state_[0]")})
<#if !(tc.getStreamOutputNames(networkInstruction.body)[0]?ends_with("_output_"))> <#if !(tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]?ends_with("_output_"))>
outputs.append(${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]}) outputs.append(${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]})
</#if> </#if>
<#list resolvedBody.elements as element> <#list resolvedBody.elements as element>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment