Commit 96c688b9 authored by Christian Fuß's avatar Christian Fuß
Browse files

Added BLEU and NIST scores for NLP tasks

parent 06da20d8
Pipeline #191795 failed with stages
in 1 minute
......@@ -25,8 +25,10 @@ class ${tc.fileNameWithoutEnding}:
data_std[input_name + '_'] = nd.array(train_h5[input_name][:].std(axis=0) + 1e-5)
train_label = {}
index = 0
for output_name in self._output_names_:
train_label[output_name] = train_h5[output_name]
train_label[index] = train_h5[output_name]
index += 1
train_iter = mx.io.NDArrayIter(data=train_data,
label=train_label,
......@@ -40,8 +42,10 @@ class ${tc.fileNameWithoutEnding}:
test_data[input_name] = test_h5[input_name]
test_label = {}
index = 0
for output_name in self._output_names_:
test_label[output_name] = test_h5[output_name]
test_label[index] = test_h5[output_name]
index += 1
test_iter = mx.io.NDArrayIter(data=test_data,
label=test_label,
......
......@@ -4,6 +4,7 @@ import numpy as np
import time
import os
import shutil
import pickle
from mxnet import gluon, autograd, nd
class CrossEntropyLoss(gluon.loss.Loss):
......@@ -31,6 +32,7 @@ class LogCoshLoss(gluon.loss.Loss):
loss = gluon.loss._apply_weighting(F, loss, self._weight, sample_weight)
return F.mean(loss, axis=self._batch_axis, exclude=True)
class ${tc.fileNameWithoutEnding}:
def applyBeamSearch(input, length, width, maxLength, currProb, netIndex, bestOutput):
bestProb = 0.0
......@@ -172,6 +174,8 @@ class ${tc.fileNameWithoutEnding}:
${output_name}label = batch.label[${output_name?index}].as_in_context(mx_context)
</#list>
outputs=[]
with autograd.record():
<#include "pythonExecuteWithLoss.ftl">
......@@ -226,8 +230,34 @@ class ${tc.fileNameWithoutEnding}:
else:
predictions.append(output_name)
#print [word[0] for word in predictions]
#print labels[0]
#Compute BLEU and NIST Score if data folder contains a dictionary -> NLP dataset
if(os.path.isfile('${tc.dataPath}/dict.pkl')):
with open('${tc.dataPath}/dict.pkl', 'rb') as f:
dict = pickle.load(f)
import nltk.translate.bleu_score
import nltk.translate.nist_score
prediction = []
for index in range(batch_size):
sentence = ''
for entry in predictions:
sentence += dict[int(entry[index].asscalar())] + ' '
prediction.append(sentence)
for index in range(batch_size):
sentence = ''
for batchEntry in batch.label:
sentence += dict[int(batchEntry[index].asscalar())] + ' '
print "############################"
print "label: ", sentence
print "prediction: ", prediction[index]
BLEUscore = nltk.translate.bleu_score.sentence_bleu([sentence], prediction[index])
NISTscore = nltk.translate.nist_score.sentence_nist([sentence], prediction[index])
print "BLEU: ", BLEUscore
print "NIST: ", NISTscore
print "############################"
metric.update(preds=predictions, labels=labels)
train_metric_score = metric.get()[1]
......
......@@ -20,7 +20,7 @@
${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]} = applyBeamSearch(input, 0, ${length}, ${width}, 1.0, ${networkInstruction?index}, input)
<#else>
${tc.join(tc.getStreamOutputNames(networkInstruction.body, resolvedBody), ", ")} = self._networks[${networkInstruction?index}](${tc.join(tc.getStreamInputNames(networkInstruction.body, resolvedBody), ", ")?replace("_state_","_state_[0]")})
<#if !(tc.getStreamOutputNames(networkInstruction.body)[0]?ends_with("_output_"))>
<#if !(tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]?ends_with("_output_"))>
outputs.append(${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]})
</#if>
<#list resolvedBody.elements as element>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment