Added BLEU and NIST scores for NLP tasks

96c688b9 · Christian Fuß · 06da20d8 · 96c688b9 · 96c688b9 · 96c688b9
Commit 96c688b9 authored 5 years ago by Christian Fuß
--- a/src/main/resources/templates/gluon/CNNDataLoader.ftl
+++ b/src/main/resources/templates/gluon/CNNDataLoader.ftl
@@ -25,8 +25,10 @@ class ${tc.fileNameWithoutEnding}:
            data_std[input_name + '_'] = nd.array(train_h5[input_name][:].std(axis=0) + 1e-5)

        train_label = {}
+        index = 0
        for output_name in self._output_names_:
-            train_label[output_name] = train_h5[output_name]
+            train_label[index] = train_h5[output_name]
+            index += 1

        train_iter = mx.io.NDArrayIter(data=train_data,
                                       label=train_label,
@@ -40,8 +42,10 @@ class ${tc.fileNameWithoutEnding}:
                test_data[input_name] = test_h5[input_name]

            test_label = {}
+            index = 0
            for output_name in self._output_names_:
-                test_label[output_name] = test_h5[output_name]
+                test_label[index] = test_h5[output_name]
+                index += 1

            test_iter = mx.io.NDArrayIter(data=test_data,
                                          label=test_label,

--- a/src/main/resources/templates/gluon/CNNSupervisedTrainer.ftl
+++ b/src/main/resources/templates/gluon/CNNSupervisedTrainer.ftl
@@ -4,6 +4,7 @@ import numpy as np
 import time
 import os
 import shutil
+import pickle
 from mxnet import gluon, autograd, nd

 class CrossEntropyLoss(gluon.loss.Loss):
@@ -31,6 +32,7 @@ class LogCoshLoss(gluon.loss.Loss):
        loss = gluon.loss._apply_weighting(F, loss, self._weight, sample_weight)
        return F.mean(loss, axis=self._batch_axis, exclude=True)

+
 class ${tc.fileNameWithoutEnding}:
    def applyBeamSearch(input, length, width, maxLength, currProb, netIndex, bestOutput):
        bestProb = 0.0
@@ -172,6 +174,8 @@ class ${tc.fileNameWithoutEnding}:
                ${output_name}label = batch.label[${output_name?index}].as_in_context(mx_context)
                </#list>

+                outputs=[]
+
                with autograd.record():
 <#include "pythonExecuteWithLoss.ftl">

@@ -226,8 +230,34 @@ class ${tc.fileNameWithoutEnding}:
                    else:
                        predictions.append(output_name)

-                #print [word[0] for word in predictions]
-                #print labels[0]
+                #Compute BLEU and NIST Score if data folder contains a dictionary -> NLP dataset
+                if(os.path.isfile('${tc.dataPath}/dict.pkl')):
+                    with open('${tc.dataPath}/dict.pkl', 'rb') as f:
+                        dict = pickle.load(f)
+
+                    import nltk.translate.bleu_score
+                    import nltk.translate.nist_score
+
+                    prediction = []
+                    for index in range(batch_size):
+                        sentence = ''
+                        for entry in predictions:
+                            sentence += dict[int(entry[index].asscalar())] + ' '
+                        prediction.append(sentence)
+
+                    for index in range(batch_size):
+                        sentence = ''
+                        for batchEntry in batch.label:
+                            sentence += dict[int(batchEntry[index].asscalar())] + ' '
+                        print "############################"
+                        print "label: ", sentence
+                        print "prediction: ", prediction[index]
+
+                        BLEUscore = nltk.translate.bleu_score.sentence_bleu([sentence], prediction[index])
+                        NISTscore = nltk.translate.nist_score.sentence_nist([sentence], prediction[index])
+                        print "BLEU: ", BLEUscore
+                        print "NIST: ", NISTscore
+                        print "############################"

                metric.update(preds=predictions, labels=labels)
            train_metric_score = metric.get()[1]

--- a/src/main/resources/templates/gluon/pythonExecute.ftl
+++ b/src/main/resources/templates/gluon/pythonExecute.ftl
@@ -20,7 +20,7 @@
                    ${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]} = applyBeamSearch(input, 0, ${length}, ${width}, 1.0, ${networkInstruction?index}, input)
                    <#else>
                    ${tc.join(tc.getStreamOutputNames(networkInstruction.body, resolvedBody), ", ")} = self._networks[${networkInstruction?index}](${tc.join(tc.getStreamInputNames(networkInstruction.body, resolvedBody), ", ")?replace("_state_","_state_[0]")})
-                    <#if !(tc.getStreamOutputNames(networkInstruction.body)[0]?ends_with("_output_"))>
+                    <#if !(tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]?ends_with("_output_"))>
                    outputs.append(${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]})
                    </#if>
                    <#list resolvedBody.elements as element>