fixed some problems in BeamSearch

2cfa59f0 · Christian Fuß · 25b29329 · 2cfa59f0 · 2cfa59f0 · 2cfa59f0
Commit 2cfa59f0 authored 5 years ago by Christian Fuß
--- a/src/main/java/de/monticore/lang/monticar/cnnarch/gluongenerator/CNNArch2GluonTemplateController.java
+++ b/src/main/java/de/monticore/lang/monticar/cnnarch/gluongenerator/CNNArch2GluonTemplateController.java
@@ -320,4 +320,8 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
        return unroll.getIntValue(AllPredefinedLayers.WIDTH_NAME).get();
    }

+    public int getBeamSearchDepth(UnrollInstructionSymbol unroll){
+        return unroll.getIntValue(AllPredefinedLayers.DEPTH_NAME).get();
+    }
+
 }
--- a/src/main/resources/templates/gluon/CNNSupervisedTrainer.ftl
+++ b/src/main/resources/templates/gluon/CNNSupervisedTrainer.ftl
@@ -182,31 +182,40 @@ class ${tc.fileNameWithoutEnding}:

                ]

-                #TODO still needs testing, currently one path will always end up with p ~ 1.0
-                def applyBeamSearch(input, depth, max_width, currProb, netIndex, bestOutput):
+
+                def applyBeamSearch(input, depth, width, maxDepth, currProb, netIndex, bestOutput):
                    bestProb = 0.0
-                    while depth < max_width:
+                    while depth < maxDepth:
                        depth += 1
-                        for beam in input:
-                            top_k = mx.nd.topk(beam, axis=0, k=2)
-                            top_k_values = mx.nd.topk(beam, ret_typ='value', axis=0, k=2)
-                            for index in range(top_k.size):
-                                #print mx.nd.array(top_k[index])
-                                #print mx.nd.array(top_k_values[index])
+                        batchIndex = 0
+                        for batchEntry in input:
+                            top_k_indices = mx.nd.topk(batchEntry, axis=0, k=width)
+                            top_k_values = mx.nd.topk(batchEntry, ret_typ='value', axis=0, k=width)
+                            for index in range(top_k_indices.size):
+
+                                #print mx.nd.array(top_k_indices[index])
+                                #print top_k_values[index]
                                if depth == 1:
-                                    result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k[index])), depth, max_width, currProb * top_k_values[index], netIndex, self._networks[netIndex](mx.nd.array(top_k[index])))
+                                    #print mx.nd.array(top_k_indices[index])
+                                    result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
+                                        currProb * top_k_values[index], netIndex, self._networks[netIndex](mx.nd.array(top_k_indices[index])))
                                else:
-                                    result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k[index])), depth, max_width, currProb * top_k_values[index], netIndex, bestOutput)
+                                    result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
+                                        currProb * top_k_values[index], netIndex, bestOutput)

-                                if depth == max_width:
+                                if depth == maxDepth:
                                    #print currProb
                                    if currProb > bestProb:
                                        bestProb = currProb
-                                        bestOutput = result
+                                        bestOutput[batchIndex] = result[batchIndex]
+                                        #print "new bestOutput: ", bestOutput
+
+                            batchIndex += 1
                    #print bestOutput
                    #print bestProb
                    return bestOutput

+
                if True: <#-- Fix indentation -->
 <#include "pythonExecute.ftl">


--- a/src/main/resources/templates/gluon/pythonExecute.ftl
+++ b/src/main/resources/templates/gluon/pythonExecute.ftl
@@ -5,25 +5,29 @@
                    ${tc.getName(output)} = mx.nd.zeros((batch_size, ${tc.join(output.ioDeclaration.type.dimensions, ", ")},), ctx=mx_context)
 </#list>

+<#assign instructionCounter = 0>
 <#list tc.architecture.networkInstructions as networkInstruction>
 <#if networkInstruction.isUnroll()>
 <#list networkInstruction.toUnrollInstruction().resolvedBodies as resolvedBody>
-                    ${tc.join(tc.getStreamOutputNames(networkInstruction.body, resolvedBody), ", ")} = self._networks[${networkInstruction?index}](${tc.join(tc.getStreamInputNames(networkInstruction.body, resolvedBody), ", ")})
                    <#if networkInstruction.name == "BeamSearch">
-                    input = ${tc.join(tc.getStreamOutputNames(networkInstruction.body, resolvedBody), ", ")}
-                    ${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]} = applyBeamSearch(input, 0, ${tc.getBeamSearchWidth(networkInstruction.toUnrollInstruction())}, 1.0, ${networkInstruction?index}, input)
-                    </#if>
+                    input = ${tc.join(tc.getStreamInputNames(networkInstruction.body, resolvedBody), ", ")}
+                    <#assign depth = tc.getBeamSearchDepth(networkInstruction.toUnrollInstruction())>
+                    <#assign width = tc.getBeamSearchWidth(networkInstruction.toUnrollInstruction())>
+                    ${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]} = applyBeamSearch(input, 0, ${depth}, ${width}, 1.0, ${networkInstruction?index}, input)
+                    <#else>
+                    ${tc.join(tc.getStreamOutputNames(networkInstruction.body, resolvedBody), ", ")} = self._networks[${networkInstruction?index}](${tc.join(tc.getStreamInputNames(networkInstruction.body, resolvedBody), ", ")})
                    <#list resolvedBody.elements as element>
                    <#if element.name == "ArgMax">
                    ${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]} = mx.nd.argmax(${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]}, axis=1)
                    </#if>
                    </#list>
+                    </#if>
 </#list>
 <#else>
 <#if networkInstruction.body.isTrainable()>
                    ${tc.join(tc.getStreamOutputNames(networkInstruction.body), ", ")} = self._networks[${networkInstruction?index}](${tc.join(tc.getStreamInputNames(networkInstruction.body), ", ")})
                    <#list networkInstruction.body.elements as element>
-                    <#if element.name == "ArgMax">
+                    <#if element.name == "ArgMax" && tc.architecture.networkInstructions[instructionCounter+1].getName() != "BeamSearch">
                    ${tc.getStreamOutputNames(networkInstruction.body)[0]} = mx.nd.argmax(${tc.getStreamOutputNames(networkInstruction.body)[0]}, axis=1)
                    </#if>
                    </#list>
@@ -31,4 +35,5 @@
 ${tc.include(networkInstruction.body, "PYTHON_INLINE")}
 </#if>
 </#if>
+<#assign instructionCounter = instructionCounter + 1>
 </#list>
\ No newline at end of file
--- a/src/test/resources/target_code/CNNSupervisedTrainer_Alexnet.py
+++ b/src/test/resources/target_code/CNNSupervisedTrainer_Alexnet.py
@@ -177,6 +177,40 @@ class CNNSupervisedTrainer_Alexnet:
                    batch.label[0].as_in_context(mx_context)
                ]

+
+                def applyBeamSearch(input, depth, width, maxDepth, currProb, netIndex, bestOutput):
+                    bestProb = 0.0
+                    while depth < maxDepth:
+                        depth += 1
+                        batchIndex = 0
+                        for batchEntry in input:
+                            top_k_indices = mx.nd.topk(batchEntry, axis=0, k=width)
+                            top_k_values = mx.nd.topk(batchEntry, ret_typ='value', axis=0, k=width)
+                            for index in range(top_k_indices.size):
+
+                                #print mx.nd.array(top_k_indices[index])
+                                #print top_k_values[index]
+                                if depth == 1:
+                                    #print mx.nd.array(top_k_indices[index])
+                                    result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
+                                        currProb * top_k_values[index], netIndex, self._networks[netIndex](mx.nd.array(top_k_indices[index])))
+                                else:
+                                    result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
+                                        currProb * top_k_values[index], netIndex, bestOutput)
+
+                                if depth == maxDepth:
+                                    #print currProb
+                                    if currProb > bestProb:
+                                        bestProb = currProb
+                                        bestOutput[batchIndex] = result[batchIndex]
+                                        #print "new bestOutput: ", bestOutput
+
+                            batchIndex += 1
+                    #print bestOutput
+                    #print bestProb
+                    return bestOutput
+
+
                if True: 
                    predictions_ = mx.nd.zeros((batch_size, 10,), ctx=mx_context)


--- a/src/test/resources/target_code/CNNSupervisedTrainer_CifarClassifierNetwork.py
+++ b/src/test/resources/target_code/CNNSupervisedTrainer_CifarClassifierNetwork.py
@@ -177,6 +177,40 @@ class CNNSupervisedTrainer_CifarClassifierNetwork:
                    batch.label[0].as_in_context(mx_context)
                ]

+
+                def applyBeamSearch(input, depth, width, maxDepth, currProb, netIndex, bestOutput):
+                    bestProb = 0.0
+                    while depth < maxDepth:
+                        depth += 1
+                        batchIndex = 0
+                        for batchEntry in input:
+                            top_k_indices = mx.nd.topk(batchEntry, axis=0, k=width)
+                            top_k_values = mx.nd.topk(batchEntry, ret_typ='value', axis=0, k=width)
+                            for index in range(top_k_indices.size):
+
+                                #print mx.nd.array(top_k_indices[index])
+                                #print top_k_values[index]
+                                if depth == 1:
+                                    #print mx.nd.array(top_k_indices[index])
+                                    result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
+                                        currProb * top_k_values[index], netIndex, self._networks[netIndex](mx.nd.array(top_k_indices[index])))
+                                else:
+                                    result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
+                                        currProb * top_k_values[index], netIndex, bestOutput)
+
+                                if depth == maxDepth:
+                                    #print currProb
+                                    if currProb > bestProb:
+                                        bestProb = currProb
+                                        bestOutput[batchIndex] = result[batchIndex]
+                                        #print "new bestOutput: ", bestOutput
+
+                            batchIndex += 1
+                    #print bestOutput
+                    #print bestProb
+                    return bestOutput
+
+
                if True: 
                    softmax_ = mx.nd.zeros((batch_size, 10,), ctx=mx_context)


--- a/src/test/resources/target_code/CNNSupervisedTrainer_VGG16.py
+++ b/src/test/resources/target_code/CNNSupervisedTrainer_VGG16.py
@@ -177,6 +177,40 @@ class CNNSupervisedTrainer_VGG16:
                    batch.label[0].as_in_context(mx_context)
                ]

+
+                def applyBeamSearch(input, depth, width, maxDepth, currProb, netIndex, bestOutput):
+                    bestProb = 0.0
+                    while depth < maxDepth:
+                        depth += 1
+                        batchIndex = 0
+                        for batchEntry in input:
+                            top_k_indices = mx.nd.topk(batchEntry, axis=0, k=width)
+                            top_k_values = mx.nd.topk(batchEntry, ret_typ='value', axis=0, k=width)
+                            for index in range(top_k_indices.size):
+
+                                #print mx.nd.array(top_k_indices[index])
+                                #print top_k_values[index]
+                                if depth == 1:
+                                    #print mx.nd.array(top_k_indices[index])
+                                    result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
+                                        currProb * top_k_values[index], netIndex, self._networks[netIndex](mx.nd.array(top_k_indices[index])))
+                                else:
+                                    result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
+                                        currProb * top_k_values[index], netIndex, bestOutput)
+
+                                if depth == maxDepth:
+                                    #print currProb
+                                    if currProb > bestProb:
+                                        bestProb = currProb
+                                        bestOutput[batchIndex] = result[batchIndex]
+                                        #print "new bestOutput: ", bestOutput
+
+                            batchIndex += 1
+                    #print bestOutput
+                    #print bestProb
+                    return bestOutput
+
+
                if True: 
                    predictions_ = mx.nd.zeros((batch_size, 1000,), ctx=mx_context)


--- a/src/test/resources/valid_tests/RNNencdec.cnna
+++ b/src/test/resources/valid_tests/RNNencdec.cnna
@@ -14,7 +14,7 @@ architecture RNNencdec(max_length=50, vocabulary_size=30000, hidden_size=1000){

     encoder.state -> decoder.state;

-     timed<t> BeamSearch(max_length=50) {
+     timed<t> BeamSearch(max_length=50, depth=2, width=2) {
         target[t-1] ->
         Embedding(output_dim=hidden_size) ->
         decoder ->