Commit 2cfa59f0 authored by Christian Fuß's avatar Christian Fuß

fixed some problems in BeamSearch

parent 25b29329
Pipeline #183512 failed with stages
......@@ -320,4 +320,8 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
return unroll.getIntValue(AllPredefinedLayers.WIDTH_NAME).get();
}
public int getBeamSearchDepth(UnrollInstructionSymbol unroll){
return unroll.getIntValue(AllPredefinedLayers.DEPTH_NAME).get();
}
}
......@@ -182,31 +182,40 @@ class ${tc.fileNameWithoutEnding}:
]
#TODO still needs testing, currently one path will always end up with p ~ 1.0
def applyBeamSearch(input, depth, max_width, currProb, netIndex, bestOutput):
def applyBeamSearch(input, depth, width, maxDepth, currProb, netIndex, bestOutput):
bestProb = 0.0
while depth < max_width:
while depth < maxDepth:
depth += 1
for beam in input:
top_k = mx.nd.topk(beam, axis=0, k=2)
top_k_values = mx.nd.topk(beam, ret_typ='value', axis=0, k=2)
for index in range(top_k.size):
#print mx.nd.array(top_k[index])
#print mx.nd.array(top_k_values[index])
batchIndex = 0
for batchEntry in input:
top_k_indices = mx.nd.topk(batchEntry, axis=0, k=width)
top_k_values = mx.nd.topk(batchEntry, ret_typ='value', axis=0, k=width)
for index in range(top_k_indices.size):
#print mx.nd.array(top_k_indices[index])
#print top_k_values[index]
if depth == 1:
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k[index])), depth, max_width, currProb * top_k_values[index], netIndex, self._networks[netIndex](mx.nd.array(top_k[index])))
#print mx.nd.array(top_k_indices[index])
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
currProb * top_k_values[index], netIndex, self._networks[netIndex](mx.nd.array(top_k_indices[index])))
else:
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k[index])), depth, max_width, currProb * top_k_values[index], netIndex, bestOutput)
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
currProb * top_k_values[index], netIndex, bestOutput)
if depth == max_width:
if depth == maxDepth:
#print currProb
if currProb > bestProb:
bestProb = currProb
bestOutput = result
bestOutput[batchIndex] = result[batchIndex]
#print "new bestOutput: ", bestOutput
batchIndex += 1
#print bestOutput
#print bestProb
return bestOutput
if True: <#-- Fix indentation -->
<#include "pythonExecute.ftl">
......
......@@ -5,25 +5,29 @@
${tc.getName(output)} = mx.nd.zeros((batch_size, ${tc.join(output.ioDeclaration.type.dimensions, ", ")},), ctx=mx_context)
</#list>
<#assign instructionCounter = 0>
<#list tc.architecture.networkInstructions as networkInstruction>
<#if networkInstruction.isUnroll()>
<#list networkInstruction.toUnrollInstruction().resolvedBodies as resolvedBody>
${tc.join(tc.getStreamOutputNames(networkInstruction.body, resolvedBody), ", ")} = self._networks[${networkInstruction?index}](${tc.join(tc.getStreamInputNames(networkInstruction.body, resolvedBody), ", ")})
<#if networkInstruction.name == "BeamSearch">
input = ${tc.join(tc.getStreamOutputNames(networkInstruction.body, resolvedBody), ", ")}
${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]} = applyBeamSearch(input, 0, ${tc.getBeamSearchWidth(networkInstruction.toUnrollInstruction())}, 1.0, ${networkInstruction?index}, input)
</#if>
input = ${tc.join(tc.getStreamInputNames(networkInstruction.body, resolvedBody), ", ")}
<#assign depth = tc.getBeamSearchDepth(networkInstruction.toUnrollInstruction())>
<#assign width = tc.getBeamSearchWidth(networkInstruction.toUnrollInstruction())>
${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]} = applyBeamSearch(input, 0, ${depth}, ${width}, 1.0, ${networkInstruction?index}, input)
<#else>
${tc.join(tc.getStreamOutputNames(networkInstruction.body, resolvedBody), ", ")} = self._networks[${networkInstruction?index}](${tc.join(tc.getStreamInputNames(networkInstruction.body, resolvedBody), ", ")})
<#list resolvedBody.elements as element>
<#if element.name == "ArgMax">
${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]} = mx.nd.argmax(${tc.getStreamOutputNames(networkInstruction.body, resolvedBody)[0]}, axis=1)
</#if>
</#list>
</#if>
</#list>
<#else>
<#if networkInstruction.body.isTrainable()>
${tc.join(tc.getStreamOutputNames(networkInstruction.body), ", ")} = self._networks[${networkInstruction?index}](${tc.join(tc.getStreamInputNames(networkInstruction.body), ", ")})
<#list networkInstruction.body.elements as element>
<#if element.name == "ArgMax">
<#if element.name == "ArgMax" && tc.architecture.networkInstructions[instructionCounter+1].getName() != "BeamSearch">
${tc.getStreamOutputNames(networkInstruction.body)[0]} = mx.nd.argmax(${tc.getStreamOutputNames(networkInstruction.body)[0]}, axis=1)
</#if>
</#list>
......@@ -31,4 +35,5 @@
${tc.include(networkInstruction.body, "PYTHON_INLINE")}
</#if>
</#if>
<#assign instructionCounter = instructionCounter + 1>
</#list>
\ No newline at end of file
......@@ -177,6 +177,40 @@ class CNNSupervisedTrainer_Alexnet:
batch.label[0].as_in_context(mx_context)
]
def applyBeamSearch(input, depth, width, maxDepth, currProb, netIndex, bestOutput):
bestProb = 0.0
while depth < maxDepth:
depth += 1
batchIndex = 0
for batchEntry in input:
top_k_indices = mx.nd.topk(batchEntry, axis=0, k=width)
top_k_values = mx.nd.topk(batchEntry, ret_typ='value', axis=0, k=width)
for index in range(top_k_indices.size):
#print mx.nd.array(top_k_indices[index])
#print top_k_values[index]
if depth == 1:
#print mx.nd.array(top_k_indices[index])
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
currProb * top_k_values[index], netIndex, self._networks[netIndex](mx.nd.array(top_k_indices[index])))
else:
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
currProb * top_k_values[index], netIndex, bestOutput)
if depth == maxDepth:
#print currProb
if currProb > bestProb:
bestProb = currProb
bestOutput[batchIndex] = result[batchIndex]
#print "new bestOutput: ", bestOutput
batchIndex += 1
#print bestOutput
#print bestProb
return bestOutput
if True:
predictions_ = mx.nd.zeros((batch_size, 10,), ctx=mx_context)
......
......@@ -177,6 +177,40 @@ class CNNSupervisedTrainer_CifarClassifierNetwork:
batch.label[0].as_in_context(mx_context)
]
def applyBeamSearch(input, depth, width, maxDepth, currProb, netIndex, bestOutput):
bestProb = 0.0
while depth < maxDepth:
depth += 1
batchIndex = 0
for batchEntry in input:
top_k_indices = mx.nd.topk(batchEntry, axis=0, k=width)
top_k_values = mx.nd.topk(batchEntry, ret_typ='value', axis=0, k=width)
for index in range(top_k_indices.size):
#print mx.nd.array(top_k_indices[index])
#print top_k_values[index]
if depth == 1:
#print mx.nd.array(top_k_indices[index])
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
currProb * top_k_values[index], netIndex, self._networks[netIndex](mx.nd.array(top_k_indices[index])))
else:
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
currProb * top_k_values[index], netIndex, bestOutput)
if depth == maxDepth:
#print currProb
if currProb > bestProb:
bestProb = currProb
bestOutput[batchIndex] = result[batchIndex]
#print "new bestOutput: ", bestOutput
batchIndex += 1
#print bestOutput
#print bestProb
return bestOutput
if True:
softmax_ = mx.nd.zeros((batch_size, 10,), ctx=mx_context)
......
......@@ -177,6 +177,40 @@ class CNNSupervisedTrainer_VGG16:
batch.label[0].as_in_context(mx_context)
]
def applyBeamSearch(input, depth, width, maxDepth, currProb, netIndex, bestOutput):
bestProb = 0.0
while depth < maxDepth:
depth += 1
batchIndex = 0
for batchEntry in input:
top_k_indices = mx.nd.topk(batchEntry, axis=0, k=width)
top_k_values = mx.nd.topk(batchEntry, ret_typ='value', axis=0, k=width)
for index in range(top_k_indices.size):
#print mx.nd.array(top_k_indices[index])
#print top_k_values[index]
if depth == 1:
#print mx.nd.array(top_k_indices[index])
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
currProb * top_k_values[index], netIndex, self._networks[netIndex](mx.nd.array(top_k_indices[index])))
else:
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), depth, width, maxDepth,
currProb * top_k_values[index], netIndex, bestOutput)
if depth == maxDepth:
#print currProb
if currProb > bestProb:
bestProb = currProb
bestOutput[batchIndex] = result[batchIndex]
#print "new bestOutput: ", bestOutput
batchIndex += 1
#print bestOutput
#print bestProb
return bestOutput
if True:
predictions_ = mx.nd.zeros((batch_size, 1000,), ctx=mx_context)
......
......@@ -14,7 +14,7 @@ architecture RNNencdec(max_length=50, vocabulary_size=30000, hidden_size=1000){
encoder.state -> decoder.state;
timed<t> BeamSearch(max_length=50) {
timed<t> BeamSearch(max_length=50, depth=2, width=2) {
target[t-1] ->
Embedding(output_dim=hidden_size) ->
decoder ->
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment