Commit 35854e1b authored by Sebastian N.'s avatar Sebastian N.
Browse files

Removed inline mode, changed generation of execute and python execute and...

Removed inline mode, changed generation of execute and python execute and implemented beam search for both python and c++
parent 23631e61
......@@ -30,6 +30,8 @@ import de.se_rwth.commons.logging.Log;
import java.io.Writer;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
public static final String NET_DEFINITION_MODE_KEY = "mode";
......@@ -39,6 +41,8 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
super(architecture, templateConfiguration);
}
public void include(String relativePath, String templateWithoutFileEnding, Writer writer, NetDefinitionMode netDefinitionMode){
String templatePath = relativePath + templateWithoutFileEnding + FTL_FILE_ENDING;
Map<String, Object> ftlContext = new HashMap<>();
......@@ -128,14 +132,13 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
include(architectureElement, getWriter(), netDefinitionMode);
}
public Set<String> getStreamInputNames(SerialCompositeElementSymbol stream) {
return getStreamInputs(stream).keySet();
public Set<String> getStreamInputNames(SerialCompositeElementSymbol stream, boolean outputAsArray) {
return getStreamInputs(stream, outputAsArray).keySet();
}
// used for unroll
public List<String> getStreamInputNames(SerialCompositeElementSymbol stream, SerialCompositeElementSymbol currentStream) {
List<String> inputNames = new LinkedList<>(getStreamInputNames(stream));
Map<String, String> pairs = getUnrollPairs(stream, currentStream);
public List<String> getUnrollInputNames(UnrollInstructionSymbol unroll, String variable) {
List<String> inputNames = new LinkedList<>(getStreamInputNames(unroll.getBody(), true));
Map<String, String> pairs = getUnrollPairs(unroll.getBody(), unroll.getResolvedBodies().get(0), variable);
for (int i = 0; i != inputNames.size(); ++i) {
if (pairs.containsKey(inputNames.get(i))) {
......@@ -147,15 +150,44 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
}
public Collection<List<String>> getStreamInputDimensions(SerialCompositeElementSymbol stream) {
return getStreamInputs(stream).values();
return getStreamInputs(stream, false).values();
}
public String getOutputName() {
return getNameWithoutIndex(getName(getArchitectureOutputSymbols().get(0)));
}
public String getNameAsArray(String name) {
return name.replaceAll("([0-9]+)_$", "[$1]");
}
public String getNameWithoutIndex(String name) {
return name.replaceAll("([0-9]+)_$", "").replaceAll("\\[[^\\]]+\\]$", "");
}
public Set<String> getStreamOutputNames(SerialCompositeElementSymbol stream) {
public String getIndex(String name, boolean defaultToZero) {
Pattern pattern = Pattern.compile("\\[([^\\]]+)\\]$");
Matcher matcher = pattern.matcher(name);
if (matcher.find()) {
return matcher.group(1);
}
return defaultToZero ? "0" : "";
}
public Set<String> getStreamOutputNames(SerialCompositeElementSymbol stream, boolean asArray) {
Set<String> outputNames = new LinkedHashSet<>();
for (ArchitectureElementSymbol element : stream.getLastAtomicElements()) {
if (element.isOutput()) {
outputNames.add(getName(element));
String name = getName(element);
if (asArray) {
name = getNameAsArray(name);
}
outputNames.add(name);
}
}
......@@ -164,10 +196,9 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
return outputNames;
}
// used for unroll
public List<String> getStreamOutputNames(SerialCompositeElementSymbol stream, SerialCompositeElementSymbol currentStream) {
List<String> outputNames = new LinkedList<>(getStreamOutputNames(stream));
Map<String, String> pairs = getUnrollPairs(stream, currentStream);
public List<String> getUnrollOutputNames(UnrollInstructionSymbol unroll, String variable) {
List<String> outputNames = new LinkedList<>(getStreamOutputNames(unroll.getBody(), true));
Map<String, String> pairs = getUnrollPairs(unroll.getBody(), unroll.getResolvedBodies().get(0), variable);
for (int i = 0; i != outputNames.size(); ++i) {
if (pairs.containsKey(outputNames.get(i))) {
......@@ -178,6 +209,19 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
return outputNames;
}
public boolean endsWithArgmax(SerialCompositeElementSymbol stream) {
List<ArchitectureElementSymbol> elements = stream.getElements();
if (elements.size() > 1) {
// Check second last element because last element is output
ArchitectureElementSymbol secondLastElement = elements.get(elements.size() - 2);
return secondLastElement.getName().equals(AllPredefinedLayers.ARG_MAX_NAME);
}
return false;
}
// Used to initialize all layer variable members which are passed through the networks
public Map<String, List<String>> getLayerVariableMembers() {
Map<String, List<String>> members = new LinkedHashMap<>();
......@@ -191,7 +235,7 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
// Calculate differently named VariableSymbol elements in two streams, currently used for the UnrollInstructionSymbol
// body which is resolved with t = CONST_OFFSET and the current body of the actual timestep t
public Map<String, String> getUnrollPairs(ArchitectureElementSymbol element, ArchitectureElementSymbol current) {
public Map<String, String> getUnrollPairs(ArchitectureElementSymbol element, ArchitectureElementSymbol current, String variable) {
Map<String, String> pairs = new HashMap<>();
if (element instanceof CompositeElementSymbol && current instanceof CompositeElementSymbol) {
......@@ -203,13 +247,24 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
String name = getName(elements.get(i));
String currentName = getName(currentElements.get(i));
if (elements.get(i).isOutput()) {
name = getNameAsArray(name);
}
if (currentElements.get(i).isOutput()) {
currentName = getNameAsArray(currentName);
}
if (elements.get(i) instanceof VariableSymbol && currentElements.get(i) instanceof VariableSymbol) {
if (name != null && currentName != null && !name.equals(currentName)) {
String newIndex = variable + "-1+" + getIndex(currentName, true);
currentName = currentName.replaceAll("\\[([0-9]+)\\]$", "[" + newIndex + "]");
pairs.put(name, currentName);
}
}
pairs.putAll(getUnrollPairs(elements.get(i), currentElements.get(i)));
pairs.putAll(getUnrollPairs(elements.get(i), currentElements.get(i), variable));
}
}
}
......@@ -217,7 +272,7 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
return pairs;
}
private Map<String, List<String>> getStreamInputs(SerialCompositeElementSymbol stream) {
private Map<String, List<String>> getStreamInputs(SerialCompositeElementSymbol stream, boolean outputAsArray) {
Map<String, List<String>> inputs = new LinkedHashMap<>();
for (ArchitectureElementSymbol element : stream.getFirstAtomicElements()) {
......@@ -229,7 +284,13 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
dimensions.add(intDimension.toString());
}
inputs.put(getName(element), dimensions);
String name = getName(element);
if (element.isOutput() && outputAsArray) {
name = getNameAsArray(name);
}
inputs.put(name, dimensions);
}
else if (element instanceof ConstantSymbol) {
inputs.put(getName(element), Arrays.asList("1"));
......@@ -321,12 +382,23 @@ public class CNNArch2GluonTemplateController extends CNNArchTemplateController {
return dimensions;
}
public int getBeamSearchWidth(UnrollInstructionSymbol unroll){
return unroll.getIntValue(AllPredefinedLayers.WIDTH_NAME).get();
public boolean hasUnrollInstructions() {
for (NetworkInstructionSymbol networkInstruction : getArchitecture().getNetworkInstructions()) {
if (networkInstruction.isUnroll()) {
return true;
}
}
return false;
}
public int getBeamSearchLength(UnrollInstructionSymbol unroll){
public int getBeamSearchMaxLength(UnrollInstructionSymbol unroll){
return unroll.getIntValue(AllPredefinedLayers.MAX_LENGTH_NAME).get();
}
public int getBeamSearchWidth(UnrollInstructionSymbol unroll){
// Beam search with width 1 is greedy search
return unroll.getIntValue(AllPredefinedLayers.WIDTH_NAME).orElse(1);
}
}
......@@ -5,9 +5,7 @@ package de.monticore.lang.monticar.cnnarch.gluongenerator;
*/
public enum NetDefinitionMode {
ARCHITECTURE_DEFINITION,
FORWARD_FUNCTION,
PYTHON_INLINE,
CPP_INLINE;
FORWARD_FUNCTION;
public static NetDefinitionMode fromString(final String netDefinitionMode) {
switch(netDefinitionMode) {
......@@ -15,10 +13,6 @@ public enum NetDefinitionMode {
return ARCHITECTURE_DEFINITION;
case "FORWARD_FUNCTION":
return FORWARD_FUNCTION;
case "PYTHON_INLINE":
return PYTHON_INLINE;
case "CPP_INLINE":
return CPP_INLINE;
default:
throw new IllegalArgumentException("Unknown Net Definition Mode");
}
......
......@@ -3,9 +3,7 @@ import logging
import os
<#list tc.architecture.networkInstructions as networkInstruction>
<#if networkInstruction.body.isTrainable()>
from CNNNet_${tc.fullArchitectureName} import Net_${networkInstruction?index}
</#if>
</#list>
class ${tc.fileNameWithoutEnding}:
......@@ -53,12 +51,10 @@ class ${tc.fileNameWithoutEnding}:
def construct(self, context, data_mean=None, data_std=None):
<#list tc.architecture.networkInstructions as networkInstruction>
<#if networkInstruction.body.isTrainable()>
self.networks[${networkInstruction?index}] = Net_${networkInstruction?index}(data_mean=data_mean, data_std=data_std)
self.networks[${networkInstruction?index}].collect_params().initialize(self.weight_initializer, ctx=context)
self.networks[${networkInstruction?index}].hybridize()
self.networks[${networkInstruction?index}](<#list tc.getStreamInputDimensions(networkInstruction.body) as dimensions>mx.nd.zeros((1, ${tc.join(tc.cutDimensions(dimensions), ",")},), ctx=context)<#sep>, </#list>)
</#if>
</#list>
if not os.path.exists(self._model_dir_):
......
......@@ -12,7 +12,7 @@ class ${tc.fileNameWithoutEnding}:
def __init__(self):
self._data_dir = "${tc.dataPath}/"
def load_data(self, batch_size):
def load_data(self, train_batch_size, test_batch_size):
train_h5, test_h5 = self.load_h5_files()
train_data = {}
......@@ -32,7 +32,11 @@ class ${tc.fileNameWithoutEnding}:
train_iter = mx.io.NDArrayIter(data=train_data,
label=train_label,
batch_size=batch_size)
batch_size=train_batch_size)
train_test_iter = mx.io.NDArrayIter(data=train_data,
label=train_label,
batch_size=test_batch_size)
test_iter = None
......@@ -47,12 +51,11 @@ class ${tc.fileNameWithoutEnding}:
test_label[index] = test_h5[output_name]
index += 1
test_iter = mx.io.NDArrayIter(data=test_data,
label=test_label,
batch_size=batch_size)
batch_size=test_batch_size)
return train_iter, test_iter, data_mean, data_std
return train_iter, train_test_iter, test_iter, data_mean, data_std
def load_h5_files(self):
train_h5 = None
......
......@@ -87,18 +87,15 @@ class CustomGRU(gluon.HybridBlock):
<#list tc.architecture.networkInstructions as networkInstruction>
<#if networkInstruction.body.isTrainable()>
class Net_${networkInstruction?index}(gluon.HybridBlock):
def __init__(self, data_mean=None, data_std=None, **kwargs):
super(Net_${networkInstruction?index}, self).__init__(**kwargs)
self.last_layers = {}
with self.name_scope():
${tc.include(networkInstruction.body, "ARCHITECTURE_DEFINITION")}
pass
def hybrid_forward(self, F, ${tc.join(tc.getStreamInputNames(networkInstruction.body), ", ")}):
def hybrid_forward(self, F, ${tc.join(tc.getStreamInputNames(networkInstruction.body, false), ", ")}):
${tc.include(networkInstruction.body, "FORWARD_FUNCTION")}
return ${tc.join(tc.getStreamOutputNames(networkInstruction.body), ", ")}
return ${tc.join(tc.getStreamOutputNames(networkInstruction.body, false), ", ")}
</#if>
</#list>
......@@ -10,16 +10,15 @@
#include <CNNBufferFile.h>
<#list tc.architecture.networkInstructions as networkInstruction>
<#if networkInstruction.body.isTrainable()>
class ${tc.fileNameWithoutEnding}_${networkInstruction?index}{
public:
const std::string json_file = "model/${tc.componentName}/model_${networkInstruction?index}_newest-symbol.json";
const std::string param_file = "model/${tc.componentName}/model_${networkInstruction?index}_newest-0000.params";
const std::vector<std::string> input_keys = {
<#if tc.getStreamInputNames(networkInstruction.body)?size == 1>
<#if tc.getStreamInputNames(networkInstruction.body, true)?size == 1>
"data"
<#else>
<#list tc.getStreamInputNames(networkInstruction.body) as variable>"data${variable?index}"<#sep>, </#list>
<#list tc.getStreamInputNames(networkInstruction.body, true) as variable>"data${variable?index}"<#sep>, </#list>
</#if>
};
const std::vector<std::vector<mx_uint>> input_shapes = {<#list tc.getStreamInputDimensions(networkInstruction.body) as dimensions>{${tc.join(dimensions, ", ")}}<#sep>, </#list>};
......@@ -35,9 +34,9 @@ public:
if(handle) MXPredFree(handle);
}
void predict(${tc.join(tc.getStreamInputNames(networkInstruction.body), ", ", "const std::vector<float> &in_", "")},
${tc.join(tc.getStreamOutputNames(networkInstruction.body), ", ", "std::vector<float> &out_", "")}){
<#list tc.getStreamInputNames(networkInstruction.body) as variable>
void predict(${tc.join(tc.getStreamInputNames(networkInstruction.body, false), ", ", "const std::vector<float> &in_", "")},
${tc.join(tc.getStreamOutputNames(networkInstruction.body, false), ", ", "std::vector<float> &out_", "")}){
<#list tc.getStreamInputNames(networkInstruction.body, false) as variable>
MXPredSetInput(handle, input_keys[${variable?index}].c_str(), in_${variable}.data(), static_cast<mx_uint>(in_${variable}.size()));
</#list>
......@@ -48,7 +47,7 @@ public:
mx_uint shape_len;
size_t size;
<#list tc.getStreamOutputNames(networkInstruction.body) as variable>
<#list tc.getStreamOutputNames(networkInstruction.body, false) as variable>
output_index = ${variable?index?c};
MXPredGetOutputShape(handle, output_index, &shape, &shape_len);
size = 1;
......@@ -113,7 +112,6 @@ public:
assert(handle);
}
};
</#if>
</#list>
#endif // ${tc.fileNameWithoutEnding?upper_case}
......@@ -171,39 +171,6 @@ class BLEU(mx.metric.EvalMetric):
class ${tc.fileNameWithoutEnding}:
def applyBeamSearch(input, length, width, maxLength, currProb, netIndex, bestOutput):
bestProb = 0.0
while length < maxLength:
length += 1
batchIndex = 0
for batchEntry in input:
top_k_indices = mx.nd.topk(batchEntry, axis=0, k=width)
top_k_values = mx.nd.topk(batchEntry, ret_typ='value', axis=0, k=width)
for index in range(top_k_indices.size):
#print mx.nd.array(top_k_indices[index])
#print top_k_values[index]
if length == 1:
#print mx.nd.array(top_k_indices[index])
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), length, width, maxLength,
currProb * top_k_values[index], netIndex, self._networks[netIndex](mx.nd.array(top_k_indices[index])))
else:
result = applyBeamSearch(self._networks[netIndex](mx.nd.array(top_k_indices[index])), length, width, maxLength,
currProb * top_k_values[index], netIndex, bestOutput)
if length == maxLength:
#print currProb
if currProb > bestProb:
bestProb = currProb
bestOutput[batchIndex] = result[batchIndex]
#print "new bestOutput: ", bestOutput
batchIndex += 1
#print bestOutput
#print bestProb
return bestOutput
def __init__(self, data_loader, net_constructor):
self._data_loader = data_loader
self._net_creator = net_constructor
......@@ -243,8 +210,10 @@ class ${tc.fileNameWithoutEnding}:
del optimizer_params['step_size']
del optimizer_params['learning_rate_decay']
train_batch_size = batch_size
test_batch_size = ${tc.hasUnrollInstructions()?then('1', 'batch_size')}
train_iter, test_iter, data_mean, data_std = self._data_loader.load_data(batch_size)
train_iter, train_test_iter, test_iter, data_mean, data_std = self._data_loader.load_data(train_batch_size, test_batch_size)
if normalize:
self._net_creator.construct(context=mx_context, data_mean=data_mean, data_std=data_std)
......@@ -305,17 +274,8 @@ class ${tc.fileNameWithoutEnding}:
for epoch in range(begin_epoch, begin_epoch + num_epoch):
train_iter.reset()
for batch_i, batch in enumerate(train_iter):
<#list tc.architectureInputs as input_name>
${input_name} = batch.data[0].as_in_context(mx_context)
</#list>
<#list tc.architectureOutputs as output_name>
${output_name}label = batch.label[${output_name?index}].as_in_context(mx_context)
</#list>
outputs=[]
with autograd.record():
<#include "pythonExecuteWithLoss.ftl">
<#include "pythonExecuteTrain.ftl">
loss = 0
for element in lossList:
......@@ -341,30 +301,16 @@ class ${tc.fileNameWithoutEnding}:
tic = None
train_iter.reset()
train_test_iter.reset()
metric = mx.metric.create(eval_metric, **eval_metric_params)
for batch_i, batch in enumerate(train_iter):
<#list tc.architectureInputs as input_name>
${input_name} = batch.data[0].as_in_context(mx_context)
</#list>
labels = [
<#list tc.architectureOutputs as output_name>
batch.label[${output_name?index}].as_in_context(mx_context)<#sep>,
</#list>
]
outputs=[]
for batch_i, batch in enumerate(train_test_iter):
if True: <#-- Fix indentation -->
<#include "pythonExecute.ftl">
<#include "pythonExecuteTest.ftl">
predictions = []
for output_name in outputs:
if mx.nd.shape_array(mx.nd.squeeze(output_name)).size > 1:
predictions.append(mx.nd.argmax(output_name, axis=1))
#ArgMax already applied
else:
predictions.append(output_name)
......@@ -374,21 +320,8 @@ class ${tc.fileNameWithoutEnding}:
test_iter.reset()
metric = mx.metric.create(eval_metric, **eval_metric_params)
for batch_i, batch in enumerate(test_iter):
<#list tc.architectureInputs as input_name>
${input_name} = batch.data[0].as_in_context(mx_context)
</#list>
labels = [
<#list tc.architectureOutputs as output_name>
batch.label[${output_name?index}].as_in_context(mx_context)<#sep>,
</#list>
]
outputs=[]
if True: <#-- Fix indentation -->
<#include "pythonExecute.ftl">
<#include "pythonExecuteTest.ftl">
predictions = []
for output_name in outputs:
......
<#if mode == "FORWARD_FUNCTION">
${element.name} = ${tc.join(element.inputs, " + ")}
<#elseif mode == "PYTHON_INLINE">
${element.name} = ${tc.join(element.inputs, " + ")}
<#elseif mode == "CPP_INLINE">
vector<float> ${element.name}(${element.inputs[0]}.size());
for (size_t i = 0; i != ${element.name}.size(); ++i) {
${element.name}[i] = ${tc.join(element.inputs, " + ", "", "[i]")};
}
</#if>
\ No newline at end of file
<#if mode == "FORWARD_FUNCTION">
${element.name} = F.broadcast_add(${tc.join(element.inputs, ",")})
<#elseif mode == "PYTHON_INLINE">
self.${element.name} = mx.nd.broadcast_add(${tc.join(element.inputs, ",")})
</#if>
\ No newline at end of file
<#if mode == "FORWARD_FUNCTION">
${element.name} = ${element.inputs[element.index]}
<#elseif mode == "PYTHON_INLINE">
${element.name} = ${element.inputs[element.index]}
<#elseif mode == "CPP_INLINE">
vector<float> ${element.name} = ${element.inputs[element.index]};
</#if>
......@@ -2,9 +2,5 @@
<#assign input = element.inputs[0]>
<#if mode == "FORWARD_FUNCTION">
${element.name} = ${input}
<#elseif mode == "PYTHON_INLINE">
${element.name} = ${input}
<#elseif mode == "CPP_INLINE">
${element.name} = ${input};
</#if>
</#if>
\ No newline at end of file
<#list tc.architectureInputSymbols as input>
vector<float> ${tc.getName(input)} = CNNTranslator::translate(${input.name}<#if input.arrayAccess.isPresent()>[${input.arrayAccess.get().intValue.get()?c}]</#if>);
</#list>
<#if tc.architectureOutputSymbols?size gt 1>
<#assign outputName = tc.getNameWithoutIndex(tc.getName(tc.architectureOutputSymbols[0]))>
vector<vector<float>> ${outputName}(${tc.architectureOutputSymbols?size});
for (size_t i = 0; i < ${outputName}.size(); ++i) {
${outputName}[i].emplace_back(${tc.join(tc.architectureOutputSymbols[0].ioDeclaration.type.dimensions, " * ")});
}
<#else>
<#list tc.architectureOutputSymbols as output>
vector<float> ${tc.getName(output)}(${tc.join(tc.architectureOutputSymbols[0].ioDeclaration.type.dimensions, " * ")});<#sep>,
</#list>
</#if>
<#list tc.getLayerVariableMembers()?keys as member>
vector<float> ${member}(${tc.join(tc.getLayerVariableMembers()[member], " * ")});
</#list>
<#list tc.architectureOutputSymbols as output>
vector<float> ${tc.getName(output)}(${tc.join(output.ioDeclaration.type.dimensions, " * ")});
</#list>
<#list tc.architecture.constants as constant>
vector<float> ${tc.getName(constant)}{${constant.intValue?c}};
</#list>
<#list tc.architecture.networkInstructions as networkInstruction>
<#if networkInstruction.isUnroll()>
<#list networkInstruction.toUnrollInstruction().resolvedBodies as resolvedBody>
_predictor_${networkInstruction?index}_.predict(${tc.join(tc.getStreamInputNames(networkInstruction.body, resolvedBody), ", ")}, ${tc.join(tc.getStreamOutputNames(networkInstruction.body, resolvedBody), ", ")});
{
int k = ${tc.getBeamSearchWidth(networkInstruction)};
<#list tc.getUnrollInputNames(networkInstruction, "1") as inputName>
<#if tc.getNameWithoutIndex(inputName) == tc.outputName>
vector<pair<vector<vector<float>>, double>> sequences{make_pair(vector<float>{${inputName}}, 1.0)};
</#if>
</#list>
<#else>
<#if networkInstruction.body.isTrainable()>
_predictor_${networkInstruction?index}_.predict(${tc.join(tc.getStreamInputNames(networkInstruction.body), ", ")}, ${tc.join(tc.getStreamOutputNames(networkInstruction.body), ", ")});
<#else>
${tc.include(networkInstruction.body, "CPP_INLINE")}
for (size_t i = 1; i < ${tc.getBeamSearchMaxLength(networkInstruction)}; ++i) {
vector<pair<vector<vector<float>>, double>> allCandidates;
for (const pair<vector<vector<float>>& pair : sequences) {
vector<vector<float>> seq = pair.first;
double score = pair.second;
<#list tc.getUnrollInputNames(networkInstruction, "i") as inputName>
<#if tc.getNameWithoutIndex(inputName) == tc.outputName>
${inputName} = seq.back();
</#if>
</#list>
_predictor_${networkInstruction?index}_.predict(${tc.join(tc.getUnrollInputNames(networkInstruction, "i"), ", ")}, ${tc.join(tc.getUnrollOutputNames(networkInstruction, "i"), ", ")});
<#list tc.getUnrollOutputNames(networkInstruction, "i") as outputName>
<#if tc.getNameWithoutIndex(outputName) == tc.outputName>
vector<float>& out = ${outputName};
</#if>
</#list>
vector<pair<int, float>> topk;
for (size_t i = 0; i < out.size(); ++i) {
topk.emplace_back(i, out[i]);