Commit f0801126 authored by Evgeny Kusmenko's avatar Evgeny Kusmenko

Merge branch 'adapt_pending_predefined_layers' into 'master'

Adapt pending predefined layers

See merge request !21
parents 59c74918 4ec7308a
Pipeline #101955 passed with stages
in 7 minutes and 44 seconds
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
<groupId>de.monticore.lang.monticar</groupId> <groupId>de.monticore.lang.monticar</groupId>
<artifactId>cnnarch-caffe2-generator</artifactId> <artifactId>cnnarch-caffe2-generator</artifactId>
<version>0.2.7-SNAPSHOT</version> <version>0.2.8-SNAPSHOT</version>
<!-- == PROJECT DEPENDENCIES ============================================= --> <!-- == PROJECT DEPENDENCIES ============================================= -->
......
...@@ -165,31 +165,22 @@ public class ArchitectureElementData { ...@@ -165,31 +165,22 @@ public class ArchitectureElementData {
} }
@Nullable @Nullable
public List<Integer> getPadding(){ public Integer getPadding(){
return getPadding((LayerSymbol) getElement()); return getPadding((LayerSymbol) getElement());
} }
@Nullable @Nullable
public List<Integer> getPadding(LayerSymbol layer){ public Integer getPadding(LayerSymbol layer){
List<Integer> kernel = layer.getIntTupleValue(AllPredefinedLayers.KERNEL_NAME).get(); String padding_type = ((LayerSymbol) getElement()).getStringValue(AllPredefinedLayers.PADDING_NAME).get();
List<Integer> stride = layer.getIntTupleValue(AllPredefinedLayers.STRIDE_NAME).get(); Integer pad=0;
ArchTypeSymbol inputType = layer.getInputTypes().get(0);
ArchTypeSymbol outputType = layer.getOutputTypes().get(0); if (padding_type.equals(AllPredefinedLayers.PADDING_VALID)){
pad = 0;
int heightWithPad = kernel.get(0) + stride.get(0)*(outputType.getHeight() - 1); }
int widthWithPad = kernel.get(1) + stride.get(1)*(outputType.getWidth() - 1); else if (padding_type.equals(AllPredefinedLayers.PADDING_SAME)){
int heightPad = Math.max(0, heightWithPad - inputType.getHeight()); pad = 1;
int widthPad = Math.max(0, widthWithPad - inputType.getWidth());
int topPad = (int)Math.ceil(heightPad / 2.0);
int bottomPad = (int)Math.floor(heightPad / 2.0);
int leftPad = (int)Math.ceil(widthPad / 2.0);
int rightPad = (int)Math.floor(widthPad / 2.0);
if (topPad == 0 && bottomPad == 0 && leftPad == 0 && rightPad == 0){
return null;
} }
return Arrays.asList(0,0,0,0,topPad,bottomPad,leftPad,rightPad); return pad;
} }
} }
...@@ -76,10 +76,7 @@ public class TrainParamSupportChecker implements CNNTrainVisitor { ...@@ -76,10 +76,7 @@ public class TrainParamSupportChecker implements CNNTrainVisitor {
public void visit(ASTWeightDecayEntry node){} public void visit(ASTWeightDecayEntry node){}
public void visit(ASTLRDecayEntry node){ public void visit(ASTLRDecayEntry node){}
printUnsupportedOptimizerParam(node.getName());
this.unsupportedElemList.add(node.getName());
}
public void visit(ASTLRPolicyEntry node){} public void visit(ASTLRPolicyEntry node){}
......
...@@ -7,6 +7,7 @@ import logging ...@@ -7,6 +7,7 @@ import logging
import os import os
import sys import sys
import lmdb import lmdb
class ${tc.fileNameWithoutEnding}: class ${tc.fileNameWithoutEnding}:
module = None module = None
...@@ -58,7 +59,7 @@ class ${tc.fileNameWithoutEnding}: ...@@ -58,7 +59,7 @@ class ${tc.fileNameWithoutEnding}:
return data, label, dataset_size return data, label, dataset_size
def create_model(self, model, data, device_opts): def create_model(self, model, data, device_opts, is_test):
with core.DeviceScope(device_opts): with core.DeviceScope(device_opts):
${tc.include(tc.architecture.body)} ${tc.include(tc.architecture.body)}
...@@ -118,7 +119,7 @@ ${tc.include(tc.architecture.body)} ...@@ -118,7 +119,7 @@ ${tc.include(tc.architecture.body)}
# == Training model == # == Training model ==
train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope) train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts) data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts)
${tc.join(tc.architectureOutputs, ",", "","")} = self.create_model(train_model, data, device_opts=device_opts) ${tc.join(tc.architectureOutputs, ",", "","")} = self.create_model(train_model, data, device_opts=device_opts, is_test=False)
self.add_training_operators(train_model, ${tc.join(tc.architectureOutputs, ",", "","")}, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) self.add_training_operators(train_model, ${tc.join(tc.architectureOutputs, ",", "","")}, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
self.add_accuracy(train_model, ${tc.join(tc.architectureOutputs, ",", "","")}, label, device_opts, eval_metric) self.add_accuracy(train_model, ${tc.join(tc.architectureOutputs, ",", "","")}, label, device_opts, eval_metric)
with core.DeviceScope(device_opts): with core.DeviceScope(device_opts):
...@@ -141,7 +142,7 @@ ${tc.include(tc.architecture.body)} ...@@ -141,7 +142,7 @@ ${tc.include(tc.architecture.body)}
# == Testing model. == # == Testing model. ==
test_model= model_helper.ModelHelper(name="test_net", arg_scope=arg_scope, init_params=False) test_model= model_helper.ModelHelper(name="test_net", arg_scope=arg_scope, init_params=False)
data, label, test_dataset_size = self.add_input(test_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'test_lmdb'), db_type='lmdb', device_opts=device_opts) data, label, test_dataset_size = self.add_input(test_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'test_lmdb'), db_type='lmdb', device_opts=device_opts)
${tc.join(tc.architectureOutputs, ",", "","")} = self.create_model(test_model, data, device_opts=device_opts) ${tc.join(tc.architectureOutputs, ",", "","")} = self.create_model(test_model, data, device_opts=device_opts, is_test=True)
self.add_accuracy(test_model, predictions, label, device_opts, eval_metric) self.add_accuracy(test_model, predictions, label, device_opts, eval_metric)
workspace.RunNetOnce(test_model.param_init_net) workspace.RunNetOnce(test_model.param_init_net)
workspace.CreateNet(test_model.net, overwrite=True) workspace.CreateNet(test_model.net, overwrite=True)
...@@ -159,7 +160,7 @@ ${tc.include(tc.architecture.body)} ...@@ -159,7 +160,7 @@ ${tc.include(tc.architecture.body)}
# == Deployment model. == # == Deployment model. ==
# We simply need the main AddModel part. # We simply need the main AddModel part.
deploy_model = model_helper.ModelHelper(name="deploy_net", arg_scope=arg_scope, init_params=False) deploy_model = model_helper.ModelHelper(name="deploy_net", arg_scope=arg_scope, init_params=False)
self.create_model(deploy_model, "data", device_opts) self.create_model(deploy_model, "data", device_opts, is_test=True)
print("Saving deploy model") print("Saving deploy model")
self.save_net(self._init_net_, self._predict_net_, deploy_model) self.save_net(self._init_net_, self._predict_net_, deploy_model)
......
...@@ -41,6 +41,8 @@ if __name__ == "__main__": ...@@ -41,6 +41,8 @@ if __name__ == "__main__":
<#elseif param == "step_size"> <#elseif param == "step_size">
<#assign paramName = "stepsize"> <#assign paramName = "stepsize">
<#elseif param == "gamma1"> <#elseif param == "gamma1">
<#assign paramName = "gamma1">
<#elseif param == "learning_rate_decay">
<#assign paramName = "gamma"> <#assign paramName = "gamma">
</#if> </#if>
${paramName}=${config.optimizerParams[param]}<#sep>, ${paramName}=${config.optimizerParams[param]}<#sep>,
......
${element.name} = ${tc.join(element.inputs, " + ")} <#-- This layer is currently not supported -->
<#include "OutputShape.ftl">
\ No newline at end of file
${element.name} = mx.symbol.BatchNorm(data=${element.inputs[0]}, <#-- This layer is currently not supported -->
fix_gamma=${element.fixGamma?string("True","False")},
name="${element.name}")
${element.name} = mx.symbol.concat(${tc.join(element.inputs, ", ")}, <#-- This layer is currently not supported -->
dim=1,
name="${element.name}")
<#include "OutputShape.ftl">
\ No newline at end of file
...@@ -3,8 +3,14 @@ ...@@ -3,8 +3,14 @@
<#assign strideWidth = element.stride[1]> <#assign strideWidth = element.stride[1]>
<#assign kernelHeight = element.kernel[0]> <#assign kernelHeight = element.kernel[0]>
<#assign kernelWidth = element.kernel[1]> <#assign kernelWidth = element.kernel[1]>
<#if element.padding??> <#-- Check wheather padding null is. --> <#if element.padding??>
<#-- TODO: check how to adapt CNNArchLang argument pad_width=${element.padding[0]} --> <#if element.padding == 0>
<#assign padParameter = ""><#--Don't add anything since "valid" is the default padding of Caffe2-->
<#elseif element.padding == 1>
<#assign padParameter = ", pad=1">
</#if>
<#else>
<#assign padParameter = ", pad=1">
</#if> </#if>
<#if strideHeight == strideWidth> <#if strideHeight == strideWidth>
<#assign strideParameter = "stride=${strideHeight}"> <#assign strideParameter = "stride=${strideHeight}">
...@@ -16,6 +22,5 @@ ...@@ -16,6 +22,5 @@
<#else> <#else>
<#assign kernelParameter = "kernel=[${kernelHeight},${kernelWidth}]"> <#assign kernelParameter = "kernel=[${kernelHeight},${kernelWidth}]">
</#if> </#if>
${element.name} = brew.conv(model, ${input}, '${element.name}', dim_in=${element.element.inputTypes[0].channels?c}, dim_out=${element.channels?c}, ${kernelParameter}, ${strideParameter}) ${element.name} = brew.conv(model, ${input}, '${element.name}', dim_in=${element.element.inputTypes[0].channels?c}, dim_out=${element.channels?c}, ${kernelParameter}, ${strideParameter}${padParameter})
<#-- TODO: check how to adapt CNNArchLang argument no_bias=${element.noBias?string("True","False")} -->
<#include "OutputShape.ftl"> <#include "OutputShape.ftl">
\ No newline at end of file
${element.name} = mx.symbol.Dropout(data=${element.inputs[0]}, <#assign input = element.inputs[0]>
p=${element.p?c}, <#assign ratio = element.p?c?string>
name="${element.name}") ${element.name} = brew.dropout(model, ${input}, '${element.name}', ratio=${ratio}, is_test=False)
${element.name} = mx.symbol.Flatten(data=${element.inputs[0]}, ${element.name} = model.net.Flatten(${element.inputs[0]}, "${element.name}")
name="${element.name}") \ No newline at end of file
\ No newline at end of file
...@@ -4,13 +4,10 @@ ...@@ -4,13 +4,10 @@
<#assign inputChannels = element.element.inputTypes[0].channels?c> <#assign inputChannels = element.element.inputTypes[0].channels?c>
<#assign inputHeight = element.element.inputTypes[0].height> <#assign inputHeight = element.element.inputTypes[0].height>
<#assign inputWidth = element.element.inputTypes[0].width> <#assign inputWidth = element.element.inputTypes[0].width>
<#if flatten> <#--flatten is not needed since the fc layer applies it automatically-->
<#-- TODO: check how to adapt CNNArchLang flatten #${element.name} = mx.symbol.flatten(data=${input}) -->
</#if>
<#if inputLayerType?matches("FullyConnected") || (inputHeight == 1 && inputWidth == 1)> <#if inputLayerType?matches("FullyConnected") || (inputHeight == 1 && inputWidth == 1)>
${element.name} = brew.fc(model, ${input}, '${element.name}', dim_in=${inputChannels}, dim_out=${element.units?c}) ${element.name} = brew.fc(model, ${input}, '${element.name}', dim_in=${inputChannels}, dim_out=${element.units?c})
<#else> <#else>
${element.name} = brew.fc(model, ${input}, '${element.name}', dim_in=${inputChannels} * ${inputHeight} * ${inputWidth}, dim_out=${element.units?c}) ${element.name} = brew.fc(model, ${input}, '${element.name}', dim_in=${inputChannels} * ${inputHeight} * ${inputWidth}, dim_out=${element.units?c})
</#if> </#if>
<#-- TODO: check how to adapt CNNArchLang argument no_bias=${element.noBias?string("True","False")} -->
<#include "OutputShape.ftl"> <#include "OutputShape.ftl">
\ No newline at end of file
${element.name} = ${element.inputs[element.index]} <#-- This layer is currently not supported -->
${element.name} = mx.symbol.Pooling(data=${element.inputs[0]}, <#assign input = element.inputs[0]>
global_pool=True, <#if element.poolType == "max">
kernel=(1,1), ${element.name} = brew.max_pool(model, ${input}, '${element.name}', global_pooling=True)
pool_type="${element.poolType}", <#elseif element.poolType == "avg">
name="${element.name}") ${element.name} = brew.average_pool(model, ${input}, '${element.name}', global_pooling=True)
</#if>
<#include "OutputShape.ftl"> <#include "OutputShape.ftl">
\ No newline at end of file
...@@ -9,11 +9,10 @@ ...@@ -9,11 +9,10 @@
${element.name} = data ${element.name} = data
<#include "OutputShape.ftl"> <#include "OutputShape.ftl">
<#if heightIndex != channelIndex + 1 || widthIndex != heightIndex + 1> <#if heightIndex != channelIndex + 1 || widthIndex != heightIndex + 1>
${element.name} = mx.symbol.transpose(data=${element.name},mx.sym.var <#-- TODO: check how to adapt CNNArchLang transpose case --> ${element.name} = model.net.Transpose(${element.name}, '${element.name}', axes=[0,${tc.join(indexList, ",")}])
axes=(0,${tc.join(indexList, ",")}))
</#if> </#if>
<#if indexList?size != 3> <#if indexList?size != 3>
${element.name} = mx.symbol.reshape(data=${element.name}, <#-- TODO: check how to adapt CNNArchLang transpose case --> ${element.name}, _ = model.net.Reshape('${element.name}', ['${element.name}', '${element.name}_old_shape'],
shape=(0,${element.element.outputTypes[0].channels?c},${element.element.outputTypes[0].height?c},${element.element.outputTypes[0].width?c})) shape=(0,${element.element.outputTypes[0].channels?c},${element.element.outputTypes[0].height?c},${element.element.outputTypes[0].width?c}))
</#if> </#if>
${element.name} = mx.symbol.LRN(data=${element.inputs[0]}, <#assign input = element.inputs[0]>
alpha=${element.alpha?c}, <#if !element.knorm?string?contains(".")>
beta=${element.beta?c}, <#assign bias = element.knorm?string["0.0"]>
knorm=${element.knorm?c}, <#else>
nsize=${element.nsize?c}, <#assign bias = element.knorm?c>
name="${element.name}") </#if>
${element.name} = brew.lrn(model, ${input}, '${element.name}', size=${element.nsize?c}, alpha=${element.alpha?c}, beta=${element.beta?c}, bias=${bias})
...@@ -2,11 +2,10 @@ ...@@ -2,11 +2,10 @@
<#if element.softmaxOutput> <#if element.softmaxOutput>
${element.name} = brew.softmax(model, ${input}, '${element.name}') ${element.name} = brew.softmax(model, ${input}, '${element.name}')
<#elseif element.logisticRegressionOutput> <#elseif element.logisticRegressionOutput>
${element.name} = mx.symbol.LogisticRegressionOutput(data=${element.inputs[0]}, <#-- TODO: check how to adapt LogisticRegressionOutput --> ${element.name} = model.net.Sigmoid(${input}, '${element.name}')
name="${element.name}")
<#elseif element.linearRegressionOutput> <#elseif element.linearRegressionOutput>
${element.name} = mx.symbol.LinearRegressionOutput(data=${element.inputs[0]}, <#-- TODO: check how to adapt linearRegressionOutput --> <#--Don't add L2 loss here but within the function "add_training_operators" from CNNCreator.ftl-->
name="${element.name}") ${element.name} = ${input}
</#if> </#if>
return ${element.name} return ${element.name}
\ No newline at end of file
...@@ -4,7 +4,13 @@ ...@@ -4,7 +4,13 @@
<#assign kernelHeight = element.kernel[0]> <#assign kernelHeight = element.kernel[0]>
<#assign kernelWidth = element.kernel[1]> <#assign kernelWidth = element.kernel[1]>
<#if element.padding??> <#if element.padding??>
<#-- TODO: check how to adapt CNNArchLang argument pad_width=${element.padding[0]} --> <#if element.padding == 0>
<#assign padParameter = ""><#--Don't add anything since "valid" is the default padding of Caffe2-->
<#elseif element.padding == 1>
<#assign padParameter = ", pad=1">
</#if>
<#else>
<#assign padParameter = ", pad=1">
</#if> </#if>
<#if strideHeight == strideWidth> <#if strideHeight == strideWidth>
<#assign strideParameter = "stride=${strideHeight}"> <#assign strideParameter = "stride=${strideHeight}">
...@@ -19,6 +25,6 @@ ...@@ -19,6 +25,6 @@
<#if element.poolType == "max"> <#if element.poolType == "max">
${element.name} = brew.max_pool(model, ${input}, '${element.name}', ${kernelParameter}, ${strideParameter}) ${element.name} = brew.max_pool(model, ${input}, '${element.name}', ${kernelParameter}, ${strideParameter})
<#elseif element.poolType == "avg"> <#elseif element.poolType == "avg">
${element.name} = brew.average_pool(model, ${input}, '${element.name}', ${kernelParameter}, ${strideParameter}) ${element.name} = brew.average_pool(model, ${input}, '${element.name}', ${kernelParameter}, ${strideParameter}${padParameter})
</#if> </#if>
<#include "OutputShape.ftl"> <#include "OutputShape.ftl">
\ No newline at end of file
<#-- This template is not used if the followiing architecture element is an output. See Output.ftl --> <#-- This template is not used if the followiing architecture element is an output. See Output.ftl -->
${element.name} = mx.symbol.softmax(data=${element.inputs[0]}, <#assign input = element.inputs[0]>
axis=1, ${element.name} = brew.softmax(model, ${input}, '${element.name}')
name="${element.name}")
${element.name} = mx.symbol.split(data=${element.inputs[0]}, <#-- This layer is currently not supported -->
num_outputs=${element.numOutputs?c},
axis=1,
name="${element.name}")
<#include "OutputShape.ftl">
\ No newline at end of file
...@@ -130,7 +130,7 @@ public class GenerationTest extends AbstractSymtabTest{ ...@@ -130,7 +130,7 @@ public class GenerationTest extends AbstractSymtabTest{
CNNTrain2Caffe2 trainGenerator = new CNNTrain2Caffe2(); CNNTrain2Caffe2 trainGenerator = new CNNTrain2Caffe2();
trainGenerator.generate(Paths.get(sourcePath), "FullConfig"); trainGenerator.generate(Paths.get(sourcePath), "FullConfig");
assertTrue(Log.getFindings().size() == 9); assertTrue(Log.getFindings().size() == 8);
checkFilesAreEqual( checkFilesAreEqual(
Paths.get("./target/generated-sources-cnnarch"), Paths.get("./target/generated-sources-cnnarch"),
Paths.get("./src/test/resources/target_code"), Paths.get("./src/test/resources/target_code"),
......
...@@ -7,6 +7,7 @@ import logging ...@@ -7,6 +7,7 @@ import logging
import os import os
import sys import sys
import lmdb import lmdb
class CNNCreator_Alexnet: class CNNCreator_Alexnet:
module = None module = None
...@@ -58,97 +59,56 @@ class CNNCreator_Alexnet: ...@@ -58,97 +59,56 @@ class CNNCreator_Alexnet:
return data, label, dataset_size return data, label, dataset_size
def create_model(self, model, data, device_opts): def create_model(self, model, data, device_opts, is_test):
with core.DeviceScope(device_opts): with core.DeviceScope(device_opts):
data = data data = data
# data, output shape: {[3,224,224]} # data, output shape: {[3,224,224]}
conv1_ = brew.conv(model, data, 'conv1_', dim_in=3, dim_out=96, kernel=11, stride=4) conv1_ = brew.conv(model, data, 'conv1_', dim_in=3, dim_out=96, kernel=11, stride=4)
# conv1_, output shape: {[96,55,55]} # conv1_, output shape: {[96,55,55]}
lrn1_ = mx.symbol.LRN(data=conv1_, lrn1_ = brew.lrn(model, conv1_, 'lrn1_', size=5, alpha=0.0001, beta=0.75, bias=2.0)
alpha=0.0001,
beta=0.75,
knorm=2,
nsize=5,
name="lrn1_")
pool1_ = brew.max_pool(model, lrn1_, 'pool1_', kernel=3, stride=2) pool1_ = brew.max_pool(model, lrn1_, 'pool1_', kernel=3, stride=2)
# pool1_, output shape: {[96,27,27]} # pool1_, output shape: {[96,27,27]}
relu1_ = brew.relu(model, pool1_, pool1_) relu1_ = brew.relu(model, pool1_, pool1_)
split1_ = mx.symbol.split(data=relu1_, conv2_1_ = brew.conv(model, get2_1_, 'conv2_1_', dim_in=48, dim_out=128, kernel=5, stride=1, pad=1)
num_outputs=2,
axis=1,
name="split1_")
# split1_, output shape: {[48,27,27][48,27,27]}
get2_1_ = split1_[0]
conv2_1_ = brew.conv(model, get2_1_, 'conv2_1_', dim_in=48, dim_out=128, kernel=5, stride=1)
# conv2_1_, output shape: {[128,27,27]} # conv2_1_, output shape: {[128,27,27]}
lrn2_1_ = mx.symbol.LRN(data=conv2_1_, lrn2_1_ = brew.lrn(model, conv2_1_, 'lrn2_1_', size=5, alpha=0.0001, beta=0.75, bias=2.0)
alpha=0.0001,
beta=0.75,
knorm=2,
nsize=5,
name="lrn2_1_")
pool2_1_ = brew.max_pool(model, lrn2_1_, 'pool2_1_', kernel=3, stride=2) pool2_1_ = brew.max_pool(model, lrn2_1_, 'pool2_1_', kernel=3, stride=2)
# pool2_1_, output shape: {[128,13,13]} # pool2_1_, output shape: {[128,13,13]}
relu2_1_ = brew.relu(model, pool2_1_, pool2_1_) relu2_1_ = brew.relu(model, pool2_1_, pool2_1_)
get2_2_ = split1_[1] conv2_2_ = brew.conv(model, get2_2_, 'conv2_2_', dim_in=48, dim_out=128, kernel=5, stride=1, pad=1)
conv2_2_ = brew.conv(model, get2_2_, 'conv2_2_', dim_in=48, dim_out=128, kernel=5, stride=1)
# conv2_2_, output shape: {[128,27,27]} # conv2_2_, output shape: {[128,27,27]}
lrn2_2_ = mx.symbol.LRN(data=conv2_2_, lrn2_2_ = brew.lrn(model, conv2_2_, 'lrn2_2_', size=5, alpha=0.0001, beta=0.75, bias=2.0)
alpha=0.0001,
beta=0.75,
knorm=2,
nsize=5,
name="lrn2_2_")
pool2_2_ = brew.max_pool(model, lrn2_2_, 'pool2_2_', kernel=3, stride=2) pool2_2_ = brew.max_pool(model, lrn2_2_, 'pool2_2_', kernel=3, stride=2)
# pool2_2_, output shape: {[128,13,13]} # pool2_2_, output shape: {[128,13,13]}
relu2_2_ = brew.relu(model, pool2_2_, pool2_2_) relu2_2_ = brew.relu(model, pool2_2_, pool2_2_)
concatenate3_ = mx.symbol.concat(relu2_1_, relu2_2_, conv3_ = brew.conv(model, concatenate3_, 'conv3_', dim_in=256, dim_out=384, kernel=3, stride=1, pad=1)
dim=1,
name="concatenate3_")
# concatenate3_, output shape: {[256,13,13]}
conv3_ = brew.conv(model, concatenate3_, 'conv3_', dim_in=256, dim_out=384, kernel=3, stride=1)
# conv3_, output shape: {[384,13,13]} # conv3_, output shape: {[384,13,13]}
relu3_ = brew.relu(model, conv3_, conv3_) relu3_ = brew.relu(model, conv3_, conv3_)
split3_ = mx.symbol.split(data=relu3_, conv4_1_ = brew.conv(model, get4_1_, 'conv4_1_', dim_in=192, dim_out=192, kernel=3, stride=1, pad=1)
num_outputs=2,
axis=1,
name="split3_")
# split3_, output shape: {[192,13,13][192,13,13]}
get4_1_ = split3_[0]
conv4_1_ = brew.conv(model, get4_1_, 'conv4_1_', dim_in=192, dim_out=192, kernel=3, stride=1)
# conv4_1_, output shape: {[192,13,13]} # conv4_1_, output shape: {[192,13,13]}
relu4_1_ = brew.relu(model, conv4_1_, conv4_1_) relu4_1_ = brew.relu(model, conv4_1_, conv4_1_)
conv5_1_ = brew.conv(model, relu4_1_, 'conv5_1_', dim_in=192, dim_out=128, kernel=3, stride=1) conv5_1_ = brew.conv(model, relu4_1_, 'conv5_1_', dim_in=192, dim_out=128, kernel=3, stride=1, pad=1)
# conv5_1_, output shape: {[128,13,13]} # conv5_1_, output shape: {[128,13,13]}
pool5_1_ = brew.max_pool(model, conv5_1_, 'pool5_1_', kernel=3, stride=2) pool5_1_ = brew.max_pool(model, conv5_1_, 'pool5_1_', kernel=3, stride=2)
# pool5_1_, output shape: {[128,6,6]} # pool5_1_, output shape: {[128,6,6]}
relu5_1_ = brew.relu(model, pool5_1_, pool5_1_) relu5_1_ = brew.relu(model, pool5_1_, pool5_1_)
get4_2_ = split3_[1] conv4_2_ = brew.conv(model, get4_2_, 'conv4_2_', dim_in=192, dim_out=192, kernel=3, stride=1, pad=1)
conv4_2_ = brew.conv(model, get4_2_, 'conv4_2_', dim_in=192, dim_out=192, kernel=3, stride=1)
# conv4_2_, output shape: {[192,13,13]} # conv4_2_, output shape: {[192,13,13]}
relu4_2_ = brew.relu(model, conv4_2_, conv4_2_) relu4_2_ = brew.relu(model, conv4_2_, conv4_2_)
conv5_2_ = brew.conv(model, relu4_2_, 'conv5_2_', dim_in=192, dim_out=128, kernel=3, stride=1) conv5_2_ = brew.conv(model, relu4_2_, 'conv5_2_', dim_in=192, dim_out=128, kernel=3, stride=1, pad=1)
# conv5_2_, output shape: {[128,13,13]} # conv5_2_, output shape: {[128,13,13]}
pool5_2_ = brew.max_pool(model, conv5_2_, 'pool5_2_', kernel=3, stride=2) pool5_2_ = brew.max_pool(model, conv5_2_, 'pool5_2_', kernel=3, stride=2)
# pool5_2_, output shape: {[128,6,6]} # pool5_2_, output shape: {[128,6,6]}
relu5_2_ = brew.relu(model, pool5_2_, pool5_2_) relu5_2_ = brew.relu(model, pool5_2_, pool5_2_)
concatenate6_ = mx.symbol.concat(relu5_1_, relu5_2_,
dim=1,
name="concatenate6_")
# concatenate6_, output shape: {[256,6,6]}
fc6_ = brew.fc(model, concatenate6_, 'fc6_', dim_in=256 * 6 * 6, dim_out=4096) fc6_ = brew.fc(model, concatenate6_, 'fc6_', dim_in=256 * 6 * 6, dim_out=4096)
# fc6_, output shape: {[4096,1,1]} # fc6_, output shape: {[4096,1,1]}
relu6_ = brew.relu(model, fc6_, fc6_) relu6_ = brew.relu(model, fc6_, fc6_)
dropout6_ = mx.symbol.Dropout(data=relu6_, dropout6_ = brew.dropout(model, relu6_, 'dropout6_', ratio=0.5, is_test=False)
p=0.5,
name="dropout6_")
fc7_ = brew.fc(model, dropout6_, 'fc7_', dim_in=4096, dim_out=4096) fc7_ = brew.fc(model, dropout6_, 'fc7_', dim_in=4096, dim_out=4096)
# fc7_, output shape: {[4096,1,1]} # fc7_, output shape: {[4096,1,1]}
relu7_ = brew.relu(model, fc7_, fc7_) relu7_ = brew.relu(model, fc7_, fc7_)
dropout7_ = mx.symbol.Dropout(data=relu7_, dropout7_ = brew.dropout(model, relu7_, 'dropout7_', ratio=0.5, is_test=False)
p=0.5,
name="dropout7_")
fc8_ = brew.fc(model, dropout7_, 'fc8_', dim_in=4096, dim_out=10) fc8_ = brew.fc(model, dropout7_, 'fc8_', dim_in=4096, dim_out=10)
# fc8_, output shape: {[10,1,1]} # fc8_, output shape: {[10,1,1]}
predictions = brew.softmax(model, fc8_, 'predictions') predictions = brew.softmax(model, fc8_, 'predictions')
...@@ -210,7 +170,7 @@ class CNNCreator_Alexnet: ...@@ -210,7 +170,7 @@ class CNNCreator_Alexnet:
# == Training model == # == Training model ==
train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope) train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts) data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts)
predictions = self.create_model(train_model, data, device_opts=device_opts) predictions = self.create_model(train_model, data, device_opts=device_opts, is_test=False)
self.add_training_operators(train_model, predictions, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) self.add_training_operators(train_model, predictions, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
self.add_accuracy(train_model, predictions, label, device_opts, eval_metric) self.add_accuracy(train_model, predictions, label, device_opts, eval_metric)
with core.DeviceScope(device_opts): with core.DeviceScope(device_opts):
...@@ -233,7 +193,7 @@ class CNNCreator_Alexnet: ...@@ -233,7 +193,7 @@ class CNNCreator_Alexnet:
# == Testing model. == # == Testing model. ==
test_model= model_helper.ModelHelper(name="test_net", arg_scope=arg_scope, init_params=False) test_model= model_helper.ModelHelper(name="test_net", arg_scope=arg_scope, init_params=False)
data, label, test_dataset_size = self.add_input(test_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'test_lmdb'), db_type='lmdb', device_opts=device_opts) data, label, test_dataset_size = self.add_input(test_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'test_lmdb'), db_type='lmdb', device_opts=device_opts)
predictions = self.create_model(test_model, data, device_opts=device_opts) predictions = self.create_model(test_model, data, device_opts=device_opts, is_test=True)
self.add_accuracy(test_model, predictions, label, device_opts, eval_metric) self.add_accuracy(test_model, predictions, label, device_opts, eval_metric)
workspace.RunNetOnce(test_model.param_init_net) workspace.RunNetOnce(test_model.param_init_net)
workspace.CreateNet(test_model.net, overwrite=True) workspace.CreateNet(test_model.net, overwrite=True)
...@@ -251,7 +211,7 @@ class CNNCreator_Alexnet: ...@@ -251,7 +211,7 @@ class CNNCreator_Alexnet:
# == Deployment model. == # == Deployment model. ==
# We simply need the main AddModel part. # We simply need the main AddModel part.
deploy_model = model_helper.ModelHelper(name="deploy_net", arg_scope=arg_scope, init_params=False) deploy_model = model_helper.ModelHelper(name="deploy_net", arg_scope=arg_scope, init_params=False)
self.create_model(deploy_model, "data", device_opts) self.create_model(deploy_model, "data", device_opts, is_test=True)
print("Saving deploy model") print("Saving deploy model")
self.save_net(self._init_net_, self._predict_net_, deploy_model) self.save_net(self._init_net_, self._predict_net_, deploy_model)
......
...@@ -7,6 +7,7 @@ import logging ...@@ -7,6 +7,7 @@ import logging
import os import os
import sys import sys
import lmdb import lmdb
class CNNCreator_LeNet: class CNNCreator_LeNet: