Commit f0801126 authored by Evgeny Kusmenko's avatar Evgeny Kusmenko

Merge branch 'adapt_pending_predefined_layers' into 'master'

Adapt pending predefined layers

See merge request !21
parents 59c74918 4ec7308a
Pipeline #101955 passed with stages
in 7 minutes and 44 seconds
......@@ -8,7 +8,7 @@
<groupId>de.monticore.lang.monticar</groupId>
<artifactId>cnnarch-caffe2-generator</artifactId>
<version>0.2.7-SNAPSHOT</version>
<version>0.2.8-SNAPSHOT</version>
<!-- == PROJECT DEPENDENCIES ============================================= -->
......
......@@ -165,31 +165,22 @@ public class ArchitectureElementData {
}
@Nullable
public List<Integer> getPadding(){
public Integer getPadding(){
return getPadding((LayerSymbol) getElement());
}
@Nullable
public List<Integer> getPadding(LayerSymbol layer){
List<Integer> kernel = layer.getIntTupleValue(AllPredefinedLayers.KERNEL_NAME).get();
List<Integer> stride = layer.getIntTupleValue(AllPredefinedLayers.STRIDE_NAME).get();
ArchTypeSymbol inputType = layer.getInputTypes().get(0);
ArchTypeSymbol outputType = layer.getOutputTypes().get(0);
int heightWithPad = kernel.get(0) + stride.get(0)*(outputType.getHeight() - 1);
int widthWithPad = kernel.get(1) + stride.get(1)*(outputType.getWidth() - 1);
int heightPad = Math.max(0, heightWithPad - inputType.getHeight());
int widthPad = Math.max(0, widthWithPad - inputType.getWidth());
int topPad = (int)Math.ceil(heightPad / 2.0);
int bottomPad = (int)Math.floor(heightPad / 2.0);
int leftPad = (int)Math.ceil(widthPad / 2.0);
int rightPad = (int)Math.floor(widthPad / 2.0);
if (topPad == 0 && bottomPad == 0 && leftPad == 0 && rightPad == 0){
return null;
public Integer getPadding(LayerSymbol layer){
String padding_type = ((LayerSymbol) getElement()).getStringValue(AllPredefinedLayers.PADDING_NAME).get();
Integer pad=0;
if (padding_type.equals(AllPredefinedLayers.PADDING_VALID)){
pad = 0;
}
else if (padding_type.equals(AllPredefinedLayers.PADDING_SAME)){
pad = 1;
}
return Arrays.asList(0,0,0,0,topPad,bottomPad,leftPad,rightPad);
return pad;
}
}
......@@ -76,10 +76,7 @@ public class TrainParamSupportChecker implements CNNTrainVisitor {
public void visit(ASTWeightDecayEntry node){}
public void visit(ASTLRDecayEntry node){
printUnsupportedOptimizerParam(node.getName());
this.unsupportedElemList.add(node.getName());
}
public void visit(ASTLRDecayEntry node){}
public void visit(ASTLRPolicyEntry node){}
......
......@@ -7,6 +7,7 @@ import logging
import os
import sys
import lmdb
class ${tc.fileNameWithoutEnding}:
module = None
......@@ -58,7 +59,7 @@ class ${tc.fileNameWithoutEnding}:
return data, label, dataset_size
def create_model(self, model, data, device_opts):
def create_model(self, model, data, device_opts, is_test):
with core.DeviceScope(device_opts):
${tc.include(tc.architecture.body)}
......@@ -118,7 +119,7 @@ ${tc.include(tc.architecture.body)}
# == Training model ==
train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts)
${tc.join(tc.architectureOutputs, ",", "","")} = self.create_model(train_model, data, device_opts=device_opts)
${tc.join(tc.architectureOutputs, ",", "","")} = self.create_model(train_model, data, device_opts=device_opts, is_test=False)
self.add_training_operators(train_model, ${tc.join(tc.architectureOutputs, ",", "","")}, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
self.add_accuracy(train_model, ${tc.join(tc.architectureOutputs, ",", "","")}, label, device_opts, eval_metric)
with core.DeviceScope(device_opts):
......@@ -141,7 +142,7 @@ ${tc.include(tc.architecture.body)}
# == Testing model. ==
test_model= model_helper.ModelHelper(name="test_net", arg_scope=arg_scope, init_params=False)
data, label, test_dataset_size = self.add_input(test_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'test_lmdb'), db_type='lmdb', device_opts=device_opts)
${tc.join(tc.architectureOutputs, ",", "","")} = self.create_model(test_model, data, device_opts=device_opts)
${tc.join(tc.architectureOutputs, ",", "","")} = self.create_model(test_model, data, device_opts=device_opts, is_test=True)
self.add_accuracy(test_model, predictions, label, device_opts, eval_metric)
workspace.RunNetOnce(test_model.param_init_net)
workspace.CreateNet(test_model.net, overwrite=True)
......@@ -159,7 +160,7 @@ ${tc.include(tc.architecture.body)}
# == Deployment model. ==
# We simply need the main AddModel part.
deploy_model = model_helper.ModelHelper(name="deploy_net", arg_scope=arg_scope, init_params=False)
self.create_model(deploy_model, "data", device_opts)
self.create_model(deploy_model, "data", device_opts, is_test=True)
print("Saving deploy model")
self.save_net(self._init_net_, self._predict_net_, deploy_model)
......
......@@ -41,6 +41,8 @@ if __name__ == "__main__":
<#elseif param == "step_size">
<#assign paramName = "stepsize">
<#elseif param == "gamma1">
<#assign paramName = "gamma1">
<#elseif param == "learning_rate_decay">
<#assign paramName = "gamma">
</#if>
${paramName}=${config.optimizerParams[param]}<#sep>,
......
${element.name} = ${tc.join(element.inputs, " + ")}
<#include "OutputShape.ftl">
\ No newline at end of file
<#-- This layer is currently not supported -->
${element.name} = mx.symbol.BatchNorm(data=${element.inputs[0]},
fix_gamma=${element.fixGamma?string("True","False")},
name="${element.name}")
<#-- This layer is currently not supported -->
${element.name} = mx.symbol.concat(${tc.join(element.inputs, ", ")},
dim=1,
name="${element.name}")
<#include "OutputShape.ftl">
\ No newline at end of file
<#-- This layer is currently not supported -->
......@@ -3,8 +3,14 @@
<#assign strideWidth = element.stride[1]>
<#assign kernelHeight = element.kernel[0]>
<#assign kernelWidth = element.kernel[1]>
<#if element.padding??> <#-- Check wheather padding null is. -->
<#-- TODO: check how to adapt CNNArchLang argument pad_width=${element.padding[0]} -->
<#if element.padding??>
<#if element.padding == 0>
<#assign padParameter = ""><#--Don't add anything since "valid" is the default padding of Caffe2-->
<#elseif element.padding == 1>
<#assign padParameter = ", pad=1">
</#if>
<#else>
<#assign padParameter = ", pad=1">
</#if>
<#if strideHeight == strideWidth>
<#assign strideParameter = "stride=${strideHeight}">
......@@ -16,6 +22,5 @@
<#else>
<#assign kernelParameter = "kernel=[${kernelHeight},${kernelWidth}]">
</#if>
${element.name} = brew.conv(model, ${input}, '${element.name}', dim_in=${element.element.inputTypes[0].channels?c}, dim_out=${element.channels?c}, ${kernelParameter}, ${strideParameter})
<#-- TODO: check how to adapt CNNArchLang argument no_bias=${element.noBias?string("True","False")} -->
${element.name} = brew.conv(model, ${input}, '${element.name}', dim_in=${element.element.inputTypes[0].channels?c}, dim_out=${element.channels?c}, ${kernelParameter}, ${strideParameter}${padParameter})
<#include "OutputShape.ftl">
\ No newline at end of file
${element.name} = mx.symbol.Dropout(data=${element.inputs[0]},
p=${element.p?c},
name="${element.name}")
<#assign input = element.inputs[0]>
<#assign ratio = element.p?c?string>
${element.name} = brew.dropout(model, ${input}, '${element.name}', ratio=${ratio}, is_test=False)
${element.name} = mx.symbol.Flatten(data=${element.inputs[0]},
name="${element.name}")
\ No newline at end of file
${element.name} = model.net.Flatten(${element.inputs[0]}, "${element.name}")
\ No newline at end of file
......@@ -4,13 +4,10 @@
<#assign inputChannels = element.element.inputTypes[0].channels?c>
<#assign inputHeight = element.element.inputTypes[0].height>
<#assign inputWidth = element.element.inputTypes[0].width>
<#if flatten>
<#-- TODO: check how to adapt CNNArchLang flatten #${element.name} = mx.symbol.flatten(data=${input}) -->
</#if>
<#--flatten is not needed since the fc layer applies it automatically-->
<#if inputLayerType?matches("FullyConnected") || (inputHeight == 1 && inputWidth == 1)>
${element.name} = brew.fc(model, ${input}, '${element.name}', dim_in=${inputChannels}, dim_out=${element.units?c})
<#else>
${element.name} = brew.fc(model, ${input}, '${element.name}', dim_in=${inputChannels} * ${inputHeight} * ${inputWidth}, dim_out=${element.units?c})
</#if>
<#-- TODO: check how to adapt CNNArchLang argument no_bias=${element.noBias?string("True","False")} -->
<#include "OutputShape.ftl">
\ No newline at end of file
${element.name} = ${element.inputs[element.index]}
<#-- This layer is currently not supported -->
${element.name} = mx.symbol.Pooling(data=${element.inputs[0]},
global_pool=True,
kernel=(1,1),
pool_type="${element.poolType}",
name="${element.name}")
<#assign input = element.inputs[0]>
<#if element.poolType == "max">
${element.name} = brew.max_pool(model, ${input}, '${element.name}', global_pooling=True)
<#elseif element.poolType == "avg">
${element.name} = brew.average_pool(model, ${input}, '${element.name}', global_pooling=True)
</#if>
<#include "OutputShape.ftl">
\ No newline at end of file
......@@ -9,11 +9,10 @@
${element.name} = data
<#include "OutputShape.ftl">
<#if heightIndex != channelIndex + 1 || widthIndex != heightIndex + 1>
${element.name} = mx.symbol.transpose(data=${element.name},mx.sym.var <#-- TODO: check how to adapt CNNArchLang transpose case -->
axes=(0,${tc.join(indexList, ",")}))
${element.name} = model.net.Transpose(${element.name}, '${element.name}', axes=[0,${tc.join(indexList, ",")}])
</#if>
<#if indexList?size != 3>
${element.name} = mx.symbol.reshape(data=${element.name}, <#-- TODO: check how to adapt CNNArchLang transpose case -->
${element.name}, _ = model.net.Reshape('${element.name}', ['${element.name}', '${element.name}_old_shape'],
shape=(0,${element.element.outputTypes[0].channels?c},${element.element.outputTypes[0].height?c},${element.element.outputTypes[0].width?c}))
</#if>
${element.name} = mx.symbol.LRN(data=${element.inputs[0]},
alpha=${element.alpha?c},
beta=${element.beta?c},
knorm=${element.knorm?c},
nsize=${element.nsize?c},
name="${element.name}")
<#assign input = element.inputs[0]>
<#if !element.knorm?string?contains(".")>
<#assign bias = element.knorm?string["0.0"]>
<#else>
<#assign bias = element.knorm?c>
</#if>
${element.name} = brew.lrn(model, ${input}, '${element.name}', size=${element.nsize?c}, alpha=${element.alpha?c}, beta=${element.beta?c}, bias=${bias})
......@@ -2,11 +2,10 @@
<#if element.softmaxOutput>
${element.name} = brew.softmax(model, ${input}, '${element.name}')
<#elseif element.logisticRegressionOutput>
${element.name} = mx.symbol.LogisticRegressionOutput(data=${element.inputs[0]}, <#-- TODO: check how to adapt LogisticRegressionOutput -->
name="${element.name}")
${element.name} = model.net.Sigmoid(${input}, '${element.name}')
<#elseif element.linearRegressionOutput>
${element.name} = mx.symbol.LinearRegressionOutput(data=${element.inputs[0]}, <#-- TODO: check how to adapt linearRegressionOutput -->
name="${element.name}")
<#--Don't add L2 loss here but within the function "add_training_operators" from CNNCreator.ftl-->
${element.name} = ${input}
</#if>
return ${element.name}
\ No newline at end of file
......@@ -4,7 +4,13 @@
<#assign kernelHeight = element.kernel[0]>
<#assign kernelWidth = element.kernel[1]>
<#if element.padding??>
<#-- TODO: check how to adapt CNNArchLang argument pad_width=${element.padding[0]} -->
<#if element.padding == 0>
<#assign padParameter = ""><#--Don't add anything since "valid" is the default padding of Caffe2-->
<#elseif element.padding == 1>
<#assign padParameter = ", pad=1">
</#if>
<#else>
<#assign padParameter = ", pad=1">
</#if>
<#if strideHeight == strideWidth>
<#assign strideParameter = "stride=${strideHeight}">
......@@ -19,6 +25,6 @@
<#if element.poolType == "max">
${element.name} = brew.max_pool(model, ${input}, '${element.name}', ${kernelParameter}, ${strideParameter})
<#elseif element.poolType == "avg">
${element.name} = brew.average_pool(model, ${input}, '${element.name}', ${kernelParameter}, ${strideParameter})
${element.name} = brew.average_pool(model, ${input}, '${element.name}', ${kernelParameter}, ${strideParameter}${padParameter})
</#if>
<#include "OutputShape.ftl">
\ No newline at end of file
<#-- This template is not used if the followiing architecture element is an output. See Output.ftl -->
${element.name} = mx.symbol.softmax(data=${element.inputs[0]},
axis=1,
name="${element.name}")
<#assign input = element.inputs[0]>
${element.name} = brew.softmax(model, ${input}, '${element.name}')
${element.name} = mx.symbol.split(data=${element.inputs[0]},
num_outputs=${element.numOutputs?c},
axis=1,
name="${element.name}")
<#include "OutputShape.ftl">
\ No newline at end of file
<#-- This layer is currently not supported -->
......@@ -130,7 +130,7 @@ public class GenerationTest extends AbstractSymtabTest{
CNNTrain2Caffe2 trainGenerator = new CNNTrain2Caffe2();
trainGenerator.generate(Paths.get(sourcePath), "FullConfig");
assertTrue(Log.getFindings().size() == 9);
assertTrue(Log.getFindings().size() == 8);
checkFilesAreEqual(
Paths.get("./target/generated-sources-cnnarch"),
Paths.get("./src/test/resources/target_code"),
......
......@@ -7,6 +7,7 @@ import logging
import os
import sys
import lmdb
class CNNCreator_Alexnet:
module = None
......@@ -58,97 +59,56 @@ class CNNCreator_Alexnet:
return data, label, dataset_size
def create_model(self, model, data, device_opts):
def create_model(self, model, data, device_opts, is_test):
with core.DeviceScope(device_opts):
data = data
# data, output shape: {[3,224,224]}
conv1_ = brew.conv(model, data, 'conv1_', dim_in=3, dim_out=96, kernel=11, stride=4)
conv1_ = brew.conv(model, data, 'conv1_', dim_in=3, dim_out=96, kernel=11, stride=4)
# conv1_, output shape: {[96,55,55]}
lrn1_ = mx.symbol.LRN(data=conv1_,
alpha=0.0001,
beta=0.75,
knorm=2,
nsize=5,
name="lrn1_")
lrn1_ = brew.lrn(model, conv1_, 'lrn1_', size=5, alpha=0.0001, beta=0.75, bias=2.0)
pool1_ = brew.max_pool(model, lrn1_, 'pool1_', kernel=3, stride=2)
# pool1_, output shape: {[96,27,27]}
relu1_ = brew.relu(model, pool1_, pool1_)
split1_ = mx.symbol.split(data=relu1_,
num_outputs=2,
axis=1,
name="split1_")
# split1_, output shape: {[48,27,27][48,27,27]}
get2_1_ = split1_[0]
conv2_1_ = brew.conv(model, get2_1_, 'conv2_1_', dim_in=48, dim_out=128, kernel=5, stride=1)
conv2_1_ = brew.conv(model, get2_1_, 'conv2_1_', dim_in=48, dim_out=128, kernel=5, stride=1, pad=1)
# conv2_1_, output shape: {[128,27,27]}
lrn2_1_ = mx.symbol.LRN(data=conv2_1_,
alpha=0.0001,
beta=0.75,
knorm=2,
nsize=5,
name="lrn2_1_")
lrn2_1_ = brew.lrn(model, conv2_1_, 'lrn2_1_', size=5, alpha=0.0001, beta=0.75, bias=2.0)
pool2_1_ = brew.max_pool(model, lrn2_1_, 'pool2_1_', kernel=3, stride=2)
# pool2_1_, output shape: {[128,13,13]}
relu2_1_ = brew.relu(model, pool2_1_, pool2_1_)
get2_2_ = split1_[1]
conv2_2_ = brew.conv(model, get2_2_, 'conv2_2_', dim_in=48, dim_out=128, kernel=5, stride=1)
conv2_2_ = brew.conv(model, get2_2_, 'conv2_2_', dim_in=48, dim_out=128, kernel=5, stride=1, pad=1)
# conv2_2_, output shape: {[128,27,27]}
lrn2_2_ = mx.symbol.LRN(data=conv2_2_,
alpha=0.0001,
beta=0.75,
knorm=2,
nsize=5,
name="lrn2_2_")
lrn2_2_ = brew.lrn(model, conv2_2_, 'lrn2_2_', size=5, alpha=0.0001, beta=0.75, bias=2.0)
pool2_2_ = brew.max_pool(model, lrn2_2_, 'pool2_2_', kernel=3, stride=2)
# pool2_2_, output shape: {[128,13,13]}
relu2_2_ = brew.relu(model, pool2_2_, pool2_2_)
concatenate3_ = mx.symbol.concat(relu2_1_, relu2_2_,
dim=1,
name="concatenate3_")
# concatenate3_, output shape: {[256,13,13]}
conv3_ = brew.conv(model, concatenate3_, 'conv3_', dim_in=256, dim_out=384, kernel=3, stride=1)
conv3_ = brew.conv(model, concatenate3_, 'conv3_', dim_in=256, dim_out=384, kernel=3, stride=1, pad=1)
# conv3_, output shape: {[384,13,13]}
relu3_ = brew.relu(model, conv3_, conv3_)
split3_ = mx.symbol.split(data=relu3_,
num_outputs=2,
axis=1,
name="split3_")
# split3_, output shape: {[192,13,13][192,13,13]}
get4_1_ = split3_[0]
conv4_1_ = brew.conv(model, get4_1_, 'conv4_1_', dim_in=192, dim_out=192, kernel=3, stride=1)
conv4_1_ = brew.conv(model, get4_1_, 'conv4_1_', dim_in=192, dim_out=192, kernel=3, stride=1, pad=1)
# conv4_1_, output shape: {[192,13,13]}
relu4_1_ = brew.relu(model, conv4_1_, conv4_1_)
conv5_1_ = brew.conv(model, relu4_1_, 'conv5_1_', dim_in=192, dim_out=128, kernel=3, stride=1)
conv5_1_ = brew.conv(model, relu4_1_, 'conv5_1_', dim_in=192, dim_out=128, kernel=3, stride=1, pad=1)
# conv5_1_, output shape: {[128,13,13]}
pool5_1_ = brew.max_pool(model, conv5_1_, 'pool5_1_', kernel=3, stride=2)
# pool5_1_, output shape: {[128,6,6]}
relu5_1_ = brew.relu(model, pool5_1_, pool5_1_)
get4_2_ = split3_[1]
conv4_2_ = brew.conv(model, get4_2_, 'conv4_2_', dim_in=192, dim_out=192, kernel=3, stride=1)
conv4_2_ = brew.conv(model, get4_2_, 'conv4_2_', dim_in=192, dim_out=192, kernel=3, stride=1, pad=1)
# conv4_2_, output shape: {[192,13,13]}
relu4_2_ = brew.relu(model, conv4_2_, conv4_2_)
conv5_2_ = brew.conv(model, relu4_2_, 'conv5_2_', dim_in=192, dim_out=128, kernel=3, stride=1)
conv5_2_ = brew.conv(model, relu4_2_, 'conv5_2_', dim_in=192, dim_out=128, kernel=3, stride=1, pad=1)
# conv5_2_, output shape: {[128,13,13]}
pool5_2_ = brew.max_pool(model, conv5_2_, 'pool5_2_', kernel=3, stride=2)
# pool5_2_, output shape: {[128,6,6]}
relu5_2_ = brew.relu(model, pool5_2_, pool5_2_)
concatenate6_ = mx.symbol.concat(relu5_1_, relu5_2_,
dim=1,
name="concatenate6_")
# concatenate6_, output shape: {[256,6,6]}
fc6_ = brew.fc(model, concatenate6_, 'fc6_', dim_in=256 * 6 * 6, dim_out=4096)
# fc6_, output shape: {[4096,1,1]}
relu6_ = brew.relu(model, fc6_, fc6_)
dropout6_ = mx.symbol.Dropout(data=relu6_,
p=0.5,
name="dropout6_")
dropout6_ = brew.dropout(model, relu6_, 'dropout6_', ratio=0.5, is_test=False)
fc7_ = brew.fc(model, dropout6_, 'fc7_', dim_in=4096, dim_out=4096)
# fc7_, output shape: {[4096,1,1]}
relu7_ = brew.relu(model, fc7_, fc7_)
dropout7_ = mx.symbol.Dropout(data=relu7_,
p=0.5,
name="dropout7_")
dropout7_ = brew.dropout(model, relu7_, 'dropout7_', ratio=0.5, is_test=False)
fc8_ = brew.fc(model, dropout7_, 'fc8_', dim_in=4096, dim_out=10)
# fc8_, output shape: {[10,1,1]}
predictions = brew.softmax(model, fc8_, 'predictions')
......@@ -210,7 +170,7 @@ class CNNCreator_Alexnet:
# == Training model ==
train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts)
predictions = self.create_model(train_model, data, device_opts=device_opts)
predictions = self.create_model(train_model, data, device_opts=device_opts, is_test=False)
self.add_training_operators(train_model, predictions, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
self.add_accuracy(train_model, predictions, label, device_opts, eval_metric)
with core.DeviceScope(device_opts):
......@@ -233,7 +193,7 @@ class CNNCreator_Alexnet:
# == Testing model. ==
test_model= model_helper.ModelHelper(name="test_net", arg_scope=arg_scope, init_params=False)
data, label, test_dataset_size = self.add_input(test_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'test_lmdb'), db_type='lmdb', device_opts=device_opts)
predictions = self.create_model(test_model, data, device_opts=device_opts)
predictions = self.create_model(test_model, data, device_opts=device_opts, is_test=True)
self.add_accuracy(test_model, predictions, label, device_opts, eval_metric)
workspace.RunNetOnce(test_model.param_init_net)
workspace.CreateNet(test_model.net, overwrite=True)
......@@ -251,7 +211,7 @@ class CNNCreator_Alexnet:
# == Deployment model. ==
# We simply need the main AddModel part.
deploy_model = model_helper.ModelHelper(name="deploy_net", arg_scope=arg_scope, init_params=False)
self.create_model(deploy_model, "data", device_opts)
self.create_model(deploy_model, "data", device_opts, is_test=True)
print("Saving deploy model")
self.save_net(self._init_net_, self._predict_net_, deploy_model)
......
......@@ -7,6 +7,7 @@ import logging
import os
import sys
import lmdb
class CNNCreator_LeNet:
module = None
......@@ -58,7 +59,7 @@ class CNNCreator_LeNet:
return data, label, dataset_size
def create_model(self, model, data, device_opts):
def create_model(self, model, data, device_opts, is_test):
with core.DeviceScope(device_opts):
image = data
......@@ -135,7 +136,7 @@ class CNNCreator_LeNet:
# == Training model ==
train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts)
predictions = self.create_model(train_model, data, device_opts=device_opts)
predictions = self.create_model(train_model, data, device_opts=device_opts, is_test=False)
self.add_training_operators(train_model, predictions, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
self.add_accuracy(train_model, predictions, label, device_opts, eval_metric)
with core.DeviceScope(device_opts):
......@@ -158,7 +159,7 @@ class CNNCreator_LeNet:
# == Testing model. ==
test_model= model_helper.ModelHelper(name="test_net", arg_scope=arg_scope, init_params=False)
data, label, test_dataset_size = self.add_input(test_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'test_lmdb'), db_type='lmdb', device_opts=device_opts)
predictions = self.create_model(test_model, data, device_opts=device_opts)
predictions = self.create_model(test_model, data, device_opts=device_opts, is_test=True)
self.add_accuracy(test_model, predictions, label, device_opts, eval_metric)
workspace.RunNetOnce(test_model.param_init_net)
workspace.CreateNet(test_model.net, overwrite=True)
......@@ -176,7 +177,7 @@ class CNNCreator_LeNet:
# == Deployment model. ==
# We simply need the main AddModel part.
deploy_model = model_helper.ModelHelper(name="deploy_net", arg_scope=arg_scope, init_params=False)
self.create_model(deploy_model, "data", device_opts)
self.create_model(deploy_model, "data", device_opts, is_test=True)
print("Saving deploy model")
self.save_net(self._init_net_, self._predict_net_, deploy_model)
......
......@@ -7,6 +7,7 @@ import logging
import os
import sys
import lmdb
class CNNCreator_VGG16:
module = None
......@@ -58,56 +59,56 @@ class CNNCreator_VGG16:
return data, label, dataset_size
def create_model(self, model, data, device_opts):
def create_model(self, model, data, device_opts, is_test):
with core.DeviceScope(device_opts):
data = data
# data, output shape: {[3,224,224]}
conv1_ = brew.conv(model, data, 'conv1_', dim_in=3, dim_out=64, kernel=3, stride=1)
conv1_ = brew.conv(model, data, 'conv1_', dim_in=3, dim_out=64, kernel=3, stride=1, pad=1)
# conv1_, output shape: {[64,224,224]}
relu1_ = brew.relu(model, conv1_, conv1_)
conv2_ = brew.conv(model, relu1_, 'conv2_', dim_in=64, dim_out=64, kernel=3, stride=1)
conv2_ = brew.conv(model, relu1_, 'conv2_', dim_in=64, dim_out=64, kernel=3, stride=1, pad=1)
# conv2_, output shape: {[64,224,224]}
relu2_ = brew.relu(model, conv2_, conv2_)
pool2_ = brew.max_pool(model, relu2_, 'pool2_', kernel=2, stride=2)
# pool2_, output shape: {[64,112,112]}
conv3_ = brew.conv(model, pool2_, 'conv3_', dim_in=64, dim_out=128, kernel=3, stride=1)
conv3_ = brew.conv(model, pool2_, 'conv3_', dim_in=64, dim_out=128, kernel=3, stride=1, pad=1)
# conv3_, output shape: {[128,112,112]}
relu3_ = brew.relu(model, conv3_, conv3_)
conv4_ = brew.conv(model, relu3_, 'conv4_', dim_in=128, dim_out=128, kernel=3, stride=1)
conv4_ = brew.conv(model, relu3_, 'conv4_', dim_in=128, dim_out=128, kernel=3, stride=1, pad=1)
# conv4_, output shape: {[128,112,112]}
relu4_ = brew.relu(model, conv4_, conv4_)
pool4_ = brew.max_pool(model, relu4_, 'pool4_', kernel=2, stride=2)
# pool4_, output shape: {[128,56,56]}
conv5_ = brew.conv(model, pool4_, 'conv5_', dim_in=128, dim_out=256, kernel=3, stride=1)
conv5_ = brew.conv(model, pool4_, 'conv5_', dim_in=128, dim_out=256, kernel=3, stride=1, pad=1)
# conv5_, output shape: {[256,56,56]}
relu5_ = brew.relu(model, conv5_, conv5_)
conv6_ = brew.conv(model, relu5_, 'conv6_', dim_in=256, dim_out=256, kernel=3, stride=1)
conv6_ = brew.conv(model, relu5_, 'conv6_', dim_in=256, dim_out=256, kernel=3, stride=1, pad=1)
# conv6_, output shape: {[256,56,56]}
relu6_ = brew.relu(model, conv6_, conv6_)
conv7_ = brew.conv(model, relu6_, 'conv7_', dim_in=256, dim_out=256, kernel=3, stride=1)
conv7_ = brew.conv(model, relu6_, 'conv7_', dim_in=256, dim_out=256, kernel=3, stride=1, pad=1)
# conv7_, output shape: {[256,56,56]}
relu7_ = brew.relu(model, conv7_, conv7_)
pool7_ = brew.max_pool(model, relu7_, 'pool7_', kernel=2, stride=2)
# pool7_, output shape: {[256,28,28]}
conv8_ = brew.conv(model, pool7_, 'conv8_', dim_in=256, dim_out=512, kernel=3, stride=1)
conv8_ = brew.conv(model, pool7_, 'conv8_', dim_in=256, dim_out=512, kernel=3, stride=1, pad=1)
# conv8_, output shape: {[512,28,28]}
relu8_ = brew.relu(model, conv8_, conv8_)
conv9_ = brew.conv(model, relu8_, 'conv9_', dim_in=512, dim_out=512, kernel=3, stride=1)
conv9_ = brew.conv(model, relu8_, 'conv9_', dim_in=512, dim_out=512, kernel=3, stride=1, pad=1)
# conv9_, output shape: {[512,28,28]}
relu9_ = brew.relu(model, conv9_, conv9_)
conv10_ = brew.conv(model, relu9_, 'conv10_', dim_in=512, dim_out=512, kernel=3, stride=1)
conv10_ = brew.conv(model, relu9_, 'conv10_', dim_in=512, dim_out=512, kernel=3, stride=1, pad=1)
# conv10_, output shape: {[512,28,28]}
relu10_ = brew.relu(model, conv10_, conv10_)
pool10_ = brew.max_pool(model, relu10_, 'pool10_', kernel=2, stride=2)
# pool10_, output shape: {[512,14,14]}
conv11_ = brew.conv(model, pool10_, 'conv11_', dim_in=512, dim_out=512, kernel=3, stride=1)
conv11_ = brew.conv(model, pool10_, 'conv11_', dim_in=512, dim_out=512, kernel=3, stride=1, pad=1)
# conv11_, output shape: {[512,14,14]}
relu11_ = brew.relu(model, conv11_, conv11_)
conv12_ = brew.conv(model, relu11_, 'conv12_', dim_in=512, dim_out=512, kernel=3, stride=1)
conv12_ = brew.conv(model, relu11_, 'conv12_', dim_in=512, dim_out=512, kernel=3, stride=1, pad=1)
# conv12_, output shape: {[512,14,14]}
relu12_ = brew.relu(model, conv12_, conv12_)
conv13_ = brew.conv(model, relu12_, 'conv13_', dim_in=512, dim_out=512, kernel=3, stride=1)
conv13_ = brew.conv(model, relu12_, 'conv13_', dim_in=512, dim_out=512, kernel=3, stride=1, pad=1)
# conv13_, output shape: {[512,14,14]}
relu13_ = brew.relu(model, conv13_, conv13_)
pool13_ = brew.max_pool(model, relu13_, 'pool13_', kernel=2, stride=2)
......@@ -115,15 +116,11 @@ class CNNCreator_VGG16:
fc13_ = brew.fc(model, pool13_, 'fc13_', dim_in=512 * 7 * 7, dim_out=4096)
# fc13_, output shape: {[4096,1,1]}
relu14_ = brew.relu(model, fc13_, fc13_)
dropout14_ = mx.symbol.Dropout(data=relu14_,
p=0.5,
name="dropout14_")
dropout14_ = brew.dropout(model, relu14_, 'dropout14_', ratio=0.5, is_test=False)
fc14_ = brew.fc(model, dropout14_, 'fc14_', dim_in=4096, dim_out=4096)
# fc14_, output shape: {[4096,1,1]}
relu15_ = brew.relu(model, fc14_, fc14_)
dropout15_ = mx.symbol.Dropout(data=relu15_,
p=0.5,
name="dropout15_")
dropout15_ = brew.dropout(model, relu15_, 'dropout15_', ratio=0.5, is_test=False)
fc15_ = brew.fc(model, dropout15_, 'fc15_', dim_in=4096, dim_out=1000)
# fc15_, output shape: {[1000,1,1]}
predictions = brew.softmax(model, fc15_, 'predictions')
......@@ -185,7 +182,7 @@ class CNNCreator_VGG16:
# == Training model ==
train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts)
predictions = self.create_model(train_model, data, device_opts=device_opts)
predictions = self.create_model(train_model, data, device_opts=device_opts, is_test=False)
self.add_training_operators(train_model, predictions, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
self.add_accuracy(train_model, predictions, label, device_opts, eval_metric)
with core.DeviceScope(device_opts):
......@@ -208,7 +205,7 @@ class CNNCreator_VGG16:
# == Testing model. ==
test_model= model_helper.ModelHelper(name="test_net", arg_scope=arg_scope, init_params=False)
data, label, test_dataset_size = self.add_input(test_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'test_lmdb'), db_type='lmdb', device_opts=device_opts)