Corrected dropout and output layer (include sigmoid). Added...

Corrected dropout and output layer (include sigmoid). Added learning_rate_decay again since it is indeed supported in Caffe2
parent 054df59c
Pipeline #96636 failed with stages
in 3 minutes and 49 seconds
......@@ -76,10 +76,7 @@ public class TrainParamSupportChecker implements CNNTrainVisitor {
public void visit(ASTWeightDecayEntry node){}
public void visit(ASTLRDecayEntry node){
printUnsupportedOptimizerParam(node.getName());
this.unsupportedElemList.add(node.getName());
}
public void visit(ASTLRDecayEntry node){}
public void visit(ASTLRPolicyEntry node){}
......
......@@ -7,6 +7,7 @@ import logging
import os
import sys
import lmdb
class ${tc.fileNameWithoutEnding}:
module = None
......@@ -58,7 +59,7 @@ class ${tc.fileNameWithoutEnding}:
return data, label, dataset_size
def create_model(self, model, data, device_opts):
def create_model(self, model, data, device_opts, is_test):
with core.DeviceScope(device_opts):
${tc.include(tc.architecture.body)}
......@@ -118,7 +119,7 @@ ${tc.include(tc.architecture.body)}
# == Training model ==
train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts)
${tc.join(tc.architectureOutputs, ",", "","")} = self.create_model(train_model, data, device_opts=device_opts)
${tc.join(tc.architectureOutputs, ",", "","")} = self.create_model(train_model, data, device_opts=device_opts, is_test=False)
self.add_training_operators(train_model, ${tc.join(tc.architectureOutputs, ",", "","")}, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
self.add_accuracy(train_model, ${tc.join(tc.architectureOutputs, ",", "","")}, label, device_opts, eval_metric)
with core.DeviceScope(device_opts):
......@@ -141,7 +142,7 @@ ${tc.include(tc.architecture.body)}
# == Testing model. ==
test_model= model_helper.ModelHelper(name="test_net", arg_scope=arg_scope, init_params=False)
data, label, test_dataset_size = self.add_input(test_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'test_lmdb'), db_type='lmdb', device_opts=device_opts)
${tc.join(tc.architectureOutputs, ",", "","")} = self.create_model(test_model, data, device_opts=device_opts)
${tc.join(tc.architectureOutputs, ",", "","")} = self.create_model(test_model, data, device_opts=device_opts, is_test=True)
self.add_accuracy(test_model, predictions, label, device_opts, eval_metric)
workspace.RunNetOnce(test_model.param_init_net)
workspace.CreateNet(test_model.net, overwrite=True)
......@@ -159,7 +160,7 @@ ${tc.include(tc.architecture.body)}
# == Deployment model. ==
# We simply need the main AddModel part.
deploy_model = model_helper.ModelHelper(name="deploy_net", arg_scope=arg_scope, init_params=False)
self.create_model(deploy_model, "data", device_opts)
self.create_model(deploy_model, "data", device_opts, is_test=True)
print("Saving deploy model")
self.save_net(self._init_net_, self._predict_net_, deploy_model)
......
......@@ -41,6 +41,8 @@ if __name__ == "__main__":
<#elseif param == "step_size">
<#assign paramName = "stepsize">
<#elseif param == "gamma1">
<#assign paramName = "gamma1">
<#elseif param == "learning_rate_decay">
<#assign paramName = "gamma">
</#if>
${paramName}=${config.optimizerParams[param]}<#sep>,
......
${element.name} = mx.symbol.Dropout(data=${element.inputs[0]},
p=${element.p?c},
name="${element.name}")
<#assign input = element.inputs[0]>
<#assign ratio = element.p?c?string>
${element.name} = brew.dropout(model, ${input}, '${element.name}', ratio=${ratio}, is_test=False)
......@@ -2,8 +2,7 @@
<#if element.softmaxOutput>
${element.name} = brew.softmax(model, ${input}, '${element.name}')
<#elseif element.logisticRegressionOutput>
${element.name} = mx.symbol.LogisticRegressionOutput(data=${element.inputs[0]}, <#-- TODO: check how to adapt LogisticRegressionOutput -->
name="${element.name}")
${element.name} = model.net.Sigmoid(${input}, '${element.name}')
<#elseif element.linearRegressionOutput>
${element.name} = mx.symbol.LinearRegressionOutput(data=${element.inputs[0]}, <#-- TODO: check how to adapt linearRegressionOutput -->
name="${element.name}")
......
......@@ -121,7 +121,7 @@ public class GenerationTest extends AbstractSymtabTest{
CNNTrain2Caffe2 trainGenerator = new CNNTrain2Caffe2();
trainGenerator.generate(Paths.get(sourcePath), "FullConfig");
assertTrue(Log.getFindings().size() == 9);
assertTrue(Log.getFindings().size() == 8);
checkFilesAreEqual(
Paths.get("./target/generated-sources-cnnarch"),
Paths.get("./src/test/resources/target_code"),
......
......@@ -7,6 +7,7 @@ import logging
import os
import sys
import lmdb
class CNNCreator_Alexnet:
module = None
......@@ -58,7 +59,7 @@ class CNNCreator_Alexnet:
return data, label, dataset_size
def create_model(self, model, data, device_opts):
def create_model(self, model, data, device_opts, is_test):
with core.DeviceScope(device_opts):
data = data
......@@ -140,15 +141,11 @@ class CNNCreator_Alexnet:
fc6_ = brew.fc(model, concatenate6_, 'fc6_', dim_in=256 * 6 * 6, dim_out=4096)
# fc6_, output shape: {[4096,1,1]}
relu6_ = brew.relu(model, fc6_, fc6_)
dropout6_ = mx.symbol.Dropout(data=relu6_,
p=0.5,
name="dropout6_")
dropout6_ = brew.dropout(model, relu6_, 'dropout6_', ratio=0.5, is_test=False)
fc7_ = brew.fc(model, dropout6_, 'fc7_', dim_in=4096, dim_out=4096)
# fc7_, output shape: {[4096,1,1]}
relu7_ = brew.relu(model, fc7_, fc7_)
dropout7_ = mx.symbol.Dropout(data=relu7_,
p=0.5,
name="dropout7_")
dropout7_ = brew.dropout(model, relu7_, 'dropout7_', ratio=0.5, is_test=False)
fc8_ = brew.fc(model, dropout7_, 'fc8_', dim_in=4096, dim_out=10)
# fc8_, output shape: {[10,1,1]}
predictions = brew.softmax(model, fc8_, 'predictions')
......@@ -210,7 +207,7 @@ class CNNCreator_Alexnet:
# == Training model ==
train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts)
predictions = self.create_model(train_model, data, device_opts=device_opts)
predictions = self.create_model(train_model, data, device_opts=device_opts, is_test=False)
self.add_training_operators(train_model, predictions, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
self.add_accuracy(train_model, predictions, label, device_opts, eval_metric)
with core.DeviceScope(device_opts):
......@@ -233,7 +230,7 @@ class CNNCreator_Alexnet:
# == Testing model. ==
test_model= model_helper.ModelHelper(name="test_net", arg_scope=arg_scope, init_params=False)
data, label, test_dataset_size = self.add_input(test_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'test_lmdb'), db_type='lmdb', device_opts=device_opts)
predictions = self.create_model(test_model, data, device_opts=device_opts)
predictions = self.create_model(test_model, data, device_opts=device_opts, is_test=True)
self.add_accuracy(test_model, predictions, label, device_opts, eval_metric)
workspace.RunNetOnce(test_model.param_init_net)
workspace.CreateNet(test_model.net, overwrite=True)
......@@ -251,7 +248,7 @@ class CNNCreator_Alexnet:
# == Deployment model. ==
# We simply need the main AddModel part.
deploy_model = model_helper.ModelHelper(name="deploy_net", arg_scope=arg_scope, init_params=False)
self.create_model(deploy_model, "data", device_opts)
self.create_model(deploy_model, "data", device_opts, is_test=True)
print("Saving deploy model")
self.save_net(self._init_net_, self._predict_net_, deploy_model)
......
......@@ -7,6 +7,7 @@ import logging
import os
import sys
import lmdb
class CNNCreator_CifarClassifierNetwork:
module = None
......@@ -58,7 +59,7 @@ class CNNCreator_CifarClassifierNetwork:
return data, label, dataset_size
def create_model(self, model, data, device_opts):
def create_model(self, model, data, device_opts, is_test):
with core.DeviceScope(device_opts):
data = data
......@@ -231,9 +232,7 @@ class CNNCreator_CifarClassifierNetwork:
# globalpooling31_, output shape: {[64,1,1]}
fc31_ = brew.fc(model, globalpooling31_, 'fc31_', dim_in=64, dim_out=128)
# fc31_, output shape: {[128,1,1]}
dropout31_ = mx.symbol.Dropout(data=fc31_,
p=0.5,
name="dropout31_")
dropout31_ = brew.dropout(model, fc31_, 'dropout31_', ratio=0.5, is_test=False)
fc32_ = brew.fc(model, dropout31_, 'fc32_', dim_in=128, dim_out=10)
# fc32_, output shape: {[10,1,1]}
softmax = brew.softmax(model, fc32_, 'softmax')
......@@ -295,7 +294,7 @@ class CNNCreator_CifarClassifierNetwork:
# == Training model ==
train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts)
softmax = self.create_model(train_model, data, device_opts=device_opts)
softmax = self.create_model(train_model, data, device_opts=device_opts, is_test=False)
self.add_training_operators(train_model, softmax, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
self.add_accuracy(train_model, softmax, label, device_opts, eval_metric)
with core.DeviceScope(device_opts):
......@@ -318,7 +317,7 @@ class CNNCreator_CifarClassifierNetwork:
# == Testing model. ==
test_model= model_helper.ModelHelper(name="test_net", arg_scope=arg_scope, init_params=False)
data, label, test_dataset_size = self.add_input(test_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'test_lmdb'), db_type='lmdb', device_opts=device_opts)
softmax = self.create_model(test_model, data, device_opts=device_opts)
softmax = self.create_model(test_model, data, device_opts=device_opts, is_test=True)
self.add_accuracy(test_model, predictions, label, device_opts, eval_metric)
workspace.RunNetOnce(test_model.param_init_net)
workspace.CreateNet(test_model.net, overwrite=True)
......@@ -336,7 +335,7 @@ class CNNCreator_CifarClassifierNetwork:
# == Deployment model. ==
# We simply need the main AddModel part.
deploy_model = model_helper.ModelHelper(name="deploy_net", arg_scope=arg_scope, init_params=False)
self.create_model(deploy_model, "data", device_opts)
self.create_model(deploy_model, "data", device_opts, is_test=True)
print("Saving deploy model")
self.save_net(self._init_net_, self._predict_net_, deploy_model)
......
......@@ -7,6 +7,7 @@ import logging
import os
import sys
import lmdb
class CNNCreator_VGG16:
module = None
......@@ -58,7 +59,7 @@ class CNNCreator_VGG16:
return data, label, dataset_size
def create_model(self, model, data, device_opts):
def create_model(self, model, data, device_opts, is_test):
with core.DeviceScope(device_opts):
data = data
......@@ -115,15 +116,11 @@ class CNNCreator_VGG16:
fc13_ = brew.fc(model, pool13_, 'fc13_', dim_in=512 * 7 * 7, dim_out=4096)
# fc13_, output shape: {[4096,1,1]}
relu14_ = brew.relu(model, fc13_, fc13_)
dropout14_ = mx.symbol.Dropout(data=relu14_,
p=0.5,
name="dropout14_")
dropout14_ = brew.dropout(model, relu14_, 'dropout14_', ratio=0.5, is_test=False)
fc14_ = brew.fc(model, dropout14_, 'fc14_', dim_in=4096, dim_out=4096)
# fc14_, output shape: {[4096,1,1]}
relu15_ = brew.relu(model, fc14_, fc14_)
dropout15_ = mx.symbol.Dropout(data=relu15_,
p=0.5,
name="dropout15_")
dropout15_ = brew.dropout(model, relu15_, 'dropout15_', ratio=0.5, is_test=False)
fc15_ = brew.fc(model, dropout15_, 'fc15_', dim_in=4096, dim_out=1000)
# fc15_, output shape: {[1000,1,1]}
predictions = brew.softmax(model, fc15_, 'predictions')
......@@ -185,7 +182,7 @@ class CNNCreator_VGG16:
# == Training model ==
train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts)
predictions = self.create_model(train_model, data, device_opts=device_opts)
predictions = self.create_model(train_model, data, device_opts=device_opts, is_test=False)
self.add_training_operators(train_model, predictions, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
self.add_accuracy(train_model, predictions, label, device_opts, eval_metric)
with core.DeviceScope(device_opts):
......@@ -208,7 +205,7 @@ class CNNCreator_VGG16:
# == Testing model. ==
test_model= model_helper.ModelHelper(name="test_net", arg_scope=arg_scope, init_params=False)
data, label, test_dataset_size = self.add_input(test_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'test_lmdb'), db_type='lmdb', device_opts=device_opts)
predictions = self.create_model(test_model, data, device_opts=device_opts)
predictions = self.create_model(test_model, data, device_opts=device_opts, is_test=True)
self.add_accuracy(test_model, predictions, label, device_opts, eval_metric)
workspace.RunNetOnce(test_model.param_init_net)
workspace.CreateNet(test_model.net, overwrite=True)
......@@ -226,7 +223,7 @@ class CNNCreator_VGG16:
# == Deployment model. ==
# We simply need the main AddModel part.
deploy_model = model_helper.ModelHelper(name="deploy_net", arg_scope=arg_scope, init_params=False)
self.create_model(deploy_model, "data", device_opts)
self.create_model(deploy_model, "data", device_opts, is_test=True)
print("Saving deploy model")
self.save_net(self._init_net_, self._predict_net_, deploy_model)
......
......@@ -21,8 +21,9 @@ if __name__ == "__main__":
opt_type='rmsprop',
epsilon=1.0E-6,
weight_decay=0.01,
gamma=0.9,
gamma1=0.9,
policy='step',
base_learning_rate=0.001,
gamma=0.9,
stepsize=1000
)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment