Commit fdfbe9d5 authored by Carlos Alfredo Yeverino Rodriguez's avatar Carlos Alfredo Yeverino Rodriguez
Browse files

Corrected dropout and output layer (include sigmoid). Added...

Corrected dropout and output layer (include sigmoid). Added learning_rate_decay again since it is indeed supported in Caffe2
parent 054df59c
Pipeline #96636 failed with stages
in 3 minutes and 49 seconds
......@@ -76,10 +76,7 @@ public class TrainParamSupportChecker implements CNNTrainVisitor {
public void visit(ASTWeightDecayEntry node){}
public void visit(ASTLRDecayEntry node){
printUnsupportedOptimizerParam(node.getName());
this.unsupportedElemList.add(node.getName());
}
public void visit(ASTLRDecayEntry node){}
public void visit(ASTLRPolicyEntry node){}
......
......@@ -7,6 +7,7 @@ import logging
import os
import sys
import lmdb
class ${tc.fileNameWithoutEnding}:
module = None
......@@ -58,7 +59,7 @@ class ${tc.fileNameWithoutEnding}:
return data, label, dataset_size
def create_model(self, model, data, device_opts):
def create_model(self, model, data, device_opts, is_test):
with core.DeviceScope(device_opts):
${tc.include(tc.architecture.body)}
......@@ -118,7 +119,7 @@ ${tc.include(tc.architecture.body)}
# == Training model ==
train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts)
${tc.join(tc.architectureOutputs, ",", "","")} = self.create_model(train_model, data, device_opts=device_opts)
${tc.join(tc.architectureOutputs, ",", "","")} = self.create_model(train_model, data, device_opts=device_opts, is_test=False)
self.add_training_operators(train_model, ${tc.join(tc.architectureOutputs, ",", "","")}, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
self.add_accuracy(train_model, ${tc.join(tc.architectureOutputs, ",", "","")}, label, device_opts, eval_metric)
with core.DeviceScope(device_opts):
......@@ -141,7 +142,7 @@ ${tc.include(tc.architecture.body)}
# == Testing model. ==
test_model= model_helper.ModelHelper(name="test_net", arg_scope=arg_scope, init_params=False)
data, label, test_dataset_size = self.add_input(test_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'test_lmdb'), db_type='lmdb', device_opts=device_opts)
${tc.join(tc.architectureOutputs, ",", "","")} = self.create_model(test_model, data, device_opts=device_opts)
${tc.join(tc.architectureOutputs, ",", "","")} = self.create_model(test_model, data, device_opts=device_opts, is_test=True)
self.add_accuracy(test_model, predictions, label, device_opts, eval_metric)
workspace.RunNetOnce(test_model.param_init_net)
workspace.CreateNet(test_model.net, overwrite=True)
......@@ -159,7 +160,7 @@ ${tc.include(tc.architecture.body)}
# == Deployment model. ==
# We simply need the main AddModel part.
deploy_model = model_helper.ModelHelper(name="deploy_net", arg_scope=arg_scope, init_params=False)
self.create_model(deploy_model, "data", device_opts)
self.create_model(deploy_model, "data", device_opts, is_test=True)
print("Saving deploy model")
self.save_net(self._init_net_, self._predict_net_, deploy_model)
......
......@@ -41,6 +41,8 @@ if __name__ == "__main__":
<#elseif param == "step_size">
<#assign paramName = "stepsize">
<#elseif param == "gamma1">
<#assign paramName = "gamma1">
<#elseif param == "learning_rate_decay">
<#assign paramName = "gamma">
</#if>
${paramName}=${config.optimizerParams[param]}<#sep>,
......
${element.name} = mx.symbol.Dropout(data=${element.inputs[0]},
p=${element.p?c},
name="${element.name}")
<#assign input = element.inputs[0]>
<#assign ratio = element.p?c?string>
${element.name} = brew.dropout(model, ${input}, '${element.name}', ratio=${ratio}, is_test=False)
......@@ -2,8 +2,7 @@
<#if element.softmaxOutput>
${element.name} = brew.softmax(model, ${input}, '${element.name}')
<#elseif element.logisticRegressionOutput>
${element.name} = mx.symbol.LogisticRegressionOutput(data=${element.inputs[0]}, <#-- TODO: check how to adapt LogisticRegressionOutput -->
name="${element.name}")
${element.name} = model.net.Sigmoid(${input}, '${element.name}')
<#elseif element.linearRegressionOutput>
${element.name} = mx.symbol.LinearRegressionOutput(data=${element.inputs[0]}, <#-- TODO: check how to adapt linearRegressionOutput -->
name="${element.name}")
......
......@@ -121,7 +121,7 @@ public class GenerationTest extends AbstractSymtabTest{
CNNTrain2Caffe2 trainGenerator = new CNNTrain2Caffe2();
trainGenerator.generate(Paths.get(sourcePath), "FullConfig");
assertTrue(Log.getFindings().size() == 9);
assertTrue(Log.getFindings().size() == 8);
checkFilesAreEqual(
Paths.get("./target/generated-sources-cnnarch"),
Paths.get("./src/test/resources/target_code"),
......
......@@ -7,6 +7,7 @@ import logging
import os
import sys
import lmdb
class CNNCreator_Alexnet:
module = None
......@@ -58,7 +59,7 @@ class CNNCreator_Alexnet:
return data, label, dataset_size
def create_model(self, model, data, device_opts):
def create_model(self, model, data, device_opts, is_test):
with core.DeviceScope(device_opts):
data = data
......@@ -140,15 +141,11 @@ class CNNCreator_Alexnet:
fc6_ = brew.fc(model, concatenate6_, 'fc6_', dim_in=256 * 6 * 6, dim_out=4096)
# fc6_, output shape: {[4096,1,1]}
relu6_ = brew.relu(model, fc6_, fc6_)
dropout6_ = mx.symbol.Dropout(data=relu6_,
p=0.5,
name="dropout6_")
dropout6_ = brew.dropout(model, relu6_, 'dropout6_', ratio=0.5, is_test=False)
fc7_ = brew.fc(model, dropout6_, 'fc7_', dim_in=4096, dim_out=4096)
# fc7_, output shape: {[4096,1,1]}
relu7_ = brew.relu(model, fc7_, fc7_)
dropout7_ = mx.symbol.Dropout(data=relu7_,
p=0.5,
name="dropout7_")
dropout7_ = brew.dropout(model, relu7_, 'dropout7_', ratio=0.5, is_test=False)
fc8_ = brew.fc(model, dropout7_, 'fc8_', dim_in=4096, dim_out=10)
# fc8_, output shape: {[10,1,1]}
predictions = brew.softmax(model, fc8_, 'predictions')
......@@ -210,7 +207,7 @@ class CNNCreator_Alexnet:
# == Training model ==
train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts)
predictions = self.create_model(train_model, data, device_opts=device_opts)
predictions = self.create_model(train_model, data, device_opts=device_opts, is_test=False)
self.add_training_operators(train_model, predictions, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
self.add_accuracy(train_model, predictions, label, device_opts, eval_metric)
with core.DeviceScope(device_opts):
......@@ -233,7 +230,7 @@ class CNNCreator_Alexnet:
# == Testing model. ==
test_model= model_helper.ModelHelper(name="test_net", arg_scope=arg_scope, init_params=False)
data, label, test_dataset_size = self.add_input(test_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'test_lmdb'), db_type='lmdb', device_opts=device_opts)
predictions = self.create_model(test_model, data, device_opts=device_opts)
predictions = self.create_model(test_model, data, device_opts=device_opts, is_test=True)
self.add_accuracy(test_model, predictions, label, device_opts, eval_metric)
workspace.RunNetOnce(test_model.param_init_net)
workspace.CreateNet(test_model.net, overwrite=True)
......@@ -251,7 +248,7 @@ class CNNCreator_Alexnet:
# == Deployment model. ==
# We simply need the main AddModel part.
deploy_model = model_helper.ModelHelper(name="deploy_net", arg_scope=arg_scope, init_params=False)
self.create_model(deploy_model, "data", device_opts)
self.create_model(deploy_model, "data", device_opts, is_test=True)
print("Saving deploy model")
self.save_net(self._init_net_, self._predict_net_, deploy_model)
......
......@@ -7,6 +7,7 @@ import logging
import os
import sys
import lmdb
class CNNCreator_CifarClassifierNetwork:
module = None
......@@ -58,7 +59,7 @@ class CNNCreator_CifarClassifierNetwork:
return data, label, dataset_size
def create_model(self, model, data, device_opts):
def create_model(self, model, data, device_opts, is_test):
with core.DeviceScope(device_opts):
data = data
......@@ -231,9 +232,7 @@ class CNNCreator_CifarClassifierNetwork:
# globalpooling31_, output shape: {[64,1,1]}
fc31_ = brew.fc(model, globalpooling31_, 'fc31_', dim_in=64, dim_out=128)
# fc31_, output shape: {[128,1,1]}
dropout31_ = mx.symbol.Dropout(data=fc31_,
p=0.5,
name="dropout31_")
dropout31_ = brew.dropout(model, fc31_, 'dropout31_', ratio=0.5, is_test=False)
fc32_ = brew.fc(model, dropout31_, 'fc32_', dim_in=128, dim_out=10)
# fc32_, output shape: {[10,1,1]}
softmax = brew.softmax(model, fc32_, 'softmax')
......@@ -295,7 +294,7 @@ class CNNCreator_CifarClassifierNetwork:
# == Training model ==
train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts)
softmax = self.create_model(train_model, data, device_opts=device_opts)
softmax = self.create_model(train_model, data, device_opts=device_opts, is_test=False)
self.add_training_operators(train_model, softmax, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
self.add_accuracy(train_model, softmax, label, device_opts, eval_metric)
with core.DeviceScope(device_opts):
......@@ -318,7 +317,7 @@ class CNNCreator_CifarClassifierNetwork:
# == Testing model. ==
test_model= model_helper.ModelHelper(name="test_net", arg_scope=arg_scope, init_params=False)
data, label, test_dataset_size = self.add_input(test_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'test_lmdb'), db_type='lmdb', device_opts=device_opts)
softmax = self.create_model(test_model, data, device_opts=device_opts)
softmax = self.create_model(test_model, data, device_opts=device_opts, is_test=True)
self.add_accuracy(test_model, predictions, label, device_opts, eval_metric)
workspace.RunNetOnce(test_model.param_init_net)
workspace.CreateNet(test_model.net, overwrite=True)
......@@ -336,7 +335,7 @@ class CNNCreator_CifarClassifierNetwork:
# == Deployment model. ==
# We simply need the main AddModel part.
deploy_model = model_helper.ModelHelper(name="deploy_net", arg_scope=arg_scope, init_params=False)
self.create_model(deploy_model, "data", device_opts)
self.create_model(deploy_model, "data", device_opts, is_test=True)
print("Saving deploy model")
self.save_net(self._init_net_, self._predict_net_, deploy_model)
......
......@@ -7,6 +7,7 @@ import logging
import os
import sys
import lmdb
class CNNCreator_VGG16:
module = None
......@@ -58,7 +59,7 @@ class CNNCreator_VGG16:
return data, label, dataset_size
def create_model(self, model, data, device_opts):
def create_model(self, model, data, device_opts, is_test):
with core.DeviceScope(device_opts):
data = data
......@@ -115,15 +116,11 @@ class CNNCreator_VGG16:
fc13_ = brew.fc(model, pool13_, 'fc13_', dim_in=512 * 7 * 7, dim_out=4096)
# fc13_, output shape: {[4096,1,1]}
relu14_ = brew.relu(model, fc13_, fc13_)
dropout14_ = mx.symbol.Dropout(data=relu14_,
p=0.5,
name="dropout14_")
dropout14_ = brew.dropout(model, relu14_, 'dropout14_', ratio=0.5, is_test=False)
fc14_ = brew.fc(model, dropout14_, 'fc14_', dim_in=4096, dim_out=4096)
# fc14_, output shape: {[4096,1,1]}
relu15_ = brew.relu(model, fc14_, fc14_)
dropout15_ = mx.symbol.Dropout(data=relu15_,
p=0.5,
name="dropout15_")
dropout15_ = brew.dropout(model, relu15_, 'dropout15_', ratio=0.5, is_test=False)
fc15_ = brew.fc(model, dropout15_, 'fc15_', dim_in=4096, dim_out=1000)
# fc15_, output shape: {[1000,1,1]}
predictions = brew.softmax(model, fc15_, 'predictions')
......@@ -185,7 +182,7 @@ class CNNCreator_VGG16:
# == Training model ==
train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts)
predictions = self.create_model(train_model, data, device_opts=device_opts)
predictions = self.create_model(train_model, data, device_opts=device_opts, is_test=False)
self.add_training_operators(train_model, predictions, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
self.add_accuracy(train_model, predictions, label, device_opts, eval_metric)
with core.DeviceScope(device_opts):
......@@ -208,7 +205,7 @@ class CNNCreator_VGG16:
# == Testing model. ==
test_model= model_helper.ModelHelper(name="test_net", arg_scope=arg_scope, init_params=False)
data, label, test_dataset_size = self.add_input(test_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'test_lmdb'), db_type='lmdb', device_opts=device_opts)
predictions = self.create_model(test_model, data, device_opts=device_opts)
predictions = self.create_model(test_model, data, device_opts=device_opts, is_test=True)
self.add_accuracy(test_model, predictions, label, device_opts, eval_metric)
workspace.RunNetOnce(test_model.param_init_net)
workspace.CreateNet(test_model.net, overwrite=True)
......@@ -226,7 +223,7 @@ class CNNCreator_VGG16:
# == Deployment model. ==
# We simply need the main AddModel part.
deploy_model = model_helper.ModelHelper(name="deploy_net", arg_scope=arg_scope, init_params=False)
self.create_model(deploy_model, "data", device_opts)
self.create_model(deploy_model, "data", device_opts, is_test=True)
print("Saving deploy model")
self.save_net(self._init_net_, self._predict_net_, deploy_model)
......
......@@ -21,8 +21,9 @@ if __name__ == "__main__":
opt_type='rmsprop',
epsilon=1.0E-6,
weight_decay=0.01,
gamma=0.9,
gamma1=0.9,
policy='step',
base_learning_rate=0.001,
gamma=0.9,
stepsize=1000
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment