Commit 4333875e authored by Evgeny Kusmenko's avatar Evgeny Kusmenko
Browse files

Merge branch 'layer-templates-caffe2' into 'master'

Layer templates caffe2

See merge request !4
parents 058f8ab3 56853096
Pipeline #73077 passed with stages
import mxnet as mx
from caffe2.python import workspace, core, model_helper, brew, optimizer
from caffe2.python.predictor import mobile_exporter
from caffe2.proto import caffe2_pb2
import numpy as np
import logging
import os
import errno
import shutil
import h5py
import sys
import numpy as np
@mx.init.register
class MyConstant(mx.init.Initializer):
def __init__(self, value):
super(MyConstant, self).__init__(value=value)
self.value = value
def _init_weight(self, _, arr):
arr[:] = mx.nd.array(self.value)
class ${tc.fileNameWithoutEnding}:
module = None
_data_dir_ = "data/${tc.fullArchitectureName}/"
_model_dir_ = "model/${tc.fullArchitectureName}/"
_model_prefix_ = "${tc.architectureName}"
_input_names_ = [${tc.join(tc.architectureInputs, ",", "'", "'")}]
_input_shapes_ = [<#list tc.architecture.inputs as input>(${tc.join(input.definition.type.dimensions, ",")})</#list>]
_output_names_ = [${tc.join(tc.architectureOutputs, ",", "'", "_label'")}]
def load(self, context):
lastEpoch = 0
param_file = None
try:
os.remove(self._model_dir_ + self._model_prefix_ + "_newest-0000.params")
except OSError:
pass
try:
os.remove(self._model_dir_ + self._model_prefix_ + "_newest-symbol.json")
except OSError:
pass
if os.path.isdir(self._model_dir_):
for file in os.listdir(self._model_dir_):
if ".params" in file and self._model_prefix_ in file:
epochStr = file.replace(".params","").replace(self._model_prefix_ + "-","")
epoch = int(epochStr)
if epoch > lastEpoch:
lastEpoch = epoch
param_file = file
if param_file is None:
return 0
else:
logging.info("Loading checkpoint: " + param_file)
self.module.load(prefix=self._model_dir_ + self._model_prefix_,
epoch=lastEpoch,
data_names=self._input_names_,
label_names=self._output_names_,
context=context)
return lastEpoch
def load_data(self, batch_size):
train_h5, test_h5 = self.load_h5_files()
data_mean = train_h5[self._input_names_[0]][:].mean(axis=0)
data_std = train_h5[self._input_names_[0]][:].std(axis=0) + 1e-5
train_iter = mx.io.NDArrayIter(train_h5[self._input_names_[0]],
train_h5[self._output_names_[0]],
batch_size=batch_size,
data_name=self._input_names_[0],
label_name=self._output_names_[0])
test_iter = None
if test_h5 != None:
test_iter = mx.io.NDArrayIter(test_h5[self._input_names_[0]],
test_h5[self._output_names_[0]],
batch_size=batch_size,
data_name=self._input_names_[0],
label_name=self._output_names_[0])
return train_iter, test_iter, data_mean, data_std
def load_h5_files(self):
train_h5 = None
test_h5 = None
train_path = self._data_dir_ + "train.h5"
test_path = self._data_dir_ + "test.h5"
if os.path.isfile(train_path):
train_h5 = h5py.File(train_path, 'r')
if not (self._input_names_[0] in train_h5 and self._output_names_[0] in train_h5):
logging.error("The HDF5 file '" + os.path.abspath(train_path) + "' has to contain the datasets: "
+ "'" + self._input_names_[0] + "', '" + self._output_names_[0] + "'")
sys.exit(1)
test_iter = None
if os.path.isfile(test_path):
test_h5 = h5py.File(test_path, 'r')
if not (self._input_names_[0] in test_h5 and self._output_names_[0] in test_h5):
logging.error("The HDF5 file '" + os.path.abspath(test_path) + "' has to contain the datasets: "
+ "'" + self._input_names_[0] + "', '" + self._output_names_[0] + "'")
sys.exit(1)
else:
logging.warning("Couldn't load test set. File '" + os.path.abspath(test_path) + "' does not exist.")
return train_h5, test_h5
else:
logging.error("Data loading failure. File '" + os.path.abspath(train_path) + "' does not exist.")
sys.exit(1)
def train(self, batch_size=64,
num_epoch=10,
eval_metric='acc',
optimizer='adam',
optimizer_params=(('learning_rate', 0.001),),
load_checkpoint=True,
context='gpu',
checkpoint_period=5,
normalize=True):
if context == 'gpu':
mx_context = mx.gpu()
elif context == 'cpu':
mx_context = mx.cpu()
else:
logging.error("Context argument is '" + context + "'. Only 'cpu' and 'gpu are valid arguments'.")
if 'weight_decay' in optimizer_params:
optimizer_params['wd'] = optimizer_params['weight_decay']
del optimizer_params['weight_decay']
if 'learning_rate_decay' in optimizer_params:
min_learning_rate = 1e-08
if 'learning_rate_minimum' in optimizer_params:
min_learning_rate = optimizer_params['learning_rate_minimum']
del optimizer_params['learning_rate_minimum']
optimizer_params['lr_scheduler'] = mx.lr_scheduler.FactorScheduler(
optimizer_params['step_size'],
factor=optimizer_params['learning_rate_decay'],
stop_factor_lr=min_learning_rate)
del optimizer_params['step_size']
del optimizer_params['learning_rate_decay']
train_iter, test_iter, data_mean, data_std = self.load_data(batch_size)
if self.module == None:
if normalize:
self.construct(mx_context, data_mean, data_std)
else:
self.construct(mx_context)
begin_epoch = 0
if load_checkpoint:
begin_epoch = self.load(mx_context)
else:
if os.path.isdir(self._model_dir_):
shutil.rmtree(self._model_dir_)
try:
os.makedirs(self._model_dir_)
except OSError:
if not os.path.isdir(self._model_dir_):
raise
self.module.fit(
train_data=train_iter,
eval_metric=eval_metric,
eval_data=test_iter,
optimizer=optimizer,
optimizer_params=optimizer_params,
batch_end_callback=mx.callback.Speedometer(batch_size),
epoch_end_callback=mx.callback.do_checkpoint(prefix=self._model_dir_ + self._model_prefix_, period=checkpoint_period),
begin_epoch=begin_epoch,
num_epoch=num_epoch + begin_epoch)
self.module.save_checkpoint(self._model_dir_ + self._model_prefix_, num_epoch + begin_epoch)
self.module.save_checkpoint(self._model_dir_ + self._model_prefix_ + '_newest', 0)
def construct(self, context, data_mean=None, data_std=None):
#TODO: Check whether class is needed
#class ${tc.fileNameWithoutEnding}:
module = None
_data_dir_ = "data/${tc.fullArchitectureName}/"
_model_dir_ = "model/${tc.fullArchitectureName}/"
_model_prefix_ = "${tc.architectureName}"
_input_names_ = [${tc.join(tc.architectureInputs, ",", "'", "'")}]
_input_shapes_ = [<#list tc.architecture.inputs as input>(${tc.join(input.definition.type.dimensions, ",")})</#list>]
_output_names_ = [${tc.join(tc.architectureOutputs, ",", "'", "_label'")}]
#TODO: Modify paths to make them dynamic
#For Windows
#INIT_NET = 'D:/Yeverino/git_projects/Caffe2_scripts/caffe2_ema_cnncreator/init_net'
#PREDICT_NET = 'D:/Yeverino/git_projects/Caffe2_scripts/caffe2_ema_cnncreator/predict_net'
#For Ubuntu
INIT_NET = '/home/carlos/Documents/git/Caffe2_scripts/caffe2_ema_cnncreator/init_net'
PREDICT_NET = '/home/carlos/Documents/git/Caffe2_scripts/caffe2_ema_cnncreator/predict_net'
#device_opts = core.DeviceOption(caffe2_pb2.CPU, 0) #for CPU processing
device_opts = core.DeviceOption(caffe2_pb2.CUDA, 0) #for GPU processing
#data and label are dummy at the moment
# randomly creates 30x30 patches of ones or zeros with label 1 and 0 respectively
def get_dummy_data(batchsize) :
data = []
label = []
for i in range(batchsize) :
r = np.random.randint(0, 2)
if r==0 :
d = np.zeros((1,30,30))
l = 0
else :
d = np.ones((1,30,30))
l = 1
data.append(d)
label.append(l)
return np.array(data).astype('float32'), np.array(label).astype('int32')
def AddInput(model, batch_size):
data, label = get_dummy_data(batch_size)
print '\ndata:', data
print '\nlabel:', label
return data, label
def create_model(model, device_opts):
with core.DeviceScope(device_opts):
${tc.include(tc.architecture.body)}
self.module = mx.mod.Module(symbol=mx.symbol.Group([${tc.join(tc.architectureOutputs, ",")}]),
data_names=self._input_names_,
label_names=self._output_names_,
context=context)
# this adds the loss and optimizer
def add_training_operators(model, output, device_opts) :
with core.DeviceScope(device_opts):
xent = model.LabelCrossEntropy([output, "label"], 'xent')
loss = model.AveragedLoss(xent, "loss")
brew.accuracy(model, [output, "label"], "accuracy")
model.AddGradientOperators([loss])
opt = optimizer.build_sgd(model, base_learning_rate=0.01, policy="step", stepsize=1, gamma=0.999) # , momentum=0.9
def train(INIT_NET, PREDICT_NET, epochs, batch_size, device_opts) :
train_model= model_helper.ModelHelper(name="train_net")
${tc.join(tc.architectureOutputs, ",", "","")} = create_model(train_model, device_opts=device_opts)
add_training_operators(train_model, ${tc.join(tc.architectureOutputs, ",", "","")}, device_opts=device_opts)
with core.DeviceScope(device_opts):
brew.add_weight_decay(train_model, 0.001) # any effect???
workspace.RunNetOnce(train_model.param_init_net)
workspace.CreateNet(train_model.net)
print '\ntraining for', epochs, 'epochs'
for j in range(0, epochs):
workspace.RunNet(train_model.net, 10) # run for 10 times
print str(j) + ': ' + 'loss ' + str(workspace.FetchBlob("loss")) + ' - ' + 'accuracy ' + str(workspace.FetchBlob("accuracy"))
print 'training done'
print '\nrunning test model'
test_model= model_helper.ModelHelper(name="test_net", init_params=False)
create_model(test_model, device_opts=device_opts)
workspace.RunNetOnce(test_model.param_init_net)
workspace.CreateNet(test_model.net, overwrite=True)
${tc.architectureInputs[0]} = np.zeros((1,1,30,30)).astype('float32')
workspace.FeedBlob("${tc.architectureInputs[0]}", ${tc.architectureInputs[0]}, device_option=device_opts)
workspace.RunNet(test_model.net, 1)
print "\nInput: zeros"
print "Output:", workspace.FetchBlob("${tc.architectureOutputs[0]}") #TODO: Consider multiple output names
print "Output class:", np.argmax(workspace.FetchBlob("${tc.architectureOutputs[0]}")) #TODO: Consider multiple output names
${tc.architectureInputs[0]} = np.ones((1,1,30,30)).astype('float32')
workspace.FeedBlob("${tc.architectureInputs[0]}", ${tc.architectureInputs[0]}, device_option=device_opts)
workspace.RunNet(test_model.net, 1)
print "\nInput: ones"
print "Output:", workspace.FetchBlob("${tc.architectureOutputs[0]}") #TODO: Consider multiple output names
print "Output class:", np.argmax(workspace.FetchBlob("${tc.architectureOutputs[0]}")) #TODO: Consider multiple output names
print '\nsaving test model'
save_net(INIT_NET, PREDICT_NET, test_model)
def save_net(init_net_path, predict_net_path, model):
extra_params = []
extra_blobs = []
for blob in workspace.Blobs():
name = str(blob)
if name.endswith("_rm") or name.endswith("_riv"):
extra_params.append(name)
extra_blobs.append(workspace.FetchBlob(name))
for name, blob in zip(extra_params, extra_blobs):
model.params.append(name)
init_net, predict_net = mobile_exporter.Export(
workspace,
model.net,
model.params
)
print("Save the model to init_net.pb and predict_net.pb")
with open(predict_net_path + '.pb', 'wb') as f:
f.write(model.net._net.SerializeToString())
with open(init_net_path + '.pb', 'wb') as f:
f.write(init_net.SerializeToString())
print("Save the mode to init_net.pbtxt and predict_net.pbtxt")
with open(init_net_path + '.pbtxt', 'w') as f:
f.write(str(init_net))
with open(predict_net_path + '.pbtxt', 'w') as f:
f.write(str(predict_net))
def load_net(init_net_path, predict_net_path, device_opts):
init_def = caffe2_pb2.NetDef()
with open(init_net_path + '.pb', 'rb') as f:
init_def.ParseFromString(f.read())
init_def.device_option.CopyFrom(device_opts)
workspace.RunNetOnce(init_def.SerializeToString())
net_def = caffe2_pb2.NetDef()
with open(predict_net_path + '.pb', 'rb') as f:
net_def.ParseFromString(f.read())
net_def.device_option.CopyFrom(device_opts)
workspace.CreateNet(net_def.SerializeToString(), overwrite=True)
train(INIT_NET, PREDICT_NET, epochs=20, batch_size=100, device_opts=device_opts)
print '\n********************************************'
print 'loading test model'
load_net(INIT_NET, PREDICT_NET, device_opts=device_opts)
${tc.architectureInputs[0]} = np.ones((1,1,30,30)).astype('float32')
workspace.FeedBlob("${tc.architectureInputs[0]}", ${tc.architectureInputs[0]}, device_option=device_opts)
workspace.RunNet('test_net', 1)
print "\nInput: ones"
print "Output:", workspace.FetchBlob("${tc.architectureOutputs[0]}") #TODO: Consider multiple output names
print "Output class:", np.argmax(workspace.FetchBlob("${tc.architectureOutputs[0]}")) #TODO: Consider multiple output names
<#assign input = element.inputs[0]>
<#if element.padding??>
<#assign input = element.name>
${element.name} = mx.symbol.pad(data=${element.inputs[0]},
mode='constant',
pad_width=(${tc.join(element.padding, ",")}),
constant_value=0)
</#if>
${element.name} = mx.symbol.Convolution(data=${input},
kernel=(${tc.join(element.kernel, ",")}),
stride=(${tc.join(element.stride, ",")}),
num_filter=${element.channels?c},
no_bias=${element.noBias?string("True","False")},
name="${element.name}")
<#assign strideHeight = element.stride[0]>
<#assign strideWidth = element.stride[1]>
<#assign kernelHeight = element.kernel[0]>
<#assign kernelWidth = element.kernel[1]>
<#if element.padding??> <#-- Check wheather padding null is. -->
<#-- TODO: check how to adapt CNNArchLang argument pad_width=${element.padding[0]} -->
</#if>
<#if strideHeight == strideWidth>
<#assign strideParameter = "stride=${strideHeight}">
<#else>
<#assign strideParameter = "stride_h=${strideHeight}, stride_w=${strideWidth}">
</#if>
<#if kernelHeight == kernelWidth>
<#assign kernelParameter = "kernel=${kernelHeight}">
<#else>
<#assign kernelParameter = "kernel=[${kernelHeight},${kernelWidth}]">
</#if>
<#if input = tc.architectureInputs[0]> <#-- TODO: CHECK COMPARISON -->
${element.name} = brew.conv(model, '${input}', '${element.name}', dim_in=1, dim_out=${element.channels?c}, ${kernelParameter}, ${strideParameter})
<#else>
${element.name} = brew.conv(model, ${input}, '${element.name}', dim_in=${element.element.inputTypes[0].channels?c}, dim_out=${element.channels?c}, ${kernelParameter}, ${strideParameter})
</#if>
<#-- TODO: check how to adapt CNNArchLang argument no_bias=${element.noBias?string("True","False")} -->
<#include "OutputShape.ftl">
\ No newline at end of file
<#assign flatten = element.element.inputTypes[0].height != 1 || element.element.inputTypes[0].width != 1>
<#assign input = element.inputs[0]>
<#assign inputLayerType = element.element.getInputElement().get()?string>
<#assign inputChannels = element.element.inputTypes[0].channels?c>
<#assign inputHeight = element.element.inputTypes[0].height>
<#assign inputWidth = element.element.inputTypes[0].width>
<#if flatten>
${element.name} = mx.symbol.flatten(data=${input})
<#assign input = element.name>
<#-- TODO: check how to adapt CNNArchLang flatten #${element.name} = mx.symbol.flatten(data=${input}) -->
</#if>
${element.name} = mx.symbol.FullyConnected(data=${input},
num_hidden=${element.units?c},
no_bias=${element.noBias?string("True","False")},
name="${element.name}")
<#if inputLayerType?matches("FullyConnected") || (inputHeight == 1 && inputWidth == 1)>
${element.name} = brew.fc(model, ${input}, '${element.name}', dim_in=${inputChannels}, dim_out=${element.units?c})
<#else>
${element.name} = brew.fc(model, ${input}, '${element.name}', dim_in=${inputChannels} * ${inputHeight} * ${inputWidth}, dim_out=${element.units?c})
</#if>
<#-- TODO: check how to adapt CNNArchLang argument no_bias=${element.noBias?string("True","False")} -->
<#include "OutputShape.ftl">
\ No newline at end of file
......@@ -6,23 +6,16 @@
<#if heightIndex != 0><#assign indexList = indexList + [heightIndex]></#if>
<#if widthIndex != 0><#assign indexList = indexList + [widthIndex]></#if>
<#assign dimensions = element.element.outputTypes[0].dimensions>
${element.name} = mx.sym.var("${element.name}",
shape=(0,${tc.join(dimensions, ",")}))
${element.name}, label = AddInput(model, batch_size=100)
<#include "OutputShape.ftl">
<#if heightIndex != channelIndex + 1 || widthIndex != heightIndex + 1>
${element.name} = mx.symbol.transpose(data=${element.name},
${element.name} = mx.symbol.transpose(data=${element.name},mx.sym.var <#-- TODO: check how to adapt CNNArchLang transpose case -->
axes=(0,${tc.join(indexList, ",")}))
</#if>
<#if indexList?size != 3>
${element.name} = mx.symbol.reshape(data=${element.name},
${element.name} = mx.symbol.reshape(data=${element.name}, <#-- TODO: check how to adapt CNNArchLang transpose case -->
shape=(0,${element.element.outputTypes[0].channels?c},${element.element.outputTypes[0].height?c},${element.element.outputTypes[0].width?c}))
</#if>
if not data_mean is None:
assert(not data_std is None)
_data_mean_ = mx.sym.Variable("_data_mean_", shape=(${tc.join(dimensions, ",")}), init=MyConstant(value=data_mean.tolist()))
_data_mean_ = mx.sym.BlockGrad(_data_mean_)
_data_std_ = mx.sym.Variable("_data_std_", shape=(${tc.join(dimensions, ",")}), init=MyConstant(value=data_mean.tolist()))
_data_std_ = mx.sym.BlockGrad(_data_std_)
${element.name} = mx.symbol.broadcast_sub(${element.name}, _data_mean_)
${element.name} = mx.symbol.broadcast_div(${element.name}, _data_std_)
workspace.FeedBlob("${element.name}", ${element.name}, device_option=device_opts)
workspace.FeedBlob("label", label, device_option=device_opts)
<#assign input = element.inputs[0]>
<#if element.softmaxOutput>
${element.name} = mx.symbol.SoftmaxOutput(data=${element.inputs[0]},
name="${element.name}")
${element.name} = brew.softmax(model, ${input}, '${element.name}')
<#elseif element.logisticRegressionOutput>
${element.name} = mx.symbol.LogisticRegressionOutput(data=${element.inputs[0]},
${element.name} = mx.symbol.LogisticRegressionOutput(data=${element.inputs[0]},
name="${element.name}")
<#elseif element.linearRegressionOutput>
${element.name} = mx.symbol.LinearRegressionOutput(data=${element.inputs[0]},
${element.name} = mx.symbol.LinearRegressionOutput(data=${element.inputs[0]},
name="${element.name}")
</#if>
\ No newline at end of file
</#if>
model.net.AddExternalOutput(${element.name})
return ${element.name}
\ No newline at end of file
# ${element.name}, output shape: {<#list element.element.outputTypes as type>[${tc.join(type.dimensions, ",")}]</#list>}
# ${element.name}, output shape: {<#list element.element.outputTypes as type>[${tc.join(type.dimensions, ",")}]</#list>}
<#assign input = element.inputs[0]>
<#assign strideHeight = element.stride[0]>
<#assign strideWidth = element.stride[1]>
<#assign kernelHeight = element.kernel[0]>
<#assign kernelWidth = element.kernel[1]>
<#if element.padding??>
<#assign input = element.name>
${element.name} = mx.symbol.pad(data=${element.inputs[0]},
mode='constant',
pad_width=(${tc.join(element.padding, ",")}),
constant_value=0)
</#if>
${element.name} = mx.symbol.Pooling(data=${input},
kernel=(${tc.join(element.kernel, ",")}),
pool_type="${element.poolType}",
stride=(${tc.join(element.stride, ",")}),
name="${element.name}")
<#-- TODO: check how to adapt CNNArchLang argument pad_width=${element.padding[0]} -->
</#if>
<#if strideHeight == strideWidth>
<#assign strideParameter = "stride=${strideHeight}">
<#else>
<#assign strideParameter = "stride_h=${strideHeight}, stride_w=${strideWidth}">
</#if>
<#if kernelHeight == kernelWidth>
<#assign kernelParameter = "kernel=${kernelHeight}">
<#else>
<#assign kernelParameter = "kernel_h=${kernelHeight}, kernel_w=${kernelWidth}">
</#if>
<#if element.poolType == "max">
${element.name} = brew.max_pool(model, ${input}, '${element.name}', ${kernelParameter}, ${strideParameter})
<#elseif element.poolType == "avg">
${element.name} = brew.average_pool(model, ${input}, '${element.name}', ${kernelParameter}, ${strideParameter})
</#if>
<#include "OutputShape.ftl">
\ No newline at end of file
${element.name} = mx.symbol.Activation(data=${element.inputs[0]},
act_type='relu',
name="${element.name}")
<#assign input = element.inputs[0]>
${element.name} = brew.relu(model, ${input}, ${input})
${element.name} = mx.symbol.Activation(data=${element.inputs[0]},
act_type='sigmoid',
name="${element.name}")
<#assign input = element.inputs[0]>
${element.name} = model.net.Sigmoid(${input}, '${element.name}')
${element.name} = mx.symbol.Activation(data=${element.inputs[0]},
act_type='tanh',
name="${element.name}")
<#assign input = element.inputs[0]>
${element.name} = brew.tanh(model, ${input}, ${input})
architecture LeNet(img_height=28, img_width=28, img_channels=3, classes=10){
def input Z(0:255)^{img_channels, img_height, img_width} image
def output Q(0:1)^{classes} predictions
image ->
Convolution(kernel=(5,5), channels=20, padding="valid") ->
Pooling(pool_type="max", kernel=(2,2), stride=(2,2), padding="valid") ->
Convolution(kernel=(5,5), channels=50, padding="valid") ->
Pooling(pool_type="max", kernel=(2,2), stride=(2,2), padding="valid") ->
FullyConnected(units=500) ->
Relu() ->
FullyConnected(units=classes) ->
Softmax() ->
predictions
}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment