Skip to content
Snippets Groups Projects
Commit aaa745b1 authored by Evgeny Kusmenko's avatar Evgeny Kusmenko
Browse files

Merge branch 'timmermanns' into 'master'

Removed github deploy plugin, changed repository to se nexus

See merge request CNNArch2MXNet!2
parents 0e3a557a 07d96962
No related branches found
No related tags found
No related merge requests found
Showing
with 1879 additions and 0 deletions
architecture InvalidIOShape2(){
def input Q(-oo:+oo)^{10.5} in1
def output Q(0:1)^{-10} out1
in1 ->
FullyConnected(units=64) ->
Tanh() ->
FullyConnected(units=10) ->
Softmax() ->
out1
}
\ No newline at end of file
architecture InvalidInputShape(inputs=10, classes=2){
def input Q(-oo:+oo)^{inputs} in1[2]
def output Q(0:1)^{classes} out1[2]
in1 ->
FullyConnected(units=64) ->
Tanh() ->
FullyConnected(units=classes) ->
Softmax() ->
out1
}
\ No newline at end of file
architecture InvalidRecursion(img_height=224, img_width=224, img_channels=3, classes=1000){
def input Z(0:255)^{img_channels, img_height, img_width} image
def output Q(0:1)^{classes} predictions
def conv(kernel, channels, stride=1, act=true){
Convolution(kernel=(kernel,kernel), channels=channels, stride=(stride,stride)) ->
BatchNorm() ->
resLayer(channels = 8) ->
Relu(?=act)
}
def skip(channels, stride){
Convolution(kernel=(1,1), channels=channels, stride=(stride,stride)) ->
BatchNorm()
}
def resLayer(channels, stride=1){
(
conv(kernel=3, channels=channels, stride=stride) ->
conv(kernel=3, channels=channels, stride=stride, act=false)
|
skip(channels=channels, stride=stride, ?=(stride!=1))
) ->
Add() ->
Relu()
}
image ->
conv(kernel=7, channels=64, stride=2) ->
Pooling(pool_type="max", kernel=(3,3), stride=(2,2)) ->
resLayer(channels=64, ->=3) ->
resLayer(channels=128, stride=2) ->
resLayer(channels=128, ->=3) ->
resLayer(channels=256, stride=2) ->
resLayer(channels=256, ->=5) ->
resLayer(channels=512, stride=2) ->
resLayer(channels=512, ->=2) ->
GlobalPooling(pool_type="avg") ->
FullyConnected(units=classes) ->
Softmax() ->
predictions
}
architecture MissingArgument(img_height=224, img_width=224, img_channels=3, classes=1000){
def input Z(0:255)^{img_channels, img_height, img_width} image
def output Q(0:1)^{classes} predictions
def conv(kernel, channels, stride=1, act=true){
Convolution(kernel=(kernel,kernel), channels=channels, stride=(stride,stride)) ->
BatchNorm() ->
Relu(?=act)
}
def skip(channels, stride){
Convolution(channels=96, stride=(stride,stride)) ->
BatchNorm()
}
def resLayer(channels, stride=1){
(
conv(kernel=3, channels=channels, stride=stride) ->
conv(kernel=3, channels=channels, stride=stride, act=false)
|
skip(channels=channels, stride=stride, ?=(stride!=1))
) ->
Add() ->
Relu()
}
image ->
conv(kernel=7, stride=2) ->
Pooling(pool_type="max") ->
resLayer(channels=64, ->=3) ->
resLayer(channels=128, stride=2) ->
GlobalPooling(pool_type="avg") ->
FullyConnected(units=classes) ->
Softmax() ->
predictions
}
\ No newline at end of file
architecture MissingIO2(inputs=10, classes=2){
def input Q(-oo:+oo)^{inputs} in1[2]
def output Q(0:1)^{classes} out1[2]
in1[0] ->
FullyConnected(units=64, no_bias=true) ->
Tanh() ->
FullyConnected(units=classes, no_bias=true) ->
Softmax() ->
out1[0]
}
\ No newline at end of file
architecture MissingLayerOperator(){
def input Q(-oo:+oo)^{10} in1
def output Q(0:1)^{2} out1
in1 ->
FullyConnected(units=64, no_bias=true) ->
Tanh()
FullyConnected(units=2, no_bias=true)
Softmax() ->
out1
}
\ No newline at end of file
architecture MissingMerge(inputs=10, classes=2){
def input Q(-oo:+oo)^{inputs} in1
def output Q(0:1)^{classes} out1
in1 ->
(
(
FullyConnected(units=16)
|
FullyConnected(units=16)
)
|
(
FullyConnected(units=16)
|
FullyConnected(units=16)
)
) ->
Add() ->
Tanh() ->
FullyConnected(units=classes) ->
Softmax() ->
out1
}
\ No newline at end of file
architecture MissingParallelBrackets(img_height=224, img_width=224, img_channels=3, classes=10){
def input Z(0:255)^{img_channels, img_height, img_width} image
def output Q(0:1)^{classes} predictions
def conv(kernel, channels, hasPool=true, convStride=(1,1)){
Convolution(kernel=kernel, channels=channels, stride=convStride) ->
Relu() ->
Pooling(pool_type="max", kernel=(3,3), stride=(2,2), ?=hasPool)
}
def fc(){
FullyConnected(units=4096) ->
Relu() ->
Dropout()
}
image ->
conv(kernel=(11,11), channels=96, convStride=(4,4)) ->
Lrn(nsize=5, alpha=0.0001, beta=0.75) ->
SplitData(index=0, n=2) ->
conv(kernel=(5,5), channels=128) ->
Lrn(nsize=5, alpha=0.0001, beta=0.75)
|
SplitData(index=1, n=2) ->
conv(kernel=(5,5), channels=128) ->
Lrn(nsize=5, alpha=0.0001, beta=0.75)
->
conv(kernel=(3,3), channels=384 ,hasPool=false) ->
SplitData(index=0, n=2) ->
conv(kernel=(3,3), channels=192, hasPool=false) ->
conv(kernel=(3,3), channels=128)
|
SplitData(index=1, n=2) ->
conv(kernel=(3,3), channels=192, hasPool=false) ->
conv(kernel=(3,3), channels=128)
->
fc() ->
fc() ->
FullyConnected(units=classes) ->
Softmax() ->
predictions
}
\ No newline at end of file
architecture NotIOArray(inputs=10, classes=2){
def input Q(-oo:+oo)^{inputs} in1
def output Q(0:1)^{classes} out1
in1[1] ->
FullyConnected(units=64, no_bias=true) ->
Tanh() ->
FullyConnected(units=classes, no_bias=true) ->
Softmax() ->
out1[0]
}
\ No newline at end of file
architecture UnfinishedArchitecture(inputs=10, classes=2){
def input Q(-oo:+oo)^{inputs} in1
def output Q(0:1)^{classes} out1
in1 ->
FullyConnected(units=64, no_bias=true) ->
Tanh() ->
(
FullyConnected(units=classes, no_bias=true) ->
Softmax() ->
out1
|
)
}
\ No newline at end of file
architecture UnknownIO(){
in1 ->
FullyConnected(units=64) ->
Tanh() ->
FullyConnected(units=10) ->
Softmax() ->
out1
}
\ No newline at end of file
architecture UnknownMethod(inputs=10, classes=2){
def input Q(-oo:+oo)^{inputs} in1
def output Q(0:1)^{classes} out1
in1 ->
FllyConnected(units=64, no_bias=true) ->
Tanh() ->
FullyConnected(units=classes, no_bias=true) ->
Softmax() ->
out1
}
\ No newline at end of file
architecture UnknownVariableName(inputs=10){
def input Q(-oo:+oo)^{inputs} in1
def output Q(0:1)^{2} out1
in1 ->
FullyConnected(units=64) ->
Tanh() ->
FullyConnected(units=classes) ->
Softmax() ->
out1
}
\ No newline at end of file
architecture WrongArgument(inputs=10, classes=2){
def input Q(-oo:+oo)^{inputs} in1
def output Q(0:1)^{classes} out1
in1 ->
FullyConnected(units=64, bias=true) ->
Tanh(asd=1) ->
FullyConnected(unit=classes) ->
Softmax() ->
out1
}
\ No newline at end of file
architecture WrongIOType(inputs=10, classes=2){
def input Q(-oo:+oo)^{inputs} in1
def output Q(0:1)^{classes} out1
in1 ->
FullyConnected(units=64, no_bias=true) ->
Tanh() ->
FullyConnected(units=classes, no_bias=true) ->
Relu() ->
out1
}
\ No newline at end of file
architecture WrongRangeOperator(inputs=10, classes=2){
def input Q(-oo:+oo)^{inputs} in1
def output Q(0:1)^{classes} out1
in1 ->
FullyConnected(units=[64->..|65]) ->
Tanh() ->
FullyConnected(units=[classes |..-> classes + 1] ) ->
Softmax() ->
out1
}
\ No newline at end of file
#ifndef CNNBUFFERFILE_H
#define CNNBUFFERFILE_H
#include <stdio.h>
#include <iostream>
#include <fstream>
// Read file to buffer
class BufferFile {
public :
std::string file_path_;
int length_;
char* buffer_;
explicit BufferFile(std::string file_path)
:file_path_(file_path) {
std::ifstream ifs(file_path.c_str(), std::ios::in | std::ios::binary);
if (!ifs) {
std::cerr << "Can't open the file. Please check " << file_path << ". \n";
length_ = 0;
buffer_ = NULL;
return;
}
ifs.seekg(0, std::ios::end);
length_ = ifs.tellg();
ifs.seekg(0, std::ios::beg);
std::cout << file_path.c_str() << " ... "<< length_ << " bytes\n";
buffer_ = new char[sizeof(char) * length_];
ifs.read(buffer_, length_);
ifs.close();
}
int GetLength() {
return length_;
}
char* GetBuffer() {
return buffer_;
}
~BufferFile() {
if (buffer_) {
delete[] buffer_;
buffer_ = NULL;
}
}
};
#endif // CNNBUFFERFILE_H
\ No newline at end of file
import mxnet as mx
import logging
import os
import errno
import shutil
import h5py
import sys
import numpy as np
@mx.init.register
class MyConstant(mx.init.Initializer):
def __init__(self, value):
super(MyConstant, self).__init__(value=value)
self.value = value
def _init_weight(self, _, arr):
arr[:] = mx.nd.array(self.value)
class CNNCreator_Alexnet:
module = None
_data_dir_ = "data/Alexnet/"
_model_dir_ = "model/Alexnet/"
_model_prefix_ = "Alexnet"
_input_names_ = ['data']
_input_shapes_ = [(3,224,224)]
_output_names_ = ['predictions_label']
def load(self, context):
lastEpoch = 0
param_file = None
try:
os.remove(self._model_dir_ + self._model_prefix_ + "_newest-0000.params")
except OSError:
pass
try:
os.remove(self._model_dir_ + self._model_prefix_ + "_newest-symbol.json")
except OSError:
pass
if os.path.isdir(self._model_dir_):
for file in os.listdir(self._model_dir_):
if ".params" in file and self._model_prefix_ in file:
epochStr = file.replace(".params","").replace(self._model_prefix_ + "-","")
epoch = int(epochStr)
if epoch > lastEpoch:
lastEpoch = epoch
param_file = file
if param_file is None:
return 0
else:
logging.info("Loading checkpoint: " + param_file)
self.module.load(prefix=self._model_dir_ + self._model_prefix_,
epoch=lastEpoch,
data_names=self._input_names_,
label_names=self._output_names_,
context=context)
return lastEpoch
def load_data(self, batch_size):
train_h5, test_h5 = self.load_h5_files()
data_mean = train_h5[self._input_names_[0]][:].mean(axis=0)
data_std = train_h5[self._input_names_[0]][:].std(axis=0) + 1e-5
train_iter = mx.io.NDArrayIter(train_h5[self._input_names_[0]],
train_h5[self._output_names_[0]],
batch_size=batch_size,
data_name=self._input_names_[0],
label_name=self._output_names_[0])
test_iter = None
if test_h5 != None:
test_iter = mx.io.NDArrayIter(test_h5[self._input_names_[0]],
test_h5[self._output_names_[0]],
batch_size=batch_size,
data_name=self._input_names_[0],
label_name=self._output_names_[0])
return train_iter, test_iter, data_mean, data_std
def load_h5_files(self):
train_h5 = None
test_h5 = None
train_path = self._data_dir_ + "train.h5"
test_path = self._data_dir_ + "test.h5"
if os.path.isfile(train_path):
train_h5 = h5py.File(train_path, 'r')
if not (self._input_names_[0] in train_h5 and self._output_names_[0] in train_h5):
logging.error("The HDF5 file '" + os.path.abspath(train_path) + "' has to contain the datasets: "
+ "'" + self._input_names_[0] + "', '" + self._output_names_[0] + "'")
sys.exit(1)
test_iter = None
if os.path.isfile(test_path):
test_h5 = h5py.File(test_path, 'r')
if not (self._input_names_[0] in test_h5 and self._output_names_[0] in test_h5):
logging.error("The HDF5 file '" + os.path.abspath(test_path) + "' has to contain the datasets: "
+ "'" + self._input_names_[0] + "', '" + self._output_names_[0] + "'")
sys.exit(1)
else:
logging.warning("Couldn't load test set. File '" + os.path.abspath(test_path) + "' does not exist.")
return train_h5, test_h5
else:
logging.error("Data loading failure. File '" + os.path.abspath(train_path) + "' does not exist.")
sys.exit(1)
def train(self, batch_size,
num_epoch=10,
optimizer='adam',
optimizer_params=(('learning_rate', 0.001),),
load_checkpoint=True,
context='gpu',
checkpoint_period=5,
normalize=True):
if context == 'gpu':
mx_context = mx.gpu()
elif context == 'cpu':
mx_context = mx.cpu()
else:
logging.error("Context argument is '" + context + "'. Only 'cpu' and 'gpu are valid arguments'.")
if 'weight_decay' in optimizer_params:
optimizer_params['wd'] = optimizer_params['weight_decay']
del optimizer_params['weight_decay']
if 'learning_rate_decay' in optimizer_params:
min_learning_rate = 1e-08
if 'learning_rate_minimum' in optimizer_params:
min_learning_rate = optimizer_params['learning_rate_minimum']
del optimizer_params['learning_rate_minimum']
optimizer_params['lr_scheduler'] = mx.lr_scheduler.FactorScheduler(
optimizer_params['step_size'],
factor=optimizer_params['learning_rate_decay'],
stop_factor_lr=min_learning_rate)
del optimizer_params['step_size']
del optimizer_params['learning_rate_decay']
train_iter, test_iter, data_mean, data_std = self.load_data(batch_size)
if self.module == None:
if normalize:
self.construct(mx_context, data_mean, data_std)
else:
self.construct(mx_context)
begin_epoch = 0
if load_checkpoint:
begin_epoch = self.load(mx_context)
else:
if os.path.isdir(self._model_dir_):
shutil.rmtree(self._model_dir_)
try:
os.makedirs(self._model_dir_)
except OSError:
if not os.path.isdir(self._model_dir_):
raise
self.module.fit(
train_data=train_iter,
eval_data=test_iter,
optimizer=optimizer,
optimizer_params=optimizer_params,
batch_end_callback=mx.callback.Speedometer(batch_size),
epoch_end_callback=mx.callback.do_checkpoint(prefix=self._model_dir_ + self._model_prefix_, period=checkpoint_period),
begin_epoch=begin_epoch,
num_epoch=num_epoch + begin_epoch)
self.module.save_checkpoint(self._model_dir_ + self._model_prefix_, num_epoch + begin_epoch)
self.module.save_checkpoint(self._model_dir_ + self._model_prefix_ + '_newest', 0)
def construct(self, context, data_mean=None, data_std=None):
data = mx.sym.var("data",
shape=(0,3,224,224))
# data, output shape: {[3,224,224]}
if not data_mean is None:
assert(not data_std is None)
_data_mean_ = mx.sym.Variable("_data_mean_", shape=(3,224,224), init=MyConstant(value=data_mean.tolist()))
_data_mean_ = mx.sym.BlockGrad(_data_mean_)
_data_std_ = mx.sym.Variable("_data_std_", shape=(3,224,224), init=MyConstant(value=data_mean.tolist()))
_data_std_ = mx.sym.BlockGrad(_data_std_)
data = mx.symbol.broadcast_sub(data, _data_mean_)
data = mx.symbol.broadcast_div(data, _data_std_)
conv1_ = mx.symbol.pad(data=data,
mode='constant',
pad_width=(0,0,0,0,2,1,2,1),
constant_value=0)
conv1_ = mx.symbol.Convolution(data=conv1_,
kernel=(11,11),
stride=(4,4),
num_filter=96,
no_bias=False,
name="conv1_")
# conv1_, output shape: {[96,55,55]}
lrn1_ = mx.symbol.LRN(data=conv1_,
alpha=0.0001,
beta=0.75,
knorm=2,
nsize=5,
name="lrn1_")
pool1_ = mx.symbol.Pooling(data=lrn1_,
kernel=(3,3),
pool_type="max",
stride=(2,2),
name="pool1_")
# pool1_, output shape: {[96,27,27]}
relu1_ = mx.symbol.Activation(data=pool1_,
act_type='relu',
name="relu1_")
split1_ = mx.symbol.split(data=relu1_,
num_outputs=2,
axis=1,
name="split1_")
# split1_, output shape: {[48,27,27][48,27,27]}
get2_1_ = split1_[0]
conv2_1_ = mx.symbol.pad(data=get2_1_,
mode='constant',
pad_width=(0,0,0,0,2,2,2,2),
constant_value=0)
conv2_1_ = mx.symbol.Convolution(data=conv2_1_,
kernel=(5,5),
stride=(1,1),
num_filter=128,
no_bias=False,
name="conv2_1_")
# conv2_1_, output shape: {[128,27,27]}
lrn2_1_ = mx.symbol.LRN(data=conv2_1_,
alpha=0.0001,
beta=0.75,
knorm=2,
nsize=5,
name="lrn2_1_")
pool2_1_ = mx.symbol.Pooling(data=lrn2_1_,
kernel=(3,3),
pool_type="max",
stride=(2,2),
name="pool2_1_")
# pool2_1_, output shape: {[128,13,13]}
relu2_1_ = mx.symbol.Activation(data=pool2_1_,
act_type='relu',
name="relu2_1_")
get2_2_ = split1_[1]
conv2_2_ = mx.symbol.pad(data=get2_2_,
mode='constant',
pad_width=(0,0,0,0,2,2,2,2),
constant_value=0)
conv2_2_ = mx.symbol.Convolution(data=conv2_2_,
kernel=(5,5),
stride=(1,1),
num_filter=128,
no_bias=False,
name="conv2_2_")
# conv2_2_, output shape: {[128,27,27]}
lrn2_2_ = mx.symbol.LRN(data=conv2_2_,
alpha=0.0001,
beta=0.75,
knorm=2,
nsize=5,
name="lrn2_2_")
pool2_2_ = mx.symbol.Pooling(data=lrn2_2_,
kernel=(3,3),
pool_type="max",
stride=(2,2),
name="pool2_2_")
# pool2_2_, output shape: {[128,13,13]}
relu2_2_ = mx.symbol.Activation(data=pool2_2_,
act_type='relu',
name="relu2_2_")
concatenate3_ = mx.symbol.concat(relu2_1_, relu2_2_,
dim=1,
name="concatenate3_")
# concatenate3_, output shape: {[256,13,13]}
conv3_ = mx.symbol.pad(data=concatenate3_,
mode='constant',
pad_width=(0,0,0,0,1,1,1,1),
constant_value=0)
conv3_ = mx.symbol.Convolution(data=conv3_,
kernel=(3,3),
stride=(1,1),
num_filter=384,
no_bias=False,
name="conv3_")
# conv3_, output shape: {[384,13,13]}
relu3_ = mx.symbol.Activation(data=conv3_,
act_type='relu',
name="relu3_")
split3_ = mx.symbol.split(data=relu3_,
num_outputs=2,
axis=1,
name="split3_")
# split3_, output shape: {[192,13,13][192,13,13]}
get4_1_ = split3_[0]
conv4_1_ = mx.symbol.pad(data=get4_1_,
mode='constant',
pad_width=(0,0,0,0,1,1,1,1),
constant_value=0)
conv4_1_ = mx.symbol.Convolution(data=conv4_1_,
kernel=(3,3),
stride=(1,1),
num_filter=192,
no_bias=False,
name="conv4_1_")
# conv4_1_, output shape: {[192,13,13]}
relu4_1_ = mx.symbol.Activation(data=conv4_1_,
act_type='relu',
name="relu4_1_")
conv5_1_ = mx.symbol.pad(data=relu4_1_,
mode='constant',
pad_width=(0,0,0,0,1,1,1,1),
constant_value=0)
conv5_1_ = mx.symbol.Convolution(data=conv5_1_,
kernel=(3,3),
stride=(1,1),
num_filter=128,
no_bias=False,
name="conv5_1_")
# conv5_1_, output shape: {[128,13,13]}
pool5_1_ = mx.symbol.Pooling(data=conv5_1_,
kernel=(3,3),
pool_type="max",
stride=(2,2),
name="pool5_1_")
# pool5_1_, output shape: {[128,6,6]}
relu5_1_ = mx.symbol.Activation(data=pool5_1_,
act_type='relu',
name="relu5_1_")
get4_2_ = split3_[1]
conv4_2_ = mx.symbol.pad(data=get4_2_,
mode='constant',
pad_width=(0,0,0,0,1,1,1,1),
constant_value=0)
conv4_2_ = mx.symbol.Convolution(data=conv4_2_,
kernel=(3,3),
stride=(1,1),
num_filter=192,
no_bias=False,
name="conv4_2_")
# conv4_2_, output shape: {[192,13,13]}
relu4_2_ = mx.symbol.Activation(data=conv4_2_,
act_type='relu',
name="relu4_2_")
conv5_2_ = mx.symbol.pad(data=relu4_2_,
mode='constant',
pad_width=(0,0,0,0,1,1,1,1),
constant_value=0)
conv5_2_ = mx.symbol.Convolution(data=conv5_2_,
kernel=(3,3),
stride=(1,1),
num_filter=128,
no_bias=False,
name="conv5_2_")
# conv5_2_, output shape: {[128,13,13]}
pool5_2_ = mx.symbol.Pooling(data=conv5_2_,
kernel=(3,3),
pool_type="max",
stride=(2,2),
name="pool5_2_")
# pool5_2_, output shape: {[128,6,6]}
relu5_2_ = mx.symbol.Activation(data=pool5_2_,
act_type='relu',
name="relu5_2_")
concatenate6_ = mx.symbol.concat(relu5_1_, relu5_2_,
dim=1,
name="concatenate6_")
# concatenate6_, output shape: {[256,6,6]}
fc6_ = mx.symbol.flatten(data=concatenate6_)
fc6_ = mx.symbol.FullyConnected(data=fc6_,
num_hidden=4096,
no_bias=False,
name="fc6_")
relu6_ = mx.symbol.Activation(data=fc6_,
act_type='relu',
name="relu6_")
dropout6_ = mx.symbol.Dropout(data=relu6_,
p=0.5,
name="dropout6_")
fc7_ = mx.symbol.FullyConnected(data=dropout6_,
num_hidden=4096,
no_bias=False,
name="fc7_")
relu7_ = mx.symbol.Activation(data=fc7_,
act_type='relu',
name="relu7_")
dropout7_ = mx.symbol.Dropout(data=relu7_,
p=0.5,
name="dropout7_")
fc8_ = mx.symbol.FullyConnected(data=dropout7_,
num_hidden=10,
no_bias=False,
name="fc8_")
predictions = mx.symbol.SoftmaxOutput(data=fc8_,
name="predictions")
self.module = mx.mod.Module(symbol=mx.symbol.Group([predictions]),
data_names=self._input_names_,
label_names=self._output_names_,
context=context)
This diff is collapsed.
import mxnet as mx
import logging
import os
import errno
import shutil
import h5py
import sys
import numpy as np
@mx.init.register
class MyConstant(mx.init.Initializer):
def __init__(self, value):
super(MyConstant, self).__init__(value=value)
self.value = value
def _init_weight(self, _, arr):
arr[:] = mx.nd.array(self.value)
class CNNCreator_VGG16:
module = None
_data_dir_ = "data/VGG16/"
_model_dir_ = "model/VGG16/"
_model_prefix_ = "VGG16"
_input_names_ = ['data']
_input_shapes_ = [(3,224,224)]
_output_names_ = ['predictions_label']
def load(self, context):
lastEpoch = 0
param_file = None
try:
os.remove(self._model_dir_ + self._model_prefix_ + "_newest-0000.params")
except OSError:
pass
try:
os.remove(self._model_dir_ + self._model_prefix_ + "_newest-symbol.json")
except OSError:
pass
if os.path.isdir(self._model_dir_):
for file in os.listdir(self._model_dir_):
if ".params" in file and self._model_prefix_ in file:
epochStr = file.replace(".params","").replace(self._model_prefix_ + "-","")
epoch = int(epochStr)
if epoch > lastEpoch:
lastEpoch = epoch
param_file = file
if param_file is None:
return 0
else:
logging.info("Loading checkpoint: " + param_file)
self.module.load(prefix=self._model_dir_ + self._model_prefix_,
epoch=lastEpoch,
data_names=self._input_names_,
label_names=self._output_names_,
context=context)
return lastEpoch
def load_data(self, batch_size):
train_h5, test_h5 = self.load_h5_files()
data_mean = train_h5[self._input_names_[0]][:].mean(axis=0)
data_std = train_h5[self._input_names_[0]][:].std(axis=0) + 1e-5
train_iter = mx.io.NDArrayIter(train_h5[self._input_names_[0]],
train_h5[self._output_names_[0]],
batch_size=batch_size,
data_name=self._input_names_[0],
label_name=self._output_names_[0])
test_iter = None
if test_h5 != None:
test_iter = mx.io.NDArrayIter(test_h5[self._input_names_[0]],
test_h5[self._output_names_[0]],
batch_size=batch_size,
data_name=self._input_names_[0],
label_name=self._output_names_[0])
return train_iter, test_iter, data_mean, data_std
def load_h5_files(self):
train_h5 = None
test_h5 = None
train_path = self._data_dir_ + "train.h5"
test_path = self._data_dir_ + "test.h5"
if os.path.isfile(train_path):
train_h5 = h5py.File(train_path, 'r')
if not (self._input_names_[0] in train_h5 and self._output_names_[0] in train_h5):
logging.error("The HDF5 file '" + os.path.abspath(train_path) + "' has to contain the datasets: "
+ "'" + self._input_names_[0] + "', '" + self._output_names_[0] + "'")
sys.exit(1)
test_iter = None
if os.path.isfile(test_path):
test_h5 = h5py.File(test_path, 'r')
if not (self._input_names_[0] in test_h5 and self._output_names_[0] in test_h5):
logging.error("The HDF5 file '" + os.path.abspath(test_path) + "' has to contain the datasets: "
+ "'" + self._input_names_[0] + "', '" + self._output_names_[0] + "'")
sys.exit(1)
else:
logging.warning("Couldn't load test set. File '" + os.path.abspath(test_path) + "' does not exist.")
return train_h5, test_h5
else:
logging.error("Data loading failure. File '" + os.path.abspath(train_path) + "' does not exist.")
sys.exit(1)
def train(self, batch_size,
num_epoch=10,
optimizer='adam',
optimizer_params=(('learning_rate', 0.001),),
load_checkpoint=True,
context='gpu',
checkpoint_period=5,
normalize=True):
if context == 'gpu':
mx_context = mx.gpu()
elif context == 'cpu':
mx_context = mx.cpu()
else:
logging.error("Context argument is '" + context + "'. Only 'cpu' and 'gpu are valid arguments'.")
if 'weight_decay' in optimizer_params:
optimizer_params['wd'] = optimizer_params['weight_decay']
del optimizer_params['weight_decay']
if 'learning_rate_decay' in optimizer_params:
min_learning_rate = 1e-08
if 'learning_rate_minimum' in optimizer_params:
min_learning_rate = optimizer_params['learning_rate_minimum']
del optimizer_params['learning_rate_minimum']
optimizer_params['lr_scheduler'] = mx.lr_scheduler.FactorScheduler(
optimizer_params['step_size'],
factor=optimizer_params['learning_rate_decay'],
stop_factor_lr=min_learning_rate)
del optimizer_params['step_size']
del optimizer_params['learning_rate_decay']
train_iter, test_iter, data_mean, data_std = self.load_data(batch_size)
if self.module == None:
if normalize:
self.construct(mx_context, data_mean, data_std)
else:
self.construct(mx_context)
begin_epoch = 0
if load_checkpoint:
begin_epoch = self.load(mx_context)
else:
if os.path.isdir(self._model_dir_):
shutil.rmtree(self._model_dir_)
try:
os.makedirs(self._model_dir_)
except OSError:
if not os.path.isdir(self._model_dir_):
raise
self.module.fit(
train_data=train_iter,
eval_data=test_iter,
optimizer=optimizer,
optimizer_params=optimizer_params,
batch_end_callback=mx.callback.Speedometer(batch_size),
epoch_end_callback=mx.callback.do_checkpoint(prefix=self._model_dir_ + self._model_prefix_, period=checkpoint_period),
begin_epoch=begin_epoch,
num_epoch=num_epoch + begin_epoch)
self.module.save_checkpoint(self._model_dir_ + self._model_prefix_, num_epoch + begin_epoch)
self.module.save_checkpoint(self._model_dir_ + self._model_prefix_ + '_newest', 0)
def construct(self, context, data_mean=None, data_std=None):
data = mx.sym.var("data",
shape=(0,3,224,224))
# data, output shape: {[3,224,224]}
if not data_mean is None:
assert(not data_std is None)
_data_mean_ = mx.sym.Variable("_data_mean_", shape=(3,224,224), init=MyConstant(value=data_mean.tolist()))
_data_mean_ = mx.sym.BlockGrad(_data_mean_)
_data_std_ = mx.sym.Variable("_data_std_", shape=(3,224,224), init=MyConstant(value=data_mean.tolist()))
_data_std_ = mx.sym.BlockGrad(_data_std_)
data = mx.symbol.broadcast_sub(data, _data_mean_)
data = mx.symbol.broadcast_div(data, _data_std_)
conv1_ = mx.symbol.pad(data=data,
mode='constant',
pad_width=(0,0,0,0,1,1,1,1),
constant_value=0)
conv1_ = mx.symbol.Convolution(data=conv1_,
kernel=(3,3),
stride=(1,1),
num_filter=64,
no_bias=False,
name="conv1_")
# conv1_, output shape: {[64,224,224]}
relu1_ = mx.symbol.Activation(data=conv1_,
act_type='relu',
name="relu1_")
conv2_ = mx.symbol.pad(data=relu1_,
mode='constant',
pad_width=(0,0,0,0,1,1,1,1),
constant_value=0)
conv2_ = mx.symbol.Convolution(data=conv2_,
kernel=(3,3),
stride=(1,1),
num_filter=64,
no_bias=False,
name="conv2_")
# conv2_, output shape: {[64,224,224]}
relu2_ = mx.symbol.Activation(data=conv2_,
act_type='relu',
name="relu2_")
pool2_ = mx.symbol.Pooling(data=relu2_,
kernel=(2,2),
pool_type="max",
stride=(2,2),
name="pool2_")
# pool2_, output shape: {[64,112,112]}
conv3_ = mx.symbol.pad(data=pool2_,
mode='constant',
pad_width=(0,0,0,0,1,1,1,1),
constant_value=0)
conv3_ = mx.symbol.Convolution(data=conv3_,
kernel=(3,3),
stride=(1,1),
num_filter=128,
no_bias=False,
name="conv3_")
# conv3_, output shape: {[128,112,112]}
relu3_ = mx.symbol.Activation(data=conv3_,
act_type='relu',
name="relu3_")
conv4_ = mx.symbol.pad(data=relu3_,
mode='constant',
pad_width=(0,0,0,0,1,1,1,1),
constant_value=0)
conv4_ = mx.symbol.Convolution(data=conv4_,
kernel=(3,3),
stride=(1,1),
num_filter=128,
no_bias=False,
name="conv4_")
# conv4_, output shape: {[128,112,112]}
relu4_ = mx.symbol.Activation(data=conv4_,
act_type='relu',
name="relu4_")
pool4_ = mx.symbol.Pooling(data=relu4_,
kernel=(2,2),
pool_type="max",
stride=(2,2),
name="pool4_")
# pool4_, output shape: {[128,56,56]}
conv5_ = mx.symbol.pad(data=pool4_,
mode='constant',
pad_width=(0,0,0,0,1,1,1,1),
constant_value=0)
conv5_ = mx.symbol.Convolution(data=conv5_,
kernel=(3,3),
stride=(1,1),
num_filter=256,
no_bias=False,
name="conv5_")
# conv5_, output shape: {[256,56,56]}
relu5_ = mx.symbol.Activation(data=conv5_,
act_type='relu',
name="relu5_")
conv6_ = mx.symbol.pad(data=relu5_,
mode='constant',
pad_width=(0,0,0,0,1,1,1,1),
constant_value=0)
conv6_ = mx.symbol.Convolution(data=conv6_,
kernel=(3,3),
stride=(1,1),
num_filter=256,
no_bias=False,
name="conv6_")
# conv6_, output shape: {[256,56,56]}
relu6_ = mx.symbol.Activation(data=conv6_,
act_type='relu',
name="relu6_")
conv7_ = mx.symbol.pad(data=relu6_,
mode='constant',
pad_width=(0,0,0,0,1,1,1,1),
constant_value=0)
conv7_ = mx.symbol.Convolution(data=conv7_,
kernel=(3,3),
stride=(1,1),
num_filter=256,
no_bias=False,
name="conv7_")
# conv7_, output shape: {[256,56,56]}
relu7_ = mx.symbol.Activation(data=conv7_,
act_type='relu',
name="relu7_")
pool7_ = mx.symbol.Pooling(data=relu7_,
kernel=(2,2),
pool_type="max",
stride=(2,2),
name="pool7_")
# pool7_, output shape: {[256,28,28]}
conv8_ = mx.symbol.pad(data=pool7_,
mode='constant',
pad_width=(0,0,0,0,1,1,1,1),
constant_value=0)
conv8_ = mx.symbol.Convolution(data=conv8_,
kernel=(3,3),
stride=(1,1),
num_filter=512,
no_bias=False,
name="conv8_")
# conv8_, output shape: {[512,28,28]}
relu8_ = mx.symbol.Activation(data=conv8_,
act_type='relu',
name="relu8_")
conv9_ = mx.symbol.pad(data=relu8_,
mode='constant',
pad_width=(0,0,0,0,1,1,1,1),
constant_value=0)
conv9_ = mx.symbol.Convolution(data=conv9_,
kernel=(3,3),
stride=(1,1),
num_filter=512,
no_bias=False,
name="conv9_")
# conv9_, output shape: {[512,28,28]}
relu9_ = mx.symbol.Activation(data=conv9_,
act_type='relu',
name="relu9_")
conv10_ = mx.symbol.pad(data=relu9_,
mode='constant',
pad_width=(0,0,0,0,1,1,1,1),
constant_value=0)
conv10_ = mx.symbol.Convolution(data=conv10_,
kernel=(3,3),
stride=(1,1),
num_filter=512,
no_bias=False,
name="conv10_")
# conv10_, output shape: {[512,28,28]}
relu10_ = mx.symbol.Activation(data=conv10_,
act_type='relu',
name="relu10_")
pool10_ = mx.symbol.Pooling(data=relu10_,
kernel=(2,2),
pool_type="max",
stride=(2,2),
name="pool10_")
# pool10_, output shape: {[512,14,14]}
conv11_ = mx.symbol.pad(data=pool10_,
mode='constant',
pad_width=(0,0,0,0,1,1,1,1),
constant_value=0)
conv11_ = mx.symbol.Convolution(data=conv11_,
kernel=(3,3),
stride=(1,1),
num_filter=512,
no_bias=False,
name="conv11_")
# conv11_, output shape: {[512,14,14]}
relu11_ = mx.symbol.Activation(data=conv11_,
act_type='relu',
name="relu11_")
conv12_ = mx.symbol.pad(data=relu11_,
mode='constant',
pad_width=(0,0,0,0,1,1,1,1),
constant_value=0)
conv12_ = mx.symbol.Convolution(data=conv12_,
kernel=(3,3),
stride=(1,1),
num_filter=512,
no_bias=False,
name="conv12_")
# conv12_, output shape: {[512,14,14]}
relu12_ = mx.symbol.Activation(data=conv12_,
act_type='relu',
name="relu12_")
conv13_ = mx.symbol.pad(data=relu12_,
mode='constant',
pad_width=(0,0,0,0,1,1,1,1),
constant_value=0)
conv13_ = mx.symbol.Convolution(data=conv13_,
kernel=(3,3),
stride=(1,1),
num_filter=512,
no_bias=False,
name="conv13_")
# conv13_, output shape: {[512,14,14]}
relu13_ = mx.symbol.Activation(data=conv13_,
act_type='relu',
name="relu13_")
pool13_ = mx.symbol.Pooling(data=relu13_,
kernel=(2,2),
pool_type="max",
stride=(2,2),
name="pool13_")
# pool13_, output shape: {[512,7,7]}
fc13_ = mx.symbol.flatten(data=pool13_)
fc13_ = mx.symbol.FullyConnected(data=fc13_,
num_hidden=4096,
no_bias=False,
name="fc13_")
relu14_ = mx.symbol.Activation(data=fc13_,
act_type='relu',
name="relu14_")
dropout14_ = mx.symbol.Dropout(data=relu14_,
p=0.5,
name="dropout14_")
fc14_ = mx.symbol.FullyConnected(data=dropout14_,
num_hidden=4096,
no_bias=False,
name="fc14_")
relu15_ = mx.symbol.Activation(data=fc14_,
act_type='relu',
name="relu15_")
dropout15_ = mx.symbol.Dropout(data=relu15_,
p=0.5,
name="dropout15_")
fc15_ = mx.symbol.FullyConnected(data=dropout15_,
num_hidden=1000,
no_bias=False,
name="fc15_")
predictions = mx.symbol.SoftmaxOutput(data=fc15_,
name="predictions")
self.module = mx.mod.Module(symbol=mx.symbol.Group([predictions]),
data_names=self._input_names_,
label_names=self._output_names_,
context=context)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment