CNNCreator_CifarClassifierNetwork.py 15.3 KB
Newer Older
1
from caffe2.python import workspace, core, model_helper, brew, optimizer
2 3 4
from caffe2.python.predictor import mobile_exporter
from caffe2.proto import caffe2_pb2
import numpy as np
5
import math
6
import logging
7
import os
8
import sys
9
import lmdb
10

11 12 13
class CNNCreator_CifarClassifierNetwork:

    module = None
14
    _current_dir_ = os.path.join('./')
nilsfreyer's avatar
nilsfreyer committed
15
    _data_dir_    = os.path.join(_current_dir_, 'data/CifarClassifierNetwork')
16
    _model_dir_   = os.path.join(_current_dir_, 'model', 'CifarClassifierNetwork')
17

18 19
    _init_net_    = os.path.join(_model_dir_, 'init_net.pb')
    _predict_net_ = os.path.join(_model_dir_, 'predict_net.pb')
20

21 22 23 24 25 26 27 28 29 30 31
    def get_total_num_iter(self, num_epoch, batch_size, dataset_size):
        #Force floating point calculation
        batch_size_float = float(batch_size)
        dataset_size_float = float(dataset_size)

        iterations_float = math.ceil(num_epoch*(dataset_size_float/batch_size_float))
        iterations_int = int(iterations_float)

        return iterations_int


32 33
    def add_input(self, model, batch_size, db, db_type, device_opts):
        with core.DeviceScope(device_opts):
34 35 36 37 38 39 40
            if not os.path.isdir(db):
                logging.error("Data loading failure. Directory '" + os.path.abspath(db) + "' does not exist.")
                sys.exit(1)
            elif not (os.path.isfile(os.path.join(db, 'data.mdb')) and os.path.isfile(os.path.join(db, 'lock.mdb'))):
                logging.error("Data loading failure. Directory '" + os.path.abspath(db) + "' does not contain lmdb files.")
                sys.exit(1)

41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
            # load the data
            data_uint8, label = brew.db_input(
                model,
                blobs_out=["data_uint8", "label"],
                batch_size=batch_size,
                db=db,
                db_type=db_type,
            )
            # cast the data to float
            data = model.Cast(data_uint8, "data", to=core.DataType.FLOAT)

            # scale data from [0,255] down to [0,1]
            data = model.Scale(data, data, scale=float(1./256))

            # don't need the gradient for the backward pass
            data = model.StopGradient(data, data)
57 58 59 60

            dataset_size = int (lmdb.open(db).stat()['entries'])

            return data, label, dataset_size
61

62
    def create_model(self, model, data, device_opts, is_test):
63 64 65 66
    	with core.DeviceScope(device_opts):

    		data = data
    		# data, output shape: {[3,32,32]}
67
    		conv2_1_ = brew.conv(model, data, 'conv2_1_', dim_in=3, dim_out=8, kernel=3, stride=1, pad=1)
68 69
    		# conv2_1_, output shape: {[8,32,32]}
    		relu2_1_ = brew.relu(model, batchnorm2_1_, batchnorm2_1_)
70
    		conv3_1_ = brew.conv(model, relu2_1_, 'conv3_1_', dim_in=8, dim_out=8, kernel=3, stride=1, pad=1)
71
    		# conv3_1_, output shape: {[8,32,32]}
72
    		conv2_2_ = brew.conv(model, data, 'conv2_2_', dim_in=3, dim_out=8, kernel=1, stride=1, pad=1)
73 74
    		# conv2_2_, output shape: {[8,32,32]}
    		relu4_ = brew.relu(model, add4_, add4_)
75
    		conv5_1_ = brew.conv(model, relu4_, 'conv5_1_', dim_in=8, dim_out=16, kernel=3, stride=2, pad=1)
76 77
    		# conv5_1_, output shape: {[16,16,16]}
    		relu5_1_ = brew.relu(model, batchnorm5_1_, batchnorm5_1_)
78
    		conv6_1_ = brew.conv(model, relu5_1_, 'conv6_1_', dim_in=16, dim_out=16, kernel=3, stride=1, pad=1)
79
    		# conv6_1_, output shape: {[16,16,16]}
80
    		conv5_2_ = brew.conv(model, relu4_, 'conv5_2_', dim_in=8, dim_out=16, kernel=1, stride=2, pad=1)
81 82
    		# conv5_2_, output shape: {[16,16,16]}
    		relu7_ = brew.relu(model, add7_, add7_)
83
    		conv8_1_ = brew.conv(model, relu7_, 'conv8_1_', dim_in=16, dim_out=16, kernel=3, stride=1, pad=1)
84 85
    		# conv8_1_, output shape: {[16,16,16]}
    		relu8_1_ = brew.relu(model, batchnorm8_1_, batchnorm8_1_)
86
    		conv9_1_ = brew.conv(model, relu8_1_, 'conv9_1_', dim_in=16, dim_out=16, kernel=3, stride=1, pad=1)
87 88
    		# conv9_1_, output shape: {[16,16,16]}
    		relu10_ = brew.relu(model, add10_, add10_)
89
    		conv11_1_ = brew.conv(model, relu10_, 'conv11_1_', dim_in=16, dim_out=16, kernel=3, stride=1, pad=1)
90 91
    		# conv11_1_, output shape: {[16,16,16]}
    		relu11_1_ = brew.relu(model, batchnorm11_1_, batchnorm11_1_)
92
    		conv12_1_ = brew.conv(model, relu11_1_, 'conv12_1_', dim_in=16, dim_out=16, kernel=3, stride=1, pad=1)
93 94
    		# conv12_1_, output shape: {[16,16,16]}
    		relu13_ = brew.relu(model, add13_, add13_)
95
    		conv14_1_ = brew.conv(model, relu13_, 'conv14_1_', dim_in=16, dim_out=32, kernel=3, stride=2, pad=1)
96 97
    		# conv14_1_, output shape: {[32,8,8]}
    		relu14_1_ = brew.relu(model, batchnorm14_1_, batchnorm14_1_)
98
    		conv15_1_ = brew.conv(model, relu14_1_, 'conv15_1_', dim_in=32, dim_out=32, kernel=3, stride=1, pad=1)
99
    		# conv15_1_, output shape: {[32,8,8]}
100
    		conv14_2_ = brew.conv(model, relu13_, 'conv14_2_', dim_in=16, dim_out=32, kernel=1, stride=2, pad=1)
101 102
    		# conv14_2_, output shape: {[32,8,8]}
    		relu16_ = brew.relu(model, add16_, add16_)
103
    		conv17_1_ = brew.conv(model, relu16_, 'conv17_1_', dim_in=32, dim_out=32, kernel=3, stride=1, pad=1)
104 105
    		# conv17_1_, output shape: {[32,8,8]}
    		relu17_1_ = brew.relu(model, batchnorm17_1_, batchnorm17_1_)
106
    		conv18_1_ = brew.conv(model, relu17_1_, 'conv18_1_', dim_in=32, dim_out=32, kernel=3, stride=1, pad=1)
107 108
    		# conv18_1_, output shape: {[32,8,8]}
    		relu19_ = brew.relu(model, add19_, add19_)
109
    		conv20_1_ = brew.conv(model, relu19_, 'conv20_1_', dim_in=32, dim_out=32, kernel=3, stride=1, pad=1)
110 111
    		# conv20_1_, output shape: {[32,8,8]}
    		relu20_1_ = brew.relu(model, batchnorm20_1_, batchnorm20_1_)
112
    		conv21_1_ = brew.conv(model, relu20_1_, 'conv21_1_', dim_in=32, dim_out=32, kernel=3, stride=1, pad=1)
113 114
    		# conv21_1_, output shape: {[32,8,8]}
    		relu22_ = brew.relu(model, add22_, add22_)
115
    		conv23_1_ = brew.conv(model, relu22_, 'conv23_1_', dim_in=32, dim_out=64, kernel=3, stride=2, pad=1)
116 117
    		# conv23_1_, output shape: {[64,4,4]}
    		relu23_1_ = brew.relu(model, batchnorm23_1_, batchnorm23_1_)
118
    		conv24_1_ = brew.conv(model, relu23_1_, 'conv24_1_', dim_in=64, dim_out=64, kernel=3, stride=1, pad=1)
119
    		# conv24_1_, output shape: {[64,4,4]}
120
    		conv23_2_ = brew.conv(model, relu22_, 'conv23_2_', dim_in=32, dim_out=64, kernel=1, stride=2, pad=1)
121 122
    		# conv23_2_, output shape: {[64,4,4]}
    		relu25_ = brew.relu(model, add25_, add25_)
123
    		conv26_1_ = brew.conv(model, relu25_, 'conv26_1_', dim_in=64, dim_out=64, kernel=3, stride=1, pad=1)
124 125
    		# conv26_1_, output shape: {[64,4,4]}
    		relu26_1_ = brew.relu(model, batchnorm26_1_, batchnorm26_1_)
126
    		conv27_1_ = brew.conv(model, relu26_1_, 'conv27_1_', dim_in=64, dim_out=64, kernel=3, stride=1, pad=1)
127 128
    		# conv27_1_, output shape: {[64,4,4]}
    		relu28_ = brew.relu(model, add28_, add28_)
129
    		conv29_1_ = brew.conv(model, relu28_, 'conv29_1_', dim_in=64, dim_out=64, kernel=3, stride=1, pad=1)
130 131
    		# conv29_1_, output shape: {[64,4,4]}
    		relu29_1_ = brew.relu(model, batchnorm29_1_, batchnorm29_1_)
132
    		conv30_1_ = brew.conv(model, relu29_1_, 'conv30_1_', dim_in=64, dim_out=64, kernel=3, stride=1, pad=1)
133 134
    		# conv30_1_, output shape: {[64,4,4]}
    		relu31_ = brew.relu(model, add31_, add31_)
135
    		globalpooling31_ = brew.average_pool(model, relu31_, 'globalpooling31_', global_pooling=True)
136 137 138
    		# globalpooling31_, output shape: {[64,1,1]}
    		fc31_ = brew.fc(model, globalpooling31_, 'fc31_', dim_in=64, dim_out=128)
    		# fc31_, output shape: {[128,1,1]}
139
    		dropout31_ = brew.dropout(model, fc31_, 'dropout31_', ratio=0.5, is_test=False)
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
    		fc32_ = brew.fc(model, dropout31_, 'fc32_', dim_in=128, dim_out=10)
    		# fc32_, output shape: {[10,1,1]}
    		softmax = brew.softmax(model, fc32_, 'softmax')

    		return softmax

    # this adds the loss and optimizer
    def add_training_operators(self, model, output, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) :
    	with core.DeviceScope(device_opts):
    		xent = model.LabelCrossEntropy([output, label], 'xent')
    		loss = model.AveragedLoss(xent, "loss")

    		model.AddGradientOperators([loss])

    		if opt_type == 'adam':
    		    if policy == 'step':
    		        opt = optimizer.build_adam(model, base_learning_rate=base_learning_rate, policy=policy, stepsize=stepsize, beta1=beta1, beta2=beta2, epsilon=epsilon)
    		    elif policy == 'fixed' or policy == 'inv':
    		        opt = optimizer.build_adam(model, base_learning_rate=base_learning_rate, policy=policy, beta1=beta1, beta2=beta2, epsilon=epsilon)
    		    print("adam optimizer selected")
    		elif opt_type == 'sgd':
    		    if policy == 'step':
    		        opt = optimizer.build_sgd(model, base_learning_rate=base_learning_rate, policy=policy, stepsize=stepsize, gamma=gamma, momentum=momentum)
    		    elif policy == 'fixed' or policy == 'inv':
    		        opt = optimizer.build_sgd(model, base_learning_rate=base_learning_rate, policy=policy, gamma=gamma, momentum=momentum)
    		    print("sgd optimizer selected")
    		elif opt_type == 'rmsprop':
    		    if policy == 'step':
    		        opt = optimizer.build_rms_prop(model, base_learning_rate=base_learning_rate, policy=policy, stepsize=stepsize, decay=gamma, momentum=momentum, epsilon=epsilon)
    		    elif policy == 'fixed' or policy == 'inv':
    		        opt = optimizer.build_rms_prop(model, base_learning_rate=base_learning_rate, policy=policy, decay=gamma, momentum=momentum, epsilon=epsilon)
    		    print("rmsprop optimizer selected")
    		elif opt_type == 'adagrad':
    		    if policy == 'step':
    		        opt = optimizer.build_adagrad(model, base_learning_rate=base_learning_rate, policy=policy, stepsize=stepsize, decay=gamma, epsilon=epsilon)
    		    elif policy == 'fixed' or policy == 'inv':
    		        opt = optimizer.build_adagrad(model, base_learning_rate=base_learning_rate, policy=policy, decay=gamma, epsilon=epsilon)
    		    print("adagrad optimizer selected")

    def add_accuracy(self, model, output, label, device_opts, eval_metric):
        with core.DeviceScope(device_opts):
            if eval_metric == 'accuracy':
                accuracy = brew.accuracy(model, [output, label], "accuracy")
            elif eval_metric == 'top_k_accuracy':
                accuracy = brew.accuracy(model, [output, label], "accuracy", top_k=3)
            return accuracy

187 188
    def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) :
        if context == 'cpu':
189 190
            device_opts = core.DeviceOption(caffe2_pb2.CPU, 0)
            print("CPU mode selected")
191
        elif context == 'gpu':
192 193 194
            device_opts = core.DeviceOption(caffe2_pb2.CUDA, 0)
            print("GPU mode selected")

195
    	workspace.ResetWorkspace(self._model_dir_)
196 197 198 199

    	arg_scope = {"order": "NCHW"}
    	# == Training model ==
    	train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
200
    	data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts)
201
    	softmax = self.create_model(train_model, data, device_opts=device_opts, is_test=False)
202 203 204 205 206 207 208 209 210 211
    	self.add_training_operators(train_model, softmax, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
    	self.add_accuracy(train_model, softmax, label, device_opts, eval_metric)
    	with core.DeviceScope(device_opts):
    		brew.add_weight_decay(train_model, weight_decay)

    	# Initialize and create the training network
    	workspace.RunNetOnce(train_model.param_init_net)
    	workspace.CreateNet(train_model.net, overwrite=True)

    	# Main Training Loop
212 213 214
    	iterations = self.get_total_num_iter(num_epoch, batch_size, train_dataset_size)
        print("** Starting Training for " + str(num_epoch) + " epochs = " + str(iterations) + " iterations **")
    	for i in range(iterations):
215
    		workspace.RunNet(train_model.net)
216 217
    		if i % 50 == 0:
    			print 'Iter ' + str(i) + ': ' + 'Loss ' + str(workspace.FetchBlob("loss")) + ' - ' + 'Accuracy ' + str(workspace.FetchBlob('accuracy'))
218 219 220 221 222
    	print("Training done")

    	print("== Running Test model ==")
    	# == Testing model. ==
    	test_model= model_helper.ModelHelper(name="test_net", arg_scope=arg_scope, init_params=False)
223
    	data, label, test_dataset_size = self.add_input(test_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'test_lmdb'), db_type='lmdb', device_opts=device_opts)
224
    	softmax = self.create_model(test_model, data, device_opts=device_opts, is_test=True)
225 226 227 228 229
    	self.add_accuracy(test_model, predictions, label, device_opts, eval_metric)
    	workspace.RunNetOnce(test_model.param_init_net)
    	workspace.CreateNet(test_model.net, overwrite=True)

    	# Main Testing Loop
230 231
    	test_accuracy = np.zeros(test_dataset_size/batch_size)
    	for i in range(test_dataset_size/batch_size):
232 233 234 235 236 237 238 239 240 241
    		# Run a forward pass of the net on the current batch
    		workspace.RunNet(test_model.net)
    		# Collect the batch accuracy from the workspace
    		test_accuracy[i] = workspace.FetchBlob('accuracy')

    	print('Test_accuracy: {:.4f}'.format(test_accuracy.mean()))

    	# == Deployment model. ==
    	# We simply need the main AddModel part.
    	deploy_model = model_helper.ModelHelper(name="deploy_net", arg_scope=arg_scope, init_params=False)
242
    	self.create_model(deploy_model, "data", device_opts, is_test=True)
243 244

    	print("Saving deploy model")
245
    	self.save_net(self._init_net_, self._predict_net_, deploy_model)
246 247 248 249 250 251 252 253 254

    def save_net(self, init_net_path, predict_net_path, model):

    	init_net, predict_net = mobile_exporter.Export(
    		workspace,
    		model.net,
    		model.params
    	)

255
        try:
256
            os.makedirs(self._model_dir_)
257
        except OSError:
258
            if not os.path.isdir(self._model_dir_):
259 260
                raise

261
    	print("Save the model to init_net.pb and predict_net.pb")
262
    	with open(predict_net_path, 'wb') as f:
263
    		f.write(model.net._net.SerializeToString())
264
    	with open(init_net_path, 'wb') as f:
265 266 267
    		f.write(init_net.SerializeToString())

    	print("Save the model to init_net.pbtxt and predict_net.pbtxt")
268 269

    	with open(init_net_path.replace('.pb','.pbtxt'), 'w') as f:
270
    		f.write(str(init_net))
271
    	with open(predict_net_path.replace('.pb','.pbtxt'), 'w') as f:
272 273 274 275
    		f.write(str(predict_net))
    	print("== Saved init_net and predict_net ==")

    def load_net(self, init_net_path, predict_net_path, device_opts):
276 277 278 279 280 281 282 283 284
        if not os.path.isfile(init_net_path):
            logging.error("Network loading failure. File '" + os.path.abspath(init_net_path) + "' does not exist.")
            sys.exit(1)
        elif not os.path.isfile(predict_net_path):
            logging.error("Network loading failure. File '" + os.path.abspath(predict_net_path) + "' does not exist.")
            sys.exit(1)

        init_def = caffe2_pb2.NetDef()
    	with open(init_net_path, 'rb') as f:
285 286 287 288 289
    		init_def.ParseFromString(f.read())
    		init_def.device_option.CopyFrom(device_opts)
    		workspace.RunNetOnce(init_def.SerializeToString())

    	net_def = caffe2_pb2.NetDef()
290
    	with open(predict_net_path, 'rb') as f:
291 292 293
    		net_def.ParseFromString(f.read())
    		net_def.device_option.CopyFrom(device_opts)
    		workspace.CreateNet(net_def.SerializeToString(), overwrite=True)
294
    	print("== Loaded init_net and predict_net ==")