CNNCreator_CifarClassifierNetwork.py 17.8 KB
Newer Older
1
from caffe2.python import workspace, core, model_helper, brew, optimizer
2 3 4 5
from caffe2.python.predictor import mobile_exporter
from caffe2.proto import caffe2_pb2
import numpy as np

6 7 8 9
import logging
import os
import shutil
import sys
10
import cv2
11

12 13
#TODO: Check whether class is needed
#class CNNCreator_CifarClassifierNetwork:
14 15 16 17 18 19 20 21 22

module = None
_data_dir_ = "data/CifarClassifierNetwork/"
_model_dir_ = "model/CifarClassifierNetwork/"
_model_prefix_ = "CifarClassifierNetwork"
_input_names_ = ['data']
_input_shapes_ = [(3,32,32)]
_output_names_ = ['softmax_label']

23 24
EPOCHS     = 10000	# total training iterations
BATCH_SIZE = 256	# batch size for training
25 26 27 28 29 30 31 32 33 34 35 36 37
CONTEXT = 'gpu'
EVAL_METRIC = 'accuracy'
OPTIMIZER_TYPE = 'adam'
BASE_LEARNING_RATE = 0.001
WEIGHT_DECAY = 0.001
POLICY = 'fixed'
STEP_SIZE = 1
EPSILON = 1e-8
BETA1 = 0.9
BETA2 = 0.999
GAMMA = 0.999
MOMENTUM = 0.9

38 39 40 41 42

CURRENT_FOLDER      = os.path.join('./')
DATA_FOLDER         = os.path.join(CURRENT_FOLDER, 'data')
ROOT_FOLDER         = os.path.join(CURRENT_FOLDER, 'model')

43 44 45 46
#TODO: Modify paths to make them dynamic
#For Windows
#INIT_NET = 'D:/Yeverino/git_projects/Caffe2_scripts/caffe2_ema_cnncreator/init_net'
#PREDICT_NET = 'D:/Yeverino/git_projects/Caffe2_scripts/caffe2_ema_cnncreator/predict_net'
47

48
#For Ubuntu
49 50
INIT_NET = './model/init_net'
PREDICT_NET = './model/predict_net'
51

52 53 54 55 56 57 58
# Move into train function if test of deploy_net is removed
if CONTEXT == 'cpu':
    device_opts = core.DeviceOption(caffe2_pb2.CPU, 0)
    print("CPU mode selected")
elif CONTEXT == 'gpu':
    device_opts = core.DeviceOption(caffe2_pb2.CUDA, 0)
    print("GPU mode selected")
59

60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
def add_input(model, batch_size, db, db_type, device_opts):
    with core.DeviceScope(device_opts):
        # load the data
        data_uint8, label = brew.db_input(
            model,
            blobs_out=["data_uint8", "label"],
            batch_size=batch_size,
            db=db,
            db_type=db_type,
        )
        # cast the data to float
        data = model.Cast(data_uint8, "data", to=core.DataType.FLOAT)

        # scale data from [0,255] down to [0,1]
        data = model.Scale(data, data, scale=float(1./256))

        # don't need the gradient for the backward pass
        data = model.StopGradient(data, data)
        return data, label

def create_model(model, data, device_opts):
81 82
	with core.DeviceScope(device_opts):

83
		data = data
84 85
		# data, output shape: {[3,32,32]}
  	
86
		conv2_1_ = brew.conv(model, data, 'conv2_1_', dim_in=1, dim_out=8, kernel=3, stride=1)
87
		# conv2_1_, output shape: {[8,32,32]}
88 89 90
        batchnorm2_1_ = mx.symbol.BatchNorm(data=conv2_1_,
            fix_gamma=True,
            name="batchnorm2_1_")
91 92 93
		relu2_1_ = brew.relu(model, batchnorm2_1_, batchnorm2_1_)
  		conv3_1_ = brew.conv(model, relu2_1_, 'conv3_1_', dim_in=8, dim_out=8, kernel=3, stride=1)
		# conv3_1_, output shape: {[8,32,32]}
94 95 96
        batchnorm3_1_ = mx.symbol.BatchNorm(data=conv3_1_,
            fix_gamma=True,
            name="batchnorm3_1_")
97
	
98
		conv2_2_ = brew.conv(model, data, 'conv2_2_', dim_in=1, dim_out=8, kernel=1, stride=1)
99
		# conv2_2_, output shape: {[8,32,32]}
100 101 102 103
        batchnorm2_2_ = mx.symbol.BatchNorm(data=conv2_2_,
            fix_gamma=True,
            name="batchnorm2_2_")
        add4_ = batchnorm3_1_ + batchnorm2_2_
104 105 106 107
		# add4_, output shape: {[8,32,32]}
		relu4_ = brew.relu(model, add4_, add4_)
  		conv5_1_ = brew.conv(model, relu4_, 'conv5_1_', dim_in=8, dim_out=16, kernel=3, stride=2)
		# conv5_1_, output shape: {[16,16,16]}
108 109 110
        batchnorm5_1_ = mx.symbol.BatchNorm(data=conv5_1_,
            fix_gamma=True,
            name="batchnorm5_1_")
111 112 113
		relu5_1_ = brew.relu(model, batchnorm5_1_, batchnorm5_1_)
  		conv6_1_ = brew.conv(model, relu5_1_, 'conv6_1_', dim_in=16, dim_out=16, kernel=3, stride=1)
		# conv6_1_, output shape: {[16,16,16]}
114 115 116
        batchnorm6_1_ = mx.symbol.BatchNorm(data=conv6_1_,
            fix_gamma=True,
            name="batchnorm6_1_")
117 118
		conv5_2_ = brew.conv(model, relu4_, 'conv5_2_', dim_in=8, dim_out=16, kernel=1, stride=2)
		# conv5_2_, output shape: {[16,16,16]}
119 120 121 122
        batchnorm5_2_ = mx.symbol.BatchNorm(data=conv5_2_,
            fix_gamma=True,
            name="batchnorm5_2_")
        add7_ = batchnorm6_1_ + batchnorm5_2_
123 124 125 126
		# add7_, output shape: {[16,16,16]}
		relu7_ = brew.relu(model, add7_, add7_)
  		conv8_1_ = brew.conv(model, relu7_, 'conv8_1_', dim_in=16, dim_out=16, kernel=3, stride=1)
		# conv8_1_, output shape: {[16,16,16]}
127 128 129
        batchnorm8_1_ = mx.symbol.BatchNorm(data=conv8_1_,
            fix_gamma=True,
            name="batchnorm8_1_")
130 131 132
		relu8_1_ = brew.relu(model, batchnorm8_1_, batchnorm8_1_)
  		conv9_1_ = brew.conv(model, relu8_1_, 'conv9_1_', dim_in=16, dim_out=16, kernel=3, stride=1)
		# conv9_1_, output shape: {[16,16,16]}
133 134 135 136
        batchnorm9_1_ = mx.symbol.BatchNorm(data=conv9_1_,
            fix_gamma=True,
            name="batchnorm9_1_")
        add10_ = batchnorm9_1_ + relu7_
137 138 139 140
		# add10_, output shape: {[16,16,16]}
		relu10_ = brew.relu(model, add10_, add10_)
  		conv11_1_ = brew.conv(model, relu10_, 'conv11_1_', dim_in=16, dim_out=16, kernel=3, stride=1)
		# conv11_1_, output shape: {[16,16,16]}
141 142 143
        batchnorm11_1_ = mx.symbol.BatchNorm(data=conv11_1_,
            fix_gamma=True,
            name="batchnorm11_1_")
144 145 146
		relu11_1_ = brew.relu(model, batchnorm11_1_, batchnorm11_1_)
  		conv12_1_ = brew.conv(model, relu11_1_, 'conv12_1_', dim_in=16, dim_out=16, kernel=3, stride=1)
		# conv12_1_, output shape: {[16,16,16]}
147 148 149 150
        batchnorm12_1_ = mx.symbol.BatchNorm(data=conv12_1_,
            fix_gamma=True,
            name="batchnorm12_1_")
        add13_ = batchnorm12_1_ + relu10_
151 152 153 154
		# add13_, output shape: {[16,16,16]}
		relu13_ = brew.relu(model, add13_, add13_)
  		conv14_1_ = brew.conv(model, relu13_, 'conv14_1_', dim_in=16, dim_out=32, kernel=3, stride=2)
		# conv14_1_, output shape: {[32,8,8]}
155 156 157
        batchnorm14_1_ = mx.symbol.BatchNorm(data=conv14_1_,
            fix_gamma=True,
            name="batchnorm14_1_")
158 159 160
		relu14_1_ = brew.relu(model, batchnorm14_1_, batchnorm14_1_)
  		conv15_1_ = brew.conv(model, relu14_1_, 'conv15_1_', dim_in=32, dim_out=32, kernel=3, stride=1)
		# conv15_1_, output shape: {[32,8,8]}
161 162 163
        batchnorm15_1_ = mx.symbol.BatchNorm(data=conv15_1_,
            fix_gamma=True,
            name="batchnorm15_1_")
164 165
		conv14_2_ = brew.conv(model, relu13_, 'conv14_2_', dim_in=16, dim_out=32, kernel=1, stride=2)
		# conv14_2_, output shape: {[32,8,8]}
166 167 168 169
        batchnorm14_2_ = mx.symbol.BatchNorm(data=conv14_2_,
            fix_gamma=True,
            name="batchnorm14_2_")
        add16_ = batchnorm15_1_ + batchnorm14_2_
170 171 172 173
		# add16_, output shape: {[32,8,8]}
		relu16_ = brew.relu(model, add16_, add16_)
  		conv17_1_ = brew.conv(model, relu16_, 'conv17_1_', dim_in=32, dim_out=32, kernel=3, stride=1)
		# conv17_1_, output shape: {[32,8,8]}
174 175 176
        batchnorm17_1_ = mx.symbol.BatchNorm(data=conv17_1_,
            fix_gamma=True,
            name="batchnorm17_1_")
177 178 179
		relu17_1_ = brew.relu(model, batchnorm17_1_, batchnorm17_1_)
  		conv18_1_ = brew.conv(model, relu17_1_, 'conv18_1_', dim_in=32, dim_out=32, kernel=3, stride=1)
		# conv18_1_, output shape: {[32,8,8]}
180 181 182 183
        batchnorm18_1_ = mx.symbol.BatchNorm(data=conv18_1_,
            fix_gamma=True,
            name="batchnorm18_1_")
        add19_ = batchnorm18_1_ + relu16_
184 185 186 187
		# add19_, output shape: {[32,8,8]}
		relu19_ = brew.relu(model, add19_, add19_)
  		conv20_1_ = brew.conv(model, relu19_, 'conv20_1_', dim_in=32, dim_out=32, kernel=3, stride=1)
		# conv20_1_, output shape: {[32,8,8]}
188 189 190
        batchnorm20_1_ = mx.symbol.BatchNorm(data=conv20_1_,
            fix_gamma=True,
            name="batchnorm20_1_")
191 192 193
		relu20_1_ = brew.relu(model, batchnorm20_1_, batchnorm20_1_)
  		conv21_1_ = brew.conv(model, relu20_1_, 'conv21_1_', dim_in=32, dim_out=32, kernel=3, stride=1)
		# conv21_1_, output shape: {[32,8,8]}
194 195 196 197
        batchnorm21_1_ = mx.symbol.BatchNorm(data=conv21_1_,
            fix_gamma=True,
            name="batchnorm21_1_")
        add22_ = batchnorm21_1_ + relu19_
198 199 200 201
		# add22_, output shape: {[32,8,8]}
		relu22_ = brew.relu(model, add22_, add22_)
  		conv23_1_ = brew.conv(model, relu22_, 'conv23_1_', dim_in=32, dim_out=64, kernel=3, stride=2)
		# conv23_1_, output shape: {[64,4,4]}
202 203 204
        batchnorm23_1_ = mx.symbol.BatchNorm(data=conv23_1_,
            fix_gamma=True,
            name="batchnorm23_1_")
205 206 207
		relu23_1_ = brew.relu(model, batchnorm23_1_, batchnorm23_1_)
  		conv24_1_ = brew.conv(model, relu23_1_, 'conv24_1_', dim_in=64, dim_out=64, kernel=3, stride=1)
		# conv24_1_, output shape: {[64,4,4]}
208 209 210
        batchnorm24_1_ = mx.symbol.BatchNorm(data=conv24_1_,
            fix_gamma=True,
            name="batchnorm24_1_")
211 212
		conv23_2_ = brew.conv(model, relu22_, 'conv23_2_', dim_in=32, dim_out=64, kernel=1, stride=2)
		# conv23_2_, output shape: {[64,4,4]}
213 214 215 216
        batchnorm23_2_ = mx.symbol.BatchNorm(data=conv23_2_,
            fix_gamma=True,
            name="batchnorm23_2_")
        add25_ = batchnorm24_1_ + batchnorm23_2_
217 218 219 220
		# add25_, output shape: {[64,4,4]}
		relu25_ = brew.relu(model, add25_, add25_)
  		conv26_1_ = brew.conv(model, relu25_, 'conv26_1_', dim_in=64, dim_out=64, kernel=3, stride=1)
		# conv26_1_, output shape: {[64,4,4]}
221 222 223
        batchnorm26_1_ = mx.symbol.BatchNorm(data=conv26_1_,
            fix_gamma=True,
            name="batchnorm26_1_")
224 225 226
		relu26_1_ = brew.relu(model, batchnorm26_1_, batchnorm26_1_)
  		conv27_1_ = brew.conv(model, relu26_1_, 'conv27_1_', dim_in=64, dim_out=64, kernel=3, stride=1)
		# conv27_1_, output shape: {[64,4,4]}
227 228 229 230
        batchnorm27_1_ = mx.symbol.BatchNorm(data=conv27_1_,
            fix_gamma=True,
            name="batchnorm27_1_")
        add28_ = batchnorm27_1_ + relu25_
231 232 233 234
		# add28_, output shape: {[64,4,4]}
		relu28_ = brew.relu(model, add28_, add28_)
  		conv29_1_ = brew.conv(model, relu28_, 'conv29_1_', dim_in=64, dim_out=64, kernel=3, stride=1)
		# conv29_1_, output shape: {[64,4,4]}
235 236 237
        batchnorm29_1_ = mx.symbol.BatchNorm(data=conv29_1_,
            fix_gamma=True,
            name="batchnorm29_1_")
238 239 240
		relu29_1_ = brew.relu(model, batchnorm29_1_, batchnorm29_1_)
  		conv30_1_ = brew.conv(model, relu29_1_, 'conv30_1_', dim_in=64, dim_out=64, kernel=3, stride=1)
		# conv30_1_, output shape: {[64,4,4]}
241 242 243 244
        batchnorm30_1_ = mx.symbol.BatchNorm(data=conv30_1_,
            fix_gamma=True,
            name="batchnorm30_1_")
        add31_ = batchnorm30_1_ + relu28_
245 246
		# add31_, output shape: {[64,4,4]}
		relu31_ = brew.relu(model, add31_, add31_)
247 248 249 250 251
        globalpooling31_ = mx.symbol.Pooling(data=relu31_,
            global_pool=True,
            kernel=(1,1),
            pool_type="avg",
            name="globalpooling31_")
252 253 254
		# globalpooling31_, output shape: {[64,1,1]}
		fc31_ = brew.fc(model, globalpooling31_, 'fc31_', dim_in=64, dim_out=128)
		# fc31_, output shape: {[128,1,1]}
255 256 257
        dropout31_ = mx.symbol.Dropout(data=fc31_,
            p=0.5,
            name="dropout31_")
258 259 260 261 262 263
		fc32_ = brew.fc(model, dropout31_, 'fc32_', dim_in=128, dim_out=10)
		# fc32_, output shape: {[10,1,1]}
		softmax = brew.softmax(model, fc32_, 'softmax')

		return softmax

264
# this adds the loss and optimizer
265
def add_training_operators(model, output, label, device_opts) :
266
	with core.DeviceScope(device_opts):
267
		xent = model.LabelCrossEntropy([output, label], 'xent')
268 269 270
		loss = model.AveragedLoss(xent, "loss")

		model.AddGradientOperators([loss])
271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295

        if OPTIMIZER_TYPE == 'adam':
            if POLICY == 'step':
                opt = optimizer.build_adam(model, base_learning_rate=BASE_LEARNING_RATE, policy=POLICY, stepsize=STEP_SIZE, beta1=BETA1, beta2=BETA2, epsilon=EPSILON)
            elif POLICY == 'fixed' or POLICY == 'inv':
                opt = optimizer.build_adam(model, base_learning_rate=BASE_LEARNING_RATE, policy=POLICY, beta1=BETA1, beta2=BETA2, epsilon=EPSILON)
            print("adam optimizer selected")
        elif OPTIMIZER_TYPE == 'sgd':
            if POLICY == 'step':
                opt = optimizer.build_sgd(model, base_learning_rate=BASE_LEARNING_RATE, policy=POLICY, stepsize=STEP_SIZE, gamma=GAMMA, momentum=MOMENTUM)
            elif POLICY == 'fixed' or POLICY == 'inv':
                opt = optimizer.build_sgd(model, base_learning_rate=BASE_LEARNING_RATE, policy=POLICY, gamma=GAMMA, momentum=MOMENTUM)
            print("sgd optimizer selected")
        elif OPTIMIZER_TYPE == 'rmsprop':
            if POLICY == 'step':
                opt = optimizer.build_rms_prop(model, base_learning_rate=BASE_LEARNING_RATE, policy=POLICY, stepsize=STEP_SIZE, decay=GAMMA, momentum=MOMENTUM, epsilon=EPSILON)
            elif POLICY == 'fixed' or POLICY == 'inv':
                opt = optimizer.build_rms_prop(model, base_learning_rate=BASE_LEARNING_RATE, policy=POLICY, decay=GAMMA, momentum=MOMENTUM, epsilon=EPSILON)
            print("rmsprop optimizer selected")
        elif OPTIMIZER_TYPE == 'adagrad':
            if POLICY == 'step':
                opt = optimizer.build_adagrad(model, base_learning_rate=BASE_LEARNING_RATE, policy=POLICY, stepsize=STEP_SIZE, decay=GAMMA, epsilon=EPSILON)
            elif POLICY == 'fixed' or POLICY == 'inv':
                opt = optimizer.build_adagrad(model, base_learning_rate=BASE_LEARNING_RATE, policy=POLICY, decay=GAMMA, epsilon=EPSILON)
            print("adagrad optimizer selected")
296

297 298
def add_accuracy(model, output, label, device_opts):
    with core.DeviceScope(device_opts):
299 300 301 302
        if EVAL_METRIC == 'accuracy':
            accuracy = brew.accuracy(model, [output, label], "accuracy")
        elif EVAL_METRIC == 'top_k_accuracy':
            accuracy = brew.accuracy(model, [output, label], "accuracy", top_k=3)
303 304
        return accuracy

305 306
def train(INIT_NET, PREDICT_NET, epochs, batch_size, device_opts) :

307 308 309 310 311 312 313 314 315
	workspace.ResetWorkspace(ROOT_FOLDER)

	arg_scope = {"order": "NCHW"}
	# == Training model ==
	train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
	data, label = add_input(train_model, batch_size=batch_size, db=os.path.join(DATA_FOLDER, 'mnist-train-nchw-lmdb'), db_type='lmdb', device_opts=device_opts)
	softmax = create_model(train_model, data, device_opts=device_opts)
	add_training_operators(train_model, softmax, label, device_opts=device_opts)
	add_accuracy(train_model, softmax, label, device_opts)
316
	with core.DeviceScope(device_opts):
317
		brew.add_weight_decay(train_model, WEIGHT_DECAY)
318

319
	# Initialize and create the training network
320
	workspace.RunNetOnce(train_model.param_init_net)
321
	workspace.CreateNet(train_model.net, overwrite=True)
322

323 324
	# Main Training Loop
	print("== Starting Training for " + str(epochs) + " epochs ==")
325
	for j in range(0, epochs):
326 327 328 329 330 331 332 333 334 335 336
		workspace.RunNet(train_model.net)
		if j % 50 == 0:
			print 'Iter: ' + str(j) + ': ' + 'Loss ' + str(workspace.FetchBlob("loss")) + ' - ' + 'Accuracy ' + str(workspace.FetchBlob('accuracy'))
	print("Training done")

	print("== Running Test model ==")
	# == Testing model. ==
	test_model= model_helper.ModelHelper(name="test_net", arg_scope=arg_scope, init_params=False)
	data, label = add_input(test_model, batch_size=100, db=os.path.join(DATA_FOLDER, 'mnist-test-nchw-lmdb'), db_type='lmdb', device_opts=device_opts)
	softmax = create_model(test_model, data, device_opts=device_opts)
	add_accuracy(test_model, predictions, label, device_opts)
337 338 339
	workspace.RunNetOnce(test_model.param_init_net)
	workspace.CreateNet(test_model.net, overwrite=True)

340 341 342 343 344 345 346 347 348 349
	# Main Testing Loop
	# batch size:        100
	# iteration:         100
	# total test images: 10000
	test_accuracy = np.zeros(100)
	for i in range(100):
		# Run a forward pass of the net on the current batch
		workspace.RunNet(test_model.net)
		# Collect the batch accuracy from the workspace
		test_accuracy[i] = workspace.FetchBlob('accuracy')
350

351
	print('Test_accuracy: {:.4f}'.format(test_accuracy.mean()))
352

353 354 355 356
	# == Deployment model. ==
	# We simply need the main AddModel part.
	deploy_model = model_helper.ModelHelper(name="deploy_net", arg_scope=arg_scope, init_params=False)
	create_model(deploy_model, "data", device_opts)
357

358
	print("Saving deploy model")
359
	save_net(INIT_NET, PREDICT_NET, deploy_model)
360 361 362 363 364 365 366 367 368 369 370 371 372 373 374

def save_net(init_net_path, predict_net_path, model):

	init_net, predict_net = mobile_exporter.Export(
		workspace,
		model.net,
		model.params
	)

	print("Save the model to init_net.pb and predict_net.pb")
	with open(predict_net_path + '.pb', 'wb') as f:
		f.write(model.net._net.SerializeToString())
	with open(init_net_path + '.pb', 'wb') as f:
		f.write(init_net.SerializeToString())

375
	print("Save the model to init_net.pbtxt and predict_net.pbtxt")
376 377 378 379
	with open(init_net_path + '.pbtxt', 'w') as f:
		f.write(str(init_net))
	with open(predict_net_path + '.pbtxt', 'w') as f:
		f.write(str(predict_net))
380
	print("== Saved init_net and predict_net ==")
381 382 383 384 385 386 387 388 389 390 391 392 393

def load_net(init_net_path, predict_net_path, device_opts):
	init_def = caffe2_pb2.NetDef()
	with open(init_net_path + '.pb', 'rb') as f:
		init_def.ParseFromString(f.read())
		init_def.device_option.CopyFrom(device_opts)
		workspace.RunNetOnce(init_def.SerializeToString())

	net_def = caffe2_pb2.NetDef()
	with open(predict_net_path + '.pb', 'rb') as f:
		net_def.ParseFromString(f.read())
		net_def.device_option.CopyFrom(device_opts)
		workspace.CreateNet(net_def.SerializeToString(), overwrite=True)
394
	print("== Loaded init_net and predict_net ==")
395

396
train(INIT_NET, PREDICT_NET, epochs=EPOCHS, batch_size=BATCH_SIZE, device_opts=device_opts)
397 398

print '\n********************************************'
399
print("Loading Deploy model")
400 401
load_net(INIT_NET, PREDICT_NET, device_opts=device_opts)

402 403
img = cv2.imread("3.jpg")                                   # Load test image
img = cv2.resize(img, (28,28))                              # Resize to 28x28
404
img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY )               # Covert to grayscale
405 406 407 408 409 410 411 412
img = img.reshape((1,1,28,28)).astype('float32')            # Reshape to (1,1,28,28)
workspace.FeedBlob("data", img, device_option=device_opts)  # FeedBlob
workspace.RunNet('deploy_net', num_iter=1)                  # Forward

print("\nInput: {}".format(img.shape))
pred = workspace.FetchBlob("softmax") #TODO: Consider multiple output names
print("Output: {}".format(pred))
print("Output class: {}".format(np.argmax(pred)))