CNNCreator_VGG16.py 14 KB
Newer Older
1
from caffe2.python import workspace, core, model_helper, brew, optimizer
2 3 4
from caffe2.python.predictor import mobile_exporter
from caffe2.proto import caffe2_pb2
import numpy as np
5
import math
6
import logging
7
import os
8
import sys
9
import lmdb
10

11 12 13
class CNNCreator_VGG16:

    module = None
14 15 16
    _current_dir_ = os.path.join('./')
    _data_dir_    = os.path.join(_current_dir_, 'data', 'VGG16')
    _model_dir_   = os.path.join(_current_dir_, 'model', 'VGG16')
17

18 19
    _init_net_    = os.path.join(_model_dir_, 'init_net.pb')
    _predict_net_ = os.path.join(_model_dir_, 'predict_net.pb')
20

21 22 23 24 25 26 27 28 29 30 31
    def get_total_num_iter(self, num_epoch, batch_size, dataset_size):
        #Force floating point calculation
        batch_size_float = float(batch_size)
        dataset_size_float = float(dataset_size)

        iterations_float = math.ceil(num_epoch*(dataset_size_float/batch_size_float))
        iterations_int = int(iterations_float)

        return iterations_int


32 33
    def add_input(self, model, batch_size, db, db_type, device_opts):
        with core.DeviceScope(device_opts):
34 35 36 37 38 39 40
            if not os.path.isdir(db):
                logging.error("Data loading failure. Directory '" + os.path.abspath(db) + "' does not exist.")
                sys.exit(1)
            elif not (os.path.isfile(os.path.join(db, 'data.mdb')) and os.path.isfile(os.path.join(db, 'lock.mdb'))):
                logging.error("Data loading failure. Directory '" + os.path.abspath(db) + "' does not contain lmdb files.")
                sys.exit(1)

41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
            # load the data
            data_uint8, label = brew.db_input(
                model,
                blobs_out=["data_uint8", "label"],
                batch_size=batch_size,
                db=db,
                db_type=db_type,
            )
            # cast the data to float
            data = model.Cast(data_uint8, "data", to=core.DataType.FLOAT)

            # scale data from [0,255] down to [0,1]
            data = model.Scale(data, data, scale=float(1./256))

            # don't need the gradient for the backward pass
            data = model.StopGradient(data, data)
57 58 59 60

            dataset_size = int (lmdb.open(db).stat()['entries'])

            return data, label, dataset_size
61

62
    def create_model(self, model, data, device_opts, is_test):
63 64 65 66
    	with core.DeviceScope(device_opts):

    		data = data
    		# data, output shape: {[3,224,224]}
67
    		conv1_ = brew.conv(model, data, 'conv1_', dim_in=3, dim_out=64, kernel=3, stride=1, pad=1)
68 69
    		# conv1_, output shape: {[64,224,224]}
    		relu1_ = brew.relu(model, conv1_, conv1_)
70
    		conv2_ = brew.conv(model, relu1_, 'conv2_', dim_in=64, dim_out=64, kernel=3, stride=1, pad=1)
71 72 73 74
    		# conv2_, output shape: {[64,224,224]}
    		relu2_ = brew.relu(model, conv2_, conv2_)
    		pool2_ = brew.max_pool(model, relu2_, 'pool2_', kernel=2, stride=2)
    		# pool2_, output shape: {[64,112,112]}
75
    		conv3_ = brew.conv(model, pool2_, 'conv3_', dim_in=64, dim_out=128, kernel=3, stride=1, pad=1)
76 77
    		# conv3_, output shape: {[128,112,112]}
    		relu3_ = brew.relu(model, conv3_, conv3_)
78
    		conv4_ = brew.conv(model, relu3_, 'conv4_', dim_in=128, dim_out=128, kernel=3, stride=1, pad=1)
79 80 81 82
    		# conv4_, output shape: {[128,112,112]}
    		relu4_ = brew.relu(model, conv4_, conv4_)
    		pool4_ = brew.max_pool(model, relu4_, 'pool4_', kernel=2, stride=2)
    		# pool4_, output shape: {[128,56,56]}
83
    		conv5_ = brew.conv(model, pool4_, 'conv5_', dim_in=128, dim_out=256, kernel=3, stride=1, pad=1)
84 85
    		# conv5_, output shape: {[256,56,56]}
    		relu5_ = brew.relu(model, conv5_, conv5_)
86
    		conv6_ = brew.conv(model, relu5_, 'conv6_', dim_in=256, dim_out=256, kernel=3, stride=1, pad=1)
87 88
    		# conv6_, output shape: {[256,56,56]}
    		relu6_ = brew.relu(model, conv6_, conv6_)
89
    		conv7_ = brew.conv(model, relu6_, 'conv7_', dim_in=256, dim_out=256, kernel=3, stride=1, pad=1)
90 91 92 93
    		# conv7_, output shape: {[256,56,56]}
    		relu7_ = brew.relu(model, conv7_, conv7_)
    		pool7_ = brew.max_pool(model, relu7_, 'pool7_', kernel=2, stride=2)
    		# pool7_, output shape: {[256,28,28]}
94
    		conv8_ = brew.conv(model, pool7_, 'conv8_', dim_in=256, dim_out=512, kernel=3, stride=1, pad=1)
95 96
    		# conv8_, output shape: {[512,28,28]}
    		relu8_ = brew.relu(model, conv8_, conv8_)
97
    		conv9_ = brew.conv(model, relu8_, 'conv9_', dim_in=512, dim_out=512, kernel=3, stride=1, pad=1)
98 99
    		# conv9_, output shape: {[512,28,28]}
    		relu9_ = brew.relu(model, conv9_, conv9_)
100
    		conv10_ = brew.conv(model, relu9_, 'conv10_', dim_in=512, dim_out=512, kernel=3, stride=1, pad=1)
101 102 103 104
    		# conv10_, output shape: {[512,28,28]}
    		relu10_ = brew.relu(model, conv10_, conv10_)
    		pool10_ = brew.max_pool(model, relu10_, 'pool10_', kernel=2, stride=2)
    		# pool10_, output shape: {[512,14,14]}
105
    		conv11_ = brew.conv(model, pool10_, 'conv11_', dim_in=512, dim_out=512, kernel=3, stride=1, pad=1)
106 107
    		# conv11_, output shape: {[512,14,14]}
    		relu11_ = brew.relu(model, conv11_, conv11_)
108
    		conv12_ = brew.conv(model, relu11_, 'conv12_', dim_in=512, dim_out=512, kernel=3, stride=1, pad=1)
109 110
    		# conv12_, output shape: {[512,14,14]}
    		relu12_ = brew.relu(model, conv12_, conv12_)
111
    		conv13_ = brew.conv(model, relu12_, 'conv13_', dim_in=512, dim_out=512, kernel=3, stride=1, pad=1)
112 113 114 115 116 117 118
    		# conv13_, output shape: {[512,14,14]}
    		relu13_ = brew.relu(model, conv13_, conv13_)
    		pool13_ = brew.max_pool(model, relu13_, 'pool13_', kernel=2, stride=2)
    		# pool13_, output shape: {[512,7,7]}
    		fc13_ = brew.fc(model, pool13_, 'fc13_', dim_in=512 * 7 * 7, dim_out=4096)
    		# fc13_, output shape: {[4096,1,1]}
    		relu14_ = brew.relu(model, fc13_, fc13_)
119
    		dropout14_ = brew.dropout(model, relu14_, 'dropout14_', ratio=0.5, is_test=False)
120 121 122
    		fc14_ = brew.fc(model, dropout14_, 'fc14_', dim_in=4096, dim_out=4096)
    		# fc14_, output shape: {[4096,1,1]}
    		relu15_ = brew.relu(model, fc14_, fc14_)
123
    		dropout15_ = brew.dropout(model, relu15_, 'dropout15_', ratio=0.5, is_test=False)
124 125 126 127 128 129 130
    		fc15_ = brew.fc(model, dropout15_, 'fc15_', dim_in=4096, dim_out=1000)
    		# fc15_, output shape: {[1000,1,1]}
    		predictions = brew.softmax(model, fc15_, 'predictions')

    		return predictions

    # this adds the loss and optimizer
131
    def add_training_operators(self, model, output, label, device_opts, loss, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum) :
132
    	with core.DeviceScope(device_opts):
133 134 135 136 137 138
    		if loss == 'cross_entropy':
    		    xent = model.LabelCrossEntropy([output, label], 'xent')
    		    loss = model.AveragedLoss(xent, "loss")
    		elif loss == 'euclidean':
    		    dist = model.net.SquaredL2Distance([label, output], 'dist')
    		    loss = dist.AveragedLoss([], ['loss'])
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174

    		model.AddGradientOperators([loss])

    		if opt_type == 'adam':
    		    if policy == 'step':
    		        opt = optimizer.build_adam(model, base_learning_rate=base_learning_rate, policy=policy, stepsize=stepsize, beta1=beta1, beta2=beta2, epsilon=epsilon)
    		    elif policy == 'fixed' or policy == 'inv':
    		        opt = optimizer.build_adam(model, base_learning_rate=base_learning_rate, policy=policy, beta1=beta1, beta2=beta2, epsilon=epsilon)
    		    print("adam optimizer selected")
    		elif opt_type == 'sgd':
    		    if policy == 'step':
    		        opt = optimizer.build_sgd(model, base_learning_rate=base_learning_rate, policy=policy, stepsize=stepsize, gamma=gamma, momentum=momentum)
    		    elif policy == 'fixed' or policy == 'inv':
    		        opt = optimizer.build_sgd(model, base_learning_rate=base_learning_rate, policy=policy, gamma=gamma, momentum=momentum)
    		    print("sgd optimizer selected")
    		elif opt_type == 'rmsprop':
    		    if policy == 'step':
    		        opt = optimizer.build_rms_prop(model, base_learning_rate=base_learning_rate, policy=policy, stepsize=stepsize, decay=gamma, momentum=momentum, epsilon=epsilon)
    		    elif policy == 'fixed' or policy == 'inv':
    		        opt = optimizer.build_rms_prop(model, base_learning_rate=base_learning_rate, policy=policy, decay=gamma, momentum=momentum, epsilon=epsilon)
    		    print("rmsprop optimizer selected")
    		elif opt_type == 'adagrad':
    		    if policy == 'step':
    		        opt = optimizer.build_adagrad(model, base_learning_rate=base_learning_rate, policy=policy, stepsize=stepsize, decay=gamma, epsilon=epsilon)
    		    elif policy == 'fixed' or policy == 'inv':
    		        opt = optimizer.build_adagrad(model, base_learning_rate=base_learning_rate, policy=policy, decay=gamma, epsilon=epsilon)
    		    print("adagrad optimizer selected")

    def add_accuracy(self, model, output, label, device_opts, eval_metric):
        with core.DeviceScope(device_opts):
            if eval_metric == 'accuracy':
                accuracy = brew.accuracy(model, [output, label], "accuracy")
            elif eval_metric == 'top_k_accuracy':
                accuracy = brew.accuracy(model, [output, label], "accuracy", top_k=3)
            return accuracy

175
    def train(self, num_epoch=1000, batch_size=64, context='gpu', eval_metric='accuracy', loss='cross_entropy', opt_type='adam', base_learning_rate=0.001, weight_decay=0.001, policy='fixed', stepsize=1, epsilon=1E-8, beta1=0.9, beta2=0.999, gamma=0.999, momentum=0.9) :
176
        if context == 'cpu':
177 178
            device_opts = core.DeviceOption(caffe2_pb2.CPU, 0)
            print("CPU mode selected")
179
        elif context == 'gpu':
180 181 182
            device_opts = core.DeviceOption(caffe2_pb2.CUDA, 0)
            print("GPU mode selected")

183
    	workspace.ResetWorkspace(self._model_dir_)
184 185 186 187

    	arg_scope = {"order": "NCHW"}
    	# == Training model ==
    	train_model= model_helper.ModelHelper(name="train_net", arg_scope=arg_scope)
188
    	data, label, train_dataset_size = self.add_input(train_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'train_lmdb'), db_type='lmdb', device_opts=device_opts)
189
    	predictions = self.create_model(train_model, data, device_opts=device_opts, is_test=False)
190 191 192 193 194 195 196 197 198 199
    	self.add_training_operators(train_model, predictions, label, device_opts, opt_type, base_learning_rate, policy, stepsize, epsilon, beta1, beta2, gamma, momentum)
    	self.add_accuracy(train_model, predictions, label, device_opts, eval_metric)
    	with core.DeviceScope(device_opts):
    		brew.add_weight_decay(train_model, weight_decay)

    	# Initialize and create the training network
    	workspace.RunNetOnce(train_model.param_init_net)
    	workspace.CreateNet(train_model.net, overwrite=True)

    	# Main Training Loop
200 201 202
    	iterations = self.get_total_num_iter(num_epoch, batch_size, train_dataset_size)
        print("** Starting Training for " + str(num_epoch) + " epochs = " + str(iterations) + " iterations **")
    	for i in range(iterations):
203
    		workspace.RunNet(train_model.net)
204 205
    		if i % 50 == 0:
    			print 'Iter ' + str(i) + ': ' + 'Loss ' + str(workspace.FetchBlob("loss")) + ' - ' + 'Accuracy ' + str(workspace.FetchBlob('accuracy'))
206 207 208 209 210
    	print("Training done")

    	print("== Running Test model ==")
    	# == Testing model. ==
    	test_model= model_helper.ModelHelper(name="test_net", arg_scope=arg_scope, init_params=False)
211
    	data, label, test_dataset_size = self.add_input(test_model, batch_size=batch_size, db=os.path.join(self._data_dir_, 'test_lmdb'), db_type='lmdb', device_opts=device_opts)
212
    	predictions = self.create_model(test_model, data, device_opts=device_opts, is_test=True)
213 214 215 216 217
    	self.add_accuracy(test_model, predictions, label, device_opts, eval_metric)
    	workspace.RunNetOnce(test_model.param_init_net)
    	workspace.CreateNet(test_model.net, overwrite=True)

    	# Main Testing Loop
218 219
    	test_accuracy = np.zeros(test_dataset_size/batch_size)
    	for i in range(test_dataset_size/batch_size):
220 221 222 223 224 225 226 227 228 229
    		# Run a forward pass of the net on the current batch
    		workspace.RunNet(test_model.net)
    		# Collect the batch accuracy from the workspace
    		test_accuracy[i] = workspace.FetchBlob('accuracy')

    	print('Test_accuracy: {:.4f}'.format(test_accuracy.mean()))

    	# == Deployment model. ==
    	# We simply need the main AddModel part.
    	deploy_model = model_helper.ModelHelper(name="deploy_net", arg_scope=arg_scope, init_params=False)
230
    	self.create_model(deploy_model, "data", device_opts, is_test=True)
231 232

    	print("Saving deploy model")
233
    	self.save_net(self._init_net_, self._predict_net_, deploy_model)
234 235 236 237 238 239 240 241 242

    def save_net(self, init_net_path, predict_net_path, model):

    	init_net, predict_net = mobile_exporter.Export(
    		workspace,
    		model.net,
    		model.params
    	)

243
        try:
244
            os.makedirs(self._model_dir_)
245
        except OSError:
246
            if not os.path.isdir(self._model_dir_):
247 248
                raise

249
    	print("Save the model to init_net.pb and predict_net.pb")
250
    	with open(predict_net_path, 'wb') as f:
251
    		f.write(model.net._net.SerializeToString())
252
    	with open(init_net_path, 'wb') as f:
253 254 255
    		f.write(init_net.SerializeToString())

    	print("Save the model to init_net.pbtxt and predict_net.pbtxt")
256 257

    	with open(init_net_path.replace('.pb','.pbtxt'), 'w') as f:
258
    		f.write(str(init_net))
259
    	with open(predict_net_path.replace('.pb','.pbtxt'), 'w') as f:
260 261 262 263
    		f.write(str(predict_net))
    	print("== Saved init_net and predict_net ==")

    def load_net(self, init_net_path, predict_net_path, device_opts):
264 265 266 267 268 269 270 271 272
        if not os.path.isfile(init_net_path):
            logging.error("Network loading failure. File '" + os.path.abspath(init_net_path) + "' does not exist.")
            sys.exit(1)
        elif not os.path.isfile(predict_net_path):
            logging.error("Network loading failure. File '" + os.path.abspath(predict_net_path) + "' does not exist.")
            sys.exit(1)

        init_def = caffe2_pb2.NetDef()
    	with open(init_net_path, 'rb') as f:
273 274 275 276 277
    		init_def.ParseFromString(f.read())
    		init_def.device_option.CopyFrom(device_opts)
    		workspace.RunNetOnce(init_def.SerializeToString())

    	net_def = caffe2_pb2.NetDef()
278
    	with open(predict_net_path, 'rb') as f:
279 280 281
    		net_def.ParseFromString(f.read())
    		net_def.device_option.CopyFrom(device_opts)
    		workspace.CreateNet(net_def.SerializeToString(), overwrite=True)
282
    	print("== Loaded init_net and predict_net ==")