CNNNet_Alexnet.py 29.4 KB
Newer Older
Nicola Gatto's avatar
Nicola Gatto committed
1
2
import mxnet as mx
import numpy as np
3
import math
4
5
6
import os
import abc
import warnings
danielkisov's avatar
danielkisov committed
7
import sys
8
from mxnet import gluon, nd
Nicola Gatto's avatar
Nicola Gatto committed
9
10
11
12
13
14
15


class ZScoreNormalization(gluon.HybridBlock):
    def __init__(self, data_mean, data_std, **kwargs):
        super(ZScoreNormalization, self).__init__(**kwargs)
        with self.name_scope():
            self.data_mean = self.params.get('data_mean', shape=data_mean.shape,
16
                init=mx.init.Constant(data_mean.asnumpy().tolist()), differentiable=False)
Nicola Gatto's avatar
Nicola Gatto committed
17
            self.data_std = self.params.get('data_std', shape=data_mean.shape,
18
                init=mx.init.Constant(data_std.asnumpy().tolist()), differentiable=False)
Nicola Gatto's avatar
Nicola Gatto committed
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33

    def hybrid_forward(self, F, x, data_mean, data_std):
        x = F.broadcast_sub(x, data_mean)
        x = F.broadcast_div(x, data_std)
        return x


class Padding(gluon.HybridBlock):
    def __init__(self, padding, **kwargs):
        super(Padding, self).__init__(**kwargs)
        with self.name_scope():
            self.pad_width = padding

    def hybrid_forward(self, F, x):
        x = F.pad(data=x,
34
35
36
            mode='constant',
            pad_width=self.pad_width,
            constant_value=0)
Nicola Gatto's avatar
Nicola Gatto committed
37
38
39
40
41
42
43
44
45
46
47
        return x


class NoNormalization(gluon.HybridBlock):
    def __init__(self, **kwargs):
        super(NoNormalization, self).__init__(**kwargs)

    def hybrid_forward(self, F, x):
        return x


Sebastian Nickels's avatar
Sebastian Nickels committed
48
49
50
51
52
53
54
55
56
57
class Reshape(gluon.HybridBlock):
    def __init__(self, shape, **kwargs):
        super(Reshape, self).__init__(**kwargs)
        with self.name_scope():
            self.shape = shape

    def hybrid_forward(self, F, x):
        return F.reshape(data=x, shape=self.shape)


58
class CustomRNN(gluon.HybridBlock):
59
    def __init__(self, hidden_size, num_layers, dropout, bidirectional, **kwargs):
60
61
        super(CustomRNN, self).__init__(**kwargs)
        with self.name_scope():
62
            self.rnn = gluon.rnn.RNN(hidden_size=hidden_size, num_layers=num_layers, dropout=dropout,
63
64
65
66
67
68
69
70
                                     bidirectional=bidirectional, activation='tanh', layout='NTC')

    def hybrid_forward(self, F, data, state0):
        output, [state0] = self.rnn(data, [F.swapaxes(state0, 0, 1)])
        return output, F.swapaxes(state0, 0, 1)


class CustomLSTM(gluon.HybridBlock):
71
    def __init__(self, hidden_size, num_layers, dropout, bidirectional, **kwargs):
72
73
        super(CustomLSTM, self).__init__(**kwargs)
        with self.name_scope():
74
            self.lstm = gluon.rnn.LSTM(hidden_size=hidden_size, num_layers=num_layers, dropout=dropout,
75
76
77
78
79
80
81
82
                                       bidirectional=bidirectional, layout='NTC')

    def hybrid_forward(self, F, data, state0, state1):
        output, [state0, state1] = self.lstm(data, [F.swapaxes(state0, 0, 1), F.swapaxes(state1, 0, 1)])
        return output, F.swapaxes(state0, 0, 1), F.swapaxes(state1, 0, 1)


class CustomGRU(gluon.HybridBlock):
83
    def __init__(self, hidden_size, num_layers, dropout, bidirectional, **kwargs):
84
85
        super(CustomGRU, self).__init__(**kwargs)
        with self.name_scope():
86
            self.gru = gluon.rnn.GRU(hidden_size=hidden_size, num_layers=num_layers, dropout=dropout,
87
88
89
90
91
92
                                     bidirectional=bidirectional, layout='NTC')

    def hybrid_forward(self, F, data, state0):
        output, [state0] = self.gru(data, [F.swapaxes(state0, 0, 1)])
        return output, F.swapaxes(state0, 0, 1)

93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
    
class DotProductSelfAttention(gluon.HybridBlock):
    def __init__(self,
                 scale_factor,
                 num_heads,
                 dim_model,
                 dim_keys,
                 dim_values,
                 use_proj_bias,
                 use_mask,
                 **kwargs):
        super(DotProductSelfAttention, self).__init__(**kwargs)
        with self.name_scope():
            self.num_heads = num_heads
            self.dim_model = dim_model
            self.use_proj_bias = use_proj_bias
            self.use_mask = use_mask

            if dim_keys == -1:
                self.dim_keys = int(dim_model / self.num_heads)
            else:
                self.dim_keys = dim_keys
            if dim_values == -1:
                self.dim_values = int(dim_model / self.num_heads)
            else:
                self.dim_values = dim_values
    
            if scale_factor == -1:
                self.scale_factor = math.sqrt(self.dim_keys)
            else:
                self.scale_factor = scale_factor

            self.proj_q = gluon.nn.Dense(self.num_heads*self.dim_keys, use_bias=self.use_proj_bias, flatten=False)
            self.proj_k = gluon.nn.Dense(self.num_heads*self.dim_keys, use_bias=self.use_proj_bias, flatten=False)
            self.proj_v = gluon.nn.Dense(self.num_heads*self.dim_values, use_bias=self.use_proj_bias, flatten=False)
            self.proj_o = gluon.nn.Dense(self.dim_model, use_bias=self.use_proj_bias, flatten=False)

    def hybrid_forward(self, F, queries, keys, values, *args, **kwargs):

        queries = F.Reshape(queries, shape=(0, 0,-1))
        keys = F.Reshape(queries, shape=(0, 0, -1))
        values = F.Reshape(queries, shape=(0, 0, -1))
    
        head_queries = self.proj_q(queries)
        head_keys = self.proj_k(keys)
        head_values = self.proj_v(values)

        head_queries = F.reshape(head_queries, shape=(0, 0, self.num_heads, -1))
        head_queries = F.transpose(head_queries, axes=(0,2,1,3))
        head_queries = F.reshape(head_queries, shape=(-1, 0, 0), reverse=True)

        head_keys = F.reshape(head_keys, shape=(0, 0, self.num_heads, -1))
        head_keys = F.transpose(head_keys, axes=(0,2,1,3))
        head_keys = F.reshape(head_keys, shape=(-1, 0, 0), reverse=True)

        score = F.batch_dot(head_queries, head_keys, transpose_b=True)
        score = score * self.scale_factor
150

151
        if self.use_mask:
152
153
154
155
156
157
158
159
160
161
162
163
164
            seqs = F.contrib.arange_like(score, axis=1)
            zeros = F.zeros_like(seqs)
            zeros = F.reshape(zeros, shape=(1, -1))
            mask = args[0]
            mask = F.reshape(mask, shape=(-1, 1))
            mask = F.broadcast_add(mask, zeros)
            mask = F.expand_dims(mask, axis=1)
            mask = F.broadcast_axis(mask, axis=1, size=self.num_heads)
            mask = mask.reshape(shape=(-1, 0), reverse=True)
            mask = F.cast(mask, dtype='int32')
            weights = F.softmax(score, mask, use_length=self.use_mask)
        else:
            weights = F.softmax(score)
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182

        head_values = F.reshape(head_values, shape=(0, 0, self.num_heads, -1))
        head_values = F.transpose(head_values, axes=(0,2,1,3))
        head_values = F.reshape(head_values, shape=(-1, 0, 0), reverse=True)

        ret = F.batch_dot(weights, head_values)
        ret = F.reshape(ret, shape=(-1, self.num_heads, 0, 0), reverse=True)
        ret = F.transpose(ret, axes=(0, 2, 1, 3))
        ret = F.reshape(ret, shape=(0, 0, -1))

        ret = self.proj_o(ret)

        return ret

    
class EpisodicReplayMemoryInterface(gluon.HybridBlock):
    __metaclass__ = abc.ABCMeta

183
    def __init__(self, use_replay, replay_interval, replay_batch_size, replay_steps, replay_gradient_steps, use_local_adaptation, local_adaptation_gradient_steps, k, **kwargs):
184
        super(EpisodicReplayMemoryInterface, self).__init__(**kwargs)
185

186
187
188
189
190
        self.use_replay = use_replay
        self.replay_interval = replay_interval
        self.replay_batch_size = replay_batch_size
        self.replay_steps = replay_steps
        self.replay_gradient_steps = replay_gradient_steps
191
192
193
194

        self.use_local_adaptation = use_local_adaptation
        self.local_adaptation_gradient_steps = local_adaptation_gradient_steps
        self.k = k
195
196
197
198
199
200
201
202
203

    @abc.abstractmethod
    def store_samples(self, data, y, query_network, store_prob, mx_context):
        pass

    @abc.abstractmethod
    def sample_memory(self, batch_size, mx_context):
        pass

204
205
206
207
    @abc.abstractmethod
    def sample_neighbours(self, data, query_network):
        pass

208
209
210
    @abc.abstractmethod
    def get_query_network(self, mx_context):
        pass   
211

212
213
214
    @abc.abstractmethod
    def save_memory(self, path):
        pass
215
216
217
218

    @abc.abstractmethod
    def load_memory(self, path):
        pass
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
    
#Memory layer
class LargeMemory(gluon.HybridBlock):
    def __init__(self, 
                 sub_key_size, 
                 query_size, 
                 query_act,
                 k, 
                 num_heads,
                 values_dim,
                 **kwargs):
        super(LargeMemory, self).__init__(**kwargs)
        with self.name_scope():
            #Memory parameters
            self.k = k
            self.num_heads = num_heads
            self.query_act = query_act
            self.query_size = query_size
            self.num_heads = num_heads
    
            #Batch norm sub-layer
            self.batch_norm = gluon.nn.BatchNorm()

            #Memory sub-layer
            self.sub_key_size = sub_key_size
            sub_key_shape = (self.num_heads, self.sub_key_size, int(query_size[-1] / 2))

            if values_dim == -1:
                values_shape = (self.sub_key_size * self.sub_key_size, self.query_size[-1])
            else:
                values_shape = (self.sub_key_size*self.sub_key_size, values_dim)

            self.sub_keys1 = self.params.get("sub_keys1", shape=sub_key_shape, differentiable=True)
            self.sub_keys2 = self.params.get("sub_keys2", shape=sub_key_shape, differentiable=True)
            self.values = self.params.get("values", shape=values_shape, differentiable=True)
            self.label_memory = nd.array([])

            self.get_query_network()
                        
    def hybrid_forward(self, F, x, sub_keys1, sub_keys2, values):
        x = self.batch_norm(x)

        x = F.reshape(x, shape=(0, -1))

        q = self.query_network(x)

        q = F.reshape(q, shape=(0, self.num_heads, -1))

        q_split = F.split(q, num_outputs=2, axis=-1)

269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
        q1 = F.split(q_split[0], num_outputs=self.num_heads, axis=1)
        q2 = F.split(q_split[1], num_outputs=self.num_heads, axis=1)
        sub_keys1_resh = F.split(sub_keys1, num_outputs=self.num_heads, axis=0, squeeze_axis=True)
        sub_keys2_resh = F.split(sub_keys2, num_outputs=self.num_heads, axis=0, squeeze_axis=True)
        if self.num_heads == 1:
            q1 = [q1]
            q2 = [q2]
            sub_keys1_resh = [sub_keys1_resh ]
            sub_keys2_resh = [sub_keys2_resh ]

        q1_dist = F.dot(q1[0], sub_keys1_resh[0], transpose_b=True)
        q2_dist = F.dot(q2[0], sub_keys2_resh[0], transpose_b=True)
        for h in range(1, self.num_heads):
           q1_dist = F.concat(q1_dist, F.dot(q1[0], sub_keys1_resh[h], transpose_b=True), dim=1)
           q2_dist = F.concat(q2_dist, F.dot(q2[0], sub_keys1_resh[h], transpose_b=True), dim=1)
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310

        i1 = F.topk(q1_dist, k=self.k, ret_typ="indices")
        i2 = F.topk(q2_dist, k=self.k, ret_typ="indices")

        i1 = F.split(i1, num_outputs=self.num_heads, axis=1)
        i2 = F.split(i2, num_outputs=self.num_heads, axis=1)
        sub_keys1 = F.split(sub_keys1, num_outputs=self.num_heads, axis=0, squeeze_axis=True)
        sub_keys2 = F.split(sub_keys2, num_outputs=self.num_heads, axis=0, squeeze_axis=True)
        if self.num_heads == 1:
            i1 = [i1]
            i2 = [i2]
            sub_keys1 = [sub_keys1]
            sub_keys2 = [sub_keys2]

        k1 = F.take(sub_keys1[0], i1[0])
        k2 = F.take(sub_keys2[0], i2[0])
        for h in range(1, self.num_heads):
            k1 = F.concat(k1, F.take(sub_keys1[h], i1[h]), dim=1)
            k2 = F.concat(k2, F.take(sub_keys2[h], i2[h]), dim=1)

        k1 = F.tile(k1, (1, 1, self.k, 1))
        k2 = F.repeat(k2, self.k, 2)
        c_cart = F.concat(k1, k2, dim=3)

        q = F.reshape(q, shape=(-1,0), reverse=True)
        q = F.reshape(q, shape=(0, 1, -1))
        c_cart = F.reshape(c_cart, shape=(-1, 0, 0), reverse=True)
311
312
313

        k_dist = F.batch_dot(q, c_cart, transpose_b=True) #F.contrib.foreach(loop_batch_dot, [q, c_cart], init_states=state_batch_dist)
        k_dist = F.reshape(k_dist, shape=(0, -1))
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340

        i = F.topk(k_dist, k=self.k, ret_typ="both")

        w = F.softmax(i[0])
        w = F.reshape(w, shape=(0,1,-1))
        vi = F.take(values, i[1])
        aggr_value = F.batch_dot(w, vi) #F.contrib.foreach(loop_batch_dot, [w, vi], init_states=state_batch_dist)

        ret = F.reshape(aggr_value, shape=(-1, self.num_heads, 0), reverse=True)
        one_vec = F.ones((1, 1, self.num_heads))
        one_vec = F.broadcast_like(one_vec, ret, lhs_axes=0, rhs_axes=0)
        ret = F.batch_dot(one_vec, ret)
        ret = F.reshape(ret, shape=(-1, 0), reverse=True)

        return ret

    def get_query_network(self):
        if hasattr(self, 'query_network'):
            return self.query_network
        else:
            self.query_network = gluon.nn.HybridSequential()
            for size in self.query_size:
                if self.query_act == "linear":
                    self.query_network.add(gluon.nn.Dense(units=self.num_heads*size, flatten=False))
                else:
                    self.query_network.add(gluon.nn.Dense(units=self.num_heads*size, activation=self.query_act, flatten=False))
            return self.query_network
341
342


343
344
345
346
347
348
349
350
351
#EpisodicMemory layer
class EpisodicMemory(EpisodicReplayMemoryInterface):
    def __init__(self,
                 replay_interval,
                 replay_batch_size,
                 replay_steps,
                 replay_gradient_steps,
                 store_prob,
                 max_stored_samples,
352
                 memory_replacement_strategy,
353
                 use_replay,
354
355
356
                 use_local_adaptation,
                 local_adaptation_gradient_steps,
                 k,
357
358
359
360
                 query_net_dir,
                 query_net_prefix,
                 query_net_num_inputs,
                 **kwargs):
361
        super(EpisodicMemory, self).__init__(use_replay, replay_interval, replay_batch_size, replay_steps, replay_gradient_steps, use_local_adaptation, local_adaptation_gradient_steps, k, **kwargs)
362
363
364
365
        with self.name_scope():
            #Replay parameters
            self.store_prob = store_prob
            self.max_stored_samples = max_stored_samples
366
367
            self.memory_replacement_strategy = memory_replacement_strategy

368
369
370
371
372
373
374
375
376
377
378
379
380
381
            self.query_net_dir = query_net_dir
            self.query_net_prefix = query_net_prefix
            self.query_net_num_inputs = query_net_num_inputs
    
            #Memory
            self.key_memory = nd.array([])
            self.value_memory = nd.array([])
            self.label_memory = nd.array([])

    def hybrid_forward(self, F, *args):
        #propagate the input as the rest is only used for replay
        return [args, []]

    def store_samples(self, data, y, query_network, store_prob, context):
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
        if not (self.memory_replacement_strategy == "no_replacement" and self.max_stored_samples != -1 and self.key_memory.shape[0] >= self.max_stored_samples):
            num_pus = len(data)
            sub_batch_sizes = [data[i][0][0].shape[0] for i in range(num_pus)]
            num_inputs = len(data[0][0])
            num_outputs = len(y)
            mx_context = context[0]

            if len(self.key_memory) == 0:
                self.key_memory = nd.empty(0, ctx=mx.cpu())
                self.value_memory = []
                self.label_memory = []#nd.empty((num_outputs, 0), ctx=mx.cpu())

            ind = [nd.sample_multinomial(store_prob, sub_batch_sizes[i]).as_in_context(mx_context) for i in range(num_pus)]

            max_inds = [nd.max(ind[i]) for i in range(num_pus)]
            if any(max_inds):
                to_store_values = []
399
                for i in range(num_inputs):
400
401
402
403
404
405
406
407
408
409
                    tmp_values = []
                    for j in range(0, num_pus):
                        if max_inds[j]:
                            if isinstance(tmp_values, list):
                                tmp_values = nd.contrib.boolean_mask(data[j][0][i].as_in_context(mx_context), ind[j])
                            else:
                                tmp_values = nd.concat(tmp_values, nd.contrib.boolean_mask(data[j][0][i].as_in_context(mx_context), ind[j]), dim=0)
                    to_store_values.append(tmp_values)

                to_store_labels = []
410
                for i in range(num_outputs):
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
                    tmp_labels = []
                    for j in range(0, num_pus):
                        if max_inds[j]:
                            if isinstance(tmp_labels, list):
                                tmp_labels = nd.contrib.boolean_mask(y[i][j].as_in_context(mx_context), ind[j])
                            else:
                                tmp_labels = nd.concat(tmp_labels, nd.contrib.boolean_mask(y[i][j].as_in_context(mx_context), ind[j]), dim=0)
                    to_store_labels.append(tmp_labels)

                to_store_keys = query_network(*to_store_values[0:self.query_net_num_inputs])

                if self.key_memory.shape[0] == 0:
                    self.key_memory = to_store_keys.as_in_context(mx.cpu())
                    for i in range(num_inputs):
                        self.value_memory.append(to_store_values[i].as_in_context(mx.cpu()))
                    for i in range(num_outputs):
                        self.label_memory.append(to_store_labels[i].as_in_context(mx.cpu()))
                elif self.memory_replacement_strategy == "replace_oldest" and self.max_stored_samples != -1 and self.key_memory.shape[0] >= self.max_stored_samples:
                    num_to_store = to_store_keys.shape[0]
                    self.key_memory = nd.concat(self.key_memory[num_to_store:], to_store_keys.as_in_context(mx.cpu()), dim=0)
                    for i in range(num_inputs):
                        self.value_memory[i] = nd.concat(self.value_memory[i][num_to_store:], to_store_values[i].as_in_context(mx.cpu()), dim=0)
                    for i in range(num_outputs):
                        self.label_memory[i] = nd.concat(self.label_memory[i][num_to_store:], to_store_labels[i].as_in_context(mx.cpu()), dim=0)
                else:
                    self.key_memory = nd.concat(self.key_memory, to_store_keys.as_in_context(mx.cpu()), dim=0)
                    for i in range(num_inputs):
                        self.value_memory[i] = nd.concat(self.value_memory[i], to_store_values[i].as_in_context(mx.cpu()), dim=0)
                    for i in range(num_outputs):
                        self.label_memory[i] = nd.concat(self.label_memory[i], to_store_labels[i].as_in_context(mx.cpu()), dim=0)
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455

    def sample_memory(self, batch_size):
        num_stored_samples = self.key_memory.shape[0]
        if self.replay_batch_size == -1:
            sample_ind = nd.random.randint(0, num_stored_samples, (self.replay_steps, batch_size), ctx=mx.cpu())
        else:
            sample_ind = nd.random.randint(0, num_stored_samples, (self.replay_steps, self.replay_batch_size), ctx=mx.cpu())

        num_outputs = len(self.label_memory)

        sample_labels = [[self.label_memory[i][ind] for i in range(num_outputs)] for ind in sample_ind]
        sample_batches = [[[self.value_memory[j][ind] for j in range(len(self.value_memory))], sample_labels[i]] for i, ind in enumerate(sample_ind)]

        return sample_batches

456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
    def sample_neighbours(self, data, query_network):
        num_stored_samples = self.key_memory.shape[0]
        batch_size = data[0].shape[0]

        query = query_network(*data).as_in_context(mx.cpu())

        vec1 = nd.repeat(query, repeats=num_stored_samples, axis=0)
        vec2 = nd.tile(self.key_memory, reps=(batch_size, 1))
        diff = nd.subtract(vec1, vec2)
        sq = nd.square(diff)
        batch_sum = nd.sum(sq, exclude=1, axis=0)
        sqrt = nd.sqrt(batch_sum)

        dist = nd.reshape(sqrt, shape=(batch_size, num_stored_samples))

        sample_ind = nd.topk(dist, k=self.k, axis=1, ret_typ="indices")
        num_outputs = len(self.label_memory)

        sample_labels = [self.label_memory[i][sample_ind] for i in range(num_outputs)]
        sample_batches = [[self.value_memory[j][sample_ind] for j in range(len(self.value_memory))], sample_labels]

        return sample_batches

479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
    def get_query_network(self, context):
        lastEpoch = 0
        for file in os.listdir(self.query_net_dir):
            if self.query_net_prefix in file and ".json" in file:
                symbolFile = file

            if self.query_net_prefix in file and ".param" in file:
                epochStr = file.replace(".params", "").replace(self.query_net_prefix, "")
                epoch = int(epochStr)
                if epoch >= lastEpoch:
                    lastEpoch = epoch
                    weightFile = file

        inputNames = []
        if self.query_net_num_inputs == 1:
            inputNames.append("data")
        else:
            for i in range(self.query_net_num_inputs):
                inputNames.append("data" + str(i))
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            net = mx.gluon.nn.SymbolBlock.imports(self.query_net_dir + symbolFile, inputNames, self.query_net_dir + weightFile, ctx=context[0])
        net.hybridize()
        return net
    
    def save_memory(self, path):
505
        mem_arr = [("keys", self.key_memory)] + [("values_"+str(k),v) for (k,v) in enumerate(self.value_memory)] + [("labels_"+str(k),v) for (k,v) in enumerate(self.label_memory)]
506
507
508
        mem_dict = {entry[0]:entry[1] for entry in mem_arr}
        nd.save(path, mem_dict)

509
510
511
512
513
514
515
516
517
518
519
520
    def load_memory(self, path):
        mem_dict = nd.load(path)
        self.value_memory = []
        self.label_memory = []
        for key in sorted(mem_dict.keys()):
            if key == "keys":
                self.key_memory = mem_dict[key]
            elif key.startswith("values_"):
                self.value_memory.append(mem_dict[key])
            elif key.startswith("labels_"):
                self.label_memory.append(mem_dict[key])

521
522

#Stream 0
523

524
class Net_0(gluon.HybridBlock):
525
    def __init__(self, data_mean=None, data_std=None, mx_context=None, **kwargs):
526
        super(Net_0, self).__init__(**kwargs)
Nicola Gatto's avatar
Nicola Gatto committed
527
        with self.name_scope():
528
529
            if data_mean:
                assert(data_std)
530
531
                self.input_normalization_data_ = ZScoreNormalization(data_mean=data_mean['data_'],
                                                                               data_std=data_std['data_'])
Nicola Gatto's avatar
Nicola Gatto committed
532
            else:
533
                self.input_normalization_data_ = NoNormalization()
Nicola Gatto's avatar
Nicola Gatto committed
534

535
            self.conv1_padding = Padding(padding=(0,0,-1,0,0,0,0,0))
Nicola Gatto's avatar
Nicola Gatto committed
536
            self.conv1_ = gluon.nn.Conv2D(channels=96,
537
538
539
                kernel_size=(11,11),
                strides=(4,4),
                use_bias=True)
Nicola Gatto's avatar
Nicola Gatto committed
540
541
            # conv1_, output shape: {[96,55,55]}

542
            self.pool1_padding = Padding(padding=(0,0,-1,0,0,0,0,0))
Nicola Gatto's avatar
Nicola Gatto committed
543
544
545
546
547
548
549
550
551
            self.pool1_ = gluon.nn.MaxPool2D(
                pool_size=(3,3),
                strides=(2,2))
            # pool1_, output shape: {[96,27,27]}

            self.relu1_ = gluon.nn.Activation(activation='relu')

            self.conv2_1_padding = Padding(padding=(0,0,0,0,2,2,2,2))
            self.conv2_1_ = gluon.nn.Conv2D(channels=128,
552
553
554
                kernel_size=(5,5),
                strides=(1,1),
                use_bias=True)
Nicola Gatto's avatar
Nicola Gatto committed
555
556
            # conv2_1_, output shape: {[128,27,27]}

557
            self.pool2_1_padding = Padding(padding=(0,0,-1,0,0,0,0,0))
Nicola Gatto's avatar
Nicola Gatto committed
558
559
560
561
562
563
564
565
            self.pool2_1_ = gluon.nn.MaxPool2D(
                pool_size=(3,3),
                strides=(2,2))
            # pool2_1_, output shape: {[128,13,13]}

            self.relu2_1_ = gluon.nn.Activation(activation='relu')
            self.conv2_2_padding = Padding(padding=(0,0,0,0,2,2,2,2))
            self.conv2_2_ = gluon.nn.Conv2D(channels=128,
566
567
568
                kernel_size=(5,5),
                strides=(1,1),
                use_bias=True)
Nicola Gatto's avatar
Nicola Gatto committed
569
570
            # conv2_2_, output shape: {[128,27,27]}

571
            self.pool2_2_padding = Padding(padding=(0,0,-1,0,0,0,0,0))
Nicola Gatto's avatar
Nicola Gatto committed
572
573
574
575
576
577
578
579
            self.pool2_2_ = gluon.nn.MaxPool2D(
                pool_size=(3,3),
                strides=(2,2))
            # pool2_2_, output shape: {[128,13,13]}

            self.relu2_2_ = gluon.nn.Activation(activation='relu')
            self.conv3_padding = Padding(padding=(0,0,0,0,1,1,1,1))
            self.conv3_ = gluon.nn.Conv2D(channels=384,
580
581
582
                kernel_size=(3,3),
                strides=(1,1),
                use_bias=True)
Nicola Gatto's avatar
Nicola Gatto committed
583
584
585
586
587
588
            # conv3_, output shape: {[384,13,13]}

            self.relu3_ = gluon.nn.Activation(activation='relu')

            self.conv4_1_padding = Padding(padding=(0,0,0,0,1,1,1,1))
            self.conv4_1_ = gluon.nn.Conv2D(channels=192,
589
590
591
                kernel_size=(3,3),
                strides=(1,1),
                use_bias=True)
Nicola Gatto's avatar
Nicola Gatto committed
592
593
594
595
596
            # conv4_1_, output shape: {[192,13,13]}

            self.relu4_1_ = gluon.nn.Activation(activation='relu')
            self.conv5_1_padding = Padding(padding=(0,0,0,0,1,1,1,1))
            self.conv5_1_ = gluon.nn.Conv2D(channels=128,
597
598
599
                kernel_size=(3,3),
                strides=(1,1),
                use_bias=True)
Nicola Gatto's avatar
Nicola Gatto committed
600
601
            # conv5_1_, output shape: {[128,13,13]}

602
            self.pool5_1_padding = Padding(padding=(0,0,-1,0,0,0,0,0))
Nicola Gatto's avatar
Nicola Gatto committed
603
604
605
606
607
608
609
610
            self.pool5_1_ = gluon.nn.MaxPool2D(
                pool_size=(3,3),
                strides=(2,2))
            # pool5_1_, output shape: {[128,6,6]}

            self.relu5_1_ = gluon.nn.Activation(activation='relu')
            self.conv4_2_padding = Padding(padding=(0,0,0,0,1,1,1,1))
            self.conv4_2_ = gluon.nn.Conv2D(channels=192,
611
612
613
                kernel_size=(3,3),
                strides=(1,1),
                use_bias=True)
Nicola Gatto's avatar
Nicola Gatto committed
614
615
616
617
618
            # conv4_2_, output shape: {[192,13,13]}

            self.relu4_2_ = gluon.nn.Activation(activation='relu')
            self.conv5_2_padding = Padding(padding=(0,0,0,0,1,1,1,1))
            self.conv5_2_ = gluon.nn.Conv2D(channels=128,
619
620
621
                kernel_size=(3,3),
                strides=(1,1),
                use_bias=True)
Nicola Gatto's avatar
Nicola Gatto committed
622
623
            # conv5_2_, output shape: {[128,13,13]}

624
            self.pool5_2_padding = Padding(padding=(0,0,-1,0,0,0,0,0))
Nicola Gatto's avatar
Nicola Gatto committed
625
626
627
628
629
630
            self.pool5_2_ = gluon.nn.MaxPool2D(
                pool_size=(3,3),
                strides=(2,2))
            # pool5_2_, output shape: {[128,6,6]}

            self.relu5_2_ = gluon.nn.Activation(activation='relu')
631
            self.fc6_ = gluon.nn.Dense(units=4096, use_bias=True, flatten=True)
Nicola Gatto's avatar
Nicola Gatto committed
632
633
634
635
            # fc6_, output shape: {[4096,1,1]}

            self.relu6_ = gluon.nn.Activation(activation='relu')
            self.dropout6_ = gluon.nn.Dropout(rate=0.5)
636
            self.fc7_ = gluon.nn.Dense(units=4096, use_bias=True, flatten=True)
Nicola Gatto's avatar
Nicola Gatto committed
637
638
639
640
            # fc7_, output shape: {[4096,1,1]}

            self.relu7_ = gluon.nn.Activation(activation='relu')
            self.dropout7_ = gluon.nn.Dropout(rate=0.5)
641
            self.fc8_ = gluon.nn.Dense(units=10, use_bias=True, flatten=True)
Nicola Gatto's avatar
Nicola Gatto committed
642
643
644
            # fc8_, output shape: {[10,1,1]}


645
            pass
Nicola Gatto's avatar
Nicola Gatto committed
646

647
648
649
    def hybrid_forward(self, F, data_):
        data_ = self.input_normalization_data_(data_)
        conv1_padding = self.conv1_padding(data_)
Nicola Gatto's avatar
Nicola Gatto committed
650
651
        conv1_ = self.conv1_(conv1_padding)
        lrn1_ = F.LRN(data=conv1_,
652
653
654
655
            alpha=0.0001,
            beta=0.75,
            knorm=2,
            nsize=5)
656
657
        pool1_padding = self.pool1_padding(lrn1_)
        pool1_ = self.pool1_(pool1_padding)
Nicola Gatto's avatar
Nicola Gatto committed
658
        relu1_ = self.relu1_(pool1_)
Christian Fuß's avatar
Christian Fuß committed
659

660
        split1_ = F.split(relu1_, axis=1, num_outputs=2)
Nicola Gatto's avatar
Nicola Gatto committed
661
662
663
664
        get2_1_ = split1_[0]
        conv2_1_padding = self.conv2_1_padding(get2_1_)
        conv2_1_ = self.conv2_1_(conv2_1_padding)
        lrn2_1_ = F.LRN(data=conv2_1_,
665
666
667
668
            alpha=0.0001,
            beta=0.75,
            knorm=2,
            nsize=5)
669
670
        pool2_1_padding = self.pool2_1_padding(lrn2_1_)
        pool2_1_ = self.pool2_1_(pool2_1_padding)
Nicola Gatto's avatar
Nicola Gatto committed
671
672
673
674
675
        relu2_1_ = self.relu2_1_(pool2_1_)
        get2_2_ = split1_[1]
        conv2_2_padding = self.conv2_2_padding(get2_2_)
        conv2_2_ = self.conv2_2_(conv2_2_padding)
        lrn2_2_ = F.LRN(data=conv2_2_,
676
677
678
679
            alpha=0.0001,
            beta=0.75,
            knorm=2,
            nsize=5)
680
681
        pool2_2_padding = self.pool2_2_padding(lrn2_2_)
        pool2_2_ = self.pool2_2_(pool2_2_padding)
Nicola Gatto's avatar
Nicola Gatto committed
682
        relu2_2_ = self.relu2_2_(pool2_2_)
683
        concatenate3_ = F.concat(relu2_1_, relu2_2_, dim=1)
Nicola Gatto's avatar
Nicola Gatto committed
684
685
686
        conv3_padding = self.conv3_padding(concatenate3_)
        conv3_ = self.conv3_(conv3_padding)
        relu3_ = self.relu3_(conv3_)
Christian Fuß's avatar
Christian Fuß committed
687

688
        split3_ = F.split(relu3_, axis=1, num_outputs=2)
Nicola Gatto's avatar
Nicola Gatto committed
689
690
691
692
693
694
        get4_1_ = split3_[0]
        conv4_1_padding = self.conv4_1_padding(get4_1_)
        conv4_1_ = self.conv4_1_(conv4_1_padding)
        relu4_1_ = self.relu4_1_(conv4_1_)
        conv5_1_padding = self.conv5_1_padding(relu4_1_)
        conv5_1_ = self.conv5_1_(conv5_1_padding)
695
696
        pool5_1_padding = self.pool5_1_padding(conv5_1_)
        pool5_1_ = self.pool5_1_(pool5_1_padding)
Nicola Gatto's avatar
Nicola Gatto committed
697
698
699
700
701
702
703
        relu5_1_ = self.relu5_1_(pool5_1_)
        get4_2_ = split3_[1]
        conv4_2_padding = self.conv4_2_padding(get4_2_)
        conv4_2_ = self.conv4_2_(conv4_2_padding)
        relu4_2_ = self.relu4_2_(conv4_2_)
        conv5_2_padding = self.conv5_2_padding(relu4_2_)
        conv5_2_ = self.conv5_2_(conv5_2_padding)
704
705
        pool5_2_padding = self.pool5_2_padding(conv5_2_)
        pool5_2_ = self.pool5_2_(pool5_2_padding)
Nicola Gatto's avatar
Nicola Gatto committed
706
        relu5_2_ = self.relu5_2_(pool5_2_)
707
        concatenate6_ = F.concat(relu5_1_, relu5_2_, dim=1)
708
        fc6_ = self.fc6_(concatenate6_)
Nicola Gatto's avatar
Nicola Gatto committed
709
710
711
712
713
714
        relu6_ = self.relu6_(fc6_)
        dropout6_ = self.dropout6_(relu6_)
        fc7_ = self.fc7_(dropout6_)
        relu7_ = self.relu7_(fc7_)
        dropout7_ = self.dropout7_(relu7_)
        fc8_ = self.fc8_(dropout7_)
Sebastian Nickels's avatar
Sebastian Nickels committed
715
        softmax8_ = F.softmax(fc8_, axis=-1)
716
        predictions_ = F.identity(softmax8_)
717

718
        return [[predictions_]]
719