CNNNet_torcs_agent_torcsAgent_dqn.py 3.28 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import mxnet as mx
import numpy as np
from mxnet import gluon

class Softmax(gluon.HybridBlock):
    def __init__(self, **kwargs):
        super(Softmax, self).__init__(**kwargs)

    def hybrid_forward(self, F, x):
        return F.softmax(x)


class Split(gluon.HybridBlock):
    def __init__(self, num_outputs, axis=1, **kwargs):
        super(Split, self).__init__(**kwargs)
        with self.name_scope():
            self.axis = axis
            self.num_outputs = num_outputs

    def hybrid_forward(self, F, x):
        return F.split(data=x, axis=self.axis, num_outputs=self.num_outputs)


class Concatenate(gluon.HybridBlock):
    def __init__(self, dim=1, **kwargs):
        super(Concatenate, self).__init__(**kwargs)
        with self.name_scope():
            self.dim = dim

    def hybrid_forward(self, F, *x):
        return F.concat(*x, dim=self.dim)


class ZScoreNormalization(gluon.HybridBlock):
    def __init__(self, data_mean, data_std, **kwargs):
        super(ZScoreNormalization, self).__init__(**kwargs)
        with self.name_scope():
            self.data_mean = self.params.get('data_mean', shape=data_mean.shape,
                init=mx.init.Constant(data_mean.asnumpy().tolist()), differentiable=False)
            self.data_std = self.params.get('data_std', shape=data_mean.shape,
                init=mx.init.Constant(data_std.asnumpy().tolist()), differentiable=False)

    def hybrid_forward(self, F, x, data_mean, data_std):
        x = F.broadcast_sub(x, data_mean)
        x = F.broadcast_div(x, data_std)
        return x


class Padding(gluon.HybridBlock):
    def __init__(self, padding, **kwargs):
        super(Padding, self).__init__(**kwargs)
        with self.name_scope():
            self.pad_width = padding

    def hybrid_forward(self, F, x):
        x = F.pad(data=x,
            mode='constant',
            pad_width=self.pad_width,
            constant_value=0)
        return x


class NoNormalization(gluon.HybridBlock):
    def __init__(self, **kwargs):
        super(NoNormalization, self).__init__(**kwargs)

    def hybrid_forward(self, F, x):
        return x


class Net(gluon.HybridBlock):
    def __init__(self, data_mean=None, data_std=None, **kwargs):
        super(Net, self).__init__(**kwargs)
        with self.name_scope():
            if not data_mean is None:
                assert(not data_std is None)
Nicola Gatto's avatar
Nicola Gatto committed
77
                self.state_input_normalization = ZScoreNormalization(data_mean=data_mean, data_std=data_std)
78
            else:
Nicola Gatto's avatar
Nicola Gatto committed
79
                self.state_input_normalization = NoNormalization()
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95

            self.fc1_ = gluon.nn.Dense(units=512, use_bias=True)
            # fc1_, output shape: {[512,1,1]}

            self.tanh1_ = gluon.nn.Activation(activation='tanh')
            self.fc2_ = gluon.nn.Dense(units=256, use_bias=True)
            # fc2_, output shape: {[256,1,1]}

            self.tanh2_ = gluon.nn.Activation(activation='tanh')
            self.fc3_ = gluon.nn.Dense(units=30, use_bias=True)
            # fc3_, output shape: {[30,1,1]}


        self.last_layer = 'linear'


Nicola Gatto's avatar
Nicola Gatto committed
96
97
    def hybrid_forward(self, F, state):
        state = self.state_input_normalization(state)
98
99
100
101
102
103
        fc1_ = self.fc1_(state)
        tanh1_ = self.tanh1_(fc1_)
        fc2_ = self.fc2_(tanh1_)
        tanh2_ = self.tanh2_(fc2_)
        fc3_ = self.fc3_(tanh2_)
        return fc3_