CNNNet_torcs_agent_torcsAgent_dqn.py 3.68 KB
Newer Older
1
2
3
4
import mxnet as mx
import numpy as np
from mxnet import gluon

Nicola Gatto's avatar
Nicola Gatto committed
5
6
7
8
9
10
11
12
13
14
class OneHot(gluon.HybridBlock):
    def __init__(self, size, **kwargs):
        super(OneHot, self).__init__(**kwargs)
        with self.name_scope():
            self.size = size

    def hybrid_forward(self, F, x):
        return F.one_hot(indices=F.argmax(data=x, axis=1), depth=self.size)


15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
class Softmax(gluon.HybridBlock):
    def __init__(self, **kwargs):
        super(Softmax, self).__init__(**kwargs)

    def hybrid_forward(self, F, x):
        return F.softmax(x)


class Split(gluon.HybridBlock):
    def __init__(self, num_outputs, axis=1, **kwargs):
        super(Split, self).__init__(**kwargs)
        with self.name_scope():
            self.axis = axis
            self.num_outputs = num_outputs

    def hybrid_forward(self, F, x):
        return F.split(data=x, axis=self.axis, num_outputs=self.num_outputs)


class Concatenate(gluon.HybridBlock):
    def __init__(self, dim=1, **kwargs):
        super(Concatenate, self).__init__(**kwargs)
        with self.name_scope():
            self.dim = dim

    def hybrid_forward(self, F, *x):
        return F.concat(*x, dim=self.dim)


class ZScoreNormalization(gluon.HybridBlock):
    def __init__(self, data_mean, data_std, **kwargs):
        super(ZScoreNormalization, self).__init__(**kwargs)
        with self.name_scope():
            self.data_mean = self.params.get('data_mean', shape=data_mean.shape,
                init=mx.init.Constant(data_mean.asnumpy().tolist()), differentiable=False)
            self.data_std = self.params.get('data_std', shape=data_mean.shape,
                init=mx.init.Constant(data_std.asnumpy().tolist()), differentiable=False)

    def hybrid_forward(self, F, x, data_mean, data_std):
        x = F.broadcast_sub(x, data_mean)
        x = F.broadcast_div(x, data_std)
        return x


class Padding(gluon.HybridBlock):
    def __init__(self, padding, **kwargs):
        super(Padding, self).__init__(**kwargs)
        with self.name_scope():
            self.pad_width = padding

    def hybrid_forward(self, F, x):
        x = F.pad(data=x,
            mode='constant',
            pad_width=self.pad_width,
            constant_value=0)
        return x


class NoNormalization(gluon.HybridBlock):
    def __init__(self, **kwargs):
        super(NoNormalization, self).__init__(**kwargs)

    def hybrid_forward(self, F, x):
        return x


Nicola Gatto's avatar
Nicola Gatto committed
81
class Net_0(gluon.HybridBlock):
82
    def __init__(self, data_mean=None, data_std=None, **kwargs):
Nicola Gatto's avatar
Nicola Gatto committed
83
84
        super(Net_0, self).__init__(**kwargs)
        self.last_layers = {}
85
        with self.name_scope():
Nicola Gatto's avatar
Nicola Gatto committed
86
87
            if data_mean:
                assert(data_std)
Sebastian Nickels's avatar
Sebastian Nickels committed
88
89
                self.input_normalization_state_ = ZScoreNormalization(data_mean=data_mean['state_'],
                                                                               data_std=data_std['state_'])
90
            else:
Sebastian Nickels's avatar
Sebastian Nickels committed
91
                self.input_normalization_state_ = NoNormalization()
92
93
94
95
96
97
98
99
100
101
102
103
104
105

            self.fc1_ = gluon.nn.Dense(units=512, use_bias=True)
            # fc1_, output shape: {[512,1,1]}

            self.tanh1_ = gluon.nn.Activation(activation='tanh')
            self.fc2_ = gluon.nn.Dense(units=256, use_bias=True)
            # fc2_, output shape: {[256,1,1]}

            self.tanh2_ = gluon.nn.Activation(activation='tanh')
            self.fc3_ = gluon.nn.Dense(units=30, use_bias=True)
            # fc3_, output shape: {[30,1,1]}



Sebastian Nickels's avatar
Sebastian Nickels committed
106
107
108
    def hybrid_forward(self, F, state_):
        state_ = self.input_normalization_state_(state_)
        fc1_ = self.fc1_(state_)
109
110
111
112
        tanh1_ = self.tanh1_(fc1_)
        fc2_ = self.fc2_(tanh1_)
        tanh2_ = self.tanh2_(fc2_)
        fc3_ = self.fc3_(tanh2_)
Sebastian Nickels's avatar
Sebastian Nickels committed
113
114
115
        qvalues_ = fc3_

        return qvalues_
Nicola Gatto's avatar
Nicola Gatto committed
116