CNNNet_mountaincar_agent_mountaincarCritic.py 4.31 KB
Newer Older
Nicola Gatto's avatar
Nicola Gatto committed
1 2 3 4
import mxnet as mx
import numpy as np
from mxnet import gluon

Nicola Gatto's avatar
Nicola Gatto committed
5 6 7 8 9 10 11 12 13 14
class OneHot(gluon.HybridBlock):
    def __init__(self, size, **kwargs):
        super(OneHot, self).__init__(**kwargs)
        with self.name_scope():
            self.size = size

    def hybrid_forward(self, F, x):
        return F.one_hot(indices=F.argmax(data=x, axis=1), depth=self.size)


Nicola Gatto's avatar
Nicola Gatto committed
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
class Softmax(gluon.HybridBlock):
    def __init__(self, **kwargs):
        super(Softmax, self).__init__(**kwargs)

    def hybrid_forward(self, F, x):
        return F.softmax(x)


class Split(gluon.HybridBlock):
    def __init__(self, num_outputs, axis=1, **kwargs):
        super(Split, self).__init__(**kwargs)
        with self.name_scope():
            self.axis = axis
            self.num_outputs = num_outputs

    def hybrid_forward(self, F, x):
        return F.split(data=x, axis=self.axis, num_outputs=self.num_outputs)


class Concatenate(gluon.HybridBlock):
    def __init__(self, dim=1, **kwargs):
        super(Concatenate, self).__init__(**kwargs)
        with self.name_scope():
            self.dim = dim

    def hybrid_forward(self, F, *x):
        return F.concat(*x, dim=self.dim)


class ZScoreNormalization(gluon.HybridBlock):
    def __init__(self, data_mean, data_std, **kwargs):
        super(ZScoreNormalization, self).__init__(**kwargs)
        with self.name_scope():
            self.data_mean = self.params.get('data_mean', shape=data_mean.shape,
                init=mx.init.Constant(data_mean.asnumpy().tolist()), differentiable=False)
            self.data_std = self.params.get('data_std', shape=data_mean.shape,
                init=mx.init.Constant(data_std.asnumpy().tolist()), differentiable=False)

    def hybrid_forward(self, F, x, data_mean, data_std):
        x = F.broadcast_sub(x, data_mean)
        x = F.broadcast_div(x, data_std)
        return x


class Padding(gluon.HybridBlock):
    def __init__(self, padding, **kwargs):
        super(Padding, self).__init__(**kwargs)
        with self.name_scope():
            self.pad_width = padding

    def hybrid_forward(self, F, x):
        x = F.pad(data=x,
            mode='constant',
            pad_width=self.pad_width,
            constant_value=0)
        return x


class NoNormalization(gluon.HybridBlock):
    def __init__(self, **kwargs):
        super(NoNormalization, self).__init__(**kwargs)

    def hybrid_forward(self, F, x):
        return x


Nicola Gatto's avatar
Nicola Gatto committed
81
class Net_0(gluon.HybridBlock):
Nicola Gatto's avatar
Nicola Gatto committed
82
    def __init__(self, data_mean=None, data_std=None, **kwargs):
Nicola Gatto's avatar
Nicola Gatto committed
83 84
        super(Net_0, self).__init__(**kwargs)
        self.last_layers = {}
Nicola Gatto's avatar
Nicola Gatto committed
85
        with self.name_scope():
Nicola Gatto's avatar
Nicola Gatto committed
86 87 88 89
            if data_mean:
                assert(data_std)
                self.input_normalization_state = ZScoreNormalization(data_mean=data_mean['state'],
                                                                               data_std=data_std['state'])
Nicola Gatto's avatar
Nicola Gatto committed
90
            else:
Nicola Gatto's avatar
Nicola Gatto committed
91
                self.input_normalization_state = NoNormalization()
Nicola Gatto's avatar
Nicola Gatto committed
92 93 94 95 96 97 98 99

            self.fc2_1_ = gluon.nn.Dense(units=400, use_bias=True)
            # fc2_1_, output shape: {[400,1,1]}

            self.relu2_1_ = gluon.nn.Activation(activation='relu')
            self.fc3_1_ = gluon.nn.Dense(units=300, use_bias=True)
            # fc3_1_, output shape: {[300,1,1]}

Nicola Gatto's avatar
Nicola Gatto committed
100 101 102 103
            if data_mean:
                assert(data_std)
                self.input_normalization_action = ZScoreNormalization(data_mean=data_mean['action'],
                                                                               data_std=data_std['action'])
Nicola Gatto's avatar
Nicola Gatto committed
104
            else:
Nicola Gatto's avatar
Nicola Gatto committed
105
                self.input_normalization_action = NoNormalization()
Nicola Gatto's avatar
Nicola Gatto committed
106 107 108 109 110 111 112 113 114 115 116

            self.fc2_2_ = gluon.nn.Dense(units=300, use_bias=True)
            # fc2_2_, output shape: {[300,1,1]}

            self.relu4_ = gluon.nn.Activation(activation='relu')
            self.fc4_ = gluon.nn.Dense(units=1, use_bias=True)
            # fc4_, output shape: {[1,1,1]}



    def hybrid_forward(self, F, state, action):
Nicola Gatto's avatar
Nicola Gatto committed
117 118
        outputs = []
        state = self.input_normalization_state(state)
Nicola Gatto's avatar
Nicola Gatto committed
119 120 121
        fc2_1_ = self.fc2_1_(state)
        relu2_1_ = self.relu2_1_(fc2_1_)
        fc3_1_ = self.fc3_1_(relu2_1_)
Nicola Gatto's avatar
Nicola Gatto committed
122
        action = self.input_normalization_action(action)
Nicola Gatto's avatar
Nicola Gatto committed
123 124 125 126
        fc2_2_ = self.fc2_2_(action)
        add4_ = fc3_1_ + fc2_2_
        relu4_ = self.relu4_(add4_)
        fc4_ = self.fc4_(relu4_)
Nicola Gatto's avatar
Nicola Gatto committed
127 128 129
        outputs.append(fc4_)

        return outputs[0]