util.py 10.4 KB
Newer Older
1
2
3
4
5
6
7
import signal
import sys
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
import time
import os
Nicola Gatto's avatar
Nicola Gatto committed
8
import mxnet as mx
9
from mxnet import gluon, nd
Nicola Gatto's avatar
Nicola Gatto committed
10
import cnnarch_logger
11
12
13

LOSS_FUNCTIONS = {
        'l1': gluon.loss.L1Loss(),
Nicola Gatto's avatar
Nicola Gatto committed
14
15
        'l2': gluon.loss.L2Loss(),
        'huber': gluon.loss.HuberLoss(),
16
17
18
        'softmax_cross_entropy': gluon.loss.SoftmaxCrossEntropyLoss(),
        'sigmoid_cross_entropy': gluon.loss.SigmoidBinaryCrossEntropyLoss()}

Nicola Gatto's avatar
Nicola Gatto committed
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41

def make_directory_if_not_exist(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)


def copy_net(net, input_state_dim, ctx):
    assert isinstance(net, gluon.HybridBlock)
    assert type(net.__class__) is type

    net2 = net.__class__()
    net2.collect_params().initialize(mx.init.Zero(), ctx=ctx)
    net2.hybridize()
    net2(mx.nd.ones((1,) + input_state_dim, ctx=ctx))

    params_of_net = [p.data() for _, p in net.collect_params().items()]
    for i, (_, p) in enumerate(net2.collect_params().items()):
        p.set_data(params_of_net[i])

    return net2


def copy_net_with_two_inputs(net, input_state_dim1, input_state_dim2, ctx):
42
43
    assert isinstance(net, gluon.HybridBlock)
    assert type(net.__class__) is type
Nicola Gatto's avatar
Nicola Gatto committed
44

45
    net2 = net.__class__()
Nicola Gatto's avatar
Nicola Gatto committed
46
    net2.collect_params().initialize(mx.init.Zero(), ctx=ctx)
47
    net2.hybridize()
Nicola Gatto's avatar
Nicola Gatto committed
48
49
50
51
52
53
54
55
    net2(
        nd.ones((1,) + input_state_dim1, ctx=ctx),
        nd.ones((1,) + input_state_dim2, ctx=ctx))

    params_of_net = [p.data() for _, p in net.collect_params().items()]
    for i, (_, p) in enumerate(net2.collect_params().items()):
        p.set_data(params_of_net[i])

56
57
    return net2

Nicola Gatto's avatar
Nicola Gatto committed
58

59
60
61
62
63
64
65
66
67
68
def get_loss_function(loss_function_name):
    if loss_function_name not in LOSS_FUNCTIONS:
        raise ValueError('Loss function does not exist')
    return LOSS_FUNCTIONS[loss_function_name]


class AgentSignalHandler(object):
    def __init__(self):
        signal.signal(signal.SIGINT, self.interrupt_training)
        self.__agent = None
Nicola Gatto's avatar
Nicola Gatto committed
69
        self.__times_interrupted = 0
70
71
72
73
74

    def register_agent(self, agent):
        self.__agent = agent

    def interrupt_training(self, sig, frame):
Nicola Gatto's avatar
Nicola Gatto committed
75
76
77
78
79
80
81
        self.__times_interrupted = self.__times_interrupted + 1
        if self.__times_interrupted <= 3:
            if self.__agent:
                self.__agent.set_interrupt_flag(True)
        else:
            print('Interrupt called three times: Force quit')
            sys.exit(1)
82
83

style.use('fivethirtyeight')
Nicola Gatto's avatar
Nicola Gatto committed
84
85


86
class TrainingStats(object):
Nicola Gatto's avatar
Nicola Gatto committed
87
88
89
90
91
92
93
    def __init__(self, max_episodes):
        self._logger = cnnarch_logger.ArchLogger.get_logger()
        self._max_episodes = max_episodes
        self._all_total_rewards = np.zeros((max_episodes,))
        self._all_eps = np.zeros((max_episodes,))
        self._all_time = np.zeros((max_episodes,))
        self._all_mean_reward_last_100_episodes = np.zeros((max_episodes,))
94
95
96

    @property
    def logger(self):
Nicola Gatto's avatar
Nicola Gatto committed
97
        return self._logger
98
99
100

    @logger.setter
    def logger(self, logger):
Nicola Gatto's avatar
Nicola Gatto committed
101
        self._logger = logger
102
103
104

    @logger.deleter
    def logger(self):
Nicola Gatto's avatar
Nicola Gatto committed
105
        self._logger = None
106
107

    def add_total_reward(self, episode, total_reward):
Nicola Gatto's avatar
Nicola Gatto committed
108
        self._all_total_rewards[episode] = total_reward
109
110

    def add_eps(self, episode, eps):
Nicola Gatto's avatar
Nicola Gatto committed
111
        self._all_eps[episode] = eps
112
113

    def add_time(self, episode, time):
Nicola Gatto's avatar
Nicola Gatto committed
114
        self._all_time[episode] = time
115
116

    def add_mean_reward_last_100(self, episode, mean_reward):
Nicola Gatto's avatar
Nicola Gatto committed
117
118
119
120
121
122
123
124
125
126
127
128
        self._all_mean_reward_last_100_episodes[episode] = mean_reward

    def log_episode(self, *args):
        raise NotImplementedError

    def mean_of_reward(self, cur_episode, last=100):
        if cur_episode > 0:
            reward_last_100 =\
                self._all_total_rewards[max(0, cur_episode-last):cur_episode]
            return np.mean(reward_last_100)
        else:
            return self._all_total_rewards[0]
129

Nicola Gatto's avatar
Nicola Gatto committed
130
131
132
133
134
135
    def save(self, path, episode=None):
        if episode is None:
            episode = self._max_episodes
        np.save(os.path.join(path, 'total_rewards'), self._all_total_rewards[:episode])
        np.save(os.path.join(path, 'eps'), self._all_eps[:episode])
        np.save(os.path.join(path, 'time'), self._all_time[:episode])
Nicola Gatto's avatar
Nicola Gatto committed
136
137
        np.save(
            os.path.join(path, 'mean_reward'),
Nicola Gatto's avatar
Nicola Gatto committed
138
            self._all_mean_reward_last_100_episodes[:episode])
Nicola Gatto's avatar
Nicola Gatto committed
139
140

    def _log_episode(self, episode, start_time, training_steps, eps, reward):
141
142
143
144
145
146
147
        self.add_eps(episode, eps)
        self.add_total_reward(episode, reward)
        end = time.time()
        mean_reward_last_100 = self.mean_of_reward(episode, last=100)
        time_elapsed = end - start_time
        self.add_time(episode, time_elapsed)
        self.add_mean_reward_last_100(episode, mean_reward_last_100)
Nicola Gatto's avatar
Nicola Gatto committed
148
149
150
151
152
        return ('Episode: %d, Total Reward: %.3f, '
                'Avg. Reward Last 100 Episodes: %.3f, {}, '
                'Time: %.3f, Training Steps: %d, Eps: %.3f') % (
                    episode, reward, mean_reward_last_100, time_elapsed,
                    training_steps, eps), mean_reward_last_100
153
154


Nicola Gatto's avatar
Nicola Gatto committed
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
class DqnTrainingStats(TrainingStats):
    def __init__(self, max_episodes):
        super(DqnTrainingStats, self).__init__(max_episodes)
        self._all_avg_loss = np.zeros((max_episodes,))

    def add_avg_loss(self, episode, avg_loss):
        self._all_avg_loss[episode] = avg_loss

    def log_episode(
        self, episode, start_time, training_steps, avg_loss, eps, reward
    ):
        self.add_avg_loss(episode, avg_loss)

        info, avg_reward = self._log_episode(
            episode, start_time, training_steps, eps, reward)
        info = info.format(('Avg. Loss: %.3f') % (avg_loss))
171

Nicola Gatto's avatar
Nicola Gatto committed
172
173
174
        self._logger.info(info)
        return avg_reward

Nicola Gatto's avatar
Nicola Gatto committed
175
176
177
178
179
180
181
182
183
    def save_stats(self, path, episode=None):
        if episode is None:
            episode = self._max_episodes

        all_total_rewards = self._all_total_rewards[:episode]
        all_avg_loss = self._all_avg_loss[:episode]
        all_eps = self._all_eps[:episode]
        all_mean_reward_last_100_episodes = self._all_mean_reward_last_100_episodes[:episode]

Nicola Gatto's avatar
Nicola Gatto committed
184
        fig = plt.figure(figsize=(20, 20))
185
186
187

        sub_rewards = fig.add_subplot(221)
        sub_rewards.set_title('Total Rewards per episode')
Nicola Gatto's avatar
Nicola Gatto committed
188
        sub_rewards.plot(
Nicola Gatto's avatar
Nicola Gatto committed
189
            np.arange(episode), all_total_rewards)
190
191
192

        sub_loss = fig.add_subplot(222)
        sub_loss.set_title('Avg. Loss per episode')
Nicola Gatto's avatar
Nicola Gatto committed
193
        sub_loss.plot(np.arange(episode), all_avg_loss)
194
195
196

        sub_eps = fig.add_subplot(223)
        sub_eps.set_title('Epsilon per episode')
Nicola Gatto's avatar
Nicola Gatto committed
197
        sub_eps.plot(np.arange(episode), all_eps)
198
199
200

        sub_rewards = fig.add_subplot(224)
        sub_rewards.set_title('Avg. mean reward of last 100 episodes')
Nicola Gatto's avatar
Nicola Gatto committed
201
202
        sub_rewards.plot(np.arange(episode),
                         all_mean_reward_last_100_episodes)
Nicola Gatto's avatar
Nicola Gatto committed
203

Nicola Gatto's avatar
Nicola Gatto committed
204
        self.save(path, episode=episode)
Nicola Gatto's avatar
Nicola Gatto committed
205
206
        plt.savefig(os.path.join(path, 'stats.pdf'))

Nicola Gatto's avatar
Nicola Gatto committed
207
208
209
210
211
    def save(self, path, episode=None):
        if episode is None:
            episode = self._max_episodes
        super(DqnTrainingStats, self).save(path, episode=episode)
        np.save(os.path.join(path, 'avg_loss'), self._all_avg_loss[:episode])
Nicola Gatto's avatar
Nicola Gatto committed
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247


class DdpgTrainingStats(TrainingStats):
    def __init__(self, max_episodes):
        super(DdpgTrainingStats, self).__init__(max_episodes)
        self._all_avg_critic_loss = np.zeros((max_episodes,))
        self._all_avg_actor_loss = np.zeros((max_episodes,))
        self._all_avg_qvalues = np.zeros((max_episodes,))

    def add_avg_critic_loss(self, episode, avg_critic_loss):
        self._all_avg_critic_loss[episode] = avg_critic_loss

    def add_avg_actor_loss(self, episode, avg_actor_loss):
        self._all_avg_actor_loss[episode] = avg_actor_loss

    def add_avg_qvalues(self, episode, avg_qvalues):
        self._all_avg_qvalues[episode] = avg_qvalues

    def log_episode(
        self, episode, start_time, training_steps, actor_loss,
        critic_loss, qvalues, eps, reward
    ):
        self.add_avg_actor_loss(episode, actor_loss)
        self.add_avg_critic_loss(episode, critic_loss)
        self.add_avg_qvalues(episode, qvalues)

        info, avg_reward = self._log_episode(
            episode, start_time, training_steps, eps, reward)
        info = info.format((
            'Avg. Actor Loss: %.3f '
            'Avg. Critic Loss: %.3f '
            'Avg. Q-Values: %.3f') % (actor_loss, critic_loss, qvalues))

        self.logger.info(info)
        return avg_reward

Nicola Gatto's avatar
Nicola Gatto committed
248
249
250
251
    def save(self, path, episode=None):
        if episode is None:
            episode = self._max_episodes
        super(DdpgTrainingStats, self).save(path, episode=episode)
Nicola Gatto's avatar
Nicola Gatto committed
252
        np.save(os.path.join(
Nicola Gatto's avatar
Nicola Gatto committed
253
254
255
256
257
258
259
260
261
262
263
264
265
266
            path, 'avg_critic_loss'), self._all_avg_critic_loss[:episode])
        np.save(os.path.join(path, 'avg_actor_loss'), self._all_avg_actor_loss[:episode])
        np.save(os.path.join(path, 'avg_qvalues'), self._all_avg_qvalues[:episode])

    def save_stats(self, path, episode=None):
        if episode is None:
            episode = self._max_episodes

        all_total_rewards = self._all_total_rewards[:episode]
        all_avg_actor_loss = self._all_avg_actor_loss[:episode]
        all_avg_critic_loss = self._all_avg_critic_loss[:episode]
        all_avg_qvalues = self._all_avg_qvalues[:episode]
        all_eps = self._all_eps[:episode]
        all_mean_reward_last_100_episodes = self._all_mean_reward_last_100_episodes[:episode]
Nicola Gatto's avatar
Nicola Gatto committed
267
268
269
270
271
272

        fig = plt.figure(figsize=(120, 120))

        sub_rewards = fig.add_subplot(321)
        sub_rewards.set_title('Total Rewards per episode')
        sub_rewards.plot(
Nicola Gatto's avatar
Nicola Gatto committed
273
            np.arange(episode), all_total_rewards)
Nicola Gatto's avatar
Nicola Gatto committed
274
275
276
277

        sub_actor_loss = fig.add_subplot(322)
        sub_actor_loss.set_title('Avg. Actor Loss per episode')
        sub_actor_loss.plot(
Nicola Gatto's avatar
Nicola Gatto committed
278
            np.arange(episode), all_avg_actor_loss)
Nicola Gatto's avatar
Nicola Gatto committed
279
280
281
282

        sub_critic_loss = fig.add_subplot(323)
        sub_critic_loss.set_title('Avg. Critic Loss per episode')
        sub_critic_loss.plot(
Nicola Gatto's avatar
Nicola Gatto committed
283
            np.arange(episode), all_avg_critic_loss)
Nicola Gatto's avatar
Nicola Gatto committed
284
285
286
287

        sub_qvalues = fig.add_subplot(324)
        sub_qvalues.set_title('Avg. QValues per episode')
        sub_qvalues.plot(
Nicola Gatto's avatar
Nicola Gatto committed
288
            np.arange(episode), all_avg_qvalues)
Nicola Gatto's avatar
Nicola Gatto committed
289
290
291

        sub_eps = fig.add_subplot(325)
        sub_eps.set_title('Epsilon per episode')
Nicola Gatto's avatar
Nicola Gatto committed
292
        sub_eps.plot(np.arange(episode), all_eps)
Nicola Gatto's avatar
Nicola Gatto committed
293
294
295

        sub_rewards = fig.add_subplot(326)
        sub_rewards.set_title('Avg. mean reward of last 100 episodes')
Nicola Gatto's avatar
Nicola Gatto committed
296
297
        sub_rewards.plot(np.arange(episode),
                         all_mean_reward_last_100_episodes)
298

Nicola Gatto's avatar
Nicola Gatto committed
299
        self.save(path, episode=episode)
Nicola Gatto's avatar
Nicola Gatto committed
300
        plt.savefig(os.path.join(path, 'stats.pdf'))