util.py 10.4 KB
Newer Older
1 2 3 4 5 6 7
import signal
import sys
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
import time
import os
Nicola Gatto's avatar
Nicola Gatto committed
8
import mxnet as mx
9
from mxnet import gluon, nd
Nicola Gatto's avatar
Nicola Gatto committed
10
import cnnarch_logger
11 12 13 14 15 16 17 18

LOSS_FUNCTIONS = {
        'l1': gluon.loss.L1Loss(),
        'euclidean': gluon.loss.L2Loss(),
        'huber_loss': gluon.loss.HuberLoss(),
        'softmax_cross_entropy': gluon.loss.SoftmaxCrossEntropyLoss(),
        'sigmoid_cross_entropy': gluon.loss.SigmoidBinaryCrossEntropyLoss()}

Nicola Gatto's avatar
Nicola Gatto committed
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41

def make_directory_if_not_exist(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)


def copy_net(net, input_state_dim, ctx):
    assert isinstance(net, gluon.HybridBlock)
    assert type(net.__class__) is type

    net2 = net.__class__()
    net2.collect_params().initialize(mx.init.Zero(), ctx=ctx)
    net2.hybridize()
    net2(mx.nd.ones((1,) + input_state_dim, ctx=ctx))

    params_of_net = [p.data() for _, p in net.collect_params().items()]
    for i, (_, p) in enumerate(net2.collect_params().items()):
        p.set_data(params_of_net[i])

    return net2


def copy_net_with_two_inputs(net, input_state_dim1, input_state_dim2, ctx):
42 43
    assert isinstance(net, gluon.HybridBlock)
    assert type(net.__class__) is type
Nicola Gatto's avatar
Nicola Gatto committed
44

45
    net2 = net.__class__()
Nicola Gatto's avatar
Nicola Gatto committed
46
    net2.collect_params().initialize(mx.init.Zero(), ctx=ctx)
47
    net2.hybridize()
Nicola Gatto's avatar
Nicola Gatto committed
48 49 50 51 52 53 54 55
    net2(
        nd.ones((1,) + input_state_dim1, ctx=ctx),
        nd.ones((1,) + input_state_dim2, ctx=ctx))

    params_of_net = [p.data() for _, p in net.collect_params().items()]
    for i, (_, p) in enumerate(net2.collect_params().items()):
        p.set_data(params_of_net[i])

56 57
    return net2

Nicola Gatto's avatar
Nicola Gatto committed
58

59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
def get_loss_function(loss_function_name):
    if loss_function_name not in LOSS_FUNCTIONS:
        raise ValueError('Loss function does not exist')
    return LOSS_FUNCTIONS[loss_function_name]


class AgentSignalHandler(object):
    def __init__(self):
        signal.signal(signal.SIGINT, self.interrupt_training)
        self.__agent = None
        self.__times_interrupted = 0

    def register_agent(self, agent):
        self.__agent = agent

    def interrupt_training(self, sig, frame):
        self.__times_interrupted = self.__times_interrupted + 1
        if self.__times_interrupted <= 3:
            if self.__agent:
                self.__agent.set_interrupt_flag(True)
        else:
            print('Interrupt called three times: Force quit')
            sys.exit(1)

style.use('fivethirtyeight')
Nicola Gatto's avatar
Nicola Gatto committed
84 85


86
class TrainingStats(object):
Nicola Gatto's avatar
Nicola Gatto committed
87 88 89 90 91 92 93
    def __init__(self, max_episodes):
        self._logger = cnnarch_logger.ArchLogger.get_logger()
        self._max_episodes = max_episodes
        self._all_total_rewards = np.zeros((max_episodes,))
        self._all_eps = np.zeros((max_episodes,))
        self._all_time = np.zeros((max_episodes,))
        self._all_mean_reward_last_100_episodes = np.zeros((max_episodes,))
94 95 96

    @property
    def logger(self):
Nicola Gatto's avatar
Nicola Gatto committed
97
        return self._logger
98 99 100

    @logger.setter
    def logger(self, logger):
Nicola Gatto's avatar
Nicola Gatto committed
101
        self._logger = logger
102 103 104

    @logger.deleter
    def logger(self):
Nicola Gatto's avatar
Nicola Gatto committed
105
        self._logger = None
106 107

    def add_total_reward(self, episode, total_reward):
Nicola Gatto's avatar
Nicola Gatto committed
108
        self._all_total_rewards[episode] = total_reward
109 110

    def add_eps(self, episode, eps):
Nicola Gatto's avatar
Nicola Gatto committed
111
        self._all_eps[episode] = eps
112 113

    def add_time(self, episode, time):
Nicola Gatto's avatar
Nicola Gatto committed
114
        self._all_time[episode] = time
115 116

    def add_mean_reward_last_100(self, episode, mean_reward):
Nicola Gatto's avatar
Nicola Gatto committed
117 118 119 120 121 122 123 124 125 126 127 128
        self._all_mean_reward_last_100_episodes[episode] = mean_reward

    def log_episode(self, *args):
        raise NotImplementedError

    def mean_of_reward(self, cur_episode, last=100):
        if cur_episode > 0:
            reward_last_100 =\
                self._all_total_rewards[max(0, cur_episode-last):cur_episode]
            return np.mean(reward_last_100)
        else:
            return self._all_total_rewards[0]
129

130 131 132 133 134 135
    def save(self, path, episode=None):
        if episode is None:
            episode = self._max_episodes
        np.save(os.path.join(path, 'total_rewards'), self._all_total_rewards[:episode])
        np.save(os.path.join(path, 'eps'), self._all_eps[:episode])
        np.save(os.path.join(path, 'time'), self._all_time[:episode])
Nicola Gatto's avatar
Nicola Gatto committed
136 137
        np.save(
            os.path.join(path, 'mean_reward'),
138
            self._all_mean_reward_last_100_episodes[:episode])
Nicola Gatto's avatar
Nicola Gatto committed
139 140

    def _log_episode(self, episode, start_time, training_steps, eps, reward):
141 142 143 144 145 146 147
        self.add_eps(episode, eps)
        self.add_total_reward(episode, reward)
        end = time.time()
        mean_reward_last_100 = self.mean_of_reward(episode, last=100)
        time_elapsed = end - start_time
        self.add_time(episode, time_elapsed)
        self.add_mean_reward_last_100(episode, mean_reward_last_100)
Nicola Gatto's avatar
Nicola Gatto committed
148 149 150 151 152
        return ('Episode: %d, Total Reward: %.3f, '
                'Avg. Reward Last 100 Episodes: %.3f, {}, '
                'Time: %.3f, Training Steps: %d, Eps: %.3f') % (
                    episode, reward, mean_reward_last_100, time_elapsed,
                    training_steps, eps), mean_reward_last_100
153 154


Nicola Gatto's avatar
Nicola Gatto committed
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
class DqnTrainingStats(TrainingStats):
    def __init__(self, max_episodes):
        super(DqnTrainingStats, self).__init__(max_episodes)
        self._all_avg_loss = np.zeros((max_episodes,))

    def add_avg_loss(self, episode, avg_loss):
        self._all_avg_loss[episode] = avg_loss

    def log_episode(
        self, episode, start_time, training_steps, avg_loss, eps, reward
    ):
        self.add_avg_loss(episode, avg_loss)

        info, avg_reward = self._log_episode(
            episode, start_time, training_steps, eps, reward)
        info = info.format(('Avg. Loss: %.3f') % (avg_loss))
171

Nicola Gatto's avatar
Nicola Gatto committed
172 173 174
        self._logger.info(info)
        return avg_reward

175 176 177 178 179 180 181 182 183
    def save_stats(self, path, episode=None):
        if episode is None:
            episode = self._max_episodes

        all_total_rewards = self._all_total_rewards[:episode]
        all_avg_loss = self._all_avg_loss[:episode]
        all_eps = self._all_eps[:episode]
        all_mean_reward_last_100_episodes = self._all_mean_reward_last_100_episodes[:episode]

Nicola Gatto's avatar
Nicola Gatto committed
184
        fig = plt.figure(figsize=(20, 20))
185 186 187

        sub_rewards = fig.add_subplot(221)
        sub_rewards.set_title('Total Rewards per episode')
Nicola Gatto's avatar
Nicola Gatto committed
188
        sub_rewards.plot(
189
            np.arange(episode), all_total_rewards)
190 191 192

        sub_loss = fig.add_subplot(222)
        sub_loss.set_title('Avg. Loss per episode')
193
        sub_loss.plot(np.arange(episode), all_avg_loss)
194 195 196

        sub_eps = fig.add_subplot(223)
        sub_eps.set_title('Epsilon per episode')
197
        sub_eps.plot(np.arange(episode), all_eps)
198 199 200

        sub_rewards = fig.add_subplot(224)
        sub_rewards.set_title('Avg. mean reward of last 100 episodes')
201 202
        sub_rewards.plot(np.arange(episode),
                         all_mean_reward_last_100_episodes)
Nicola Gatto's avatar
Nicola Gatto committed
203

204
        self.save(path, episode=episode)
Nicola Gatto's avatar
Nicola Gatto committed
205 206
        plt.savefig(os.path.join(path, 'stats.pdf'))

207 208 209 210 211
    def save(self, path, episode=None):
        if episode is None:
            episode = self._max_episodes
        super(DqnTrainingStats, self).save(path, episode=episode)
        np.save(os.path.join(path, 'avg_loss'), self._all_avg_loss[:episode])
Nicola Gatto's avatar
Nicola Gatto committed
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247


class DdpgTrainingStats(TrainingStats):
    def __init__(self, max_episodes):
        super(DdpgTrainingStats, self).__init__(max_episodes)
        self._all_avg_critic_loss = np.zeros((max_episodes,))
        self._all_avg_actor_loss = np.zeros((max_episodes,))
        self._all_avg_qvalues = np.zeros((max_episodes,))

    def add_avg_critic_loss(self, episode, avg_critic_loss):
        self._all_avg_critic_loss[episode] = avg_critic_loss

    def add_avg_actor_loss(self, episode, avg_actor_loss):
        self._all_avg_actor_loss[episode] = avg_actor_loss

    def add_avg_qvalues(self, episode, avg_qvalues):
        self._all_avg_qvalues[episode] = avg_qvalues

    def log_episode(
        self, episode, start_time, training_steps, actor_loss,
        critic_loss, qvalues, eps, reward
    ):
        self.add_avg_actor_loss(episode, actor_loss)
        self.add_avg_critic_loss(episode, critic_loss)
        self.add_avg_qvalues(episode, qvalues)

        info, avg_reward = self._log_episode(
            episode, start_time, training_steps, eps, reward)
        info = info.format((
            'Avg. Actor Loss: %.3f '
            'Avg. Critic Loss: %.3f '
            'Avg. Q-Values: %.3f') % (actor_loss, critic_loss, qvalues))

        self.logger.info(info)
        return avg_reward

248 249 250 251
    def save(self, path, episode=None):
        if episode is None:
            episode = self._max_episodes
        super(DdpgTrainingStats, self).save(path, episode=episode)
Nicola Gatto's avatar
Nicola Gatto committed
252
        np.save(os.path.join(
253 254 255 256 257 258 259 260 261 262 263 264 265 266
            path, 'avg_critic_loss'), self._all_avg_critic_loss[:episode])
        np.save(os.path.join(path, 'avg_actor_loss'), self._all_avg_actor_loss[:episode])
        np.save(os.path.join(path, 'avg_qvalues'), self._all_avg_qvalues[:episode])

    def save_stats(self, path, episode=None):
        if episode is None:
            episode = self._max_episodes

        all_total_rewards = self._all_total_rewards[:episode]
        all_avg_actor_loss = self._all_avg_actor_loss[:episode]
        all_avg_critic_loss = self._all_avg_critic_loss[:episode]
        all_avg_qvalues = self._all_avg_qvalues[:episode]
        all_eps = self._all_eps[:episode]
        all_mean_reward_last_100_episodes = self._all_mean_reward_last_100_episodes[:episode]
Nicola Gatto's avatar
Nicola Gatto committed
267 268 269 270 271 272

        fig = plt.figure(figsize=(120, 120))

        sub_rewards = fig.add_subplot(321)
        sub_rewards.set_title('Total Rewards per episode')
        sub_rewards.plot(
273
            np.arange(episode), all_total_rewards)
Nicola Gatto's avatar
Nicola Gatto committed
274 275 276 277

        sub_actor_loss = fig.add_subplot(322)
        sub_actor_loss.set_title('Avg. Actor Loss per episode')
        sub_actor_loss.plot(
278
            np.arange(episode), all_avg_actor_loss)
Nicola Gatto's avatar
Nicola Gatto committed
279 280 281 282

        sub_critic_loss = fig.add_subplot(323)
        sub_critic_loss.set_title('Avg. Critic Loss per episode')
        sub_critic_loss.plot(
283
            np.arange(episode), all_avg_critic_loss)
Nicola Gatto's avatar
Nicola Gatto committed
284 285 286 287

        sub_qvalues = fig.add_subplot(324)
        sub_qvalues.set_title('Avg. QValues per episode')
        sub_qvalues.plot(
288
            np.arange(episode), all_avg_qvalues)
Nicola Gatto's avatar
Nicola Gatto committed
289 290 291

        sub_eps = fig.add_subplot(325)
        sub_eps.set_title('Epsilon per episode')
292
        sub_eps.plot(np.arange(episode), all_eps)
Nicola Gatto's avatar
Nicola Gatto committed
293 294 295

        sub_rewards = fig.add_subplot(326)
        sub_rewards.set_title('Avg. mean reward of last 100 episodes')
296 297
        sub_rewards.plot(np.arange(episode),
                         all_mean_reward_last_100_episodes)
298

299
        self.save(path, episode=episode)
Nicola Gatto's avatar
Nicola Gatto committed
300
        plt.savefig(os.path.join(path, 'stats.pdf'))