...
 
Commits (2)
......@@ -8,7 +8,7 @@
<groupId>de.monticore.lang.monticar</groupId>
<artifactId>embedded-montiarc-emadl-generator</artifactId>
<version>0.3.2-SNAPSHOT</version>
<version>0.3.2-SNAPSHOT-NG</version>
<!-- == PROJECT DEPENDENCIES ============================================= -->
......@@ -19,7 +19,7 @@
<CNNTrain.version>0.3.2-SNAPSHOT</CNNTrain.version>
<cnnarch-mxnet-generator.version>0.2.15-SNAPSHOT</cnnarch-mxnet-generator.version>
<cnnarch-caffe2-generator.version>0.2.11-SNAPSHOT</cnnarch-caffe2-generator.version>
<cnnarch-gluon-generator.version>0.2.1-SNAPSHOT</cnnarch-gluon-generator.version>
<cnnarch-gluon-generator.version>0.2.1-SNAPSHOT-NG</cnnarch-gluon-generator.version>
<embedded-montiarc-math-opt-generator>0.1.4</embedded-montiarc-math-opt-generator>
<!-- .. Libraries .................................................. -->
......
......@@ -114,6 +114,8 @@ class Agent(object):
agent_session_file = os.path.join(session_dir, 'agent.p')
logger = self._logger
self._training_stats.save_stats(self._output_directory, episode=self._current_episode)
self._make_pickle_ready(session_dir)
with open(agent_session_file, 'wb') as f:
......@@ -177,6 +179,9 @@ class Agent(object):
return states, actions, rewards, next_states, terminals
def evaluate(self, target=None, sample_games=100, verbose=True):
if sample_games <= 0:
return 0
target = self._target_score if target is None else target
if target:
target_achieved = 0
......@@ -268,8 +273,9 @@ class Agent(object):
def _save_net(self, net, filename, filedir=None):
filedir = self._output_directory if filedir is None else filedir
filename = os.path.join(filedir, filename + '.params')
net.save_parameters(filename)
filename = os.path.join(filedir, filename)
net.save_parameters(filename + '.params')
net.export(filename, epoch=0)
def save_best_network(self, path, epoch=0):
self._logger.info(
......@@ -367,6 +373,8 @@ class DdpgAgent(Agent):
def _make_pickle_ready(self, session_dir):
super(DdpgAgent, self)._make_pickle_ready(session_dir)
self._save_net(self._actor, 'current_actor')
self._save_net(self._actor, 'actor', session_dir)
self._actor = None
self._save_net(self._critic, 'critic', session_dir)
......@@ -457,9 +465,9 @@ class DdpgAgent(Agent):
else:
self._training_stats = DdpgTrainingStats(episodes)
# Initialize target Q' and mu'
self._actor_target = self._copy_actor()
self._critic_target = self._copy_critic()
# Initialize target Q' and mu'
self._actor_target = self._copy_actor()
self._critic_target = self._copy_critic()
# Initialize l2 loss for critic network
l2_loss = gluon.loss.L2Loss()
......@@ -732,6 +740,7 @@ class DqnAgent(Agent):
def _make_pickle_ready(self, session_dir):
super(DqnAgent, self)._make_pickle_ready(session_dir)
self._save_net(self._qnet, 'current_qnet')
self._save_net(self._qnet, 'qnet', session_dir)
self._qnet = None
self._save_net(self._target_qnet, 'target_net', session_dir)
......@@ -897,4 +906,4 @@ class DqnAgent(Agent):
def _save_current_as_best_net(self):
self._best_net = copy_net(
self._qnet, (1,) + self._state_dim, ctx=self._ctx)
self._qnet, self._state_dim, ctx=self._ctx)
......@@ -168,5 +168,5 @@ class OrnsteinUhlenbeckStrategy(BaseStrategy):
def select_action(self, values):
noise = self._evolve_state()
action = values + (self.cur_eps * noise)
action = (1.0 - self.cur_eps) * values + (self.cur_eps * noise)
return np.clip(action, self._action_low, self._action_high)
......@@ -127,13 +127,15 @@ class TrainingStats(object):
else:
return self._all_total_rewards[0]
def save(self, path):
np.save(os.path.join(path, 'total_rewards'), self._all_total_rewards)
np.save(os.path.join(path, 'eps'), self._all_eps)
np.save(os.path.join(path, 'time'), self._all_time)
def save(self, path, episode=None):
if episode is None:
episode = self._max_episodes
np.save(os.path.join(path, 'total_rewards'), self._all_total_rewards[:episode])
np.save(os.path.join(path, 'eps'), self._all_eps[:episode])
np.save(os.path.join(path, 'time'), self._all_time[:episode])
np.save(
os.path.join(path, 'mean_reward'),
self._all_mean_reward_last_100_episodes)
self._all_mean_reward_last_100_episodes[:episode])
def _log_episode(self, episode, start_time, training_steps, eps, reward):
self.add_eps(episode, eps)
......@@ -170,33 +172,43 @@ class DqnTrainingStats(TrainingStats):
self._logger.info(info)
return avg_reward
def save_stats(self, path):
def save_stats(self, path, episode=None):
if episode is None:
episode = self._max_episodes
all_total_rewards = self._all_total_rewards[:episode]
all_avg_loss = self._all_avg_loss[:episode]
all_eps = self._all_eps[:episode]
all_mean_reward_last_100_episodes = self._all_mean_reward_last_100_episodes[:episode]
fig = plt.figure(figsize=(20, 20))
sub_rewards = fig.add_subplot(221)
sub_rewards.set_title('Total Rewards per episode')
sub_rewards.plot(
np.arange(self._max_episodes), self._all_total_rewards)
np.arange(episode), all_total_rewards)
sub_loss = fig.add_subplot(222)
sub_loss.set_title('Avg. Loss per episode')
sub_loss.plot(np.arange(self._max_episodes), self._all_avg_loss)
sub_loss.plot(np.arange(episode), all_avg_loss)
sub_eps = fig.add_subplot(223)
sub_eps.set_title('Epsilon per episode')
sub_eps.plot(np.arange(self._max_episodes), self._all_eps)
sub_eps.plot(np.arange(episode), all_eps)
sub_rewards = fig.add_subplot(224)
sub_rewards.set_title('Avg. mean reward of last 100 episodes')
sub_rewards.plot(np.arange(self._max_episodes),
self._all_mean_reward_last_100_episodes)
sub_rewards.plot(np.arange(episode),
all_mean_reward_last_100_episodes)
self.save(path)
self.save(path, episode=episode)
plt.savefig(os.path.join(path, 'stats.pdf'))
def save(self, path):
super(DqnTrainingStats, self).save(path)
np.save(os.path.join(path, 'avg_loss'), self._all_avg_loss)
def save(self, path, episode=None):
if episode is None:
episode = self._max_episodes
super(DqnTrainingStats, self).save(path, episode=episode)
np.save(os.path.join(path, 'avg_loss'), self._all_avg_loss[:episode])
class DdpgTrainingStats(TrainingStats):
......@@ -233,44 +245,56 @@ class DdpgTrainingStats(TrainingStats):
self.logger.info(info)
return avg_reward
def save(self, path):
super(DdpgTrainingStats, self).save(path)
def save(self, path, episode=None):
if episode is None:
episode = self._max_episodes
super(DdpgTrainingStats, self).save(path, episode=episode)
np.save(os.path.join(
path, 'avg_critic_loss'), self._all_avg_critic_loss)
np.save(os.path.join(path, 'avg_actor_loss'), self._all_avg_actor_loss)
np.save(os.path.join(path, 'avg_qvalues'), self._all_avg_qvalues)
path, 'avg_critic_loss'), self._all_avg_critic_loss[:episode])
np.save(os.path.join(path, 'avg_actor_loss'), self._all_avg_actor_loss[:episode])
np.save(os.path.join(path, 'avg_qvalues'), self._all_avg_qvalues[:episode])
def save_stats(self, path, episode=None):
if episode is None:
episode = self._max_episodes
all_total_rewards = self._all_total_rewards[:episode]
all_avg_actor_loss = self._all_avg_actor_loss[:episode]
all_avg_critic_loss = self._all_avg_critic_loss[:episode]
all_avg_qvalues = self._all_avg_qvalues[:episode]
all_eps = self._all_eps[:episode]
all_mean_reward_last_100_episodes = self._all_mean_reward_last_100_episodes[:episode]
def save_stats(self, path):
fig = plt.figure(figsize=(120, 120))
sub_rewards = fig.add_subplot(321)
sub_rewards.set_title('Total Rewards per episode')
sub_rewards.plot(
np.arange(self._max_episodes), self._all_total_rewards)
np.arange(episode), all_total_rewards)
sub_actor_loss = fig.add_subplot(322)
sub_actor_loss.set_title('Avg. Actor Loss per episode')
sub_actor_loss.plot(
np.arange(self._max_episodes), self._all_avg_actor_loss)
np.arange(episode), all_avg_actor_loss)
sub_critic_loss = fig.add_subplot(323)
sub_critic_loss.set_title('Avg. Critic Loss per episode')
sub_critic_loss.plot(
np.arange(self._max_episodes), self._all_avg_critic_loss)
np.arange(episode), all_avg_critic_loss)
sub_qvalues = fig.add_subplot(324)
sub_qvalues.set_title('Avg. QValues per episode')
sub_qvalues.plot(
np.arange(self._max_episodes), self._all_avg_qvalues)
np.arange(episode), all_avg_qvalues)
sub_eps = fig.add_subplot(325)
sub_eps.set_title('Epsilon per episode')
sub_eps.plot(np.arange(self._max_episodes), self._all_eps)
sub_eps.plot(np.arange(episode), all_eps)
sub_rewards = fig.add_subplot(326)
sub_rewards.set_title('Avg. mean reward of last 100 episodes')
sub_rewards.plot(np.arange(self._max_episodes),
self._all_mean_reward_last_100_episodes)
sub_rewards.plot(np.arange(episode),
all_mean_reward_last_100_episodes)
self.save(path)
self.save(path, episode=episode)
plt.savefig(os.path.join(path, 'stats.pdf'))
......@@ -114,6 +114,8 @@ class Agent(object):
agent_session_file = os.path.join(session_dir, 'agent.p')
logger = self._logger
self._training_stats.save_stats(self._output_directory, episode=self._current_episode)
self._make_pickle_ready(session_dir)
with open(agent_session_file, 'wb') as f:
......@@ -177,6 +179,9 @@ class Agent(object):
return states, actions, rewards, next_states, terminals
def evaluate(self, target=None, sample_games=100, verbose=True):
if sample_games <= 0:
return 0
target = self._target_score if target is None else target
if target:
target_achieved = 0
......@@ -268,8 +273,9 @@ class Agent(object):
def _save_net(self, net, filename, filedir=None):
filedir = self._output_directory if filedir is None else filedir
filename = os.path.join(filedir, filename + '.params')
net.save_parameters(filename)
filename = os.path.join(filedir, filename)
net.save_parameters(filename + '.params')
net.export(filename, epoch=0)
def save_best_network(self, path, epoch=0):
self._logger.info(
......@@ -367,6 +373,8 @@ class DdpgAgent(Agent):
def _make_pickle_ready(self, session_dir):
super(DdpgAgent, self)._make_pickle_ready(session_dir)
self._save_net(self._actor, 'current_actor')
self._save_net(self._actor, 'actor', session_dir)
self._actor = None
self._save_net(self._critic, 'critic', session_dir)
......@@ -457,9 +465,9 @@ class DdpgAgent(Agent):
else:
self._training_stats = DdpgTrainingStats(episodes)
# Initialize target Q' and mu'
self._actor_target = self._copy_actor()
self._critic_target = self._copy_critic()
# Initialize target Q' and mu'
self._actor_target = self._copy_actor()
self._critic_target = self._copy_critic()
# Initialize l2 loss for critic network
l2_loss = gluon.loss.L2Loss()
......@@ -732,6 +740,7 @@ class DqnAgent(Agent):
def _make_pickle_ready(self, session_dir):
super(DqnAgent, self)._make_pickle_ready(session_dir)
self._save_net(self._qnet, 'current_qnet')
self._save_net(self._qnet, 'qnet', session_dir)
self._qnet = None
self._save_net(self._target_qnet, 'target_net', session_dir)
......@@ -897,4 +906,4 @@ class DqnAgent(Agent):
def _save_current_as_best_net(self):
self._best_net = copy_net(
self._qnet, (1,) + self._state_dim, ctx=self._ctx)
self._qnet, self._state_dim, ctx=self._ctx)
......@@ -168,5 +168,5 @@ class OrnsteinUhlenbeckStrategy(BaseStrategy):
def select_action(self, values):
noise = self._evolve_state()
action = values + (self.cur_eps * noise)
action = (1.0 - self.cur_eps) * values + (self.cur_eps * noise)
return np.clip(action, self._action_low, self._action_high)
......@@ -127,13 +127,15 @@ class TrainingStats(object):
else:
return self._all_total_rewards[0]
def save(self, path):
np.save(os.path.join(path, 'total_rewards'), self._all_total_rewards)
np.save(os.path.join(path, 'eps'), self._all_eps)
np.save(os.path.join(path, 'time'), self._all_time)
def save(self, path, episode=None):
if episode is None:
episode = self._max_episodes
np.save(os.path.join(path, 'total_rewards'), self._all_total_rewards[:episode])
np.save(os.path.join(path, 'eps'), self._all_eps[:episode])
np.save(os.path.join(path, 'time'), self._all_time[:episode])
np.save(
os.path.join(path, 'mean_reward'),
self._all_mean_reward_last_100_episodes)
self._all_mean_reward_last_100_episodes[:episode])
def _log_episode(self, episode, start_time, training_steps, eps, reward):
self.add_eps(episode, eps)
......@@ -170,33 +172,43 @@ class DqnTrainingStats(TrainingStats):
self._logger.info(info)
return avg_reward
def save_stats(self, path):
def save_stats(self, path, episode=None):
if episode is None:
episode = self._max_episodes
all_total_rewards = self._all_total_rewards[:episode]
all_avg_loss = self._all_avg_loss[:episode]
all_eps = self._all_eps[:episode]
all_mean_reward_last_100_episodes = self._all_mean_reward_last_100_episodes[:episode]
fig = plt.figure(figsize=(20, 20))
sub_rewards = fig.add_subplot(221)
sub_rewards.set_title('Total Rewards per episode')
sub_rewards.plot(
np.arange(self._max_episodes), self._all_total_rewards)
np.arange(episode), all_total_rewards)
sub_loss = fig.add_subplot(222)
sub_loss.set_title('Avg. Loss per episode')
sub_loss.plot(np.arange(self._max_episodes), self._all_avg_loss)
sub_loss.plot(np.arange(episode), all_avg_loss)
sub_eps = fig.add_subplot(223)
sub_eps.set_title('Epsilon per episode')
sub_eps.plot(np.arange(self._max_episodes), self._all_eps)
sub_eps.plot(np.arange(episode), all_eps)
sub_rewards = fig.add_subplot(224)
sub_rewards.set_title('Avg. mean reward of last 100 episodes')
sub_rewards.plot(np.arange(self._max_episodes),
self._all_mean_reward_last_100_episodes)
sub_rewards.plot(np.arange(episode),
all_mean_reward_last_100_episodes)
self.save(path)
self.save(path, episode=episode)
plt.savefig(os.path.join(path, 'stats.pdf'))
def save(self, path):
super(DqnTrainingStats, self).save(path)
np.save(os.path.join(path, 'avg_loss'), self._all_avg_loss)
def save(self, path, episode=None):
if episode is None:
episode = self._max_episodes
super(DqnTrainingStats, self).save(path, episode=episode)
np.save(os.path.join(path, 'avg_loss'), self._all_avg_loss[:episode])
class DdpgTrainingStats(TrainingStats):
......@@ -233,44 +245,56 @@ class DdpgTrainingStats(TrainingStats):
self.logger.info(info)
return avg_reward
def save(self, path):
super(DdpgTrainingStats, self).save(path)
def save(self, path, episode=None):
if episode is None:
episode = self._max_episodes
super(DdpgTrainingStats, self).save(path, episode=episode)
np.save(os.path.join(
path, 'avg_critic_loss'), self._all_avg_critic_loss)
np.save(os.path.join(path, 'avg_actor_loss'), self._all_avg_actor_loss)
np.save(os.path.join(path, 'avg_qvalues'), self._all_avg_qvalues)
path, 'avg_critic_loss'), self._all_avg_critic_loss[:episode])
np.save(os.path.join(path, 'avg_actor_loss'), self._all_avg_actor_loss[:episode])
np.save(os.path.join(path, 'avg_qvalues'), self._all_avg_qvalues[:episode])
def save_stats(self, path, episode=None):
if episode is None:
episode = self._max_episodes
all_total_rewards = self._all_total_rewards[:episode]
all_avg_actor_loss = self._all_avg_actor_loss[:episode]
all_avg_critic_loss = self._all_avg_critic_loss[:episode]
all_avg_qvalues = self._all_avg_qvalues[:episode]
all_eps = self._all_eps[:episode]
all_mean_reward_last_100_episodes = self._all_mean_reward_last_100_episodes[:episode]
def save_stats(self, path):
fig = plt.figure(figsize=(120, 120))
sub_rewards = fig.add_subplot(321)
sub_rewards.set_title('Total Rewards per episode')
sub_rewards.plot(
np.arange(self._max_episodes), self._all_total_rewards)
np.arange(episode), all_total_rewards)
sub_actor_loss = fig.add_subplot(322)
sub_actor_loss.set_title('Avg. Actor Loss per episode')
sub_actor_loss.plot(
np.arange(self._max_episodes), self._all_avg_actor_loss)
np.arange(episode), all_avg_actor_loss)
sub_critic_loss = fig.add_subplot(323)
sub_critic_loss.set_title('Avg. Critic Loss per episode')
sub_critic_loss.plot(
np.arange(self._max_episodes), self._all_avg_critic_loss)
np.arange(episode), all_avg_critic_loss)
sub_qvalues = fig.add_subplot(324)
sub_qvalues.set_title('Avg. QValues per episode')
sub_qvalues.plot(
np.arange(self._max_episodes), self._all_avg_qvalues)
np.arange(episode), all_avg_qvalues)
sub_eps = fig.add_subplot(325)
sub_eps.set_title('Epsilon per episode')
sub_eps.plot(np.arange(self._max_episodes), self._all_eps)
sub_eps.plot(np.arange(episode), all_eps)
sub_rewards = fig.add_subplot(326)
sub_rewards.set_title('Avg. mean reward of last 100 episodes')
sub_rewards.plot(np.arange(self._max_episodes),
self._all_mean_reward_last_100_episodes)
sub_rewards.plot(np.arange(episode),
all_mean_reward_last_100_episodes)
self.save(path)
self.save(path, episode=episode)
plt.savefig(os.path.join(path, 'stats.pdf'))
......@@ -114,6 +114,8 @@ class Agent(object):
agent_session_file = os.path.join(session_dir, 'agent.p')
logger = self._logger
self._training_stats.save_stats(self._output_directory, episode=self._current_episode)
self._make_pickle_ready(session_dir)
with open(agent_session_file, 'wb') as f:
......@@ -177,6 +179,9 @@ class Agent(object):
return states, actions, rewards, next_states, terminals
def evaluate(self, target=None, sample_games=100, verbose=True):
if sample_games <= 0:
return 0
target = self._target_score if target is None else target
if target:
target_achieved = 0
......@@ -268,8 +273,9 @@ class Agent(object):
def _save_net(self, net, filename, filedir=None):
filedir = self._output_directory if filedir is None else filedir
filename = os.path.join(filedir, filename + '.params')
net.save_parameters(filename)
filename = os.path.join(filedir, filename)
net.save_parameters(filename + '.params')
net.export(filename, epoch=0)
def save_best_network(self, path, epoch=0):
self._logger.info(
......@@ -367,6 +373,8 @@ class DdpgAgent(Agent):
def _make_pickle_ready(self, session_dir):
super(DdpgAgent, self)._make_pickle_ready(session_dir)
self._save_net(self._actor, 'current_actor')
self._save_net(self._actor, 'actor', session_dir)
self._actor = None
self._save_net(self._critic, 'critic', session_dir)
......@@ -457,9 +465,9 @@ class DdpgAgent(Agent):
else:
self._training_stats = DdpgTrainingStats(episodes)
# Initialize target Q' and mu'
self._actor_target = self._copy_actor()
self._critic_target = self._copy_critic()
# Initialize target Q' and mu'
self._actor_target = self._copy_actor()
self._critic_target = self._copy_critic()
# Initialize l2 loss for critic network
l2_loss = gluon.loss.L2Loss()
......@@ -732,6 +740,7 @@ class DqnAgent(Agent):
def _make_pickle_ready(self, session_dir):
super(DqnAgent, self)._make_pickle_ready(session_dir)
self._save_net(self._qnet, 'current_qnet')
self._save_net(self._qnet, 'qnet', session_dir)
self._qnet = None
self._save_net(self._target_qnet, 'target_net', session_dir)
......@@ -897,4 +906,4 @@ class DqnAgent(Agent):
def _save_current_as_best_net(self):
self._best_net = copy_net(
self._qnet, (1,) + self._state_dim, ctx=self._ctx)
self._qnet, self._state_dim, ctx=self._ctx)
......@@ -47,7 +47,7 @@ from std_msgs.msg import Float32MultiArray, Bool, Int32, MultiArrayDimension, Fl
class RosEnvironment(Environment):
def __init__(self,
ros_node_name='RosTrainingAgent',
timeout_in_s=3,
timeout_in_s=60,
state_topic='state',
action_topic='action',
reset_topic='reset',
......@@ -55,6 +55,7 @@ class RosEnvironment(Environment):
reward_topic='reward'):
super(RosEnvironment, self).__init__()
self.__timeout_in_s = timeout_in_s
self.__in_reset = False
self.__waiting_for_state_update = False
self.__waiting_for_terminal_update = False
self.__last_received_state = 0
......@@ -82,13 +83,18 @@ class RosEnvironment(Environment):
time.sleep(2)
def reset(self):
self.__in_reset = True
time.sleep(0.5)
reset_message = Bool()
reset_message.data = True
self.__waiting_for_state_update = True
self.__waiting_for_terminal_update = False
self.__waiting_for_reward_update = False
self.__reset_publisher.publish(reset_message)
self.__wait_for_new_state(self.__reset_publisher, reset_message)
while self.__last_received_terminal:
self.__wait_for_new_state(self.__reset_publisher, reset_message)
pass
self.__in_reset = False
return self.__last_received_state
def step(self, action):
......@@ -129,7 +135,7 @@ class RosEnvironment(Environment):
rospy.signal_shutdown('Program ended!')
def __state_callback(self, data):
self.__last_received_state = np.array(data.data, dtype='float32')
self.__last_received_state = np.array(data.data, dtype='float32').reshape((5,))
rospy.logdebug('Received state: {}'.format(self.__last_received_state))
self.__waiting_for_state_update = False
......
......@@ -168,5 +168,5 @@ class OrnsteinUhlenbeckStrategy(BaseStrategy):
def select_action(self, values):
noise = self._evolve_state()
action = values + (self.cur_eps * noise)
action = (1.0 - self.cur_eps) * values + (self.cur_eps * noise)
return np.clip(action, self._action_low, self._action_high)
......@@ -127,13 +127,15 @@ class TrainingStats(object):
else:
return self._all_total_rewards[0]
def save(self, path):
np.save(os.path.join(path, 'total_rewards'), self._all_total_rewards)
np.save(os.path.join(path, 'eps'), self._all_eps)
np.save(os.path.join(path, 'time'), self._all_time)
def save(self, path, episode=None):
if episode is None:
episode = self._max_episodes
np.save(os.path.join(path, 'total_rewards'), self._all_total_rewards[:episode])
np.save(os.path.join(path, 'eps'), self._all_eps[:episode])
np.save(os.path.join(path, 'time'), self._all_time[:episode])
np.save(
os.path.join(path, 'mean_reward'),
self._all_mean_reward_last_100_episodes)
self._all_mean_reward_last_100_episodes[:episode])
def _log_episode(self, episode, start_time, training_steps, eps, reward):
self.add_eps(episode, eps)
......@@ -170,33 +172,43 @@ class DqnTrainingStats(TrainingStats):
self._logger.info(info)
return avg_reward
def save_stats(self, path):
def save_stats(self, path, episode=None):
if episode is None:
episode = self._max_episodes
all_total_rewards = self._all_total_rewards[:episode]
all_avg_loss = self._all_avg_loss[:episode]
all_eps = self._all_eps[:episode]
all_mean_reward_last_100_episodes = self._all_mean_reward_last_100_episodes[:episode]
fig = plt.figure(figsize=(20, 20))
sub_rewards = fig.add_subplot(221)
sub_rewards.set_title('Total Rewards per episode')
sub_rewards.plot(
np.arange(self._max_episodes), self._all_total_rewards)
np.arange(episode), all_total_rewards)
sub_loss = fig.add_subplot(222)
sub_loss.set_title('Avg. Loss per episode')
sub_loss.plot(np.arange(self._max_episodes), self._all_avg_loss)
sub_loss.plot(np.arange(episode), all_avg_loss)
sub_eps = fig.add_subplot(223)
sub_eps.set_title('Epsilon per episode')
sub_eps.plot(np.arange(self._max_episodes), self._all_eps)
sub_eps.plot(np.arange(episode), all_eps)
sub_rewards = fig.add_subplot(224)
sub_rewards.set_title('Avg. mean reward of last 100 episodes')
sub_rewards.plot(np.arange(self._max_episodes),
self._all_mean_reward_last_100_episodes)
sub_rewards.plot(np.arange(episode),
all_mean_reward_last_100_episodes)
self.save(path)
self.save(path, episode=episode)
plt.savefig(os.path.join(path, 'stats.pdf'))
def save(self, path):
super(DqnTrainingStats, self).save(path)
np.save(os.path.join(path, 'avg_loss'), self._all_avg_loss)
def save(self, path, episode=None):
if episode is None:
episode = self._max_episodes
super(DqnTrainingStats, self).save(path, episode=episode)
np.save(os.path.join(path, 'avg_loss'), self._all_avg_loss[:episode])
class DdpgTrainingStats(TrainingStats):
......@@ -233,44 +245,56 @@ class DdpgTrainingStats(TrainingStats):
self.logger.info(info)
return avg_reward
def save(self, path):
super(DdpgTrainingStats, self).save(path)
def save(self, path, episode=None):
if episode is None:
episode = self._max_episodes
super(DdpgTrainingStats, self).save(path, episode=episode)
np.save(os.path.join(
path, 'avg_critic_loss'), self._all_avg_critic_loss)
np.save(os.path.join(path, 'avg_actor_loss'), self._all_avg_actor_loss)
np.save(os.path.join(path, 'avg_qvalues'), self._all_avg_qvalues)
path, 'avg_critic_loss'), self._all_avg_critic_loss[:episode])
np.save(os.path.join(path, 'avg_actor_loss'), self._all_avg_actor_loss[:episode])
np.save(os.path.join(path, 'avg_qvalues'), self._all_avg_qvalues[:episode])
def save_stats(self, path, episode=None):
if episode is None:
episode = self._max_episodes
all_total_rewards = self._all_total_rewards[:episode]
all_avg_actor_loss = self._all_avg_actor_loss[:episode]
all_avg_critic_loss = self._all_avg_critic_loss[:episode]
all_avg_qvalues = self._all_avg_qvalues[:episode]
all_eps = self._all_eps[:episode]
all_mean_reward_last_100_episodes = self._all_mean_reward_last_100_episodes[:episode]
def save_stats(self, path):
fig = plt.figure(figsize=(120, 120))
sub_rewards = fig.add_subplot(321)
sub_rewards.set_title('Total Rewards per episode')
sub_rewards.plot(
np.arange(self._max_episodes), self._all_total_rewards)
np.arange(episode), all_total_rewards)
sub_actor_loss = fig.add_subplot(322)
sub_actor_loss.set_title('Avg. Actor Loss per episode')
sub_actor_loss.plot(
np.arange(self._max_episodes), self._all_avg_actor_loss)
np.arange(episode), all_avg_actor_loss)
sub_critic_loss = fig.add_subplot(323)
sub_critic_loss.set_title('Avg. Critic Loss per episode')
sub_critic_loss.plot(
np.arange(self._max_episodes), self._all_avg_critic_loss)
np.arange(episode), all_avg_critic_loss)
sub_qvalues = fig.add_subplot(324)
sub_qvalues.set_title('Avg. QValues per episode')
sub_qvalues.plot(
np.arange(self._max_episodes), self._all_avg_qvalues)
np.arange(episode), all_avg_qvalues)
sub_eps = fig.add_subplot(325)
sub_eps.set_title('Epsilon per episode')
sub_eps.plot(np.arange(self._max_episodes), self._all_eps)
sub_eps.plot(np.arange(episode), all_eps)
sub_rewards = fig.add_subplot(326)
sub_rewards.set_title('Avg. mean reward of last 100 episodes')
sub_rewards.plot(np.arange(self._max_episodes),
self._all_mean_reward_last_100_episodes)
sub_rewards.plot(np.arange(episode),
all_mean_reward_last_100_episodes)
self.save(path)
self.save(path, episode=episode)
plt.savefig(os.path.join(path, 'stats.pdf'))