Commit 9a5ec6a1 authored by Nicola Gatto's avatar Nicola Gatto
Browse files

Use qvalues from critic for statistics

parent 7ed155b3
......@@ -545,9 +545,6 @@ class DdpgAgent(Agent):
# Temporary critic so that gluon trainer does not mess
# with critic parameters
tmp_critic = self._copy_critic()
episode_avg_q_value +=\
np.sum(tmp_critic(
states, self._actor(states)).asnumpy()) / self._minibatch_size
with autograd.record():
# For maximizing qvalues we have to multiply with -1
# as we use a minimizer
......@@ -569,6 +566,8 @@ class DdpgAgent(Agent):
np.sum(critic_loss.asnumpy()) / self._minibatch_size
episode_actor_loss +=\
np.sum(actor_loss.asnumpy()) / self._minibatch_size
episode_avg_q_value +=\
np.sum(qvalues.asnumpy()) / self._minibatch_size
training_steps += 1
......@@ -916,9 +915,6 @@ class TwinDelayedDdpgAgent(DdpgAgent):
if self._total_steps % self._policy_delay == 0:
tmp_critic = self._copy_critic()
episode_avg_q_value +=\
np.sum(tmp_critic(
states, self._actor(states)).asnumpy()) / self._minibatch_size
with autograd.record():
actor_loss = -tmp_critic(
states, self._actor(states)).mean()
......@@ -945,6 +941,8 @@ class TwinDelayedDdpgAgent(DdpgAgent):
np.sum(critic_loss.asnumpy()) / self._minibatch_size
episode_actor_loss += 0 if actor_updates == 0 else\
np.sum(actor_loss.asnumpy()[0])
episode_avg_q_value +=\
np.sum(target_qvalues.asnumpy()) / self._minibatch_size
training_steps += 1
......@@ -964,7 +962,7 @@ class TwinDelayedDdpgAgent(DdpgAgent):
episode_critic_loss = 0 if training_steps == 0\
else (episode_critic_loss / training_steps)
episode_avg_q_value = 0 if actor_updates == 0\
else (episode_avg_q_value / actor_updates)
else (episode_avg_q_value / training_steps)
avg_reward = self._training_stats.log_episode(
self._current_episode, start, training_steps,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment