Skip to content
Snippets Groups Projects
Commit 9a5ec6a1 authored by Nicola Gatto's avatar Nicola Gatto
Browse files

Use qvalues from critic for statistics

parent 7ed155b3
No related branches found
No related tags found
3 merge requests!20Implemented layer variables and RNN layer,!19Integrate TD3 Algorithm and Gaussian Noise,!18Integrate TD3 Algorithm and Gaussian Noise
......@@ -545,9 +545,6 @@ class DdpgAgent(Agent):
# Temporary critic so that gluon trainer does not mess
# with critic parameters
tmp_critic = self._copy_critic()
episode_avg_q_value +=\
np.sum(tmp_critic(
states, self._actor(states)).asnumpy()) / self._minibatch_size
with autograd.record():
# For maximizing qvalues we have to multiply with -1
# as we use a minimizer
......@@ -569,6 +566,8 @@ class DdpgAgent(Agent):
np.sum(critic_loss.asnumpy()) / self._minibatch_size
episode_actor_loss +=\
np.sum(actor_loss.asnumpy()) / self._minibatch_size
episode_avg_q_value +=\
np.sum(qvalues.asnumpy()) / self._minibatch_size
training_steps += 1
......@@ -916,9 +915,6 @@ class TwinDelayedDdpgAgent(DdpgAgent):
if self._total_steps % self._policy_delay == 0:
tmp_critic = self._copy_critic()
episode_avg_q_value +=\
np.sum(tmp_critic(
states, self._actor(states)).asnumpy()) / self._minibatch_size
with autograd.record():
actor_loss = -tmp_critic(
states, self._actor(states)).mean()
......@@ -945,6 +941,8 @@ class TwinDelayedDdpgAgent(DdpgAgent):
np.sum(critic_loss.asnumpy()) / self._minibatch_size
episode_actor_loss += 0 if actor_updates == 0 else\
np.sum(actor_loss.asnumpy()[0])
episode_avg_q_value +=\
np.sum(target_qvalues.asnumpy()) / self._minibatch_size
training_steps += 1
......@@ -964,7 +962,7 @@ class TwinDelayedDdpgAgent(DdpgAgent):
episode_critic_loss = 0 if training_steps == 0\
else (episode_critic_loss / training_steps)
episode_avg_q_value = 0 if actor_updates == 0\
else (episode_avg_q_value / actor_updates)
else (episode_avg_q_value / training_steps)
avg_reward = self._training_stats.log_episode(
self._current_episode, start, training_steps,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment