Commit 869ec892 authored by Nicola Gatto's avatar Nicola Gatto

Adapt tests to new templates

parent 1fb97868
......@@ -78,10 +78,10 @@ if __name__ == "__main__":
'snapshot_interval': 20,
'max_episode_step': 250,
'target_score': 185.5,
'qnet':qnet_creator.net,
'qnet':qnet_creator.networks[0],
'use_fix_target': True,
'target_update_interval': 200,
'loss': 'huber',
'loss_function': 'huber',
'optimizer': 'rmsprop',
'optimizer_params': {
'learning_rate': 0.001 },
......@@ -108,4 +108,4 @@ if __name__ == "__main__":
train_successful = agent.train()
if train_successful:
agent.save_best_network(qnet_creator._model_dir_ + qnet_creator._model_prefix_ + '_0_newest', epoch=0)
agent.export_best_network(path=qnet_creator._model_dir_ + qnet_creator._model_prefix_ + '_0_newest', epoch=0)
......@@ -13,18 +13,21 @@ class StrategyBuilder(object):
epsilon_decay_method='no',
epsilon_decay=0.0,
epsilon_decay_start=0,
epsilon_decay_per_step=False,
action_dim=None,
action_low=None,
action_high=None,
mu=0.0,
theta=0.5,
sigma=0.3
sigma=0.3,
noise_variance=0.1
):
if epsilon_decay_method == 'linear':
decay = LinearDecay(
eps_decay=epsilon_decay, min_eps=min_epsilon,
decay_start=epsilon_decay_start)
decay_start=epsilon_decay_start,
decay_per_step=epsilon_decay_per_step)
else:
decay = NoDecay()
......@@ -44,6 +47,13 @@ class StrategyBuilder(object):
return OrnsteinUhlenbeckStrategy(
action_dim, action_low, action_high, epsilon, mu, theta,
sigma, decay)
elif method == 'gaussian':
assert action_dim is not None
assert action_low is not None
assert action_high is not None
assert noise_variance is not None
return GaussianNoiseStrategy(action_dim, action_low, action_high,
epsilon, noise_variance, decay)
else:
assert action_dim is not None
assert len(action_dim) == 1
......@@ -70,17 +80,27 @@ class NoDecay(BaseDecay):
class LinearDecay(BaseDecay):
def __init__(self, eps_decay, min_eps=0, decay_start=0):
def __init__(self, eps_decay, min_eps=0, decay_start=0, decay_per_step=False):
super(LinearDecay, self).__init__()
self.eps_decay = eps_decay
self.min_eps = min_eps
self.decay_start = decay_start
self.decay_per_step = decay_per_step
self.last_episode = -1
def decay(self, cur_eps, episode):
if episode < self.decay_start:
return cur_eps
def do_decay(self, episode):
if self.decay_per_step:
do = (episode >= self.decay_start)
else:
do = ((self.last_episode != episode) and (episode >= self.decay_start))
self.last_episode = episode
return do
def decay(self, cur_eps, episode):
if self.do_decay(episode):
return max(cur_eps - self.eps_decay, self.min_eps)
else:
return cur_eps
class BaseStrategy(object):
......@@ -170,3 +190,29 @@ class OrnsteinUhlenbeckStrategy(BaseStrategy):
noise = self._evolve_state()
action = (1.0 - self.cur_eps) * values + (self.cur_eps * noise)
return np.clip(action, self._action_low, self._action_high)
class GaussianNoiseStrategy(BaseStrategy):
def __init__(
self,
action_dim,
action_low,
action_high,
eps,
noise_variance,
decay=NoDecay()
):
super(GaussianNoiseStrategy, self).__init__(decay)
self.eps = eps
self.cur_eps = eps
self._action_dim = action_dim
self._action_low = action_low
self._action_high = action_high
self._noise_variance = noise_variance
def select_action(self, values):
noise = np.random.normal(loc=0.0, scale=self._noise_variance, size=self._action_dim)
action = values + self.cur_eps * noise
return np.clip(action, self._action_low, self._action_high)
......@@ -11,8 +11,8 @@ import cnnarch_logger
LOSS_FUNCTIONS = {
'l1': gluon.loss.L1Loss(),
'euclidean': gluon.loss.L2Loss(),
'huber_loss': gluon.loss.HuberLoss(),
'l2': gluon.loss.L2Loss(),
'huber': gluon.loss.HuberLoss(),
'softmax_cross_entropy': gluon.loss.SoftmaxCrossEntropyLoss(),
'sigmoid_cross_entropy': gluon.loss.SigmoidBinaryCrossEntropyLoss()}
......
......@@ -85,8 +85,8 @@ if __name__ == "__main__":
'train_interval': 1,
'snapshot_interval': 20,
'max_episode_step': 1000,
'actor': actor_creator.net,
'critic': critic_creator.net,
'actor': actor_creator.networks[0],
'critic': critic_creator.networks[0],
'actor_optimizer': 'adam',
'actor_optimizer_params': {
'learning_rate': 1.0E-4},
......@@ -116,4 +116,4 @@ if __name__ == "__main__":
train_successful = agent.train()
if train_successful:
agent.save_best_network(actor_creator._model_dir_ + actor_creator._model_prefix_ + '_0_newest', epoch=0)
agent.export_best_network(path=actor_creator._model_dir_ + actor_creator._model_prefix_ + '_0_newest', epoch=0)
......@@ -13,18 +13,21 @@ class StrategyBuilder(object):
epsilon_decay_method='no',
epsilon_decay=0.0,
epsilon_decay_start=0,
epsilon_decay_per_step=False,
action_dim=None,
action_low=None,
action_high=None,
mu=0.0,
theta=0.5,
sigma=0.3
sigma=0.3,
noise_variance=0.1
):
if epsilon_decay_method == 'linear':
decay = LinearDecay(
eps_decay=epsilon_decay, min_eps=min_epsilon,
decay_start=epsilon_decay_start)
decay_start=epsilon_decay_start,
decay_per_step=epsilon_decay_per_step)
else:
decay = NoDecay()
......@@ -44,6 +47,13 @@ class StrategyBuilder(object):
return OrnsteinUhlenbeckStrategy(
action_dim, action_low, action_high, epsilon, mu, theta,
sigma, decay)
elif method == 'gaussian':
assert action_dim is not None
assert action_low is not None
assert action_high is not None
assert noise_variance is not None
return GaussianNoiseStrategy(action_dim, action_low, action_high,
epsilon, noise_variance, decay)
else:
assert action_dim is not None
assert len(action_dim) == 1
......@@ -70,17 +80,27 @@ class NoDecay(BaseDecay):
class LinearDecay(BaseDecay):
def __init__(self, eps_decay, min_eps=0, decay_start=0):
def __init__(self, eps_decay, min_eps=0, decay_start=0, decay_per_step=False):
super(LinearDecay, self).__init__()
self.eps_decay = eps_decay
self.min_eps = min_eps
self.decay_start = decay_start
self.decay_per_step = decay_per_step
self.last_episode = -1
def decay(self, cur_eps, episode):
if episode < self.decay_start:
return cur_eps
def do_decay(self, episode):
if self.decay_per_step:
do = (episode >= self.decay_start)
else:
do = ((self.last_episode != episode) and (episode >= self.decay_start))
self.last_episode = episode
return do
def decay(self, cur_eps, episode):
if self.do_decay(episode):
return max(cur_eps - self.eps_decay, self.min_eps)
else:
return cur_eps
class BaseStrategy(object):
......@@ -170,3 +190,29 @@ class OrnsteinUhlenbeckStrategy(BaseStrategy):
noise = self._evolve_state()
action = (1.0 - self.cur_eps) * values + (self.cur_eps * noise)
return np.clip(action, self._action_low, self._action_high)
class GaussianNoiseStrategy(BaseStrategy):
def __init__(
self,
action_dim,
action_low,
action_high,
eps,
noise_variance,
decay=NoDecay()
):
super(GaussianNoiseStrategy, self).__init__(decay)
self.eps = eps
self.cur_eps = eps
self._action_dim = action_dim
self._action_low = action_low
self._action_high = action_high
self._noise_variance = noise_variance
def select_action(self, values):
noise = np.random.normal(loc=0.0, scale=self._noise_variance, size=self._action_dim)
action = values + self.cur_eps * noise
return np.clip(action, self._action_low, self._action_high)
......@@ -11,8 +11,8 @@ import cnnarch_logger
LOSS_FUNCTIONS = {
'l1': gluon.loss.L1Loss(),
'euclidean': gluon.loss.L2Loss(),
'huber_loss': gluon.loss.HuberLoss(),
'l2': gluon.loss.L2Loss(),
'huber': gluon.loss.HuberLoss(),
'softmax_cross_entropy': gluon.loss.SoftmaxCrossEntropyLoss(),
'sigmoid_cross_entropy': gluon.loss.SigmoidBinaryCrossEntropyLoss()}
......
......@@ -84,10 +84,10 @@ if __name__ == "__main__":
'train_interval': 1,
'snapshot_interval': 1000,
'max_episode_step': 999999999,
'qnet':qnet_creator.net,
'qnet':qnet_creator.networks[0],
'use_fix_target': True,
'target_update_interval': 500,
'loss': 'huber',
'loss_function': 'huber',
'optimizer': 'rmsprop',
'optimizer_params': {
'learning_rate': 0.001 },
......@@ -114,4 +114,4 @@ if __name__ == "__main__":
train_successful = agent.train()
if train_successful:
agent.save_best_network(qnet_creator._model_dir_ + qnet_creator._model_prefix_ + '_0_newest', epoch=0)
agent.export_best_network(path=qnet_creator._model_dir_ + qnet_creator._model_prefix_ + '_0_newest', epoch=0)
......@@ -84,7 +84,6 @@ class RosEnvironment(Environment):
def reset(self):
self.__in_reset = True
time.sleep(0.5)
reset_message = Bool()
reset_message.data = True
self.__waiting_for_state_update = True
......@@ -110,7 +109,8 @@ class RosEnvironment(Environment):
next_state = self.__last_received_state
terminal = self.__last_received_terminal
reward = self.__calc_reward(next_state, terminal)
rospy.logdebug('Calculated reward: {}'.format(reward))
logger.debug('Transition: ({}, {}, {}, {})'.format(action, reward, next_state, terminal))
return next_state, reward, terminal, 0
......@@ -129,20 +129,19 @@ class RosEnvironment(Environment):
else:
rospy.logerr("Timeout 3 times in a row: Terminate application")
exit()
time.sleep(100/1000)
time.sleep(1/500)
def close(self):
rospy.signal_shutdown('Program ended!')
def __state_callback(self, data):
self.__last_received_state = np.array(data.data, dtype='float32').reshape((5,))
rospy.logdebug('Received state: {}'.format(self.__last_received_state))
logger.debug('Received state: {}'.format(self.__last_received_state))
self.__waiting_for_state_update = False
def __terminal_state_callback(self, data):
self.__last_received_terminal = data.data
rospy.logdebug('Received terminal flag: {}'.format(self.__last_received_terminal))
logger.debug('Received terminal: {}'.format(self.__last_received_terminal))
self.__last_received_terminal = np.bool(data.data)
logger.debug('Received terminal flag: {}'.format(self.__last_received_terminal))
self.__waiting_for_terminal_update = False
def __calc_reward(self, state, terminal):
......
......@@ -13,18 +13,21 @@ class StrategyBuilder(object):
epsilon_decay_method='no',
epsilon_decay=0.0,
epsilon_decay_start=0,
epsilon_decay_per_step=False,
action_dim=None,
action_low=None,
action_high=None,
mu=0.0,
theta=0.5,
sigma=0.3
sigma=0.3,
noise_variance=0.1
):
if epsilon_decay_method == 'linear':
decay = LinearDecay(
eps_decay=epsilon_decay, min_eps=min_epsilon,
decay_start=epsilon_decay_start)
decay_start=epsilon_decay_start,
decay_per_step=epsilon_decay_per_step)
else:
decay = NoDecay()
......@@ -44,6 +47,13 @@ class StrategyBuilder(object):
return OrnsteinUhlenbeckStrategy(
action_dim, action_low, action_high, epsilon, mu, theta,
sigma, decay)
elif method == 'gaussian':
assert action_dim is not None
assert action_low is not None
assert action_high is not None
assert noise_variance is not None
return GaussianNoiseStrategy(action_dim, action_low, action_high,
epsilon, noise_variance, decay)
else:
assert action_dim is not None
assert len(action_dim) == 1
......@@ -70,17 +80,27 @@ class NoDecay(BaseDecay):
class LinearDecay(BaseDecay):
def __init__(self, eps_decay, min_eps=0, decay_start=0):
def __init__(self, eps_decay, min_eps=0, decay_start=0, decay_per_step=False):
super(LinearDecay, self).__init__()
self.eps_decay = eps_decay
self.min_eps = min_eps
self.decay_start = decay_start
self.decay_per_step = decay_per_step
self.last_episode = -1
def decay(self, cur_eps, episode):
if episode < self.decay_start:
return cur_eps
def do_decay(self, episode):
if self.decay_per_step:
do = (episode >= self.decay_start)
else:
do = ((self.last_episode != episode) and (episode >= self.decay_start))
self.last_episode = episode
return do
def decay(self, cur_eps, episode):
if self.do_decay(episode):
return max(cur_eps - self.eps_decay, self.min_eps)
else:
return cur_eps
class BaseStrategy(object):
......@@ -170,3 +190,29 @@ class OrnsteinUhlenbeckStrategy(BaseStrategy):
noise = self._evolve_state()
action = (1.0 - self.cur_eps) * values + (self.cur_eps * noise)
return np.clip(action, self._action_low, self._action_high)
class GaussianNoiseStrategy(BaseStrategy):
def __init__(
self,
action_dim,
action_low,
action_high,
eps,
noise_variance,
decay=NoDecay()
):
super(GaussianNoiseStrategy, self).__init__(decay)
self.eps = eps
self.cur_eps = eps
self._action_dim = action_dim
self._action_low = action_low
self._action_high = action_high
self._noise_variance = noise_variance
def select_action(self, values):
noise = np.random.normal(loc=0.0, scale=self._noise_variance, size=self._action_dim)
action = values + self.cur_eps * noise
return np.clip(action, self._action_low, self._action_high)
......@@ -11,8 +11,8 @@ import cnnarch_logger
LOSS_FUNCTIONS = {
'l1': gluon.loss.L1Loss(),
'euclidean': gluon.loss.L2Loss(),
'huber_loss': gluon.loss.HuberLoss(),
'l2': gluon.loss.L2Loss(),
'huber': gluon.loss.HuberLoss(),
'softmax_cross_entropy': gluon.loss.SoftmaxCrossEntropyLoss(),
'sigmoid_cross_entropy': gluon.loss.SigmoidBinaryCrossEntropyLoss()}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment