Commit d900ba89 authored by Nicola Gatto's avatar Nicola Gatto

Adapt test to new templates

parent 7a329bee
Pipeline #161867 failed with stages
......@@ -79,9 +79,9 @@ if __name__ == "__main__":
'max_episode_step': 250,
'evaluation_samples': 100,
'target_score': 185.5,
'qnet':qnet_creator.net,
'qnet':qnet_creator.networks[0],
'use_fix_target': False,
'loss': 'l2',
'loss_function': 'l2',
'optimizer': 'rmsprop',
'optimizer_params': {
'weight_decay': 0.01,
......@@ -120,4 +120,4 @@ if __name__ == "__main__":
train_successful = agent.train()
if train_successful:
agent.save_best_network(qnet_creator._model_dir_ + qnet_creator._model_prefix_ + '_0_newest', epoch=0)
agent.export_best_network(path=qnet_creator._model_dir_ + qnet_creator._model_prefix_ + '_0_newest', epoch=0)
......@@ -13,18 +13,21 @@ class StrategyBuilder(object):
epsilon_decay_method='no',
epsilon_decay=0.0,
epsilon_decay_start=0,
epsilon_decay_per_step=False,
action_dim=None,
action_low=None,
action_high=None,
mu=0.0,
theta=0.5,
sigma=0.3
sigma=0.3,
noise_variance=0.1
):
if epsilon_decay_method == 'linear':
decay = LinearDecay(
eps_decay=epsilon_decay, min_eps=min_epsilon,
decay_start=epsilon_decay_start)
decay_start=epsilon_decay_start,
decay_per_step=epsilon_decay_per_step)
else:
decay = NoDecay()
......@@ -44,6 +47,13 @@ class StrategyBuilder(object):
return OrnsteinUhlenbeckStrategy(
action_dim, action_low, action_high, epsilon, mu, theta,
sigma, decay)
elif method == 'gaussian':
assert action_dim is not None
assert action_low is not None
assert action_high is not None
assert noise_variance is not None
return GaussianNoiseStrategy(action_dim, action_low, action_high,
epsilon, noise_variance, decay)
else:
assert action_dim is not None
assert len(action_dim) == 1
......@@ -70,17 +80,27 @@ class NoDecay(BaseDecay):
class LinearDecay(BaseDecay):
def __init__(self, eps_decay, min_eps=0, decay_start=0):
def __init__(self, eps_decay, min_eps=0, decay_start=0, decay_per_step=False):
super(LinearDecay, self).__init__()
self.eps_decay = eps_decay
self.min_eps = min_eps
self.decay_start = decay_start
self.decay_per_step = decay_per_step
self.last_episode = -1
def decay(self, cur_eps, episode):
if episode < self.decay_start:
return cur_eps
def do_decay(self, episode):
if self.decay_per_step:
do = (episode >= self.decay_start)
else:
do = ((self.last_episode != episode) and (episode >= self.decay_start))
self.last_episode = episode
return do
def decay(self, cur_eps, episode):
if self.do_decay(episode):
return max(cur_eps - self.eps_decay, self.min_eps)
else:
return cur_eps
class BaseStrategy(object):
......@@ -170,3 +190,29 @@ class OrnsteinUhlenbeckStrategy(BaseStrategy):
noise = self._evolve_state()
action = (1.0 - self.cur_eps) * values + (self.cur_eps * noise)
return np.clip(action, self._action_low, self._action_high)
class GaussianNoiseStrategy(BaseStrategy):
def __init__(
self,
action_dim,
action_low,
action_high,
eps,
noise_variance,
decay=NoDecay()
):
super(GaussianNoiseStrategy, self).__init__(decay)
self.eps = eps
self.cur_eps = eps
self._action_dim = action_dim
self._action_low = action_low
self._action_high = action_high
self._noise_variance = noise_variance
def select_action(self, values):
noise = np.random.normal(loc=0.0, scale=self._noise_variance, size=self._action_dim)
action = values + self.cur_eps * noise
return np.clip(action, self._action_low, self._action_high)
......@@ -11,8 +11,8 @@ import cnnarch_logger
LOSS_FUNCTIONS = {
'l1': gluon.loss.L1Loss(),
'euclidean': gluon.loss.L2Loss(),
'huber_loss': gluon.loss.HuberLoss(),
'l2': gluon.loss.L2Loss(),
'huber': gluon.loss.HuberLoss(),
'softmax_cross_entropy': gluon.loss.SoftmaxCrossEntropyLoss(),
'sigmoid_cross_entropy': gluon.loss.SigmoidBinaryCrossEntropyLoss()}
......
......@@ -84,10 +84,10 @@ if __name__ == "__main__":
'snapshot_interval': 500,
'max_episode_step': 10000,
'target_score': 35000,
'qnet':qnet_creator.net,
'qnet':qnet_creator.networks[0],
'use_fix_target': True,
'target_update_interval': 500,
'loss': 'huber',
'loss_function': 'huber',
'optimizer': 'adam',
'optimizer_params': {
'learning_rate': 0.001 },
......@@ -114,4 +114,4 @@ if __name__ == "__main__":
train_successful = agent.train()
if train_successful:
agent.save_best_network(qnet_creator._model_dir_ + qnet_creator._model_prefix_ + '_0_newest', epoch=0)
agent.export_best_network(path=qnet_creator._model_dir_ + qnet_creator._model_prefix_ + '_0_newest', epoch=0)
......@@ -72,7 +72,6 @@ class RosEnvironment(Environment):
def reset(self):
self.__in_reset = True
time.sleep(0.5)
reset_message = Bool()
reset_message.data = True
self.__waiting_for_state_update = True
......@@ -99,7 +98,8 @@ class RosEnvironment(Environment):
next_state = self.__last_received_state
terminal = self.__last_received_terminal
reward = self.__last_received_reward
rospy.logdebug('Calculated reward: {}'.format(reward))
logger.debug('Transition: ({}, {}, {}, {})'.format(action, reward, next_state, terminal))
return next_state, reward, terminal, 0
......@@ -118,23 +118,22 @@ class RosEnvironment(Environment):
else:
rospy.logerr("Timeout 3 times in a row: Terminate application")
exit()
time.sleep(100/1000)
time.sleep(1/500)
def close(self):
rospy.signal_shutdown('Program ended!')
def __state_callback(self, data):
self.__last_received_state = np.array(data.data, dtype='float32').reshape((8,))
rospy.logdebug('Received state: {}'.format(self.__last_received_state))
logger.debug('Received state: {}'.format(self.__last_received_state))
self.__waiting_for_state_update = False
def __terminal_state_callback(self, data):
self.__last_received_terminal = data.data
rospy.logdebug('Received terminal flag: {}'.format(self.__last_received_terminal))
logger.debug('Received terminal: {}'.format(self.__last_received_terminal))
self.__last_received_terminal = np.bool(data.data)
logger.debug('Received terminal flag: {}'.format(self.__last_received_terminal))
self.__waiting_for_terminal_update = False
def __reward_callback(self, data):
self.__last_received_reward = float(data.data)
self.__last_received_reward = np.float32(data.data)
logger.debug('Received reward: {}'.format(self.__last_received_reward))
self.__waiting_for_reward_update = False
......@@ -13,18 +13,21 @@ class StrategyBuilder(object):
epsilon_decay_method='no',
epsilon_decay=0.0,
epsilon_decay_start=0,
epsilon_decay_per_step=False,
action_dim=None,
action_low=None,
action_high=None,
mu=0.0,
theta=0.5,
sigma=0.3
sigma=0.3,
noise_variance=0.1
):
if epsilon_decay_method == 'linear':
decay = LinearDecay(
eps_decay=epsilon_decay, min_eps=min_epsilon,
decay_start=epsilon_decay_start)
decay_start=epsilon_decay_start,
decay_per_step=epsilon_decay_per_step)
else:
decay = NoDecay()
......@@ -44,6 +47,13 @@ class StrategyBuilder(object):
return OrnsteinUhlenbeckStrategy(
action_dim, action_low, action_high, epsilon, mu, theta,
sigma, decay)
elif method == 'gaussian':
assert action_dim is not None
assert action_low is not None
assert action_high is not None
assert noise_variance is not None
return GaussianNoiseStrategy(action_dim, action_low, action_high,
epsilon, noise_variance, decay)
else:
assert action_dim is not None
assert len(action_dim) == 1
......@@ -70,17 +80,27 @@ class NoDecay(BaseDecay):
class LinearDecay(BaseDecay):
def __init__(self, eps_decay, min_eps=0, decay_start=0):
def __init__(self, eps_decay, min_eps=0, decay_start=0, decay_per_step=False):
super(LinearDecay, self).__init__()
self.eps_decay = eps_decay
self.min_eps = min_eps
self.decay_start = decay_start
self.decay_per_step = decay_per_step
self.last_episode = -1
def decay(self, cur_eps, episode):
if episode < self.decay_start:
return cur_eps
def do_decay(self, episode):
if self.decay_per_step:
do = (episode >= self.decay_start)
else:
do = ((self.last_episode != episode) and (episode >= self.decay_start))
self.last_episode = episode
return do
def decay(self, cur_eps, episode):
if self.do_decay(episode):
return max(cur_eps - self.eps_decay, self.min_eps)
else:
return cur_eps
class BaseStrategy(object):
......@@ -170,3 +190,29 @@ class OrnsteinUhlenbeckStrategy(BaseStrategy):
noise = self._evolve_state()
action = (1.0 - self.cur_eps) * values + (self.cur_eps * noise)
return np.clip(action, self._action_low, self._action_high)
class GaussianNoiseStrategy(BaseStrategy):
def __init__(
self,
action_dim,
action_low,
action_high,
eps,
noise_variance,
decay=NoDecay()
):
super(GaussianNoiseStrategy, self).__init__(decay)
self.eps = eps
self.cur_eps = eps
self._action_dim = action_dim
self._action_low = action_low
self._action_high = action_high
self._noise_variance = noise_variance
def select_action(self, values):
noise = np.random.normal(loc=0.0, scale=self._noise_variance, size=self._action_dim)
action = values + self.cur_eps * noise
return np.clip(action, self._action_low, self._action_high)
......@@ -11,8 +11,8 @@ import cnnarch_logger
LOSS_FUNCTIONS = {
'l1': gluon.loss.L1Loss(),
'euclidean': gluon.loss.L2Loss(),
'huber_loss': gluon.loss.HuberLoss(),
'l2': gluon.loss.L2Loss(),
'huber': gluon.loss.HuberLoss(),
'softmax_cross_entropy': gluon.loss.SoftmaxCrossEntropyLoss(),
'sigmoid_cross_entropy': gluon.loss.SigmoidBinaryCrossEntropyLoss()}
......
......@@ -88,8 +88,8 @@ if __name__ == "__main__":
'max_episode_step': 250,
'evaluation_samples': 100,
'target_score': 185.5,
'actor': actor_creator.net,
'critic': critic_creator.net,
'actor': actor_creator.networks[0],
'critic': critic_creator.networks[0],
'soft_target_update_rate': 0.001,
'actor_optimizer': 'adam',
'actor_optimizer_params': {
......@@ -133,4 +133,4 @@ if __name__ == "__main__":
train_successful = agent.train()
if train_successful:
agent.save_best_network(actor_creator._model_dir_ + actor_creator._model_prefix_ + '_0_newest', epoch=0)
agent.export_best_network(path=actor_creator._model_dir_ + actor_creator._model_prefix_ + '_0_newest', epoch=0)
......@@ -13,18 +13,21 @@ class StrategyBuilder(object):
epsilon_decay_method='no',
epsilon_decay=0.0,
epsilon_decay_start=0,
epsilon_decay_per_step=False,
action_dim=None,
action_low=None,
action_high=None,
mu=0.0,
theta=0.5,
sigma=0.3
sigma=0.3,
noise_variance=0.1
):
if epsilon_decay_method == 'linear':
decay = LinearDecay(
eps_decay=epsilon_decay, min_eps=min_epsilon,
decay_start=epsilon_decay_start)
decay_start=epsilon_decay_start,
decay_per_step=epsilon_decay_per_step)
else:
decay = NoDecay()
......@@ -44,6 +47,13 @@ class StrategyBuilder(object):
return OrnsteinUhlenbeckStrategy(
action_dim, action_low, action_high, epsilon, mu, theta,
sigma, decay)
elif method == 'gaussian':
assert action_dim is not None
assert action_low is not None
assert action_high is not None
assert noise_variance is not None
return GaussianNoiseStrategy(action_dim, action_low, action_high,
epsilon, noise_variance, decay)
else:
assert action_dim is not None
assert len(action_dim) == 1
......@@ -70,17 +80,27 @@ class NoDecay(BaseDecay):
class LinearDecay(BaseDecay):
def __init__(self, eps_decay, min_eps=0, decay_start=0):
def __init__(self, eps_decay, min_eps=0, decay_start=0, decay_per_step=False):
super(LinearDecay, self).__init__()
self.eps_decay = eps_decay
self.min_eps = min_eps
self.decay_start = decay_start
self.decay_per_step = decay_per_step
self.last_episode = -1
def decay(self, cur_eps, episode):
if episode < self.decay_start:
return cur_eps
def do_decay(self, episode):
if self.decay_per_step:
do = (episode >= self.decay_start)
else:
do = ((self.last_episode != episode) and (episode >= self.decay_start))
self.last_episode = episode
return do
def decay(self, cur_eps, episode):
if self.do_decay(episode):
return max(cur_eps - self.eps_decay, self.min_eps)
else:
return cur_eps
class BaseStrategy(object):
......@@ -170,3 +190,29 @@ class OrnsteinUhlenbeckStrategy(BaseStrategy):
noise = self._evolve_state()
action = (1.0 - self.cur_eps) * values + (self.cur_eps * noise)
return np.clip(action, self._action_low, self._action_high)
class GaussianNoiseStrategy(BaseStrategy):
def __init__(
self,
action_dim,
action_low,
action_high,
eps,
noise_variance,
decay=NoDecay()
):
super(GaussianNoiseStrategy, self).__init__(decay)
self.eps = eps
self.cur_eps = eps
self._action_dim = action_dim
self._action_low = action_low
self._action_high = action_high
self._noise_variance = noise_variance
def select_action(self, values):
noise = np.random.normal(loc=0.0, scale=self._noise_variance, size=self._action_dim)
action = values + self.cur_eps * noise
return np.clip(action, self._action_low, self._action_high)
......@@ -11,8 +11,8 @@ import cnnarch_logger
LOSS_FUNCTIONS = {
'l1': gluon.loss.L1Loss(),
'euclidean': gluon.loss.L2Loss(),
'huber_loss': gluon.loss.HuberLoss(),
'l2': gluon.loss.L2Loss(),
'huber': gluon.loss.HuberLoss(),
'softmax_cross_entropy': gluon.loss.SoftmaxCrossEntropyLoss(),
'sigmoid_cross_entropy': gluon.loss.SigmoidBinaryCrossEntropyLoss()}
......
......@@ -94,8 +94,8 @@ if __name__ == "__main__":
'snapshot_interval': 500,
'max_episode_step': 2000,
'evaluation_samples': 1000,
'actor': actor_creator.net,
'critic': critic_creator.net,
'actor': actor_creator.networks[0],
'critic': critic_creator.networks[0],
'soft_target_update_rate': 0.001,
'actor_optimizer': 'adam',
'actor_optimizer_params': {
......@@ -139,4 +139,4 @@ if __name__ == "__main__":
train_successful = agent.train()
if train_successful:
agent.save_best_network(actor_creator._model_dir_ + actor_creator._model_prefix_ + '_0_newest', epoch=0)
agent.export_best_network(path=actor_creator._model_dir_ + actor_creator._model_prefix_ + '_0_newest', epoch=0)
......@@ -72,7 +72,6 @@ class RosEnvironment(Environment):
def reset(self):
self.__in_reset = True
time.sleep(0.5)
reset_message = Bool()
reset_message.data = True
self.__waiting_for_state_update = True
......@@ -104,7 +103,8 @@ class RosEnvironment(Environment):
next_state = self.__last_received_state
terminal = self.__last_received_terminal
reward = self.__last_received_reward
rospy.logdebug('Calculated reward: {}'.format(reward))
logger.debug('Transition: ({}, {}, {}, {})'.format(action, reward, next_state, terminal))
return next_state, reward, terminal, 0
......@@ -123,23 +123,22 @@ class RosEnvironment(Environment):
else:
rospy.logerr("Timeout 3 times in a row: Terminate application")
exit()
time.sleep(100/1000)
time.sleep(1/500)
def close(self):
rospy.signal_shutdown('Program ended!')
def __state_callback(self, data):
self.__last_received_state = np.array(data.data, dtype='float32').reshape((8,))
rospy.logdebug('Received state: {}'.format(self.__last_received_state))
logger.debug('Received state: {}'.format(self.__last_received_state))
self.__waiting_for_state_update = False
def __terminal_state_callback(self, data):
self.__last_received_terminal = data.data
rospy.logdebug('Received terminal flag: {}'.format(self.__last_received_terminal))
logger.debug('Received terminal: {}'.format(self.__last_received_terminal))
self.__last_received_terminal = np.bool(data.data)
logger.debug('Received terminal flag: {}'.format(self.__last_received_terminal))
self.__waiting_for_terminal_update = False
def __reward_callback(self, data):
self.__last_received_reward = float(data.data)
self.__last_received_reward = np.float32(data.data)
logger.debug('Received reward: {}'.format(self.__last_received_reward))
self.__waiting_for_reward_update = False
......@@ -13,18 +13,21 @@ class StrategyBuilder(object):
epsilon_decay_method='no',
epsilon_decay=0.0,
epsilon_decay_start=0,
epsilon_decay_per_step=False,
action_dim=None,
action_low=None,
action_high=None,
mu=0.0,
theta=0.5,
sigma=0.3
sigma=0.3,
noise_variance=0.1
):
if epsilon_decay_method == 'linear':
decay = LinearDecay(
eps_decay=epsilon_decay, min_eps=min_epsilon,
decay_start=epsilon_decay_start)
decay_start=epsilon_decay_start,
decay_per_step=epsilon_decay_per_step)
else:
decay = NoDecay()
......@@ -44,6 +47,13 @@ class StrategyBuilder(object):
return OrnsteinUhlenbeckStrategy(
action_dim, action_low, action_high, epsilon, mu, theta,
sigma, decay)
elif method == 'gaussian':
assert action_dim is not None
assert action_low is not None
assert action_high is not None
assert noise_variance is not None
return GaussianNoiseStrategy(action_dim, action_low, action_high,
epsilon, noise_variance, decay)
else:
assert action_dim is not None
assert len(action_dim) == 1
......@@ -70,17 +80,27 @@ class NoDecay(BaseDecay):
class LinearDecay(BaseDecay):
def __init__(self, eps_decay, min_eps=0, decay_start=0):
def __init__(self, eps_decay, min_eps=0, decay_start=0, decay_per_step=False):
super(LinearDecay, self).__init__()
self.eps_decay = eps_decay
self.min_eps = min_eps
self.decay_start = decay_start
self.decay_per_step = decay_per_step
self.last_episode = -1
def decay(self, cur_eps, episode):
if episode < self.decay_start:
return cur_eps
def do_decay(self, episode):
if self.decay_per_step:
do = (episode >= self.decay_start)
else:
do = ((self.last_episode != episode) and (episode >= self.decay_start))
self.last_episode = episode
return do
def decay(self, cur_eps, episode):
if self.do_decay(episode):
return max(cur_eps - self.eps_decay, self.min_eps)
else:
return cur_eps
class BaseStrategy(object):
......@@ -170,3 +190,29 @@ class OrnsteinUhlenbeckStrategy(BaseStrategy):
noise = self._evolve_state()
action = (1.0 - self.cur_eps) * values + (self.cur_eps * noise)
return np.clip(action, self._action_low, self._action_high)
class GaussianNoiseStrategy(BaseStrategy):
def __init__(
self,
action_dim,
action_low,
action_high,
eps,
noise_variance,
decay=NoDecay()
):
super(GaussianNoiseStrategy, self).__init__(decay)
self.eps = eps
self.cur_eps = eps
self._action_dim = action_dim
self._action_low = action_low
self._action_high = action_high
self._noise_variance = noise_variance
def select_action(self, values):
noise = np.random.normal(loc=0.0, scale=self._noise_variance, size=self._action_dim)
action = values + self.cur_eps * noise
return np.clip(action, self._action_low, self._action_high)
......@@ -11,8 +11,8 @@ import cnnarch_logger
LOSS_FUNCTIONS = {
'l1': gluon.loss.L1Loss(),
'euclidean': gluon.loss.L2Loss(),
'huber_loss': gluon.loss.HuberLoss(),
'l2': gluon.loss.L2Loss(),
'huber': gluon.loss.HuberLoss(),
'softmax_cross_entropy': gluon.loss.SoftmaxCrossEntropyLoss(),
'sigmoid_cross_entropy': gluon.loss.SigmoidBinaryCrossEntropyLoss()}
......