Commit a51b42b6 authored by Nicola Gatto's avatar Nicola Gatto

Adjust tests

parent cc31bd8e
......@@ -214,11 +214,12 @@ public class GenerationTest extends AbstractSymtabTest {
"HelperA.h",
"start_training.sh",
"reinforcement_learning/__init__.py",
"reinforcement_learning/action_policy.py",
"reinforcement_learning/strategy.py",
"reinforcement_learning/agent.py",
"reinforcement_learning/environment.py",
"reinforcement_learning/replay_memory.py",
"reinforcement_learning/util.py"
"reinforcement_learning/util.py",
"reinforcement_learning/cnnarch_logger.py"
)
);
}
......@@ -262,11 +263,12 @@ public class GenerationTest extends AbstractSymtabTest {
"reward/pylib/armanpy/armanpy_3d.i",
"reward/pylib/armanpy/numpy.i",
"reinforcement_learning/__init__.py",
"reinforcement_learning/action_policy.py",
"reinforcement_learning/strategy.py",
"reinforcement_learning/agent.py",
"reinforcement_learning/environment.py",
"reinforcement_learning/replay_memory.py",
"reinforcement_learning/util.py",
"reinforcement_learning/cnnarch_logger.py",
"reinforcement_learning/torcs_agent_dqn_reward_executor.py"
)
);
......@@ -292,5 +294,33 @@ public class GenerationTest extends AbstractSymtabTest {
String[] args = {"-m", "src/test/resources/models/reinforcementModel", "-r", "mountaincar.Master", "-b", "GLUON", "-f", "n", "-c", "n"};
EMADLGeneratorCli.main(args);
assertEquals(0, Log.getFindings().stream().filter(Finding::isError).count());
checkFilesAreEqual(
Paths.get("./target/generated-sources-emadl"),
Paths.get("./src/test/resources/target_code/gluon/reinforcementModel/mountaincar"),
Arrays.asList(
"mountaincar_master.cpp",
"mountaincar_master.h",
"mountaincar_master_actor.h",
"CMakeLists.txt",
"CNNBufferFile.h",
"CNNCreator_mountaincar_master_actor.py",
"CNNNet_mountaincar_master_actor.py",
"CNNPredictor_mountaincar_master_actor.h",
"CNNTrainer_mountaincar_master_actor.py",
"CNNTranslator.h",
"HelperA.h",
"start_training.sh",
"reinforcement_learning/__init__.py",
"reinforcement_learning/CNNCreator_MountaincarCritic.py",
"reinforcement_learning/CNNNet_MountaincarCritic.py",
"reinforcement_learning/strategy.py",
"reinforcement_learning/agent.py",
"reinforcement_learning/environment.py",
"reinforcement_learning/replay_memory.py",
"reinforcement_learning/util.py",
"reinforcement_learning/cnnarch_logger.py"
)
);
}
}
......@@ -24,7 +24,7 @@ configuration CartPoleDQN {
sample_size : 32
}
action_selection : epsgreedy{
strategy : epsgreedy{
epsilon : 1.0
min_epsilon : 0.01
epsilon_decay_method: linear
......
......@@ -14,21 +14,26 @@ configuration MountaincarActor {
snapshot_interval : 20
loss : euclidean
replay_memory : buffer{
memory_size : 10000
sample_size : 32
memory_size : 1000000
sample_size : 64
}
action_selection : epsgreedy{
strategy : ornstein_uhlenbeck{
epsilon : 1.0
min_epsilon : 0.01
epsilon_decay_method: linear
epsilon_decay : 0.01
mu: (0.0)
theta: (0.15)
sigma: (0.3)
}
actor_optimizer : adam {
learning_rate : 0.0001
}
optimizer : rmsprop{
critic_optimizer : adam {
learning_rate : 0.001
}
}
\ No newline at end of file
......@@ -8,8 +8,5 @@ implementation Critic(state, action) {
FullyConnected(units=300)
) ->
Add() ->
Relu() ->
FullyConnected(units=1) ->
Tanh() ->
critic
Relu()
}
\ No newline at end of file
......@@ -30,7 +30,7 @@ configuration TorcsDQN {
sample_size : 32
}
action_selection : epsgreedy{
strategy : epsgreedy{
epsilon : 1.0
min_epsilon : 0.01
epsilon_decay_method: linear
......
import os
import h5py
import mxnet as mx
import logging
import sys
class cartpole_master_dqnDataLoader:
_input_names_ = ['state']
_output_names_ = ['qvalues_label']
def __init__(self):
self._data_dir = "data/"
def load_data(self, batch_size):
train_h5, test_h5 = self.load_h5_files()
data_mean = train_h5[self._input_names_[0]][:].mean(axis=0)
data_std = train_h5[self._input_names_[0]][:].std(axis=0) + 1e-5
train_iter = mx.io.NDArrayIter(train_h5[self._input_names_[0]],
train_h5[self._output_names_[0]],
batch_size=batch_size,
data_name=self._input_names_[0],
label_name=self._output_names_[0])
test_iter = None
if test_h5 != None:
test_iter = mx.io.NDArrayIter(test_h5[self._input_names_[0]],
test_h5[self._output_names_[0]],
batch_size=batch_size,
data_name=self._input_names_[0],
label_name=self._output_names_[0])
return train_iter, test_iter, data_mean, data_std
def load_h5_files(self):
train_h5 = None
test_h5 = None
train_path = self._data_dir + "train.h5"
test_path = self._data_dir + "test.h5"
if os.path.isfile(train_path):
train_h5 = h5py.File(train_path, 'r')
if not (self._input_names_[0] in train_h5 and self._output_names_[0] in train_h5):
logging.error("The HDF5 file '" + os.path.abspath(train_path) + "' has to contain the datasets: "
+ "'" + self._input_names_[0] + "', '" + self._output_names_[0] + "'")
sys.exit(1)
test_iter = None
if os.path.isfile(test_path):
test_h5 = h5py.File(test_path, 'r')
if not (self._input_names_[0] in test_h5 and self._output_names_[0] in test_h5):
logging.error("The HDF5 file '" + os.path.abspath(test_path) + "' has to contain the datasets: "
+ "'" + self._input_names_[0] + "', '" + self._output_names_[0] + "'")
sys.exit(1)
else:
logging.warning("Couldn't load test set. File '" + os.path.abspath(test_path) + "' does not exist.")
return train_h5, test_h5
else:
logging.error("Data loading failure. File '" + os.path.abspath(train_path) + "' does not exist.")
sys.exit(1)
\ No newline at end of file
from reinforcement_learning.agent import DqnAgent
from reinforcement_learning.util import AgentSignalHandler
from reinforcement_learning.cnnarch_logger import ArchLogger
import reinforcement_learning.environment
import CNNCreator_cartpole_master_dqn
......@@ -9,9 +10,6 @@ import re
import logging
import mxnet as mx
session_output_dir = 'session'
agent_name='cartpole_master_dqn'
session_param_output = os.path.join(session_output_dir, agent_name)
def resume_session():
session_param_output = os.path.join(session_output_dir, agent_name)
......@@ -32,60 +30,73 @@ def resume_session():
break
return resume_session, resume_directory
if __name__ == "__main__":
agent_name='cartpole_master_dqn'
# Prepare output directory and logger
output_directory = 'model_output'\
+ '/' + agent_name\
+ '/' + time.strftime(
'%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
ArchLogger.set_output_directory(output_directory)
ArchLogger.set_logger_name(agent_name)
ArchLogger.set_output_level(ArchLogger.INFO)
env = reinforcement_learning.environment.GymEnvironment('CartPole-v0')
context = mx.cpu()
net_creator = CNNCreator_cartpole_master_dqn.CNNCreator_cartpole_master_dqn()
net_creator.construct(context)
replay_memory_params = {
'method':'buffer',
'memory_size':10000,
'sample_size':32,
'state_dtype':'float32',
'action_dtype':'uint8',
'rewards_dtype':'float32'
}
context = mx.cpu()
qnet_creator = CNNCreator_cartpole_master_dqn.CNNCreator_cartpole_master_dqn()
qnet_creator.construct(context)
policy_params = {
'method':'epsgreedy',
'epsilon': 1,
'min_epsilon': 0.01,
'epsilon_decay_method': 'linear',
'epsilon_decay': 0.01,
agent_params = {
'environment': env,
'replay_memory_params': {
'method':'buffer',
'memory_size':10000,
'sample_size':32,
'state_dtype':'float32',
'action_dtype':'float32',
'rewards_dtype':'float32'
},
'strategy_params': {
'method':'epsgreedy',
'epsilon': 1,
'min_epsilon': 0.01,
'epsilon_decay_method': 'linear',
'epsilon_decay': 0.01,
},
'agent_name': agent_name,
'verbose': True,
'state_dim': (4,),
'action_dim': (2,),
'ctx': 'cpu',
'discount_factor': 0.999,
'training_episodes': 160,
'train_interval': 1,
'snapshot_interval': 20,
'max_episode_step': 250,
'target_score': 185.5,
'qnet':qnet_creator.net,
'use_fix_target': True,
'target_update_interval': 200,
'loss_function': 'euclidean',
'optimizer': 'rmsprop',
'optimizer_params': {
'learning_rate': 0.001 },
'double_dqn': False,
}
resume_session, resume_directory = resume_session()
resume, resume_directory = resume_session()
if resume_session:
agent = DqnAgent.resume_from_session(resume_directory, net_creator.net, env)
if resume:
resume_agent_params = {
'session_dir': resume_directory,
'environment': env,
'net': qnet_creator.net,
}
agent = DqnAgent.resume_from_session(**resume_agent_params)
else:
agent = DqnAgent(
network = net_creator.net,
environment=env,
replay_memory_params=replay_memory_params,
policy_params=policy_params,
state_dim=net_creator.get_input_shapes()[0],
ctx='cpu',
discount_factor=0.999,
loss_function='euclidean',
optimizer='rmsprop',
optimizer_params={
'learning_rate': 0.001 },
training_episodes=160,
train_interval=1,
use_fix_target=True,
target_update_interval=200,
double_dqn = False,
snapshot_interval=20,
agent_name=agent_name,
max_episode_step=250,
output_directory=session_output_dir,
verbose=True,
live_plot = True,
make_logfile=True,
target_score=185.5
)
agent = DqnAgent(**agent_params)
signal_handler = AgentSignalHandler()
signal_handler.register_agent(agent)
......@@ -93,4 +104,4 @@ if __name__ == "__main__":
train_successful = agent.train()
if train_successful:
agent.save_best_network(net_creator._model_dir_ + net_creator._model_prefix_ + '_newest', epoch=0)
\ No newline at end of file
agent.save_best_network(qnet_creator._model_dir_ + qnet_creator._model_prefix_ + '_newest', epoch=0)
import numpy as np
class ActionPolicyBuilder(object):
def __init__(self):
pass
def build_by_params(self,
method='epsgreedy',
epsilon=0.5,
min_epsilon=0.05,
epsilon_decay_method='no',
epsilon_decay=0.0,
action_dim=None):
if epsilon_decay_method == 'linear':
decay = LinearDecay(eps_decay=epsilon_decay, min_eps=min_epsilon)
else:
decay = NoDecay()
if method == 'epsgreedy':
assert action_dim is not None
assert len(action_dim) == 1
return EpsilonGreedyActionPolicy(eps=epsilon,
number_of_actions=action_dim[0], decay=decay)
else:
assert action_dim is not None
assert len(action_dim) == 1
return GreedyActionPolicy()
class EpsilonGreedyActionPolicy(object):
def __init__(self, eps, number_of_actions, decay):
self.eps = eps
self.cur_eps = eps
self.__number_of_actions = number_of_actions
self.__decay_method = decay
def select_action(self, values):
do_exploration = (np.random.rand() < self.cur_eps)
if do_exploration:
action = np.random.randint(low=0, high=self.__number_of_actions)
else:
action = values.asnumpy().argmax()
return action
def decay(self):
self.cur_eps = self.__decay_method.decay(self.cur_eps)
class GreedyActionPolicy(object):
def __init__(self):
pass
def select_action(self, values):
return values.asnumpy().argmax()
def decay(self):
pass
class NoDecay(object):
def __init__(self):
pass
def decay(self, cur_eps):
return cur_eps
class LinearDecay(object):
def __init__(self, eps_decay, min_eps=0):
self.eps_decay = eps_decay
self.min_eps = min_eps
def decay(self, cur_eps):
return max(cur_eps - self.eps_decay, self.min_eps)
\ No newline at end of file
import logging
import sys
import os
import util
class ArchLogger(object):
_logger = None
__output_level = logging.INFO
__logger_name = 'agent'
__output_directory = '.'
__append = True
__logformat = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
__dateformat = '%d-%b-%y %H:%M:%S'
INFO = logging.INFO
DEBUG = logging.DEBUG
@staticmethod
def set_output_level(output_level):
assert output_level is not None
ArchLogger.__output_level = output_level
@staticmethod
def set_logger_name(logger_name):
assert logger_name is not None
ArchLogger.__logger_name = logger_name
@staticmethod
def set_output_directory(output_directory):
assert output_directory is not None
ArchLogger.__output_directory = output_directory
@staticmethod
def set_append(append):
assert append is not None
ArchLogger.__append = append
@staticmethod
def set_log_format(logformat, dateformat):
assert logformat is not None
assert dateformat is not None
ArchLogger.__logformat = logformat
ArchLogger.__dateformat = dateformat
@staticmethod
def init_logger(make_log_file=True):
assert ArchLogger._logger is None, 'Logger init already called'
filemode = 'a' if ArchLogger.__append else 'w'
formatter = logging.Formatter(
fmt=ArchLogger.__logformat, datefmt=ArchLogger.__dateformat)
logger = logging.getLogger(ArchLogger.__logger_name)
logger.setLevel(ArchLogger.__output_level)
stream_handler = logging.StreamHandler(sys.stdout)
stream_handler.setLevel(ArchLogger.__output_level)
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
if make_log_file:
util.make_directory_if_not_exist(ArchLogger.__output_directory)
log_file = os.path.join(
ArchLogger.__output_directory,
ArchLogger.__logger_name + '.log')
file_handler = logging.FileHandler(log_file, mode=filemode)
file_handler.setLevel(ArchLogger.__output_level)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
ArchLogger._logger = logger
@staticmethod
def get_logger():
if ArchLogger._logger is None:
ArchLogger.init_logger()
assert ArchLogger._logger is not None
return ArchLogger._logger
if __name__ == "__main__":
print('=== Test logger ===')
ArchLogger.set_logger_name('TestLogger')
ArchLogger.set_output_directory('test_log')
ArchLogger.init_logger()
logger = ArchLogger.get_logger()
logger.warning('This is a warning')
logger.debug('This is a debug information, which you should not see')
logger.info('This is a normal information')
assert os.path.exists('test_log')\
and os.path.isfile(os.path.join('test_log', 'TestLogger.log')),\
'Test failed: No logfile exists'
import shutil
shutil.rmtree('test_log')
\ No newline at end of file
......@@ -33,13 +33,6 @@ class GymEnvironment(Environment):
def state_dim(self):
return self.__env.observation_space.shape
@property
def state_dtype(self):
return 'float32'
@property
def action_dtype(self):
return 'uint8'
@property
def number_of_actions(self):
......
import numpy as np
class StrategyBuilder(object):
def __init__(self):
pass
def build_by_params(
self,
method='epsgreedy',
epsilon=0.5,
min_epsilon=0.05,
epsilon_decay_method='no',
epsilon_decay=0.0,
epsilon_decay_start=0,
action_dim=None,
action_low=None,
action_high=None,
mu=0.0,
theta=0.5,
sigma=0.3
):
if epsilon_decay_method == 'linear':
decay = LinearDecay(
eps_decay=epsilon_decay, min_eps=min_epsilon,
decay_start=epsilon_decay_start)
else:
decay = NoDecay()
if method == 'epsgreedy':
assert action_dim is not None
assert len(action_dim) == 1
return EpsilonGreedyStrategy(
eps=epsilon, number_of_actions=action_dim[0],
decay_method=decay)
elif method == 'uhlenbeck_ornstein':
assert action_dim is not None
assert action_low is not None
assert action_high is not None
assert mu is not None
assert theta is not None
assert sigma is not None
return UhlenbeckOrnsteinStrategy(
action_dim, action_low, action_high, epsilon, mu, theta,
sigma, decay)
else:
assert action_dim is not None
assert len(action_dim) == 1
return GreedyStrategy()
class BaseDecay(object):
def __init__(self):
pass
def decay(self, *args):
raise NotImplementedError
def __call__(self, *args):
return self.decay(*args)
class NoDecay(BaseDecay):
def __init__(self):
super(NoDecay, self).__init__()
def decay(self, cur_eps, episode):
return cur_eps
class LinearDecay(BaseDecay):
def __init__(self, eps_decay, min_eps=0, decay_start=0):
super(LinearDecay, self).__init__()
self.eps_decay = eps_decay
self.min_eps = min_eps
self.decay_start = decay_start
def decay(self, cur_eps, episode):
if episode < self.decay_start:
return cur_eps
else:
return max(cur_eps - self.eps_decay, self.min_eps)
class BaseStrategy(object):
def __init__(self, decay_method):