Commit a51b42b6 authored by Nicola Gatto's avatar Nicola Gatto

Adjust tests

parent cc31bd8e
...@@ -214,11 +214,12 @@ public class GenerationTest extends AbstractSymtabTest { ...@@ -214,11 +214,12 @@ public class GenerationTest extends AbstractSymtabTest {
"HelperA.h", "HelperA.h",
"start_training.sh", "start_training.sh",
"reinforcement_learning/__init__.py", "reinforcement_learning/__init__.py",
"reinforcement_learning/action_policy.py", "reinforcement_learning/strategy.py",
"reinforcement_learning/agent.py", "reinforcement_learning/agent.py",
"reinforcement_learning/environment.py", "reinforcement_learning/environment.py",
"reinforcement_learning/replay_memory.py", "reinforcement_learning/replay_memory.py",
"reinforcement_learning/util.py" "reinforcement_learning/util.py",
"reinforcement_learning/cnnarch_logger.py"
) )
); );
} }
...@@ -262,11 +263,12 @@ public class GenerationTest extends AbstractSymtabTest { ...@@ -262,11 +263,12 @@ public class GenerationTest extends AbstractSymtabTest {
"reward/pylib/armanpy/armanpy_3d.i", "reward/pylib/armanpy/armanpy_3d.i",
"reward/pylib/armanpy/numpy.i", "reward/pylib/armanpy/numpy.i",
"reinforcement_learning/__init__.py", "reinforcement_learning/__init__.py",
"reinforcement_learning/action_policy.py", "reinforcement_learning/strategy.py",
"reinforcement_learning/agent.py", "reinforcement_learning/agent.py",
"reinforcement_learning/environment.py", "reinforcement_learning/environment.py",
"reinforcement_learning/replay_memory.py", "reinforcement_learning/replay_memory.py",
"reinforcement_learning/util.py", "reinforcement_learning/util.py",
"reinforcement_learning/cnnarch_logger.py",
"reinforcement_learning/torcs_agent_dqn_reward_executor.py" "reinforcement_learning/torcs_agent_dqn_reward_executor.py"
) )
); );
...@@ -292,5 +294,33 @@ public class GenerationTest extends AbstractSymtabTest { ...@@ -292,5 +294,33 @@ public class GenerationTest extends AbstractSymtabTest {
String[] args = {"-m", "src/test/resources/models/reinforcementModel", "-r", "mountaincar.Master", "-b", "GLUON", "-f", "n", "-c", "n"}; String[] args = {"-m", "src/test/resources/models/reinforcementModel", "-r", "mountaincar.Master", "-b", "GLUON", "-f", "n", "-c", "n"};
EMADLGeneratorCli.main(args); EMADLGeneratorCli.main(args);
assertEquals(0, Log.getFindings().stream().filter(Finding::isError).count()); assertEquals(0, Log.getFindings().stream().filter(Finding::isError).count());
checkFilesAreEqual(
Paths.get("./target/generated-sources-emadl"),
Paths.get("./src/test/resources/target_code/gluon/reinforcementModel/mountaincar"),
Arrays.asList(
"mountaincar_master.cpp",
"mountaincar_master.h",
"mountaincar_master_actor.h",
"CMakeLists.txt",
"CNNBufferFile.h",
"CNNCreator_mountaincar_master_actor.py",
"CNNNet_mountaincar_master_actor.py",
"CNNPredictor_mountaincar_master_actor.h",
"CNNTrainer_mountaincar_master_actor.py",
"CNNTranslator.h",
"HelperA.h",
"start_training.sh",
"reinforcement_learning/__init__.py",
"reinforcement_learning/CNNCreator_MountaincarCritic.py",
"reinforcement_learning/CNNNet_MountaincarCritic.py",
"reinforcement_learning/strategy.py",
"reinforcement_learning/agent.py",
"reinforcement_learning/environment.py",
"reinforcement_learning/replay_memory.py",
"reinforcement_learning/util.py",
"reinforcement_learning/cnnarch_logger.py"
)
);
} }
} }
...@@ -24,7 +24,7 @@ configuration CartPoleDQN { ...@@ -24,7 +24,7 @@ configuration CartPoleDQN {
sample_size : 32 sample_size : 32
} }
action_selection : epsgreedy{ strategy : epsgreedy{
epsilon : 1.0 epsilon : 1.0
min_epsilon : 0.01 min_epsilon : 0.01
epsilon_decay_method: linear epsilon_decay_method: linear
......
...@@ -14,21 +14,26 @@ configuration MountaincarActor { ...@@ -14,21 +14,26 @@ configuration MountaincarActor {
snapshot_interval : 20 snapshot_interval : 20
loss : euclidean
replay_memory : buffer{ replay_memory : buffer{
memory_size : 10000 memory_size : 1000000
sample_size : 32 sample_size : 64
} }
action_selection : epsgreedy{ strategy : ornstein_uhlenbeck{
epsilon : 1.0 epsilon : 1.0
min_epsilon : 0.01 min_epsilon : 0.01
epsilon_decay_method: linear epsilon_decay_method: linear
epsilon_decay : 0.01 epsilon_decay : 0.01
mu: (0.0)
theta: (0.15)
sigma: (0.3)
}
actor_optimizer : adam {
learning_rate : 0.0001
} }
optimizer : rmsprop{ critic_optimizer : adam {
learning_rate : 0.001 learning_rate : 0.001
} }
} }
\ No newline at end of file
...@@ -8,8 +8,5 @@ implementation Critic(state, action) { ...@@ -8,8 +8,5 @@ implementation Critic(state, action) {
FullyConnected(units=300) FullyConnected(units=300)
) -> ) ->
Add() -> Add() ->
Relu() -> Relu()
FullyConnected(units=1) ->
Tanh() ->
critic
} }
\ No newline at end of file
...@@ -30,7 +30,7 @@ configuration TorcsDQN { ...@@ -30,7 +30,7 @@ configuration TorcsDQN {
sample_size : 32 sample_size : 32
} }
action_selection : epsgreedy{ strategy : epsgreedy{
epsilon : 1.0 epsilon : 1.0
min_epsilon : 0.01 min_epsilon : 0.01
epsilon_decay_method: linear epsilon_decay_method: linear
......
import os
import h5py
import mxnet as mx
import logging
import sys
class cartpole_master_dqnDataLoader:
_input_names_ = ['state']
_output_names_ = ['qvalues_label']
def __init__(self):
self._data_dir = "data/"
def load_data(self, batch_size):
train_h5, test_h5 = self.load_h5_files()
data_mean = train_h5[self._input_names_[0]][:].mean(axis=0)
data_std = train_h5[self._input_names_[0]][:].std(axis=0) + 1e-5
train_iter = mx.io.NDArrayIter(train_h5[self._input_names_[0]],
train_h5[self._output_names_[0]],
batch_size=batch_size,
data_name=self._input_names_[0],
label_name=self._output_names_[0])
test_iter = None
if test_h5 != None:
test_iter = mx.io.NDArrayIter(test_h5[self._input_names_[0]],
test_h5[self._output_names_[0]],
batch_size=batch_size,
data_name=self._input_names_[0],
label_name=self._output_names_[0])
return train_iter, test_iter, data_mean, data_std
def load_h5_files(self):
train_h5 = None
test_h5 = None
train_path = self._data_dir + "train.h5"
test_path = self._data_dir + "test.h5"
if os.path.isfile(train_path):
train_h5 = h5py.File(train_path, 'r')
if not (self._input_names_[0] in train_h5 and self._output_names_[0] in train_h5):
logging.error("The HDF5 file '" + os.path.abspath(train_path) + "' has to contain the datasets: "
+ "'" + self._input_names_[0] + "', '" + self._output_names_[0] + "'")
sys.exit(1)
test_iter = None
if os.path.isfile(test_path):
test_h5 = h5py.File(test_path, 'r')
if not (self._input_names_[0] in test_h5 and self._output_names_[0] in test_h5):
logging.error("The HDF5 file '" + os.path.abspath(test_path) + "' has to contain the datasets: "
+ "'" + self._input_names_[0] + "', '" + self._output_names_[0] + "'")
sys.exit(1)
else:
logging.warning("Couldn't load test set. File '" + os.path.abspath(test_path) + "' does not exist.")
return train_h5, test_h5
else:
logging.error("Data loading failure. File '" + os.path.abspath(train_path) + "' does not exist.")
sys.exit(1)
\ No newline at end of file
from reinforcement_learning.agent import DqnAgent from reinforcement_learning.agent import DqnAgent
from reinforcement_learning.util import AgentSignalHandler from reinforcement_learning.util import AgentSignalHandler
from reinforcement_learning.cnnarch_logger import ArchLogger
import reinforcement_learning.environment import reinforcement_learning.environment
import CNNCreator_cartpole_master_dqn import CNNCreator_cartpole_master_dqn
...@@ -9,9 +10,6 @@ import re ...@@ -9,9 +10,6 @@ import re
import logging import logging
import mxnet as mx import mxnet as mx
session_output_dir = 'session'
agent_name='cartpole_master_dqn'
session_param_output = os.path.join(session_output_dir, agent_name)
def resume_session(): def resume_session():
session_param_output = os.path.join(session_output_dir, agent_name) session_param_output = os.path.join(session_output_dir, agent_name)
...@@ -32,60 +30,73 @@ def resume_session(): ...@@ -32,60 +30,73 @@ def resume_session():
break break
return resume_session, resume_directory return resume_session, resume_directory
if __name__ == "__main__": if __name__ == "__main__":
agent_name='cartpole_master_dqn'
# Prepare output directory and logger
output_directory = 'model_output'\
+ '/' + agent_name\
+ '/' + time.strftime(
'%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
ArchLogger.set_output_directory(output_directory)
ArchLogger.set_logger_name(agent_name)
ArchLogger.set_output_level(ArchLogger.INFO)
env = reinforcement_learning.environment.GymEnvironment('CartPole-v0') env = reinforcement_learning.environment.GymEnvironment('CartPole-v0')
context = mx.cpu()
net_creator = CNNCreator_cartpole_master_dqn.CNNCreator_cartpole_master_dqn()
net_creator.construct(context)
replay_memory_params = { context = mx.cpu()
'method':'buffer', qnet_creator = CNNCreator_cartpole_master_dqn.CNNCreator_cartpole_master_dqn()
'memory_size':10000, qnet_creator.construct(context)
'sample_size':32,
'state_dtype':'float32',
'action_dtype':'uint8',
'rewards_dtype':'float32'
}
policy_params = { agent_params = {
'method':'epsgreedy', 'environment': env,
'epsilon': 1, 'replay_memory_params': {
'min_epsilon': 0.01, 'method':'buffer',
'epsilon_decay_method': 'linear', 'memory_size':10000,
'epsilon_decay': 0.01, 'sample_size':32,
'state_dtype':'float32',
'action_dtype':'float32',
'rewards_dtype':'float32'
},
'strategy_params': {
'method':'epsgreedy',
'epsilon': 1,
'min_epsilon': 0.01,
'epsilon_decay_method': 'linear',
'epsilon_decay': 0.01,
},
'agent_name': agent_name,
'verbose': True,
'state_dim': (4,),
'action_dim': (2,),
'ctx': 'cpu',
'discount_factor': 0.999,
'training_episodes': 160,
'train_interval': 1,
'snapshot_interval': 20,
'max_episode_step': 250,
'target_score': 185.5,
'qnet':qnet_creator.net,
'use_fix_target': True,
'target_update_interval': 200,
'loss_function': 'euclidean',
'optimizer': 'rmsprop',
'optimizer_params': {
'learning_rate': 0.001 },
'double_dqn': False,
} }
resume_session, resume_directory = resume_session() resume, resume_directory = resume_session()
if resume_session: if resume:
agent = DqnAgent.resume_from_session(resume_directory, net_creator.net, env) resume_agent_params = {
'session_dir': resume_directory,
'environment': env,
'net': qnet_creator.net,
}
agent = DqnAgent.resume_from_session(**resume_agent_params)
else: else:
agent = DqnAgent( agent = DqnAgent(**agent_params)
network = net_creator.net,
environment=env,
replay_memory_params=replay_memory_params,
policy_params=policy_params,
state_dim=net_creator.get_input_shapes()[0],
ctx='cpu',
discount_factor=0.999,
loss_function='euclidean',
optimizer='rmsprop',
optimizer_params={
'learning_rate': 0.001 },
training_episodes=160,
train_interval=1,
use_fix_target=True,
target_update_interval=200,
double_dqn = False,
snapshot_interval=20,
agent_name=agent_name,
max_episode_step=250,
output_directory=session_output_dir,
verbose=True,
live_plot = True,
make_logfile=True,
target_score=185.5
)
signal_handler = AgentSignalHandler() signal_handler = AgentSignalHandler()
signal_handler.register_agent(agent) signal_handler.register_agent(agent)
...@@ -93,4 +104,4 @@ if __name__ == "__main__": ...@@ -93,4 +104,4 @@ if __name__ == "__main__":
train_successful = agent.train() train_successful = agent.train()
if train_successful: if train_successful:
agent.save_best_network(net_creator._model_dir_ + net_creator._model_prefix_ + '_newest', epoch=0) agent.save_best_network(qnet_creator._model_dir_ + qnet_creator._model_prefix_ + '_newest', epoch=0)
\ No newline at end of file
import numpy as np
class ActionPolicyBuilder(object):
def __init__(self):
pass
def build_by_params(self,
method='epsgreedy',
epsilon=0.5,
min_epsilon=0.05,
epsilon_decay_method='no',
epsilon_decay=0.0,
action_dim=None):
if epsilon_decay_method == 'linear':
decay = LinearDecay(eps_decay=epsilon_decay, min_eps=min_epsilon)
else:
decay = NoDecay()
if method == 'epsgreedy':
assert action_dim is not None
assert len(action_dim) == 1
return EpsilonGreedyActionPolicy(eps=epsilon,
number_of_actions=action_dim[0], decay=decay)
else:
assert action_dim is not None
assert len(action_dim) == 1
return GreedyActionPolicy()
class EpsilonGreedyActionPolicy(object):
def __init__(self, eps, number_of_actions, decay):
self.eps = eps
self.cur_eps = eps
self.__number_of_actions = number_of_actions
self.__decay_method = decay
def select_action(self, values):
do_exploration = (np.random.rand() < self.cur_eps)
if do_exploration:
action = np.random.randint(low=0, high=self.__number_of_actions)
else:
action = values.asnumpy().argmax()
return action
def decay(self):
self.cur_eps = self.__decay_method.decay(self.cur_eps)
class GreedyActionPolicy(object):
def __init__(self):
pass
def select_action(self, values):
return values.asnumpy().argmax()
def decay(self):
pass
class NoDecay(object):
def __init__(self):
pass
def decay(self, cur_eps):
return cur_eps
class LinearDecay(object):
def __init__(self, eps_decay, min_eps=0):
self.eps_decay = eps_decay
self.min_eps = min_eps
def decay(self, cur_eps):
return max(cur_eps - self.eps_decay, self.min_eps)
\ No newline at end of file
import logging
import sys
import os
import util
class ArchLogger(object):
_logger = None
__output_level = logging.INFO
__logger_name = 'agent'
__output_directory = '.'
__append = True
__logformat = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
__dateformat = '%d-%b-%y %H:%M:%S'
INFO = logging.INFO
DEBUG = logging.DEBUG
@staticmethod
def set_output_level(output_level):
assert output_level is not None
ArchLogger.__output_level = output_level
@staticmethod
def set_logger_name(logger_name):
assert logger_name is not None
ArchLogger.__logger_name = logger_name
@staticmethod
def set_output_directory(output_directory):
assert output_directory is not None
ArchLogger.__output_directory = output_directory
@staticmethod
def set_append(append):
assert append is not None
ArchLogger.__append = append
@staticmethod
def set_log_format(logformat, dateformat):
assert logformat is not None
assert dateformat is not None
ArchLogger.__logformat = logformat
ArchLogger.__dateformat = dateformat
@staticmethod
def init_logger(make_log_file=True):
assert ArchLogger._logger is None, 'Logger init already called'
filemode = 'a' if ArchLogger.__append else 'w'
formatter = logging.Formatter(
fmt=ArchLogger.__logformat, datefmt=ArchLogger.__dateformat)
logger = logging.getLogger(ArchLogger.__logger_name)
logger.setLevel(ArchLogger.__output_level)
stream_handler = logging.StreamHandler(sys.stdout)
stream_handler.setLevel(ArchLogger.__output_level)
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
if make_log_file:
util.make_directory_if_not_exist(ArchLogger.__output_directory)
log_file = os.path.join(
ArchLogger.__output_directory,
ArchLogger.__logger_name + '.log')
file_handler = logging.FileHandler(log_file, mode=filemode)
file_handler.setLevel(ArchLogger.__output_level)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
ArchLogger._logger = logger
@staticmethod
def get_logger():
if ArchLogger._logger is None:
ArchLogger.init_logger()
assert ArchLogger._logger is not None
return ArchLogger._logger
if __name__ == "__main__":
print('=== Test logger ===')
ArchLogger.set_logger_name('TestLogger')
ArchLogger.set_output_directory('test_log')
ArchLogger.init_logger()
logger = ArchLogger.get_logger()
logger.warning('This is a warning')
logger.debug('This is a debug information, which you should not see')
logger.info('This is a normal information')
assert os.path.exists('test_log')\
and os.path.isfile(os.path.join('test_log', 'TestLogger.log')),\
'Test failed: No logfile exists'
import shutil
shutil.rmtree('test_log')
\ No newline at end of file
...@@ -33,13 +33,6 @@ class GymEnvironment(Environment): ...@@ -33,13 +33,6 @@ class GymEnvironment(Environment):
def state_dim(self): def state_dim(self):
return self.__env.observation_space.shape return self.__env.observation_space.shape
@property
def state_dtype(self):
return 'float32'