From a51b42b620a9e517c1c328b233d421ee15e5f3be Mon Sep 17 00:00:00 2001 From: Nicola Gatto Date: Thu, 30 May 2019 23:49:35 +0200 Subject: [PATCH] Adjust tests --- .../lang/monticar/emadl/GenerationTest.java | 36 +- .../cartpole/agent/CartPoleDQN.cnnt | 2 +- .../mountaincar/agent/MountaincarActor.cnnt | 17 +- .../mountaincar/agent/MountaincarCritic.cnna | 5 +- .../torcs/agent/dqn/TorcsDQN.cnnt | 2 +- .../CNNDataLoader_cartpole_master_dqn.py | 57 + .../CNNTrainer_cartpole_master_dqn.py | 111 +- .../reinforcement_learning/action_policy.py | 73 -- .../cartpole/reinforcement_learning/agent.py | 1144 +++++++++++------ .../reinforcement_learning/cnnarch_logger.py | 93 ++ .../reinforcement_learning/environment.py | 7 - .../reinforcement_learning/replay_memory.py | 137 +- .../reinforcement_learning/strategy.py | 172 +++ .../cartpole/reinforcement_learning/util.py | 238 +++- .../mountaincar/CMakeLists.txt | 27 + .../mountaincar/CNNBufferFile.h | 51 + .../CNNCreator_mountaincar_master_actor.py | 56 + .../CNNDataLoader_mountaincar_master_actor.py | 57 + .../CNNNet_mountaincar_master_actor.py | 105 ++ .../CNNPredictor_mountaincar_master_actor.h | 104 ++ .../CNNTrainer_mountaincar_master_actor.py | 115 ++ .../mountaincar/CNNTranslator.h | 127 ++ .../reinforcementModel/mountaincar/HelperA.h | 141 ++ .../mountaincar/cmake/FindArmadillo.cmake | 38 + .../mountaincar/mountaincar_master.cpp | 1 + .../mountaincar/mountaincar_master.h | 28 + .../mountaincar/mountaincar_master_actor.h | 31 + .../CNNCreator_MountaincarCritic.py | 56 + .../CNNNet_MountaincarCritic.py | 115 ++ .../reinforcement_learning}/__init__.py | 0 .../reinforcement_learning/agent.py | 900 +++++++++++++ .../reinforcement_learning/cnnarch_logger.py | 93 ++ .../reinforcement_learning/environment.py | 64 + .../reinforcement_learning/replay_memory.py | 208 +++ .../reinforcement_learning/strategy.py | 172 +++ .../reinforcement_learning/util.py | 276 ++++ .../mountaincar/start_training.sh | 2 + ...NNDataLoader_torcs_agent_torcsAgent_dqn.py | 57 + .../CNNTrainer_torcs_agent_torcsAgent_dqn.py | 110 +- .../_torcs_agent_dqn_reward_executor.so | Bin 174216 -> 0 bytes .../reinforcement_learning/action_policy.py | 73 -- .../torcs/reinforcement_learning/agent.py | 1144 +++++++++++------ .../reinforcement_learning/cnnarch_logger.py | 93 ++ .../reinforcement_learning/environment.py | 2 +- .../reinforcement_learning/replay_memory.py | 137 +- .../torcs/reinforcement_learning/strategy.py | 172 +++ .../torcs/reinforcement_learning/util.py | 238 +++- .../torcs/reward/pylib/__init__.py | 0 48 files changed, 5682 insertions(+), 1205 deletions(-) create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNDataLoader_cartpole_master_dqn.py delete mode 100644 src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/action_policy.py create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/cnnarch_logger.py create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/strategy.py create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CMakeLists.txt create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNBufferFile.h create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNCreator_mountaincar_master_actor.py create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNDataLoader_mountaincar_master_actor.py create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNNet_mountaincar_master_actor.py create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNPredictor_mountaincar_master_actor.h create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNTrainer_mountaincar_master_actor.py create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNTranslator.h create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/mountaincar/HelperA.h create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/mountaincar/cmake/FindArmadillo.cmake create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/mountaincar/mountaincar_master.cpp create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/mountaincar/mountaincar_master.h create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/mountaincar/mountaincar_master_actor.h create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/CNNCreator_MountaincarCritic.py create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/CNNNet_MountaincarCritic.py rename src/test/resources/target_code/gluon/reinforcementModel/{torcs/reward => mountaincar/reinforcement_learning}/__init__.py (100%) create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/agent.py create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/cnnarch_logger.py create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/environment.py create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/replay_memory.py create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/strategy.py create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/util.py create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/mountaincar/start_training.sh create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/torcs/CNNDataLoader_torcs_agent_torcsAgent_dqn.py delete mode 100755 src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/_torcs_agent_dqn_reward_executor.so delete mode 100644 src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/action_policy.py create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/cnnarch_logger.py create mode 100644 src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/strategy.py delete mode 100644 src/test/resources/target_code/gluon/reinforcementModel/torcs/reward/pylib/__init__.py diff --git a/src/test/java/de/monticore/lang/monticar/emadl/GenerationTest.java b/src/test/java/de/monticore/lang/monticar/emadl/GenerationTest.java index ed5d04a..d3146ca 100644 --- a/src/test/java/de/monticore/lang/monticar/emadl/GenerationTest.java +++ b/src/test/java/de/monticore/lang/monticar/emadl/GenerationTest.java @@ -214,11 +214,12 @@ public class GenerationTest extends AbstractSymtabTest { "HelperA.h", "start_training.sh", "reinforcement_learning/__init__.py", - "reinforcement_learning/action_policy.py", + "reinforcement_learning/strategy.py", "reinforcement_learning/agent.py", "reinforcement_learning/environment.py", "reinforcement_learning/replay_memory.py", - "reinforcement_learning/util.py" + "reinforcement_learning/util.py", + "reinforcement_learning/cnnarch_logger.py" ) ); } @@ -262,11 +263,12 @@ public class GenerationTest extends AbstractSymtabTest { "reward/pylib/armanpy/armanpy_3d.i", "reward/pylib/armanpy/numpy.i", "reinforcement_learning/__init__.py", - "reinforcement_learning/action_policy.py", + "reinforcement_learning/strategy.py", "reinforcement_learning/agent.py", "reinforcement_learning/environment.py", "reinforcement_learning/replay_memory.py", "reinforcement_learning/util.py", + "reinforcement_learning/cnnarch_logger.py", "reinforcement_learning/torcs_agent_dqn_reward_executor.py" ) ); @@ -292,5 +294,33 @@ public class GenerationTest extends AbstractSymtabTest { String[] args = {"-m", "src/test/resources/models/reinforcementModel", "-r", "mountaincar.Master", "-b", "GLUON", "-f", "n", "-c", "n"}; EMADLGeneratorCli.main(args); assertEquals(0, Log.getFindings().stream().filter(Finding::isError).count()); + + checkFilesAreEqual( + Paths.get("./target/generated-sources-emadl"), + Paths.get("./src/test/resources/target_code/gluon/reinforcementModel/mountaincar"), + Arrays.asList( + "mountaincar_master.cpp", + "mountaincar_master.h", + "mountaincar_master_actor.h", + "CMakeLists.txt", + "CNNBufferFile.h", + "CNNCreator_mountaincar_master_actor.py", + "CNNNet_mountaincar_master_actor.py", + "CNNPredictor_mountaincar_master_actor.h", + "CNNTrainer_mountaincar_master_actor.py", + "CNNTranslator.h", + "HelperA.h", + "start_training.sh", + "reinforcement_learning/__init__.py", + "reinforcement_learning/CNNCreator_MountaincarCritic.py", + "reinforcement_learning/CNNNet_MountaincarCritic.py", + "reinforcement_learning/strategy.py", + "reinforcement_learning/agent.py", + "reinforcement_learning/environment.py", + "reinforcement_learning/replay_memory.py", + "reinforcement_learning/util.py", + "reinforcement_learning/cnnarch_logger.py" + ) + ); } } diff --git a/src/test/resources/models/reinforcementModel/cartpole/agent/CartPoleDQN.cnnt b/src/test/resources/models/reinforcementModel/cartpole/agent/CartPoleDQN.cnnt index 20ba0f8..ba3afd5 100644 --- a/src/test/resources/models/reinforcementModel/cartpole/agent/CartPoleDQN.cnnt +++ b/src/test/resources/models/reinforcementModel/cartpole/agent/CartPoleDQN.cnnt @@ -24,7 +24,7 @@ configuration CartPoleDQN { sample_size : 32 } - action_selection : epsgreedy{ + strategy : epsgreedy{ epsilon : 1.0 min_epsilon : 0.01 epsilon_decay_method: linear diff --git a/src/test/resources/models/reinforcementModel/mountaincar/agent/MountaincarActor.cnnt b/src/test/resources/models/reinforcementModel/mountaincar/agent/MountaincarActor.cnnt index 5d25017..bda58d3 100644 --- a/src/test/resources/models/reinforcementModel/mountaincar/agent/MountaincarActor.cnnt +++ b/src/test/resources/models/reinforcementModel/mountaincar/agent/MountaincarActor.cnnt @@ -14,21 +14,26 @@ configuration MountaincarActor { snapshot_interval : 20 - loss : euclidean - replay_memory : buffer{ - memory_size : 10000 - sample_size : 32 + memory_size : 1000000 + sample_size : 64 } - action_selection : epsgreedy{ + strategy : ornstein_uhlenbeck{ epsilon : 1.0 min_epsilon : 0.01 epsilon_decay_method: linear epsilon_decay : 0.01 + mu: (0.0) + theta: (0.15) + sigma: (0.3) + } + + actor_optimizer : adam { + learning_rate : 0.0001 } - optimizer : rmsprop{ + critic_optimizer : adam { learning_rate : 0.001 } } \ No newline at end of file diff --git a/src/test/resources/models/reinforcementModel/mountaincar/agent/MountaincarCritic.cnna b/src/test/resources/models/reinforcementModel/mountaincar/agent/MountaincarCritic.cnna index 8ebe3c2..ef5e24f 100644 --- a/src/test/resources/models/reinforcementModel/mountaincar/agent/MountaincarCritic.cnna +++ b/src/test/resources/models/reinforcementModel/mountaincar/agent/MountaincarCritic.cnna @@ -8,8 +8,5 @@ implementation Critic(state, action) { FullyConnected(units=300) ) -> Add() -> - Relu() -> - FullyConnected(units=1) -> - Tanh() -> - critic + Relu() } \ No newline at end of file diff --git a/src/test/resources/models/reinforcementModel/torcs/agent/dqn/TorcsDQN.cnnt b/src/test/resources/models/reinforcementModel/torcs/agent/dqn/TorcsDQN.cnnt index 1352e72..e917c34 100644 --- a/src/test/resources/models/reinforcementModel/torcs/agent/dqn/TorcsDQN.cnnt +++ b/src/test/resources/models/reinforcementModel/torcs/agent/dqn/TorcsDQN.cnnt @@ -30,7 +30,7 @@ configuration TorcsDQN { sample_size : 32 } - action_selection : epsgreedy{ + strategy : epsgreedy{ epsilon : 1.0 min_epsilon : 0.01 epsilon_decay_method: linear diff --git a/src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNDataLoader_cartpole_master_dqn.py b/src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNDataLoader_cartpole_master_dqn.py new file mode 100644 index 0000000..a884473 --- /dev/null +++ b/src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNDataLoader_cartpole_master_dqn.py @@ -0,0 +1,57 @@ +import os +import h5py +import mxnet as mx +import logging +import sys + +class cartpole_master_dqnDataLoader: + _input_names_ = ['state'] + _output_names_ = ['qvalues_label'] + + def __init__(self): + self._data_dir = "data/" + + def load_data(self, batch_size): + train_h5, test_h5 = self.load_h5_files() + + data_mean = train_h5[self._input_names_[0]][:].mean(axis=0) + data_std = train_h5[self._input_names_[0]][:].std(axis=0) + 1e-5 + + train_iter = mx.io.NDArrayIter(train_h5[self._input_names_[0]], + train_h5[self._output_names_[0]], + batch_size=batch_size, + data_name=self._input_names_[0], + label_name=self._output_names_[0]) + test_iter = None + if test_h5 != None: + test_iter = mx.io.NDArrayIter(test_h5[self._input_names_[0]], + test_h5[self._output_names_[0]], + batch_size=batch_size, + data_name=self._input_names_[0], + label_name=self._output_names_[0]) + return train_iter, test_iter, data_mean, data_std + + def load_h5_files(self): + train_h5 = None + test_h5 = None + train_path = self._data_dir + "train.h5" + test_path = self._data_dir + "test.h5" + if os.path.isfile(train_path): + train_h5 = h5py.File(train_path, 'r') + if not (self._input_names_[0] in train_h5 and self._output_names_[0] in train_h5): + logging.error("The HDF5 file '" + os.path.abspath(train_path) + "' has to contain the datasets: " + + "'" + self._input_names_[0] + "', '" + self._output_names_[0] + "'") + sys.exit(1) + test_iter = None + if os.path.isfile(test_path): + test_h5 = h5py.File(test_path, 'r') + if not (self._input_names_[0] in test_h5 and self._output_names_[0] in test_h5): + logging.error("The HDF5 file '" + os.path.abspath(test_path) + "' has to contain the datasets: " + + "'" + self._input_names_[0] + "', '" + self._output_names_[0] + "'") + sys.exit(1) + else: + logging.warning("Couldn't load test set. File '" + os.path.abspath(test_path) + "' does not exist.") + return train_h5, test_h5 + else: + logging.error("Data loading failure. File '" + os.path.abspath(train_path) + "' does not exist.") + sys.exit(1) \ No newline at end of file diff --git a/src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNTrainer_cartpole_master_dqn.py b/src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNTrainer_cartpole_master_dqn.py index 5d7b3c3..3491708 100644 --- a/src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNTrainer_cartpole_master_dqn.py +++ b/src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNTrainer_cartpole_master_dqn.py @@ -1,5 +1,6 @@ from reinforcement_learning.agent import DqnAgent from reinforcement_learning.util import AgentSignalHandler +from reinforcement_learning.cnnarch_logger import ArchLogger import reinforcement_learning.environment import CNNCreator_cartpole_master_dqn @@ -9,9 +10,6 @@ import re import logging import mxnet as mx -session_output_dir = 'session' -agent_name='cartpole_master_dqn' -session_param_output = os.path.join(session_output_dir, agent_name) def resume_session(): session_param_output = os.path.join(session_output_dir, agent_name) @@ -32,60 +30,73 @@ def resume_session(): break return resume_session, resume_directory + if __name__ == "__main__": + agent_name='cartpole_master_dqn' + # Prepare output directory and logger + output_directory = 'model_output'\ + + '/' + agent_name\ + + '/' + time.strftime( + '%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) + ArchLogger.set_output_directory(output_directory) + ArchLogger.set_logger_name(agent_name) + ArchLogger.set_output_level(ArchLogger.INFO) + env = reinforcement_learning.environment.GymEnvironment('CartPole-v0') - context = mx.cpu() - net_creator = CNNCreator_cartpole_master_dqn.CNNCreator_cartpole_master_dqn() - net_creator.construct(context) - replay_memory_params = { - 'method':'buffer', - 'memory_size':10000, - 'sample_size':32, - 'state_dtype':'float32', - 'action_dtype':'uint8', - 'rewards_dtype':'float32' - } + context = mx.cpu() + qnet_creator = CNNCreator_cartpole_master_dqn.CNNCreator_cartpole_master_dqn() + qnet_creator.construct(context) - policy_params = { - 'method':'epsgreedy', - 'epsilon': 1, - 'min_epsilon': 0.01, - 'epsilon_decay_method': 'linear', - 'epsilon_decay': 0.01, + agent_params = { + 'environment': env, + 'replay_memory_params': { + 'method':'buffer', + 'memory_size':10000, + 'sample_size':32, + 'state_dtype':'float32', + 'action_dtype':'float32', + 'rewards_dtype':'float32' + }, + 'strategy_params': { + 'method':'epsgreedy', + 'epsilon': 1, + 'min_epsilon': 0.01, + 'epsilon_decay_method': 'linear', + 'epsilon_decay': 0.01, + }, + 'agent_name': agent_name, + 'verbose': True, + 'state_dim': (4,), + 'action_dim': (2,), + 'ctx': 'cpu', + 'discount_factor': 0.999, + 'training_episodes': 160, + 'train_interval': 1, + 'snapshot_interval': 20, + 'max_episode_step': 250, + 'target_score': 185.5, + 'qnet':qnet_creator.net, + 'use_fix_target': True, + 'target_update_interval': 200, + 'loss_function': 'euclidean', + 'optimizer': 'rmsprop', + 'optimizer_params': { + 'learning_rate': 0.001 }, + 'double_dqn': False, } - resume_session, resume_directory = resume_session() + resume, resume_directory = resume_session() - if resume_session: - agent = DqnAgent.resume_from_session(resume_directory, net_creator.net, env) + if resume: + resume_agent_params = { + 'session_dir': resume_directory, + 'environment': env, + 'net': qnet_creator.net, + } + agent = DqnAgent.resume_from_session(**resume_agent_params) else: - agent = DqnAgent( - network = net_creator.net, - environment=env, - replay_memory_params=replay_memory_params, - policy_params=policy_params, - state_dim=net_creator.get_input_shapes()[0], - ctx='cpu', - discount_factor=0.999, - loss_function='euclidean', - optimizer='rmsprop', - optimizer_params={ - 'learning_rate': 0.001 }, - training_episodes=160, - train_interval=1, - use_fix_target=True, - target_update_interval=200, - double_dqn = False, - snapshot_interval=20, - agent_name=agent_name, - max_episode_step=250, - output_directory=session_output_dir, - verbose=True, - live_plot = True, - make_logfile=True, - target_score=185.5 - ) + agent = DqnAgent(**agent_params) signal_handler = AgentSignalHandler() signal_handler.register_agent(agent) @@ -93,4 +104,4 @@ if __name__ == "__main__": train_successful = agent.train() if train_successful: - agent.save_best_network(net_creator._model_dir_ + net_creator._model_prefix_ + '_newest', epoch=0) \ No newline at end of file + agent.save_best_network(qnet_creator._model_dir_ + qnet_creator._model_prefix_ + '_newest', epoch=0) diff --git a/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/action_policy.py b/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/action_policy.py deleted file mode 100644 index f43a211..0000000 --- a/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/action_policy.py +++ /dev/null @@ -1,73 +0,0 @@ -import numpy as np - -class ActionPolicyBuilder(object): - def __init__(self): - pass - - def build_by_params(self, - method='epsgreedy', - epsilon=0.5, - min_epsilon=0.05, - epsilon_decay_method='no', - epsilon_decay=0.0, - action_dim=None): - - if epsilon_decay_method == 'linear': - decay = LinearDecay(eps_decay=epsilon_decay, min_eps=min_epsilon) - else: - decay = NoDecay() - - if method == 'epsgreedy': - assert action_dim is not None - assert len(action_dim) == 1 - return EpsilonGreedyActionPolicy(eps=epsilon, - number_of_actions=action_dim[0], decay=decay) - else: - assert action_dim is not None - assert len(action_dim) == 1 - return GreedyActionPolicy() - -class EpsilonGreedyActionPolicy(object): - def __init__(self, eps, number_of_actions, decay): - self.eps = eps - self.cur_eps = eps - self.__number_of_actions = number_of_actions - self.__decay_method = decay - - def select_action(self, values): - do_exploration = (np.random.rand() < self.cur_eps) - if do_exploration: - action = np.random.randint(low=0, high=self.__number_of_actions) - else: - action = values.asnumpy().argmax() - return action - - def decay(self): - self.cur_eps = self.__decay_method.decay(self.cur_eps) - - -class GreedyActionPolicy(object): - def __init__(self): - pass - - def select_action(self, values): - return values.asnumpy().argmax() - - def decay(self): - pass - - -class NoDecay(object): - def __init__(self): - pass - - def decay(self, cur_eps): - return cur_eps - -class LinearDecay(object): - def __init__(self, eps_decay, min_eps=0): - self.eps_decay = eps_decay - self.min_eps = min_eps - - def decay(self, cur_eps): - return max(cur_eps - self.eps_decay, self.min_eps) \ No newline at end of file diff --git a/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/agent.py b/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/agent.py index acbc974..c06b32a 100644 --- a/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/agent.py +++ b/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/agent.py @@ -2,118 +2,382 @@ import mxnet as mx import numpy as np import time import os -import logging import sys import util import matplotlib.pyplot as plt +import pyprind +from cnnarch_logger import ArchLogger from replay_memory import ReplayMemoryBuilder -from action_policy import ActionPolicyBuilder -from util import copy_net, get_loss_function +from strategy import StrategyBuilder +from util import copy_net, get_loss_function,\ + copy_net_with_two_inputs, DdpgTrainingStats, DqnTrainingStats,\ + make_directory_if_not_exist from mxnet import nd, gluon, autograd -class DqnAgent(object): - def __init__(self, - network, + +class Agent(object): + def __init__( + self, environment, replay_memory_params, - policy_params, + strategy_params, state_dim, + action_dim, ctx=None, discount_factor=.9, - loss_function='euclidean', - optimizer='rmsprop', - optimizer_params = {'learning_rate':0.09}, training_episodes=50, train_interval=1, - use_fix_target=False, - double_dqn = False, - target_update_interval=10, + start_training=0, snapshot_interval=200, - agent_name='Dqn_agent', + agent_name='Agent', max_episode_step=99999, + evaluation_samples=1000, output_directory='model_parameters', verbose=True, - live_plot = True, - make_logfile=True, - target_score=None): - assert 0 < discount_factor <= 1 - assert train_interval > 0 - assert target_update_interval > 0 - assert snapshot_interval > 0 - assert max_episode_step > 0 - assert training_episodes > 0 - assert replay_memory_params is not None - assert type(state_dim) is tuple - - self.__ctx = mx.gpu() if ctx == 'gpu' else mx.cpu() - self.__qnet = network - - self.__environment = environment - self.__discount_factor = discount_factor - self.__training_episodes = training_episodes - self.__train_interval = train_interval - self.__verbose = verbose - self.__state_dim = state_dim - self.__action_dim = self.__qnet(nd.random_normal(shape=((1,) + self.__state_dim), ctx=self.__ctx)).shape[1:] + target_score=None + ): + assert 0 < discount_factor <= 1,\ + 'Discount factor must be between 0 and 1' + assert train_interval > 0, 'Train interval must be greater 0' + assert snapshot_interval > 0, 'Snapshot interval must be greater 0' + assert max_episode_step > 0,\ + 'Maximal steps per episode must be greater 0' + assert training_episodes > 0, 'Trainings episode must be greater 0' + assert replay_memory_params is not None,\ + 'Replay memory parameter not set' + assert type(state_dim) is tuple, 'State dimension is not a tuple' + assert type(action_dim) is tuple, 'Action dimension is not a tuple' + + self._logger = ArchLogger.get_logger() + self._ctx = mx.gpu() if ctx == 'gpu' else mx.cpu() + self._environment = environment + self._discount_factor = discount_factor + self._training_episodes = training_episodes + self._train_interval = train_interval + self._verbose = verbose + self._state_dim = state_dim replay_memory_params['state_dim'] = state_dim - self.__replay_memory_params = replay_memory_params + replay_memory_params['action_dim'] = action_dim + self._replay_memory_params = replay_memory_params rm_builder = ReplayMemoryBuilder() - self.__memory = rm_builder.build_by_params(**replay_memory_params) - self.__minibatch_size = self.__memory.sample_size - - policy_params['action_dim'] = self.__action_dim - self.__policy_params = policy_params - p_builder = ActionPolicyBuilder() - self.__policy = p_builder.build_by_params(**policy_params) - - self.__target_update_interval = target_update_interval - self.__target_qnet = copy_net(self.__qnet, self.__state_dim, ctx=self.__ctx) - self.__loss_function_str = loss_function - self.__loss_function = get_loss_function(loss_function) - self.__agent_name = agent_name - self.__snapshot_interval = snapshot_interval - self.__creation_time = time.time() - self.__max_episode_step = max_episode_step - self.__optimizer = optimizer - self.__optimizer_params = optimizer_params - self.__make_logfile = make_logfile - self.__double_dqn = double_dqn - self.__use_fix_target = use_fix_target - self.__live_plot = live_plot - self.__user_given_directory = output_directory - self.__target_score = target_score - - self.__interrupt_flag = False + self._memory = rm_builder.build_by_params(**replay_memory_params) + self._minibatch_size = self._memory.sample_size + self._action_dim = action_dim + + strategy_params['action_dim'] = self._action_dim + self._strategy_params = strategy_params + strategy_builder = StrategyBuilder() + self._strategy = strategy_builder.build_by_params(**strategy_params) + self._agent_name = agent_name + self._snapshot_interval = snapshot_interval + self._creation_time = time.time() + self._max_episode_step = max_episode_step + self._start_training = start_training + self._output_directory = output_directory + self._target_score = target_score + + self._evaluation_samples = evaluation_samples + self._best_avg_score = -np.infty + self._best_net = None + + self._interrupt_flag = False + self._training_stats = None # Training Context - self.__current_episode = 0 - self.__total_steps = 0 - - # Initialize best network - self.__best_net = copy_net(self.__qnet, self.__state_dim, self.__ctx) - self.__best_avg_score = None + self._current_episode = 0 + self._total_steps = 0 - # Gluon Trainer definition - self.__training_stats = None + @property + def current_episode(self): + return self._current_episode - # Prepare output directory and logger - self.__output_directory = output_directory\ - + '/' + self.__agent_name\ - + '/' + time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(self.__creation_time)) - self.__logger = self.__setup_logging() - self.__logger.info('Agent created with following parameters: {}'.format(self.__make_config_dict())) + @property + def environment(self): + return self._environment - @classmethod - def from_config_file(cls, network, environment, config_file_path, ctx=None): + def save_config_file(self): import json - # Load config - with open(config_file_path, 'r') as config_file: - config_dict = json.load(config_file) - return cls(network, environment, ctx=ctx, **config_dict) + make_directory_if_not_exist(self._output_directory) + filename = os.path.join(self._output_directory, 'config.json') + config = self._make_config_dict() + with open(filename, mode='w') as fp: + json.dump(config, fp, indent=4) + + def set_interrupt_flag(self, interrupt): + self._interrupt_flag = interrupt + + def _interrupt_training(self): + import pickle + self._logger.info('Training interrupted; Store state for resuming') + session_dir = self._get_session_dir() + agent_session_file = os.path.join(session_dir, 'agent.p') + logger = self._logger + + self._make_pickle_ready(session_dir) + + with open(agent_session_file, 'wb') as f: + pickle.dump(self, f, protocol=2) + logger.info('State successfully stored') + + def _make_pickle_ready(self, session_dir): + del self._training_stats.logger + self._logger = None + self._environment.close() + self._environment = None + self._save_net(self._best_net, 'best_net', session_dir) + self._best_net = None + + def _make_config_dict(self): + config = dict() + config['state_dim'] = self._state_dim + config['action_dim'] = self._action_dim + config['ctx'] = str(self._ctx) + config['discount_factor'] = self._discount_factor + config['strategy_params'] = self._strategy_params + config['replay_memory_params'] = self._replay_memory_params + config['training_episodes'] = self._training_episodes + config['start_training'] = self._start_training + config['evaluation_samples'] = self._evaluation_samples + config['train_interval'] = self._train_interval + config['snapshot_interval'] = self._snapshot_interval + config['agent_name'] = self._agent_name + config['max_episode_step'] = self._max_episode_step + config['output_directory'] = self._output_directory + config['verbose'] = self._verbose + config['target_score'] = self._target_score + return config + + def _adjust_optimizer_params(self, optimizer_params): + if 'weight_decay' in optimizer_params: + optimizer_params['wd'] = optimizer_params['weight_decay'] + del optimizer_params['weight_decay'] + if 'learning_rate_decay' in optimizer_params: + min_learning_rate = 1e-8 + if 'learning_rate_minimum' in optimizer_params: + min_learning_rate = optimizer_params['learning_rate_minimum'] + del optimizer_params['learning_rate_minimum'] + optimizer_params['lr_scheduler'] = mx.lr_scheduler.FactorScheduler( + optimizer_params['step_size'], + factor=optimizer_params['learning_rate_decay'], + stop_factor_lr=min_learning_rate) + del optimizer_params['step_size'] + del optimizer_params['learning_rate_decay'] + + return optimizer_params + + def _sample_from_memory(self): + states, actions, rewards, next_states, terminals\ + = self._memory.sample(batch_size=self._minibatch_size) + states = nd.array(states, ctx=self._ctx) + actions = nd.array(actions, ctx=self._ctx) + rewards = nd.array(rewards, ctx=self._ctx) + next_states = nd.array(next_states, ctx=self._ctx) + terminals = nd.array(terminals, ctx=self._ctx) + return states, actions, rewards, next_states, terminals + + def evaluate(self, target=None, sample_games=100, verbose=True): + target = self._target_score if target is None else target + if target: + target_achieved = 0 + total_reward = 0 + + self._logger.info('Sampling from {} games...'.format(sample_games)) + for g in pyprind.prog_bar(range(sample_games)): + state = self._environment.reset() + step = 0 + game_reward = 0 + terminal = False + while not terminal and (step < self._max_episode_step): + action = self.get_next_action(state) + state, reward, terminal, _ = self._environment.step(action) + game_reward += reward + step += 1 + + if verbose: + info = 'Game %d: Reward %f' % (g, game_reward) + self._logger.debug(info) + if target: + if game_reward >= target: + target_achieved += 1 + total_reward += game_reward + + avg_reward = float(total_reward)/float(sample_games) + info = 'Avg. Reward: %f' % avg_reward + if target: + target_achieved_ratio = int( + (float(target_achieved)/float(sample_games))*100) + info += '; Target Achieved in %d%% of games'\ + % (target_achieved_ratio) + + if verbose: + self._logger.info(info) + return avg_reward + + def _do_snapshot_if_in_interval(self, episode): + do_snapshot =\ + (episode != 0 and (episode % self._snapshot_interval == 0)) + if do_snapshot: + self.save_parameters(episode=episode) + self._evaluate() + + def _evaluate(self, verbose=True): + avg_reward = self.evaluate( + sample_games=self._evaluation_samples, verbose=False) + info = 'Evaluation -> Average Reward in {} games: {}'.format( + self._evaluation_samples, avg_reward) + + if self._best_avg_score is None or self._best_avg_score <= avg_reward: + self._save_current_as_best_net() + self._best_avg_score = avg_reward + if verbose: + self._logger.info(info) + + def _is_target_reached(self, avg_reward): + return self._target_score is not None\ + and avg_reward > self._target_score + + def _do_training(self): + return (self._total_steps % self._train_interval == 0) and\ + (self._memory.is_sample_possible(self._minibatch_size)) and\ + (self._current_episode >= self._start_training) + + def _check_interrupt_routine(self): + if self._interrupt_flag: + self._interrupt_flag = False + self._interrupt_training() + return True + return False + + def _is_target_reached(self, avg_reward): + return self._target_score is not None\ + and avg_reward > self._target_score + + def _save_parameters(self, net, episode=None, filename='dqn-agent-params'): + assert self._output_directory + assert isinstance(net, gluon.HybridBlock) + make_directory_if_not_exist(self._output_directory) + + if(episode is not None): + self._logger.info( + 'Saving model parameters after episode %d' % episode) + filename = filename + '-ep{}'.format(episode) + else: + self._logger.info('Saving model parameters') + self._save_net(net, filename) + + def _save_net(self, net, filename, filedir=None): + filedir = self._output_directory if filedir is None else filedir + filename = os.path.join(filedir, filename + '.params') + net.save_parameters(filename) + + def save_best_network(self, path, epoch=0): + self._logger.info( + 'Saving best network with average reward of {}'.format( + self._best_avg_score)) + self._best_net.export(path, epoch=epoch) + + def _get_session_dir(self): + session_dir = os.path.join( + self._output_directory, '.interrupted_session') + make_directory_if_not_exist(session_dir) + return session_dir + + def _save_current_as_best_net(self): + raise NotImplementedError + + def get_next_action(self, state): + raise NotImplementedError + + def save_parameters(self, episode): + raise NotImplementedError + + def train(self, episodes=None): + raise NotImplementedError + + +class DdpgAgent(Agent): + def __init__( + self, + actor, + critic, + environment, + replay_memory_params, + strategy_params, + state_dim, + action_dim, + soft_target_update_rate=.001, + actor_optimizer='adam', + actor_optimizer_params={'learning_rate': 0.0001}, + critic_optimizer='adam', + critic_optimizer_params={'learning_rate': 0.001}, + ctx=None, + discount_factor=.9, + training_episodes=50, + start_training=20, + train_interval=1, + snapshot_interval=200, + agent_name='DdpgAgent', + max_episode_step=9999, + evaluation_samples=100, + output_directory='model_parameters', + verbose=True, + target_score=None + ): + super(DdpgAgent, self).__init__( + environment=environment, replay_memory_params=replay_memory_params, + strategy_params=strategy_params, state_dim=state_dim, + action_dim=action_dim, ctx=ctx, discount_factor=discount_factor, + training_episodes=training_episodes, start_training=start_training, + train_interval=train_interval, + snapshot_interval=snapshot_interval, agent_name=agent_name, + max_episode_step=max_episode_step, + output_directory=output_directory, verbose=verbose, + target_score=target_score, evaluation_samples=evaluation_samples) + assert critic is not None, 'Critic not set' + assert actor is not None, 'Actor is not set' + assert soft_target_update_rate > 0,\ + 'Target update must be greater zero' + assert actor_optimizer is not None, 'No actor optimizer set' + assert critic_optimizer is not None, 'No critic optimizer set' + + self._actor = actor + self._critic = critic + + self._actor_target = self._copy_actor() + self._critic_target = self._copy_critic() + + self._actor_optimizer = actor_optimizer + self._actor_optimizer_params = self._adjust_optimizer_params( + actor_optimizer_params) + + self._critic_optimizer = critic_optimizer + self._critic_optimizer_params = self._adjust_optimizer_params( + critic_optimizer_params) + + self._soft_target_update_rate = soft_target_update_rate + + self._logger.info( + 'Agent created with following parameters: {}'.format( + self._make_config_dict())) + + self._best_net = self._copy_actor() + + self._training_stats = DdpgTrainingStats(self._training_episodes) + + def _make_pickle_ready(self, session_dir): + super(DdpgAgent, self)._make_pickle_ready(session_dir) + self._save_net(self._actor, 'actor', session_dir) + self._actor = None + self._save_net(self._critic, 'critic', session_dir) + self._critic = None + self._save_net(self._actor_target, 'actor_target', session_dir) + self._actor_target = None + self._save_net(self._critic_target, 'critic_target', session_dir) + self._critic_target = None @classmethod - def resume_from_session(cls, session_dir, net, environment): + def resume_from_session(cls, session_dir, actor, critic, environment): import pickle if not os.path.exists(session_dir): raise ValueError('Session directory does not exist') @@ -121,386 +385,516 @@ class DqnAgent(object): files = dict() files['agent'] = os.path.join(session_dir, 'agent.p') files['best_net_params'] = os.path.join(session_dir, 'best_net.params') - files['q_net_params'] = os.path.join(session_dir, 'qnet.params') - files['target_net_params'] = os.path.join(session_dir, 'target_net.params') + files['actor_net_params'] = os.path.join(session_dir, 'actor.params') + files['actor_target_net_params'] = os.path.join( + session_dir, 'actor_target.params') + files['critic_net_params'] = os.path.join(session_dir, 'critic.params') + files['critic_target_net_params'] = os.path.join( + session_dir, 'critic_target.params') for file in files.values(): if not os.path.exists(file): - raise ValueError('Session directory is not complete: {} is missing'.format(file)) + raise ValueError( + 'Session directory is not complete: {} is missing' + .format(file)) with open(files['agent'], 'rb') as f: agent = pickle.load(f) - agent.__environment = environment - agent.__qnet = net - agent.__qnet.load_parameters(files['q_net_params'], agent.__ctx) - agent.__qnet.hybridize() - agent.__qnet(nd.random_normal(shape=((1,) + agent.__state_dim), ctx=agent.__ctx)) - agent.__best_net = copy_net(agent.__qnet, agent.__state_dim, agent.__ctx) - agent.__best_net.load_parameters(files['best_net_params'], agent.__ctx) - agent.__target_qnet = copy_net(agent.__qnet, agent.__state_dim, agent.__ctx) - agent.__target_qnet.load_parameters(files['target_net_params'], agent.__ctx) + agent._environment = environment + + agent._actor = actor + agent._actor.load_parameters(files['actor_net_params'], agent._ctx) + agent._actor.hybridize() + agent._actor(nd.random_normal( + shape=((1,) + agent._state_dim), ctx=agent._ctx)) + + agent._best_net = copy_net(agent._actor, agent._state_dim, agent._ctx) + agent._best_net.load_parameters(files['best_net_params'], agent._ctx) + + agent._actor_target = copy_net( + agent._actor, agent._state_dim, agent._ctx) + agent._actor_target.load_parameters(files['actor_target_net_params']) - agent.__logger = agent.__setup_logging(append=True) - agent.__training_stats.logger = agent.__logger - agent.__logger.info('Agent was retrieved; Training can be continued') + agent._critic = critic + agent._critic.load_parameters(files['critic_net_params'], agent._ctx) + agent._critic.hybridize() + agent._critic( + nd.random_normal(shape=((1,) + agent._state_dim), ctx=agent._ctx), + nd.random_normal(shape=((1,) + agent._action_dim), ctx=agent._ctx)) + + agent._critic_target = copy_net_with_two_inputs( + agent._critic, agent._state_dim, agent._action_dim, agent._ctx) + agent._critic_target.load_parameters(files['critic_target_net_params']) + + agent._logger = ArchLogger.get_logger() + agent._training_stats.logger = ArchLogger.get_logger() + agent._logger.info('Agent was retrieved; Training can be continued') return agent - def __interrupt_training(self): - import pickle - self.__logger.info('Training interrupted; Store state for resuming') - session_dir = os.path.join(self.__output_directory, '.interrupted_session') - if not os.path.exists(session_dir): - os.mkdir(session_dir) + def _save_current_as_best_net(self): + self._best_net = self._copy_actor() - del self.__training_stats.logger - logger = self.__logger - self.__logger = None - self.__environment.close() - self.__environment = None + def get_next_action(self, state): + action = self._actor(nd.array([state], ctx=self._ctx)) + return action[0].asnumpy() - self.__save_net(self.__qnet, 'qnet', session_dir) - self.__qnet = None - self.__save_net(self.__best_net, 'best_net', session_dir) - self.__best_net = None - self.__save_net(self.__target_qnet, 'target_net', session_dir) - self.__target_qnet = None + def save_parameters(self, episode): + self._save_parameters(self._actor, episode=episode) - agent_session_file = os.path.join(session_dir, 'agent.p') + def train(self, episodes=None): + self.save_config_file() + self._logger.info("--- Start DDPG training ---") + episodes = \ + episodes if episodes is not None else self._training_episodes - with open(agent_session_file, 'wb') as f: - pickle.dump(self, f) - self.__logger = logger - logger.info('State successfully stored') + resume = (self._current_episode > 0) + if resume: + self._logger.info("Training session resumed") + self._logger.info( + "Starting from episode {}".format(self._current_episode)) + else: + self._training_stats = DdpgTrainingStats(episodes) - @property - def current_episode(self): - return self.__current_episode + # Initialize target Q' and mu' + self._actor_target = self._copy_actor() + self._critic_target = self._copy_critic() - @property - def environment(self): - return self.__environment + # Initialize l2 loss for critic network + l2_loss = gluon.loss.L2Loss() - def __adjust_optimizer_params(self, optimizer_params): - if 'weight_decay' in optimizer_params: - optimizer_params['wd'] = optimizer_params['weight_decay'] - del optimizer_params['weight_decay'] - if 'learning_rate_decay' in optimizer_params: - min_learning_rate = 1e-8 - if 'learning_rate_minimum' in optimizer_params: - min_learning_rate = optimizer_params['learning_rate_minimum'] - del optimizer_params['learning_rate_minimum'] - optimizer_params['lr_scheduler'] = mx.lr.scheduler.FactorScheduler( - optimizer_params['step_size'], - factor=optimizer_params['learning_rate_decay'], - stop_factor_lr=min_learning_rate) - del optimizer_params['step_size'] - del optimizer_params['learning_rate_decay'] + # Initialize critic and actor trainer + trainer_actor = gluon.Trainer( + self._actor.collect_params(), self._actor_optimizer, + self._actor_optimizer_params) + trainer_critic = gluon.Trainer( + self._critic.collect_params(), self._critic_optimizer, + self._critic_optimizer_params) - return optimizer_params + # For episode=1..n + while self._current_episode < episodes: + # Check interrupt flag + if self._check_interrupt_routine(): + return False - def set_interrupt_flag(self, interrupt): - self.__interrupt_flag = interrupt + # Initialize new episode + step = 0 + episode_reward = 0 + start = time.time() + episode_critic_loss = 0 + episode_actor_loss = 0 + episode_avg_q_value = 0 + training_steps = 0 + # Get initialial observation state s + state = self._environment.reset() + + # For step=1..T + while step < self._max_episode_step: + # Select an action a = mu(s) + N(step) according to current + # actor and exploration noise N according to strategy + action = self._strategy.select_action( + self.get_next_action(state)) + + # Execute action a and observe reward r and next state ns + next_state, reward, terminal, _ = \ + self._environment.step(action) + + self._logger.debug( + 'Applied action {} with reward {}'.format(action, reward)) + + # Store transition (s,a,r,ns) in replay buffer + self._memory.append( + state, action, reward, next_state, terminal) + + if self._do_training(): + # Sample random minibatch of b transitions + # (s_i, a_i, r_i, s_(i+1)) from replay buffer + states, actions, rewards, next_states, terminals =\ + self._sample_from_memory() + + actor_target_actions = self._actor_target(next_states) + critic_target_qvalues = self._critic_target( + next_states, actor_target_actions) + + rewards = rewards.reshape(self._minibatch_size, 1) + terminals = terminals.reshape(self._minibatch_size, 1) + + # y = r_i + discount * Q'(s_(i+1), mu'(s_(i+1))) + y = rewards + (1.0 - terminals) * self._discount_factor\ + * critic_target_qvalues + + # Train the critic network + with autograd.record(): + qvalues = self._critic(states, actions) + critic_loss = l2_loss(qvalues, y) + critic_loss.backward() + trainer_critic.step(self._minibatch_size) + + # Train the actor network + # Temporary critic so that gluon trainer does not mess + # with critic parameters + tmp_critic = self._copy_critic() + with autograd.record(): + actor_qvalues = tmp_critic(states, self._actor(states)) + # For maximizing qvalues we have to multiply with -1 + # as we use a minimizer + actor_loss = -1 * actor_qvalues + actor_loss.backward() + trainer_actor.step(self._minibatch_size) + + # Update target networks: + self._actor_target = self._soft_update( + self._actor, self._actor_target, + self._soft_target_update_rate) + self._critic_target = self._soft_update( + self._critic, self._critic_target, + self._soft_target_update_rate) + + # Update statistics + episode_critic_loss +=\ + np.sum(critic_loss.asnumpy()) / self._minibatch_size + episode_actor_loss +=\ + np.sum(actor_loss.asnumpy()) / self._minibatch_size + episode_avg_q_value +=\ + np.sum(actor_qvalues.asnumpy()) / self._minibatch_size - def __make_output_directory_if_not_exist(self): - assert self.__output_directory - if not os.path.exists(self.__output_directory): - os.makedirs(self.__output_directory) + training_steps += 1 - def __setup_logging(self, append=False): - assert self.__output_directory - assert self.__agent_name + episode_reward += reward + step += 1 + self._total_steps += 1 + state = next_state - output_level = logging.DEBUG if self.__verbose else logging.WARNING - filemode = 'a' if append else 'w' + if terminal: + # Reset the strategy + self._strategy.reset() + break + + # Log the episode results + episode_actor_loss = 0 if training_steps == 0\ + else (episode_actor_loss / training_steps) + episode_critic_loss = 0 if training_steps == 0\ + else (episode_critic_loss / training_steps) + episode_avg_q_value = 0 if training_steps == 0\ + else (episode_avg_q_value / training_steps) + + avg_reward = self._training_stats.log_episode( + self._current_episode, start, training_steps, + episode_actor_loss, episode_critic_loss, episode_avg_q_value, + self._strategy.cur_eps, episode_reward) + + self._do_snapshot_if_in_interval(self._current_episode) + self._strategy.decay(self._current_episode) + + if self._is_target_reached(avg_reward): + self._logger.info( + 'Target score is reached in average; Training is stopped') + break + + self._current_episode += 1 + + self._evaluate() + self.save_parameters(episode=self._current_episode) + self.save_best_network(os.path.join(self._output_directory, 'best')) + self._training_stats.save_stats(self._output_directory) + self._logger.info('--------- Training finished ---------') + return True + + def _make_config_dict(self): + config = super(DdpgAgent, self)._make_config_dict() + config['soft_target_update_rate'] = self._soft_target_update_rate + config['actor_optimizer'] = self._actor_optimizer + config['actor_optimizer_params'] = self._actor_optimizer_params + config['critic_optimizer'] = self._critic_optimizer + config['critic_optimizer_params'] = self._critic_optimizer_params + return config - logformat = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' - dateformat = '%d-%b-%y %H:%M:%S' - formatter = logging.Formatter(fmt=logformat, datefmt=dateformat) + def _soft_update(self, net, target, tau): + net_params = [p.data() for _, p in net.collect_params().items()] + for i, (_, p) in enumerate(target.collect_params().items()): + target_params = p.data() + p.set_data((1.0 - tau) * target_params + tau * net_params[i]) + return target + + def _copy_actor(self): + assert self._actor is not None + assert self._ctx is not None + assert type(self._state_dim) is tuple + return copy_net(self._actor, self._state_dim, ctx=self._ctx) + + def _copy_critic(self): + assert self._critic is not None + assert self._ctx is not None + assert type(self._state_dim) is tuple + assert type(self._action_dim) is tuple + return copy_net_with_two_inputs( + self._critic, self._state_dim, self._action_dim, ctx=self._ctx) + + +class DqnAgent(Agent): + def __init__( + self, + qnet, + environment, + replay_memory_params, + strategy_params, + state_dim, + action_dim, + ctx=None, + discount_factor=.9, + loss_function='euclidean', + optimizer='rmsprop', + optimizer_params={'learning_rate': 0.09}, + training_episodes=50, + start_training=0, + train_interval=1, + use_fix_target=False, + double_dqn=False, + target_update_interval=10, + snapshot_interval=200, + evaluation_samples=100, + agent_name='Dqn_agent', + max_episode_step=99999, + output_directory='model_parameters', + verbose=True, + target_score=None + ): + super(DqnAgent, self).__init__( + environment=environment, replay_memory_params=replay_memory_params, + strategy_params=strategy_params, state_dim=state_dim, + action_dim=action_dim, ctx=ctx, discount_factor=discount_factor, + training_episodes=training_episodes, start_training=start_training, + train_interval=train_interval, + snapshot_interval=snapshot_interval, agent_name=agent_name, + max_episode_step=max_episode_step, + output_directory=output_directory, verbose=verbose, + target_score=target_score, evaluation_samples=evaluation_samples) + + self._qnet = qnet + self._target_update_interval = target_update_interval + self._target_qnet = copy_net( + self._qnet, self._state_dim, ctx=self._ctx) + self._loss_function_str = loss_function + self._loss_function = get_loss_function(loss_function) + self._optimizer = optimizer + self._optimizer_params = optimizer_params + self._double_dqn = double_dqn + self._use_fix_target = use_fix_target - logger = logging.getLogger('DQNAgent') - logger.setLevel(output_level) + # Initialize best network + self._best_net = copy_net(self._qnet, self._state_dim, self._ctx) + self._best_avg_score = -np.infty - stream_handler = logging.StreamHandler(sys.stdout) - stream_handler.setLevel(output_level) - stream_handler.setFormatter(formatter) - logger.addHandler(stream_handler) + self._training_stats = None - if self.__make_logfile: - self.__make_output_directory_if_not_exist() - log_file = os.path.join(self.__output_directory, self.__agent_name + '.log') - file_handler = logging.FileHandler(log_file, mode=filemode) - file_handler.setLevel(output_level) - file_handler.setFormatter(formatter) - logger.addHandler(file_handler) + @classmethod + def resume_from_session(cls, session_dir, net, environment): + import pickle + if not os.path.exists(session_dir): + raise ValueError('Session directory does not exist') - return logger + files = dict() + files['agent'] = os.path.join(session_dir, 'agent.p') + files['best_net_params'] = os.path.join(session_dir, 'best_net.params') + files['q_net_params'] = os.path.join(session_dir, 'qnet.params') + files['target_net_params'] = os.path.join( + session_dir, 'target_net.params') - def __is_target_reached(self, avg_reward): - return self.__target_score is not None\ - and avg_reward > self.__target_score + for file in files.values(): + if not os.path.exists(file): + raise ValueError( + 'Session directory is not complete: {} is missing' + .format(file)) + with open(files['agent'], 'rb') as f: + agent = pickle.load(f) + + agent._environment = environment + agent._qnet = net + agent._qnet.load_parameters(files['q_net_params'], agent._ctx) + agent._qnet.hybridize() + agent._qnet(nd.random_normal( + shape=((1,) + agent._state_dim), ctx=agent._ctx)) + agent._best_net = copy_net(agent._qnet, agent._state_dim, agent._ctx) + agent._best_net.load_parameters(files['best_net_params'], agent._ctx) + agent._target_qnet = copy_net( + agent._qnet, agent._state_dim, agent._ctx) + agent._target_qnet.load_parameters( + files['target_net_params'], agent._ctx) + + agent._logger = ArchLogger.get_logger() + agent._training_stats.logger = agent._logger + agent._logger.info('Agent was retrieved; Training can be continued') + + return agent + + def _make_pickle_ready(self, session_dir): + super(DqnAgent, self)._make_pickle_ready(session_dir) + self._save_net(self._qnet, 'qnet', session_dir) + self._qnet = None + self._save_net(self._target_qnet, 'target_net', session_dir) + self._target_qnet = None def get_q_values(self, state, with_best=False): - return self.get_batch_q_values(nd.array([state], ctx=self.__ctx), with_best=with_best)[0] + return self.get_batch_q_values( + nd.array([state], ctx=self._ctx), with_best=with_best)[0] def get_batch_q_values(self, state_batch, with_best=False): - return self.__best_net(state_batch) if with_best else self.__qnet(state_batch) + return self._best_net(state_batch)\ + if with_best else self._qnet(state_batch) def get_next_action(self, state, with_best=False): q_values = self.get_q_values(state, with_best=with_best) action = q_values.asnumpy().argmax() - return q_values.asnumpy().argmax() + return action - def __sample_from_memory(self): - states, actions, rewards, next_states, terminals\ - = self.__memory.sample(batch_size=self.__minibatch_size) - states = nd.array(states, ctx=self.__ctx) - actions = nd.array(actions, ctx=self.__ctx) - rewards = nd.array(rewards, ctx=self.__ctx) - next_states = nd.array(next_states, ctx=self.__ctx) - terminals = nd.array(terminals, ctx=self.__ctx) - return states, actions, rewards, next_states, terminals - - def __determine_target_q_values(self, states, actions, rewards, next_states, terminals): - if self.__use_fix_target: - q_max_val = self.__target_qnet(next_states) + def __determine_target_q_values( + self, states, actions, rewards, next_states, terminals + ): + if self._use_fix_target: + q_max_val = self._target_qnet(next_states) else: - q_max_val = self.__qnet(next_states) + q_max_val = self._qnet(next_states) - if self.__double_dqn: - q_values_next_states = self.__qnet(next_states) - target_rewards = rewards + nd.choose_element_0index(q_max_val, nd.argmax_channel(q_values_next_states))\ - * (1.0 - terminals) * self.__discount_factor + if self._double_dqn: + q_values_next_states = self._qnet(next_states) + target_rewards = rewards + nd.choose_element_0index( + q_max_val, nd.argmax_channel(q_values_next_states))\ + * (1.0 - terminals) * self._discount_factor else: - target_rewards = rewards + nd.choose_element_0index(q_max_val, nd.argmax_channel(q_max_val))\ - * (1.0 - terminals) * self.__discount_factor + target_rewards = rewards + nd.choose_element_0index( + q_max_val, nd.argmax_channel(q_max_val))\ + * (1.0 - terminals) * self._discount_factor - target_qval = self.__qnet(states) + target_qval = self._qnet(states) for t in range(target_rewards.shape[0]): target_qval[t][actions[t]] = target_rewards[t] return target_qval def __train_q_net_step(self, trainer): - states, actions, rewards, next_states, terminals = self.__sample_from_memory() - target_qval = self.__determine_target_q_values(states, actions, rewards, next_states, terminals) + states, actions, rewards, next_states, terminals =\ + self._sample_from_memory() + target_qval = self.__determine_target_q_values( + states, actions, rewards, next_states, terminals) with autograd.record(): - q_values = self.__qnet(states) - loss = self.__loss_function(q_values, target_qval) + q_values = self._qnet(states) + loss = self._loss_function(q_values, target_qval) loss.backward() - trainer.step(self.__minibatch_size) + trainer.step(self._minibatch_size) return loss - def __do_snapshot_if_in_interval(self, episode): - do_snapshot = (episode != 0 and (episode % self.__snapshot_interval == 0)) - if do_snapshot: - self.save_parameters(episode=episode) - self.__evaluate() - def __do_target_update_if_in_interval(self, total_steps): - do_target_update = (self.__use_fix_target and total_steps % self.__target_update_interval == 0) + do_target_update = ( + self._use_fix_target and + (total_steps % self._target_update_interval == 0)) if do_target_update: - self.__logger.info('Target network is updated after {} steps'.format(total_steps)) - self.__target_qnet = copy_net(self.__qnet, self.__state_dim, self.__ctx) + self._logger.info( + 'Target network is updated after {} steps'.format(total_steps)) + self._target_qnet = copy_net( + self._qnet, self._state_dim, self._ctx) def train(self, episodes=None): - self.__logger.info("--- Start training ---") - trainer = gluon.Trainer(self.__qnet.collect_params(), self.__optimizer, self.__adjust_optimizer_params(self.__optimizer_params)) - episodes = episodes if episodes != None else self.__training_episodes - - resume = (self.__current_episode > 0) + self.save_config_file() + self._logger.info("--- Start training ---") + trainer = gluon.Trainer( + self._qnet.collect_params(), + self._optimizer, + self._adjust_optimizer_params(self._optimizer_params)) + episodes = episodes if episodes is not None\ + else self._training_episodes + + resume = (self._current_episode > 0) if resume: - self.__logger.info("Training session resumed") - self.__logger.info("Starting from episode {}".format(self.__current_episode)) + self._logger.info("Training session resumed") + self._logger.info("Starting from episode {}".format( + self._current_episode)) else: - self.__training_stats = util.TrainingStats(self.__logger, episodes, self.__live_plot) + self._training_stats = DqnTrainingStats(episodes) - # Implementation Deep Q Learning described by Mnih et. al. in Playing Atari with Deep Reinforcement Learning - while self.__current_episode < episodes: - # Check interrupt flag - if self.__interrupt_flag: - self.__interrupt_flag = False - self.__interrupt_training() + # Implementation Deep Q Learning described by + # Mnih et. al. in Playing Atari with Deep Reinforcement Learning + while self._current_episode < episodes: + if self._check_interrupt_routine(): return False step = 0 episode_reward = 0 start = time.time() - state = self.__environment.reset() + state = self._environment.reset() episode_loss = 0 training_steps = 0 - while step < self.__max_episode_step: - #1. Choose an action based on current game state and policy - q_values = self.__qnet(nd.array([state], ctx=self.__ctx)) - action = self.__policy.select_action(q_values[0]) + while step < self._max_episode_step: + # 1. Choose an action based on current game state and policy + q_values = self._qnet(nd.array([state], ctx=self._ctx)) + action = self._strategy.select_action(q_values[0]) - #2. Play the game for a single step - next_state, reward, terminal, _ = self.__environment.step(action) + # 2. Play the game for a single step + next_state, reward, terminal, _ =\ + self._environment.step(action) - #3. Store transition in replay memory - self.__memory.append(state, action, reward, next_state, terminal) + # 3. Store transition in replay memory + self._memory.append( + state, action, reward, next_state, terminal) - #4. Train the network if in interval - do_training = (self.__total_steps % self.__train_interval == 0\ - and self.__memory.is_sample_possible(self.__minibatch_size)) - if do_training: + # 4. Train the network if in interval + if self._do_training(): loss = self.__train_q_net_step(trainer) - loss_sum = sum(loss).asnumpy()[0] - episode_loss += float(loss_sum)/float(self.__minibatch_size) training_steps += 1 + episode_loss +=\ + np.sum(loss.asnumpy()) / self._minibatch_size # Update target network if in interval - self.__do_target_update_if_in_interval(self.__total_steps) + self.__do_target_update_if_in_interval(self._total_steps) step += 1 - self.__total_steps += 1 + self._total_steps += 1 episode_reward += reward state = next_state if terminal: - episode_loss = episode_loss if training_steps > 0 else None - _, _, avg_reward = self.__training_stats.log_episode(self.__current_episode, start, training_steps, - episode_loss, self.__policy.cur_eps, episode_reward) + self._strategy.reset() break - self.__do_snapshot_if_in_interval(self.__current_episode) - self.__policy.decay() - - if self.__is_target_reached(avg_reward): - self.__logger.info('Target score is reached in average; Training is stopped') - break - - self.__current_episode += 1 + self._do_snapshot_if_in_interval(self._current_episode) - self.__evaluate() - training_stats_file = os.path.join(self.__output_directory, 'training_stats.pdf') - self.__training_stats.save_stats(training_stats_file) - self.__logger.info('--------- Training finished ---------') - return True - - def __save_net(self, net, filename, filedir=None): - filedir = self.__output_directory if filedir is None else filedir - filename = os.path.join(filedir, filename + '.params') - net.save_parameters(filename) + episode_loss = (episode_loss / training_steps)\ + if training_steps > 0 else 0 + avg_reward = self._training_stats.log_episode( + self._current_episode, start, training_steps, + episode_loss, self._strategy.cur_eps, episode_reward) + self._strategy.decay(self._current_episode) - def save_parameters(self, episode=None, filename='dqn-agent-params'): - assert self.__output_directory - self.__make_output_directory_if_not_exist() - - if(episode != None): - self.__logger.info('Saving model parameters after episode %d' % episode) - filename = filename + '-ep{}'.format(episode) - else: - self.__logger.info('Saving model parameters') - self.__save_net(self.__qnet, filename) - - def evaluate(self, target=None, sample_games=100, verbose=True): - target = self.__target_score if target is None else target - if target: - target_achieved = 0 - total_reward = 0 - - for g in range(sample_games): - state = self.__environment.reset() - step = 0 - game_reward = 0 - while step < self.__max_episode_step: - action = self.get_next_action(state) - state, reward, terminal, _ = self.__environment.step(action) - game_reward += reward - - if terminal: - if verbose: - info = 'Game %d: Reward %f' % (g,game_reward) - self.__logger.debug(info) - if target: - if game_reward >= target: - target_achieved += 1 - total_reward += game_reward - break - - step += 1 - - avg_reward = float(total_reward)/float(sample_games) - info = 'Avg. Reward: %f' % avg_reward - if target: - target_achieved_ratio = int((float(target_achieved)/float(sample_games))*100) - info += '; Target Achieved in %d%% of games' % (target_achieved_ratio) - - if verbose: - self.__logger.info(info) - return avg_reward - - def __evaluate(self, verbose=True): - sample_games = 100 - avg_reward = self.evaluate(sample_games=sample_games, verbose=False) - info = 'Evaluation -> Average Reward in {} games: {}'.format(sample_games, avg_reward) - - if self.__best_avg_score is None or self.__best_avg_score <= avg_reward: - self.__best_net = copy_net(self.__qnet, self.__state_dim, self.__ctx) - self.__best_avg_score = avg_reward - info += ' (NEW BEST)' - - if verbose: - self.__logger.info(info) - - - - def play(self, update_frame=1, with_best=False): - step = 0 - state = self.__environment.reset() - total_reward = 0 - while step < self.__max_episode_step: - action = self.get_next_action(state, with_best=with_best) - state, reward, terminal, _ = self.__environment.step(action) - total_reward += reward - do_update_frame = (step % update_frame == 0) - if do_update_frame: - self.__environment.render() - time.sleep(.100) - - if terminal: + if self._is_target_reached(avg_reward): + self._logger.info( + 'Target score is reached in average; Training is stopped') break - step += 1 - return total_reward + self._current_episode += 1 - def save_best_network(self, path, epoch=0): - self.__logger.info('Saving best network with average reward of {}'.format(self.__best_avg_score)) - self.__best_net.export(path, epoch=epoch) + self._evaluate() + self.save_parameters(episode=self._current_episode) + self.save_best_network(os.path.join(self._output_directory, 'best')) + self._training_stats.save_stats(self._output_directory) + self._logger.info('--------- Training finished ---------') + return True - def __make_config_dict(self): - config = dict() - config['discount_factor'] = self.__discount_factor - config['optimizer'] = self.__optimizer - config['optimizer_params'] = self.__optimizer_params - config['policy_params'] = self.__policy_params - config['replay_memory_params'] = self.__replay_memory_params - config['loss_function'] = self.__loss_function_str - config['optimizer'] = self.__optimizer - config['training_episodes'] = self.__training_episodes - config['train_interval'] = self.__train_interval - config['use_fix_target'] = self.__use_fix_target - config['double_dqn'] = self.__double_dqn - config['target_update_interval'] = self.__target_update_interval - config['snapshot_interval']= self.__snapshot_interval - config['agent_name'] = self.__agent_name - config['max_episode_step'] = self.__max_episode_step - config['output_directory'] = self.__user_given_directory - config['verbose'] = self.__verbose - config['live_plot'] = self.__live_plot - config['make_logfile'] = self.__make_logfile - config['target_score'] = self.__target_score + def _make_config_dict(self): + config = super(DqnAgent, self)._make_config_dict() + config['optimizer'] = self._optimizer + config['optimizer_params'] = self._optimizer_params + config['loss_function'] = self._loss_function_str + config['use_fix_target'] = self._use_fix_target + config['double_dqn'] = self._double_dqn + config['target_update_interval'] = self._target_update_interval return config - def save_config_file(self): - import json - self.__make_output_directory_if_not_exist() - filename = os.path.join(self.__output_directory, 'config.json') - config = self.__make_config_dict() - with open(filename, mode='w') as fp: - json.dump(config, fp, indent=4) \ No newline at end of file + def save_parameters(self, episode): + self._save_parameters(self._qnet, episode=episode) + + def _save_current_as_best_net(self): + self._best_net = copy_net( + self._qnet, (1,) + self._state_dim, ctx=self._ctx) diff --git a/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/cnnarch_logger.py b/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/cnnarch_logger.py new file mode 100644 index 0000000..a17f5b1 --- /dev/null +++ b/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/cnnarch_logger.py @@ -0,0 +1,93 @@ +import logging +import sys +import os +import util + + +class ArchLogger(object): + _logger = None + + __output_level = logging.INFO + __logger_name = 'agent' + __output_directory = '.' + __append = True + __logformat = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + __dateformat = '%d-%b-%y %H:%M:%S' + + INFO = logging.INFO + DEBUG = logging.DEBUG + + @staticmethod + def set_output_level(output_level): + assert output_level is not None + ArchLogger.__output_level = output_level + + @staticmethod + def set_logger_name(logger_name): + assert logger_name is not None + ArchLogger.__logger_name = logger_name + + @staticmethod + def set_output_directory(output_directory): + assert output_directory is not None + ArchLogger.__output_directory = output_directory + + @staticmethod + def set_append(append): + assert append is not None + ArchLogger.__append = append + + @staticmethod + def set_log_format(logformat, dateformat): + assert logformat is not None + assert dateformat is not None + ArchLogger.__logformat = logformat + ArchLogger.__dateformat = dateformat + + @staticmethod + def init_logger(make_log_file=True): + assert ArchLogger._logger is None, 'Logger init already called' + filemode = 'a' if ArchLogger.__append else 'w' + formatter = logging.Formatter( + fmt=ArchLogger.__logformat, datefmt=ArchLogger.__dateformat) + + logger = logging.getLogger(ArchLogger.__logger_name) + logger.setLevel(ArchLogger.__output_level) + + stream_handler = logging.StreamHandler(sys.stdout) + stream_handler.setLevel(ArchLogger.__output_level) + stream_handler.setFormatter(formatter) + logger.addHandler(stream_handler) + + if make_log_file: + util.make_directory_if_not_exist(ArchLogger.__output_directory) + log_file = os.path.join( + ArchLogger.__output_directory, + ArchLogger.__logger_name + '.log') + file_handler = logging.FileHandler(log_file, mode=filemode) + file_handler.setLevel(ArchLogger.__output_level) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + ArchLogger._logger = logger + + @staticmethod + def get_logger(): + if ArchLogger._logger is None: + ArchLogger.init_logger() + assert ArchLogger._logger is not None + return ArchLogger._logger + +if __name__ == "__main__": + print('=== Test logger ===') + ArchLogger.set_logger_name('TestLogger') + ArchLogger.set_output_directory('test_log') + ArchLogger.init_logger() + logger = ArchLogger.get_logger() + logger.warning('This is a warning') + logger.debug('This is a debug information, which you should not see') + logger.info('This is a normal information') + assert os.path.exists('test_log')\ + and os.path.isfile(os.path.join('test_log', 'TestLogger.log')),\ + 'Test failed: No logfile exists' + import shutil + shutil.rmtree('test_log') \ No newline at end of file diff --git a/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/environment.py b/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/environment.py index 47787a8..381ec9e 100644 --- a/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/environment.py +++ b/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/environment.py @@ -33,13 +33,6 @@ class GymEnvironment(Environment): def state_dim(self): return self.__env.observation_space.shape - @property - def state_dtype(self): - return 'float32' - - @property - def action_dtype(self): - return 'uint8' @property def number_of_actions(self): diff --git a/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/replay_memory.py b/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/replay_memory.py index e66cd93..d22147a 100644 --- a/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/replay_memory.py +++ b/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/replay_memory.py @@ -1,58 +1,90 @@ import numpy as np + class ReplayMemoryBuilder(object): def __init__(self): self.__supported_methods = ['online', 'buffer', 'combined'] - def build_by_params(self, + def build_by_params( + self, state_dim, method='online', state_dtype='float32', + action_dim=(1,), action_dtype='uint8', rewards_dtype='float32', memory_size=1000, - sample_size=32): + sample_size=32 + ): assert state_dim is not None + assert action_dim is not None assert method in self.__supported_methods if method == 'online': - return self.build_online_memory(state_dim=state_dim, state_dtype=state_dtype, - action_dtype=action_dtype, rewards_dtype=rewards_dtype) + return self.build_online_memory( + state_dim=state_dim, state_dtype=state_dtype, + action_dtype=action_dtype, action_dim=action_dim, + rewards_dtype=rewards_dtype) else: assert memory_size is not None and memory_size > 0 assert sample_size is not None and sample_size > 0 if method == 'buffer': - return self.build_buffered_memory(state_dim=state_dim, sample_size=sample_size, - memory_size=memory_size, state_dtype=state_dtype, action_dtype=action_dtype, + return self.build_buffered_memory( + state_dim=state_dim, sample_size=sample_size, + memory_size=memory_size, state_dtype=state_dtype, + action_dim=action_dim, action_dtype=action_dtype, rewards_dtype=rewards_dtype) else: - return self.build_combined_memory(state_dim=state_dim, sample_size=sample_size, - memory_size=memory_size, state_dtype=state_dtype, action_dtype=action_dtype, + return self.build_combined_memory( + state_dim=state_dim, sample_size=sample_size, + memory_size=memory_size, state_dtype=state_dtype, + action_dim=action_dim, action_dtype=action_dtype, rewards_dtype=rewards_dtype) - def build_buffered_memory(self, state_dim, memory_size=1000, sample_size=1, state_dtype='float32', - action_dtype='uint8', rewards_dtype='float32'): + def build_buffered_memory( + self, state_dim, memory_size, sample_size, state_dtype, action_dim, + action_dtype, rewards_dtype + ): assert memory_size > 0 assert sample_size > 0 - return ReplayMemory(state_dim, size=memory_size, sample_size=sample_size, - state_dtype=state_dtype, action_dtype=action_dtype, rewards_dtype=rewards_dtype) - - def build_combined_memory(self, state_dim, memory_size=1000, sample_size=1, state_dtype='float32', - action_dtype='uint8', rewards_dtype='float32'): + return ReplayMemory( + state_dim, size=memory_size, sample_size=sample_size, + state_dtype=state_dtype, action_dim=action_dim, + action_dtype=action_dtype, rewards_dtype=rewards_dtype) + + def build_combined_memory( + self, state_dim, memory_size, sample_size, state_dtype, action_dim, + action_dtype, rewards_dtype + ): assert memory_size > 0 assert sample_size > 0 - return CombinedReplayMemory(state_dim, size=memory_size, sample_size=sample_size, - state_dtype=state_dtype, action_dtype=action_dtype, rewards_dtype=rewards_dtype) + return CombinedReplayMemory( + state_dim, size=memory_size, sample_size=sample_size, + state_dtype=state_dtype, action_dim=action_dim, + action_dtype=action_dtype, rewards_dtype=rewards_dtype) + + def build_online_memory( + self, state_dim, state_dtype, action_dtype, action_dim, rewards_dtype + ): + return OnlineReplayMemory( + state_dim, state_dtype=state_dtype, action_dim=action_dim, + action_dtype=action_dtype, rewards_dtype=rewards_dtype) - def build_online_memory(self, state_dim, state_dtype='float32', action_dtype='uint8', - rewards_dtype='float32'): - return OnlineReplayMemory(state_dim, state_dtype=state_dtype, action_dtype=action_dtype, - rewards_dtype=rewards_dtype) class ReplayMemory(object): - def __init__(self, state_dim, sample_size, size=1000, state_dtype='uint8', action_dtype='uint8', rewards_dtype='float32'): + def __init__( + self, + state_dim, + sample_size, + size=1000, + action_dim=(1,), + state_dtype='float32', + action_dtype='uint8', + rewards_dtype='float32' + ): assert size > 0, "Size must be greater than zero" assert type(state_dim) is tuple, "State dimension must be a tuple" + assert type(action_dim) is tuple, "Action dimension must be a tuple" assert sample_size > 0 self._size = size self._sample_size = sample_size @@ -60,12 +92,15 @@ class ReplayMemory(object): self._pointer = 0 self._state_dim = state_dim self._state_dtype = state_dtype + self._action_dim = action_dim self._action_dtype = action_dtype self._rewards_dtype = rewards_dtype self._states = np.zeros((self._size,) + state_dim, dtype=state_dtype) - self._actions = np.array([0] * self._size, dtype=action_dtype) + self._actions = np.zeros( + (self._size,) + action_dim, dtype=action_dtype) self._rewards = np.array([0] * self._size, dtype=rewards_dtype) - self._next_states = np.zeros((self._size,) + state_dim, dtype=state_dtype) + self._next_states = np.zeros( + (self._size,) + state_dim, dtype=state_dtype) self._terminals = np.array([0] * self._size, dtype='bool') @property @@ -93,17 +128,23 @@ class ReplayMemory(object): self._terminals[index] def is_sample_possible(self, batch_size=None): - batch_size = batch_size if batch_size is not None else self._sample_size + batch_size = batch_size if batch_size is not None\ + else self._sample_size return self._cur_size >= batch_size def sample(self, batch_size=None): - batch_size = batch_size if batch_size is not None else self._sample_size - assert self._cur_size >= batch_size, "Size of replay memory must be larger than batch size" - i=0 - states = np.zeros((batch_size,)+self._state_dim, dtype=self._state_dtype) - actions = np.zeros(batch_size, dtype=self._action_dtype) + batch_size = batch_size if batch_size is not None\ + else self._sample_size + assert self._cur_size >= batch_size,\ + "Size of replay memory must be larger than batch size" + i = 0 + states = np.zeros(( + batch_size,)+self._state_dim, dtype=self._state_dtype) + actions = np.zeros( + (batch_size,)+self._action_dim, dtype=self._action_dtype) rewards = np.zeros(batch_size, dtype=self._rewards_dtype) - next_states = np.zeros((batch_size,)+self._state_dim, dtype=self._state_dtype) + next_states = np.zeros( + (batch_size,)+self._state_dim, dtype=self._state_dtype) terminals = np.zeros(batch_size, dtype='bool') while i < batch_size: @@ -119,25 +160,35 @@ class ReplayMemory(object): class OnlineReplayMemory(ReplayMemory): - def __init__(self, state_dim, state_dtype='float32', action_dtype='uint8', rewards_dtype='float32'): - super(OnlineReplayMemory, self).__init__(state_dim, sample_size=1, size=1, - state_dtype=state_dtype, action_dtype=action_dtype, rewards_dtype=rewards_dtype) + def __init__( + self, state_dim, state_dtype='float32', action_dim=(1,), + action_dtype='uint8', rewards_dtype='float32' + ): + super(OnlineReplayMemory, self).__init__( + state_dim, sample_size=1, size=1, state_dtype=state_dtype, + action_dim=action_dim, action_dtype=action_dtype, + rewards_dtype=rewards_dtype) class CombinedReplayMemory(ReplayMemory): - def __init__(self, state_dim, sample_size, size=1000, - state_dtype='uint8', action_dtype='uint8', rewards_dtype='float32'): - super(CombinedReplayMemory, self).__init__(state_dim, sample_size=(sample_size - 1), size=size, - state_dtype=state_dtype, action_dtype=action_dtype, rewards_dtype=rewards_dtype) + def __init__( + self, state_dim, sample_size, size=1000, state_dtype='float32', + action_dim=(1,), action_dtype='uint8', rewards_dtype='float32' + ): + super(CombinedReplayMemory, self).__init__( + state_dim=state_dim, sample_size=(sample_size - 1), size=size, + state_dtype=state_dtype, action_dim=action_dim, + action_dtype=action_dtype, rewards_dtype=rewards_dtype) self._last_state = np.zeros((1,) + state_dim, dtype=state_dtype) - self._last_action = np.array([0], dtype=action_dtype) + self._last_action = np.array((1,) + action_dim, dtype=action_dtype) self._last_reward = np.array([0], dtype=rewards_dtype) self._last_next_state = np.zeros((1,) + state_dim, dtype=state_dtype) self._last_terminal = np.array([0], dtype='bool') def append(self, state, action, reward, next_state, terminal): - super(CombinedReplayMemory, self).append(state, action, reward, next_state, terminal) + super(CombinedReplayMemory, self).append( + state, action, reward, next_state, terminal) self._last_state = state self._last_action = action self._last_reward = reward @@ -145,8 +196,10 @@ class CombinedReplayMemory(ReplayMemory): self._last_terminal = terminal def sample(self, batch_size=None): - batch_size = (batch_size-1) if batch_size is not None else self._sample_size - states, actions, rewards, next_states, terminals = super(CombinedReplayMemory, self).sample(batch_size=batch_size) + batch_size = (batch_size-1) if batch_size is not None\ + else self._sample_size + states, actions, rewards, next_states, terminals = super( + CombinedReplayMemory, self).sample(batch_size=batch_size) states = np.append(states, [self._last_state], axis=0) actions = np.append(actions, [self._last_action], axis=0) rewards = np.append(rewards, [self._last_reward], axis=0) diff --git a/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/strategy.py b/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/strategy.py new file mode 100644 index 0000000..1f8deb0 --- /dev/null +++ b/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/strategy.py @@ -0,0 +1,172 @@ +import numpy as np + + +class StrategyBuilder(object): + def __init__(self): + pass + + def build_by_params( + self, + method='epsgreedy', + epsilon=0.5, + min_epsilon=0.05, + epsilon_decay_method='no', + epsilon_decay=0.0, + epsilon_decay_start=0, + action_dim=None, + action_low=None, + action_high=None, + mu=0.0, + theta=0.5, + sigma=0.3 + ): + + if epsilon_decay_method == 'linear': + decay = LinearDecay( + eps_decay=epsilon_decay, min_eps=min_epsilon, + decay_start=epsilon_decay_start) + else: + decay = NoDecay() + + if method == 'epsgreedy': + assert action_dim is not None + assert len(action_dim) == 1 + return EpsilonGreedyStrategy( + eps=epsilon, number_of_actions=action_dim[0], + decay_method=decay) + elif method == 'uhlenbeck_ornstein': + assert action_dim is not None + assert action_low is not None + assert action_high is not None + assert mu is not None + assert theta is not None + assert sigma is not None + return UhlenbeckOrnsteinStrategy( + action_dim, action_low, action_high, epsilon, mu, theta, + sigma, decay) + else: + assert action_dim is not None + assert len(action_dim) == 1 + return GreedyStrategy() + + +class BaseDecay(object): + def __init__(self): + pass + + def decay(self, *args): + raise NotImplementedError + + def __call__(self, *args): + return self.decay(*args) + + +class NoDecay(BaseDecay): + def __init__(self): + super(NoDecay, self).__init__() + + def decay(self, cur_eps, episode): + return cur_eps + + +class LinearDecay(BaseDecay): + def __init__(self, eps_decay, min_eps=0, decay_start=0): + super(LinearDecay, self).__init__() + self.eps_decay = eps_decay + self.min_eps = min_eps + self.decay_start = decay_start + + def decay(self, cur_eps, episode): + if episode < self.decay_start: + return cur_eps + else: + return max(cur_eps - self.eps_decay, self.min_eps) + + +class BaseStrategy(object): + def __init__(self, decay_method): + self._decay_method = decay_method + + def select_action(self, values, decay_method): + raise NotImplementedError + + def decay(self, episode): + self.cur_eps = self._decay_method.decay(self.cur_eps, episode) + + def reset(self): + pass + + +class EpsilonGreedyStrategy(BaseStrategy): + def __init__(self, eps, number_of_actions, decay_method): + super(EpsilonGreedyStrategy, self).__init__(decay_method) + self.eps = eps + self.cur_eps = eps + self.__number_of_actions = number_of_actions + + def select_action(self, values): + do_exploration = (np.random.rand() < self.cur_eps) + if do_exploration: + action = np.random.randint(low=0, high=self.__number_of_actions) + else: + action = values.asnumpy().argmax() + return action + + +class GreedyStrategy(BaseStrategy): + def __init__(self): + super(GreedyStrategy, self).__init__(None) + + def select_action(self, values): + return values.asnumpy().argmax() + + def decay(self): + pass + + +class UhlenbeckOrnsteinStrategy(BaseStrategy): + """ + Ornstein-Uhlenbeck process: dxt = theta * (mu - xt) * dt + sigma * dWt + where Wt denotes the Wiener process. + """ + def __init__( + self, + action_dim, + action_low, + action_high, + eps, + mu=0.0, + theta=.15, + sigma=.3, + decay=NoDecay() + ): + super(UhlenbeckOrnsteinStrategy, self).__init__() + self.eps = eps + self.cur_eps = eps + + self._decay_method = decay + + self._action_dim = action_dim + self._action_low = action_low + self._action_high = action_high + + self._mu = mu + self._theta = theta + self._sigma = sigma + + self.state = np.ones(self._action_dim) * self._mu + + def _evolve_state(self): + x = self.state + dx = self._theta * (self._mu - x)\ + + self._sigma * np.random.randn(len(x)) + self.state = x + dx + return self.state + + def reset(self): + self.state = np.ones(self._action_dim) * self._mu + + def select_action(self, values): + noise = self._evolve_state() + action = values + (self.cur_eps * noise) + return np.clip(action, self._action_low, self._action_high) diff --git a/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/util.py b/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/util.py index 5857893..a1da1ce 100644 --- a/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/util.py +++ b/src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/util.py @@ -5,9 +5,9 @@ import matplotlib.pyplot as plt from matplotlib import style import time import os -import mxnet +import mxnet as mx from mxnet import gluon, nd - +import cnnarch_logger LOSS_FUNCTIONS = { 'l1': gluon.loss.L1Loss(), @@ -16,17 +16,46 @@ LOSS_FUNCTIONS = { 'softmax_cross_entropy': gluon.loss.SoftmaxCrossEntropyLoss(), 'sigmoid_cross_entropy': gluon.loss.SigmoidBinaryCrossEntropyLoss()} -def copy_net(net, input_state_dim, ctx, tmp_filename='tmp.params'): + +def make_directory_if_not_exist(directory): + if not os.path.exists(directory): + os.makedirs(directory) + + +def copy_net(net, input_state_dim, ctx): + assert isinstance(net, gluon.HybridBlock) + assert type(net.__class__) is type + + net2 = net.__class__() + net2.collect_params().initialize(mx.init.Zero(), ctx=ctx) + net2.hybridize() + net2(mx.nd.ones((1,) + input_state_dim, ctx=ctx)) + + params_of_net = [p.data() for _, p in net.collect_params().items()] + for i, (_, p) in enumerate(net2.collect_params().items()): + p.set_data(params_of_net[i]) + + return net2 + + +def copy_net_with_two_inputs(net, input_state_dim1, input_state_dim2, ctx): assert isinstance(net, gluon.HybridBlock) assert type(net.__class__) is type - net.save_parameters(tmp_filename) + net2 = net.__class__() - net2.load_parameters(tmp_filename, ctx=ctx) - os.remove(tmp_filename) + net2.collect_params().initialize(mx.init.Zero(), ctx=ctx) net2.hybridize() - net2(nd.ones((1,) + input_state_dim, ctx=ctx)) + net2( + nd.ones((1,) + input_state_dim1, ctx=ctx), + nd.ones((1,) + input_state_dim2, ctx=ctx)) + + params_of_net = [p.data() for _, p in net.collect_params().items()] + for i, (_, p) in enumerate(net2.collect_params().items()): + p.set_data(params_of_net[i]) + return net2 + def get_loss_function(loss_function_name): if loss_function_name not in LOSS_FUNCTIONS: raise ValueError('Loss function does not exist') @@ -52,89 +81,196 @@ class AgentSignalHandler(object): sys.exit(1) style.use('fivethirtyeight') + + class TrainingStats(object): - def __init__(self, logger, max_episodes, live_plot=True): - self.__logger = logger - self.__max_episodes = max_episodes - self.__all_avg_loss = np.zeros((max_episodes,)) - self.__all_total_rewards = np.zeros((max_episodes,)) - self.__all_eps = np.zeros((max_episodes,)) - self.__all_time = np.zeros((max_episodes,)) - self.__all_mean_reward_last_100_episodes = np.zeros((max_episodes,)) - self.__live_plot = live_plot + def __init__(self, max_episodes): + self._logger = cnnarch_logger.ArchLogger.get_logger() + self._max_episodes = max_episodes + self._all_total_rewards = np.zeros((max_episodes,)) + self._all_eps = np.zeros((max_episodes,)) + self._all_time = np.zeros((max_episodes,)) + self._all_mean_reward_last_100_episodes = np.zeros((max_episodes,)) @property def logger(self): - return self.__logger + return self._logger @logger.setter def logger(self, logger): - self.__logger = logger + self._logger = logger @logger.deleter def logger(self): - self.__logger = None - - def add_avg_loss(self, episode, avg_loss): - self.__all_avg_loss[episode] = avg_loss + self._logger = None def add_total_reward(self, episode, total_reward): - self.__all_total_rewards[episode] = total_reward + self._all_total_rewards[episode] = total_reward def add_eps(self, episode, eps): - self.__all_eps[episode] = eps + self._all_eps[episode] = eps def add_time(self, episode, time): - self.__all_time[episode] = time + self._all_time[episode] = time def add_mean_reward_last_100(self, episode, mean_reward): - self.__all_mean_reward_last_100_episodes[episode] = mean_reward + self._all_mean_reward_last_100_episodes[episode] = mean_reward + + def log_episode(self, *args): + raise NotImplementedError + + def mean_of_reward(self, cur_episode, last=100): + if cur_episode > 0: + reward_last_100 =\ + self._all_total_rewards[max(0, cur_episode-last):cur_episode] + return np.mean(reward_last_100) + else: + return self._all_total_rewards[0] - def log_episode(self, episode, start_time, training_steps, loss, eps, reward): + def save(self, path): + np.save(os.path.join(path, 'total_rewards'), self._all_total_rewards) + np.save(os.path.join(path, 'eps'), self._all_eps) + np.save(os.path.join(path, 'time'), self._all_time) + np.save( + os.path.join(path, 'mean_reward'), + self._all_mean_reward_last_100_episodes) + + def _log_episode(self, episode, start_time, training_steps, eps, reward): self.add_eps(episode, eps) self.add_total_reward(episode, reward) end = time.time() - if training_steps == 0: - avg_loss = 0 - else: - avg_loss = float(loss)/float(training_steps) - mean_reward_last_100 = self.mean_of_reward(episode, last=100) - time_elapsed = end - start_time - info = "Episode: %d, Total Reward: %.3f, Avg. Reward Last 100 Episodes: %.3f, Avg Loss: %.3f, Time: %.3f, Training Steps: %d, Eps: %.3f"\ - % (episode, reward, mean_reward_last_100, avg_loss, time_elapsed, training_steps, eps) - self.__logger.info(info) - self.add_avg_loss(episode, avg_loss) self.add_time(episode, time_elapsed) self.add_mean_reward_last_100(episode, mean_reward_last_100) + return ('Episode: %d, Total Reward: %.3f, ' + 'Avg. Reward Last 100 Episodes: %.3f, {}, ' + 'Time: %.3f, Training Steps: %d, Eps: %.3f') % ( + episode, reward, mean_reward_last_100, time_elapsed, + training_steps, eps), mean_reward_last_100 - return avg_loss, time_elapsed, mean_reward_last_100 - def mean_of_reward(self, cur_episode, last=100): - if cur_episode > 0: - reward_last_100 = self.__all_total_rewards[max(0, cur_episode-last):cur_episode] - return np.mean(reward_last_100) - else: - return self.__all_total_rewards[0] +class DqnTrainingStats(TrainingStats): + def __init__(self, max_episodes): + super(DqnTrainingStats, self).__init__(max_episodes) + self._all_avg_loss = np.zeros((max_episodes,)) + + def add_avg_loss(self, episode, avg_loss): + self._all_avg_loss[episode] = avg_loss + + def log_episode( + self, episode, start_time, training_steps, avg_loss, eps, reward + ): + self.add_avg_loss(episode, avg_loss) + + info, avg_reward = self._log_episode( + episode, start_time, training_steps, eps, reward) + info = info.format(('Avg. Loss: %.3f') % (avg_loss)) - def save_stats(self, file): - fig = plt.figure(figsize=(20,20)) + self._logger.info(info) + return avg_reward + + def save_stats(self, path): + fig = plt.figure(figsize=(20, 20)) sub_rewards = fig.add_subplot(221) sub_rewards.set_title('Total Rewards per episode') - sub_rewards.plot(np.arange(self.__max_episodes), self.__all_total_rewards) + sub_rewards.plot( + np.arange(self._max_episodes), self._all_total_rewards) sub_loss = fig.add_subplot(222) sub_loss.set_title('Avg. Loss per episode') - sub_loss.plot(np.arange(self.__max_episodes), self.__all_avg_loss) + sub_loss.plot(np.arange(self._max_episodes), self._all_avg_loss) sub_eps = fig.add_subplot(223) sub_eps.set_title('Epsilon per episode') - sub_eps.plot(np.arange(self.__max_episodes), self.__all_eps) + sub_eps.plot(np.arange(self._max_episodes), self._all_eps) sub_rewards = fig.add_subplot(224) sub_rewards.set_title('Avg. mean reward of last 100 episodes') - sub_rewards.plot(np.arange(self.__max_episodes), self.__all_mean_reward_last_100_episodes) + sub_rewards.plot(np.arange(self._max_episodes), + self._all_mean_reward_last_100_episodes) + + self.save(path) + plt.savefig(os.path.join(path, 'stats.pdf')) + + def save(self, path): + super(DqnTrainingStats, self).save(path) + np.save(os.path.join(path, 'avg_loss'), self._all_avg_loss) + + +class DdpgTrainingStats(TrainingStats): + def __init__(self, max_episodes): + super(DdpgTrainingStats, self).__init__(max_episodes) + self._all_avg_critic_loss = np.zeros((max_episodes,)) + self._all_avg_actor_loss = np.zeros((max_episodes,)) + self._all_avg_qvalues = np.zeros((max_episodes,)) + + def add_avg_critic_loss(self, episode, avg_critic_loss): + self._all_avg_critic_loss[episode] = avg_critic_loss + + def add_avg_actor_loss(self, episode, avg_actor_loss): + self._all_avg_actor_loss[episode] = avg_actor_loss + + def add_avg_qvalues(self, episode, avg_qvalues): + self._all_avg_qvalues[episode] = avg_qvalues + + def log_episode( + self, episode, start_time, training_steps, actor_loss, + critic_loss, qvalues, eps, reward + ): + self.add_avg_actor_loss(episode, actor_loss) + self.add_avg_critic_loss(episode, critic_loss) + self.add_avg_qvalues(episode, qvalues) + + info, avg_reward = self._log_episode( + episode, start_time, training_steps, eps, reward) + info = info.format(( + 'Avg. Actor Loss: %.3f ' + 'Avg. Critic Loss: %.3f ' + 'Avg. Q-Values: %.3f') % (actor_loss, critic_loss, qvalues)) + + self.logger.info(info) + return avg_reward + + def save(self, path): + super(DdpgTrainingStats, self).save(path) + np.save(os.path.join( + path, 'avg_critic_loss'), self._all_avg_critic_loss) + np.save(os.path.join(path, 'avg_actor_loss'), self._all_avg_actor_loss) + np.save(os.path.join(path, 'avg_qvalues'), self._all_avg_qvalues) + + def save_stats(self, path): + fig = plt.figure(figsize=(120, 120)) + + sub_rewards = fig.add_subplot(321) + sub_rewards.set_title('Total Rewards per episode') + sub_rewards.plot( + np.arange(self._max_episodes), self._all_total_rewards) + + sub_actor_loss = fig.add_subplot(322) + sub_actor_loss.set_title('Avg. Actor Loss per episode') + sub_actor_loss.plot( + np.arange(self._max_episodes), self._all_avg_actor_loss) + + sub_critic_loss = fig.add_subplot(323) + sub_critic_loss.set_title('Avg. Critic Loss per episode') + sub_critic_loss.plot( + np.arange(self._max_episodes), self._all_avg_critic_loss) + + sub_qvalues = fig.add_subplot(324) + sub_qvalues.set_title('Avg. QValues per episode') + sub_qvalues.plot( + np.arange(self._max_episodes), self._all_avg_qvalues) + + sub_eps = fig.add_subplot(325) + sub_eps.set_title('Epsilon per episode') + sub_eps.plot(np.arange(self._max_episodes), self._all_eps) + + sub_rewards = fig.add_subplot(326) + sub_rewards.set_title('Avg. mean reward of last 100 episodes') + sub_rewards.plot(np.arange(self._max_episodes), + self._all_mean_reward_last_100_episodes) - plt.savefig(file) \ No newline at end of file + self.save(path) + plt.savefig(os.path.join(path, 'stats.pdf')) diff --git a/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CMakeLists.txt b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CMakeLists.txt new file mode 100644 index 0000000..6142455 --- /dev/null +++ b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CMakeLists.txt @@ -0,0 +1,27 @@ +cmake_minimum_required(VERSION 3.5) +set(CMAKE_CXX_STANDARD 14) + +project(mountaincar_master LANGUAGES CXX) + +#set cmake module path +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake) + +# add dependencies +find_package(Armadillo REQUIRED) +set(INCLUDE_DIRS ${INCLUDE_DIRS} ${Armadillo_INCLUDE_DIRS}) +set(LIBS ${LIBS} ${Armadillo_LIBRARIES}) + +# additional commands +set(LIBS ${LIBS} mxnet) + +# create static library +include_directories(${INCLUDE_DIRS}) +add_library(mountaincar_master mountaincar_master.cpp) +target_include_directories(mountaincar_master PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${INCLUDE_DIRS}) +target_link_libraries(mountaincar_master PUBLIC ${LIBS}) +set_target_properties(mountaincar_master PROPERTIES LINKER_LANGUAGE CXX) + +# export cmake project +export(TARGETS mountaincar_master FILE mountaincar_master.cmake) + +# additional commands end diff --git a/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNBufferFile.h b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNBufferFile.h new file mode 100644 index 0000000..c0d8dd9 --- /dev/null +++ b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNBufferFile.h @@ -0,0 +1,51 @@ +#ifndef CNNBUFFERFILE_H +#define CNNBUFFERFILE_H + +#include +#include +#include + +// Read file to buffer +class BufferFile { + public : + std::string file_path_; + int length_; + char* buffer_; + + explicit BufferFile(std::string file_path) + :file_path_(file_path) { + + std::ifstream ifs(file_path.c_str(), std::ios::in | std::ios::binary); + if (!ifs) { + std::cerr << "Can't open the file. Please check " << file_path << ". \n"; + length_ = 0; + buffer_ = NULL; + return; + } + + ifs.seekg(0, std::ios::end); + length_ = ifs.tellg(); + ifs.seekg(0, std::ios::beg); + std::cout << file_path.c_str() << " ... "<< length_ << " bytes\n"; + + buffer_ = new char[sizeof(char) * length_]; + ifs.read(buffer_, length_); + ifs.close(); + } + + int GetLength() { + return length_; + } + char* GetBuffer() { + return buffer_; + } + + ~BufferFile() { + if (buffer_) { + delete[] buffer_; + buffer_ = NULL; + } + } +}; + +#endif // CNNBUFFERFILE_H diff --git a/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNCreator_mountaincar_master_actor.py b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNCreator_mountaincar_master_actor.py new file mode 100644 index 0000000..2928ff7 --- /dev/null +++ b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNCreator_mountaincar_master_actor.py @@ -0,0 +1,56 @@ +import mxnet as mx +import logging +import os +from CNNNet_mountaincar_master_actor import Net + +class CNNCreator_mountaincar_master_actor: + _model_dir_ = "model/mountaincar.agent.MountaincarActor/" + _model_prefix_ = "model" + _input_shapes_ = [(2,)] + + def __init__(self): + self.weight_initializer = mx.init.Normal() + self.net = None + + def get_input_shapes(self): + return self._input_shapes_ + + def load(self, context): + lastEpoch = 0 + param_file = None + + try: + os.remove(self._model_dir_ + self._model_prefix_ + "_newest-0000.params") + except OSError: + pass + try: + os.remove(self._model_dir_ + self._model_prefix_ + "_newest-symbol.json") + except OSError: + pass + + if os.path.isdir(self._model_dir_): + for file in os.listdir(self._model_dir_): + if ".params" in file and self._model_prefix_ in file: + epochStr = file.replace(".params","").replace(self._model_prefix_ + "-","") + epoch = int(epochStr) + if epoch > lastEpoch: + lastEpoch = epoch + param_file = file + if param_file is None: + return 0 + else: + logging.info("Loading checkpoint: " + param_file) + self.net.load_parameters(self._model_dir_ + param_file) + return lastEpoch + + + def construct(self, context, data_mean=None, data_std=None): + self.net = Net(data_mean=data_mean, data_std=data_std) + self.net.collect_params().initialize(self.weight_initializer, ctx=context) + self.net.hybridize() + self.net(mx.nd.zeros((1,)+self._input_shapes_[0], ctx=context)) + + if not os.path.exists(self._model_dir_): + os.makedirs(self._model_dir_) + + self.net.export(self._model_dir_ + self._model_prefix_, epoch=0) diff --git a/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNDataLoader_mountaincar_master_actor.py b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNDataLoader_mountaincar_master_actor.py new file mode 100644 index 0000000..48d2dfb --- /dev/null +++ b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNDataLoader_mountaincar_master_actor.py @@ -0,0 +1,57 @@ +import os +import h5py +import mxnet as mx +import logging +import sys + +class mountaincar_master_actorDataLoader: + _input_names_ = ['state'] + _output_names_ = ['action_label'] + + def __init__(self): + self._data_dir = "data/" + + def load_data(self, batch_size): + train_h5, test_h5 = self.load_h5_files() + + data_mean = train_h5[self._input_names_[0]][:].mean(axis=0) + data_std = train_h5[self._input_names_[0]][:].std(axis=0) + 1e-5 + + train_iter = mx.io.NDArrayIter(train_h5[self._input_names_[0]], + train_h5[self._output_names_[0]], + batch_size=batch_size, + data_name=self._input_names_[0], + label_name=self._output_names_[0]) + test_iter = None + if test_h5 != None: + test_iter = mx.io.NDArrayIter(test_h5[self._input_names_[0]], + test_h5[self._output_names_[0]], + batch_size=batch_size, + data_name=self._input_names_[0], + label_name=self._output_names_[0]) + return train_iter, test_iter, data_mean, data_std + + def load_h5_files(self): + train_h5 = None + test_h5 = None + train_path = self._data_dir + "train.h5" + test_path = self._data_dir + "test.h5" + if os.path.isfile(train_path): + train_h5 = h5py.File(train_path, 'r') + if not (self._input_names_[0] in train_h5 and self._output_names_[0] in train_h5): + logging.error("The HDF5 file '" + os.path.abspath(train_path) + "' has to contain the datasets: " + + "'" + self._input_names_[0] + "', '" + self._output_names_[0] + "'") + sys.exit(1) + test_iter = None + if os.path.isfile(test_path): + test_h5 = h5py.File(test_path, 'r') + if not (self._input_names_[0] in test_h5 and self._output_names_[0] in test_h5): + logging.error("The HDF5 file '" + os.path.abspath(test_path) + "' has to contain the datasets: " + + "'" + self._input_names_[0] + "', '" + self._output_names_[0] + "'") + sys.exit(1) + else: + logging.warning("Couldn't load test set. File '" + os.path.abspath(test_path) + "' does not exist.") + return train_h5, test_h5 + else: + logging.error("Data loading failure. File '" + os.path.abspath(train_path) + "' does not exist.") + sys.exit(1) \ No newline at end of file diff --git a/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNNet_mountaincar_master_actor.py b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNNet_mountaincar_master_actor.py new file mode 100644 index 0000000..e4a190f --- /dev/null +++ b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNNet_mountaincar_master_actor.py @@ -0,0 +1,105 @@ +import mxnet as mx +import numpy as np +from mxnet import gluon + +class Softmax(gluon.HybridBlock): + def __init__(self, **kwargs): + super(Softmax, self).__init__(**kwargs) + + def hybrid_forward(self, F, x): + return F.softmax(x) + + +class Split(gluon.HybridBlock): + def __init__(self, num_outputs, axis=1, **kwargs): + super(Split, self).__init__(**kwargs) + with self.name_scope(): + self.axis = axis + self.num_outputs = num_outputs + + def hybrid_forward(self, F, x): + return F.split(data=x, axis=self.axis, num_outputs=self.num_outputs) + + +class Concatenate(gluon.HybridBlock): + def __init__(self, dim=1, **kwargs): + super(Concatenate, self).__init__(**kwargs) + with self.name_scope(): + self.dim = dim + + def hybrid_forward(self, F, *x): + return F.concat(*x, dim=self.dim) + + +class ZScoreNormalization(gluon.HybridBlock): + def __init__(self, data_mean, data_std, **kwargs): + super(ZScoreNormalization, self).__init__(**kwargs) + with self.name_scope(): + self.data_mean = self.params.get('data_mean', shape=data_mean.shape, + init=mx.init.Constant(data_mean.asnumpy().tolist()), differentiable=False) + self.data_std = self.params.get('data_std', shape=data_mean.shape, + init=mx.init.Constant(data_std.asnumpy().tolist()), differentiable=False) + + def hybrid_forward(self, F, x, data_mean, data_std): + x = F.broadcast_sub(x, data_mean) + x = F.broadcast_div(x, data_std) + return x + + +class Padding(gluon.HybridBlock): + def __init__(self, padding, **kwargs): + super(Padding, self).__init__(**kwargs) + with self.name_scope(): + self.pad_width = padding + + def hybrid_forward(self, F, x): + x = F.pad(data=x, + mode='constant', + pad_width=self.pad_width, + constant_value=0) + return x + + +class NoNormalization(gluon.HybridBlock): + def __init__(self, **kwargs): + super(NoNormalization, self).__init__(**kwargs) + + def hybrid_forward(self, F, x): + return x + + +class Net(gluon.HybridBlock): + def __init__(self, data_mean=None, data_std=None, **kwargs): + super(Net, self).__init__(**kwargs) + with self.name_scope(): + if not data_mean is None: + assert(not data_std is None) + self.state_input_normalization = ZScoreNormalization(data_mean=data_mean, data_std=data_std) + else: + self.state_input_normalization = NoNormalization() + + self.fc1_ = gluon.nn.Dense(units=300, use_bias=True) + # fc1_, output shape: {[300,1,1]} + + self.relu1_ = gluon.nn.Activation(activation='relu') + self.fc2_ = gluon.nn.Dense(units=300, use_bias=True) + # fc2_, output shape: {[300,1,1]} + + self.relu2_ = gluon.nn.Activation(activation='relu') + self.fc3_ = gluon.nn.Dense(units=1, use_bias=True) + # fc3_, output shape: {[1,1,1]} + + self.tanh3_ = gluon.nn.Activation(activation='tanh') + + self.last_layer = 'linear' + + + def hybrid_forward(self, F, state): + state = self.state_input_normalization(state) + fc1_ = self.fc1_(state) + relu1_ = self.relu1_(fc1_) + fc2_ = self.fc2_(relu1_) + relu2_ = self.relu2_(fc2_) + fc3_ = self.fc3_(relu2_) + tanh3_ = self.tanh3_(fc3_) + return tanh3_ diff --git a/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNPredictor_mountaincar_master_actor.h b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNPredictor_mountaincar_master_actor.h new file mode 100644 index 0000000..eca3f2b --- /dev/null +++ b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNPredictor_mountaincar_master_actor.h @@ -0,0 +1,104 @@ +#ifndef CNNPREDICTOR_MOUNTAINCAR_MASTER_ACTOR +#define CNNPREDICTOR_MOUNTAINCAR_MASTER_ACTOR + +#include + +#include +#include +#include + +#include + +class CNNPredictor_mountaincar_master_actor{ +public: + const std::string json_file = "model/mountaincar.agent.MountaincarActor/model_newest-symbol.json"; + const std::string param_file = "model/mountaincar.agent.MountaincarActor/model_newest-0000.params"; + //const std::vector input_keys = {"data"}; + const std::vector input_keys = {"state"}; + const std::vector> input_shapes = {{1,2}}; + const bool use_gpu = false; + + PredictorHandle handle; + + explicit CNNPredictor_mountaincar_master_actor(){ + init(json_file, param_file, input_keys, input_shapes, use_gpu); + } + + ~CNNPredictor_mountaincar_master_actor(){ + if(handle) MXPredFree(handle); + } + + void predict(const std::vector &state, + std::vector &action){ + MXPredSetInput(handle, "data", state.data(), static_cast(state.size())); + + MXPredForward(handle); + + mx_uint output_index; + mx_uint *shape = 0; + mx_uint shape_len; + size_t size; + + output_index = 0; + MXPredGetOutputShape(handle, output_index, &shape, &shape_len); + size = 1; + for (mx_uint i = 0; i < shape_len; ++i) size *= shape[i]; + assert(size == action.size()); + MXPredGetOutput(handle, 0, &(action[0]), action.size()); + + } + + void init(const std::string &json_file, + const std::string ¶m_file, + const std::vector &input_keys, + const std::vector> &input_shapes, + const bool &use_gpu){ + + BufferFile json_data(json_file); + BufferFile param_data(param_file); + + int dev_type = use_gpu ? 2 : 1; + int dev_id = 0; + + if (json_data.GetLength() == 0 || + param_data.GetLength() == 0) { + std::exit(-1); + } + + const mx_uint num_input_nodes = input_keys.size(); + + const char* input_key[1] = { "data" }; + const char** input_keys_ptr = input_key; + + mx_uint shape_data_size = 0; + mx_uint input_shape_indptr[input_shapes.size() + 1]; + input_shape_indptr[0] = 0; + for(mx_uint i = 0; i < input_shapes.size(); i++){ + input_shape_indptr[i+1] = input_shapes[i].size(); + shape_data_size += input_shapes[i].size(); + } + + mx_uint input_shape_data[shape_data_size]; + mx_uint index = 0; + for(mx_uint i = 0; i < input_shapes.size(); i++){ + for(mx_uint j = 0; j < input_shapes[i].size(); j++){ + input_shape_data[index] = input_shapes[i][j]; + index++; + } + } + + MXPredCreate(static_cast(json_data.GetBuffer()), + static_cast(param_data.GetBuffer()), + static_cast(param_data.GetLength()), + dev_type, + dev_id, + num_input_nodes, + input_keys_ptr, + input_shape_indptr, + input_shape_data, + &handle); + assert(handle); + } +}; + +#endif // CNNPREDICTOR_MOUNTAINCAR_MASTER_ACTOR diff --git a/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNTrainer_mountaincar_master_actor.py b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNTrainer_mountaincar_master_actor.py new file mode 100644 index 0000000..954098e --- /dev/null +++ b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNTrainer_mountaincar_master_actor.py @@ -0,0 +1,115 @@ +from reinforcement_learning.agent import DdpgAgent +from reinforcement_learning.util import AgentSignalHandler +from reinforcement_learning.cnnarch_logger import ArchLogger +from reinforcement_learning.CNNCreator_MountaincarCritic import CNNCreator_MountaincarCritic +import reinforcement_learning.environment +import CNNCreator_mountaincar_master_actor + +import os +import sys +import re +import logging +import mxnet as mx + + +def resume_session(): + session_param_output = os.path.join(session_output_dir, agent_name) + resume_session = False + resume_directory = None + if os.path.isdir(session_output_dir) and os.path.isdir(session_param_output): + regex = re.compile(r'\d\d\d\d-\d\d-\d\d-\d\d-\d\d') + dir_content = os.listdir(session_param_output) + session_files = filter(regex.search, dir_content) + session_files.sort(reverse=True) + for d in session_files: + interrupted_session_dir = os.path.join(session_param_output, d, '.interrupted_session') + if os.path.isdir(interrupted_session_dir): + resume = raw_input('Interrupted session from {} found. Do you want to resume? (y/n) '.format(d)) + if resume == 'y': + resume_session = True + resume_directory = interrupted_session_dir + break + return resume_session, resume_directory + + +if __name__ == "__main__": + agent_name='mountaincar_master_actor' + # Prepare output directory and logger + output_directory = 'model_output'\ + + '/' + agent_name\ + + '/' + time.strftime( + '%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) + ArchLogger.set_output_directory(output_directory) + ArchLogger.set_logger_name(agent_name) + ArchLogger.set_output_level(ArchLogger.INFO) + + env = reinforcement_learning.environment.GymEnvironment('MountainCarContinuous-v0') + + context = mx.cpu() + actor_creator = CNNCreator_mountaincar_master_actor.CNNCreator_mountaincar_master_actor() + actor_creator.construct(context) + critic_creator = CNNCreator_MountaincarCritic.CNNCreator_MountaincarCritic() + critic_creator.construct(context) + + agent_params = { + 'environment': env, + 'replay_memory_params': { + 'method':'buffer', + 'memory_size':1000000, + 'sample_size':64, + 'state_dtype':'float32', + 'action_dtype':'float32', + 'rewards_dtype':'float32' + }, + 'strategy_params': { + 'method':'ornstein_uhlenbeck', + 'epsilon': 1, + 'min_epsilon': 0.01, + 'epsilon_decay_method': 'linear', + 'epsilon_decay': 0.01, + 'action_low': -1 + 'action_high': 1 + 'mu': np.array([0]) + 'theta': np.array([0.15]) + 'sigma': np.array([0.3]) + }, + 'agent_name': agent_name, + 'verbose': True, + 'state_dim': (2,), + 'action_dim': (1,), + 'ctx': 'cpu', + 'discount_factor': 0.999, + 'training_episodes': 200, + 'train_interval': 1, + 'snapshot_interval': 20, + 'max_episode_step': 1000, + 'actor': actor_creator.net, + 'critic': critic_creator.net, + 'actor_optimizer': 'adam', + 'actor_optimizer_params': { + 'learning_rate': 1.0E-4}, + 'critic_optimizer': 'adam', + 'critic_optimizer_params': { + 'learning_rate': 0.001}, + } + + resume, resume_directory = resume_session() + + if resume: + resume_agent_params = { + 'session_dir': resume_directory, + 'environment': env, + 'actor': actor_creator.net, + 'critic': critic_creator.net + } + agent = DdpgAgent.resume_from_session(**resume_agent_params) + else: + agent = DdpgAgent(**agent_params) + + signal_handler = AgentSignalHandler() + signal_handler.register_agent(agent) + + train_successful = agent.train() + + if train_successful: + agent.save_best_network(actor_creator._model_dir_ + actor_creator._model_prefix_ + '_newest', epoch=0) diff --git a/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNTranslator.h b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNTranslator.h new file mode 100644 index 0000000..5a03665 --- /dev/null +++ b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNTranslator.h @@ -0,0 +1,127 @@ +#ifndef CNNTRANSLATOR_H +#define CNNTRANSLATOR_H +#include +#include + +using namespace std; +using namespace arma; + +class CNNTranslator{ +public: + template static void addColToSTDVector(const Col &source, vector &data){ + for(size_t i = 0; i < source.n_elem; i++){ + data.push_back((float) source(i)); + } + } + + template static void addRowToSTDVector(const subview_row &source, vector &data){ + for(size_t i = 0; i < source.n_elem; i++){ + data.push_back((float) source(i)); + } + } + + template static void addRowToSTDVector(const Row &source, vector &data){ + for(size_t i = 0; i < source.n_elem; i++){ + data.push_back((float) source(i)); + } + } + + template static void addMatToSTDVector(const Mat &source, vector &data){ + for(size_t i = 0; i < source.n_rows; i++){ + addRowToSTDVector(source.row(i), data); + } + } + + + template static vector translate(const Col &source){ + size_t size = source.n_elem; + vector data; + data.reserve(size); + addColToSTDVector(source, data); + return data; + } + + template static vector translate(const Row &source){ + size_t size = source.n_elem; + vector data; + data.reserve(size); + addRowToSTDVector(source, data); + return data; + } + + template static vector translate(const Mat &source){ + size_t size = source.n_elem; + vector data; + data.reserve(size); + addMatToSTDVector(source, data); + return data; + } + + template static vector translate(const Cube &source){ + size_t size = source.n_elem; + vector data; + data.reserve(size); + for(size_t i = 0; i < source.n_slices; i++){ + addMatToSTDVector(source.slice(i), data); + } + return data; + } + + static vec translateToCol(const vector &source, const vector &shape){ + assert(shape.size() == 1); + vec column(shape[0]); + for(size_t i = 0; i < source.size(); i++){ + column(i) = (double) source[i]; + } + return column; + } + + static mat translateToMat(const vector &source, const vector &shape){ + assert(shape.size() == 2); + mat matrix(shape[1], shape[0]); //create transposed version of the matrix + int startPos = 0; + int endPos = matrix.n_rows; + const vector columnShape = {matrix.n_rows}; + for(size_t i = 0; i < matrix.n_cols; i++){ + vector colSource(&source[startPos], &source[endPos]); + matrix.col(i) = translateToCol(colSource, columnShape); + startPos = endPos; + endPos += matrix.n_rows; + } + return matrix.t(); + } + + static cube translateToCube(const vector &source, const vector &shape){ + assert(shape.size() == 3); + cube cubeMatrix(shape[1], shape[2], shape[0]); + const int matrixSize = shape[1] * shape[2]; + const vector matrixShape = {shape[1], shape[2]}; + int startPos = 0; + int endPos = matrixSize; + for(size_t i = 0; i < cubeMatrix.n_slices; i++){ + vector matrixSource(&source[startPos], &source[endPos]); + cubeMatrix.slice(i) = translateToMat(matrixSource, matrixShape); + startPos = endPos; + endPos += matrixSize; + } + return cubeMatrix; + } + + template static vector getShape(const Col &source){ + return {source.n_elem}; + } + + template static vector getShape(const Row &source){ + return {source.n_elem}; + } + + template static vector getShape(const Mat &source){ + return {source.n_rows, source.n_cols}; + } + + template static vector getShape(const Cube &source){ + return {source.n_slices, source.n_rows, source.n_cols}; + } +}; + +#endif diff --git a/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/HelperA.h b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/HelperA.h new file mode 100644 index 0000000..733fd62 --- /dev/null +++ b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/HelperA.h @@ -0,0 +1,141 @@ +#ifndef HELPERA_H +#define HELPERA_H +#include +#include "armadillo" +#include +#include +#include +using namespace arma; +#ifndef _FILESTRING_CONVERSION___A +#define _FILESTRING_CONVERSION___A +void toFileString(std::ofstream& myfile, mat A){ + myfile << "["; + for (int i = 0; i < A.n_rows; i++){ + for (int j = 0; j < A.n_cols; j++){ + myfile << A(i,j); + if(j + 1 < A.n_cols){ + myfile << ", "; + } + } + if(i + 1 < A.n_rows){ + myfile << ";"; + } + } + myfile << "]"; +} +void toFileString(std::ofstream& myfile, double A){ + myfile << A; +} +void toFileString(std::ofstream& myfile, float A){ + myfile << A; +} +void toFileString(std::ofstream& myfile, int A){ + myfile << A; +} +void toFileString(std::ofstream& myfile, bool A){ + myfile << A; +} +bool Is_close(mat& X, mat& Y, double tol) +{ + // abs returns a mat type then max checks columns and returns a row_vec + // max used again will return the biggest element in the row_vec + bool close(false); + if(arma::max(arma::max(arma::abs(X-Y))) < tol) + { + close = true; + } + return close; +} +#endif +class HelperA{ +public: +static mat getEigenVectors(mat A){ +vec eigenValues; +mat eigenVectors; +eig_sym(eigenValues,eigenVectors,A); +return eigenVectors; +} +static vec getEigenValues(mat A){ +vec eigenValues; +mat eigenVectors; +eig_sym(eigenValues,eigenVectors,A); +return eigenValues; +} + +static mat getKMeansClusters(mat A, int k){ +mat clusters; +kmeans(clusters,A.t(),k,random_subset,20,true); +/*printf("cluster centroid calculation done\n"); +std::ofstream myfile; + myfile.open("data after cluster.txt"); + myfile << A; + myfile.close(); + + std::ofstream myfile2; + myfile2.open("cluster centroids.txt"); + myfile2 << clusters; + myfile2.close();*/ +mat indexedData=getKMeansClustersIndexData(A.t(), clusters); + +/*std::ofstream myfile3; + myfile3.open("data after index.txt"); + myfile3 << indexedData; + myfile3.close(); + */ +return indexedData; +} + +static mat getKMeansClustersIndexData(mat A, mat centroids){ + mat result=mat(A.n_cols, 1); + for(int i=0;i CNN_action(1); + + _cnn_.predict(CNNTranslator::translate(state), + CNN_action); + + action = CNNTranslator::translateToCol(CNN_action, std::vector {1}); + +} + +}; +#endif diff --git a/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/CNNCreator_MountaincarCritic.py b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/CNNCreator_MountaincarCritic.py new file mode 100644 index 0000000..0b6c8fa --- /dev/null +++ b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/CNNCreator_MountaincarCritic.py @@ -0,0 +1,56 @@ +import mxnet as mx +import logging +import os +from CNNNet_MountaincarCritic import Net + +class CNNCreator_MountaincarCritic: + _model_dir_ = "model/MountaincarCritic/" + _model_prefix_ = "model" + _input_shapes_ = [(2,),(1,)] + + def __init__(self): + self.weight_initializer = mx.init.Normal() + self.net = None + + def get_input_shapes(self): + return self._input_shapes_ + + def load(self, context): + lastEpoch = 0 + param_file = None + + try: + os.remove(self._model_dir_ + self._model_prefix_ + "_newest-0000.params") + except OSError: + pass + try: + os.remove(self._model_dir_ + self._model_prefix_ + "_newest-symbol.json") + except OSError: + pass + + if os.path.isdir(self._model_dir_): + for file in os.listdir(self._model_dir_): + if ".params" in file and self._model_prefix_ in file: + epochStr = file.replace(".params","").replace(self._model_prefix_ + "-","") + epoch = int(epochStr) + if epoch > lastEpoch: + lastEpoch = epoch + param_file = file + if param_file is None: + return 0 + else: + logging.info("Loading checkpoint: " + param_file) + self.net.load_parameters(self._model_dir_ + param_file) + return lastEpoch + + + def construct(self, context, data_mean=None, data_std=None): + self.net = Net(data_mean=data_mean, data_std=data_std) + self.net.collect_params().initialize(self.weight_initializer, ctx=context) + self.net.hybridize() + self.net(mx.nd.zeros((1,)+self._input_shapes_[0], ctx=context),mx.nd.zeros((1,)+self._input_shapes_[1], ctx=context)) + + if not os.path.exists(self._model_dir_): + os.makedirs(self._model_dir_) + + self.net.export(self._model_dir_ + self._model_prefix_, epoch=0) diff --git a/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/CNNNet_MountaincarCritic.py b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/CNNNet_MountaincarCritic.py new file mode 100644 index 0000000..9dc0288 --- /dev/null +++ b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/CNNNet_MountaincarCritic.py @@ -0,0 +1,115 @@ +import mxnet as mx +import numpy as np +from mxnet import gluon + +class Softmax(gluon.HybridBlock): + def __init__(self, **kwargs): + super(Softmax, self).__init__(**kwargs) + + def hybrid_forward(self, F, x): + return F.softmax(x) + + +class Split(gluon.HybridBlock): + def __init__(self, num_outputs, axis=1, **kwargs): + super(Split, self).__init__(**kwargs) + with self.name_scope(): + self.axis = axis + self.num_outputs = num_outputs + + def hybrid_forward(self, F, x): + return F.split(data=x, axis=self.axis, num_outputs=self.num_outputs) + + +class Concatenate(gluon.HybridBlock): + def __init__(self, dim=1, **kwargs): + super(Concatenate, self).__init__(**kwargs) + with self.name_scope(): + self.dim = dim + + def hybrid_forward(self, F, *x): + return F.concat(*x, dim=self.dim) + + +class ZScoreNormalization(gluon.HybridBlock): + def __init__(self, data_mean, data_std, **kwargs): + super(ZScoreNormalization, self).__init__(**kwargs) + with self.name_scope(): + self.data_mean = self.params.get('data_mean', shape=data_mean.shape, + init=mx.init.Constant(data_mean.asnumpy().tolist()), differentiable=False) + self.data_std = self.params.get('data_std', shape=data_mean.shape, + init=mx.init.Constant(data_std.asnumpy().tolist()), differentiable=False) + + def hybrid_forward(self, F, x, data_mean, data_std): + x = F.broadcast_sub(x, data_mean) + x = F.broadcast_div(x, data_std) + return x + + +class Padding(gluon.HybridBlock): + def __init__(self, padding, **kwargs): + super(Padding, self).__init__(**kwargs) + with self.name_scope(): + self.pad_width = padding + + def hybrid_forward(self, F, x): + x = F.pad(data=x, + mode='constant', + pad_width=self.pad_width, + constant_value=0) + return x + + +class NoNormalization(gluon.HybridBlock): + def __init__(self, **kwargs): + super(NoNormalization, self).__init__(**kwargs) + + def hybrid_forward(self, F, x): + return x + + +class Net(gluon.HybridBlock): + def __init__(self, data_mean=None, data_std=None, **kwargs): + super(Net, self).__init__(**kwargs) + with self.name_scope(): + if not data_mean is None: + assert(not data_std is None) + self.state_input_normalization = ZScoreNormalization(data_mean=data_mean, data_std=data_std) + else: + self.state_input_normalization = NoNormalization() + + self.fc2_1_ = gluon.nn.Dense(units=400, use_bias=True) + # fc2_1_, output shape: {[400,1,1]} + + self.relu2_1_ = gluon.nn.Activation(activation='relu') + self.fc3_1_ = gluon.nn.Dense(units=300, use_bias=True) + # fc3_1_, output shape: {[300,1,1]} + + if not data_mean is None: + assert(not data_std is None) + self.action_input_normalization = ZScoreNormalization(data_mean=data_mean, data_std=data_std) + else: + self.action_input_normalization = NoNormalization() + + self.fc2_2_ = gluon.nn.Dense(units=300, use_bias=True) + # fc2_2_, output shape: {[300,1,1]} + + self.relu4_ = gluon.nn.Activation(activation='relu') + self.fc4_ = gluon.nn.Dense(units=1, use_bias=True) + # fc4_, output shape: {[1,1,1]} + + + self.last_layer = 'linear' + + + def hybrid_forward(self, F, state, action): + state = self.state_input_normalization(state) + fc2_1_ = self.fc2_1_(state) + relu2_1_ = self.relu2_1_(fc2_1_) + fc3_1_ = self.fc3_1_(relu2_1_) + action = self.action_input_normalization(action) + fc2_2_ = self.fc2_2_(action) + add4_ = fc3_1_ + fc2_2_ + relu4_ = self.relu4_(add4_) + fc4_ = self.fc4_(relu4_) + return fc4_ diff --git a/src/test/resources/target_code/gluon/reinforcementModel/torcs/reward/__init__.py b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/__init__.py similarity index 100% rename from src/test/resources/target_code/gluon/reinforcementModel/torcs/reward/__init__.py rename to src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/__init__.py diff --git a/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/agent.py b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/agent.py new file mode 100644 index 0000000..c06b32a --- /dev/null +++ b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/agent.py @@ -0,0 +1,900 @@ +import mxnet as mx +import numpy as np +import time +import os +import sys +import util +import matplotlib.pyplot as plt +import pyprind +from cnnarch_logger import ArchLogger +from replay_memory import ReplayMemoryBuilder +from strategy import StrategyBuilder +from util import copy_net, get_loss_function,\ + copy_net_with_two_inputs, DdpgTrainingStats, DqnTrainingStats,\ + make_directory_if_not_exist +from mxnet import nd, gluon, autograd + + +class Agent(object): + def __init__( + self, + environment, + replay_memory_params, + strategy_params, + state_dim, + action_dim, + ctx=None, + discount_factor=.9, + training_episodes=50, + train_interval=1, + start_training=0, + snapshot_interval=200, + agent_name='Agent', + max_episode_step=99999, + evaluation_samples=1000, + output_directory='model_parameters', + verbose=True, + target_score=None + ): + assert 0 < discount_factor <= 1,\ + 'Discount factor must be between 0 and 1' + assert train_interval > 0, 'Train interval must be greater 0' + assert snapshot_interval > 0, 'Snapshot interval must be greater 0' + assert max_episode_step > 0,\ + 'Maximal steps per episode must be greater 0' + assert training_episodes > 0, 'Trainings episode must be greater 0' + assert replay_memory_params is not None,\ + 'Replay memory parameter not set' + assert type(state_dim) is tuple, 'State dimension is not a tuple' + assert type(action_dim) is tuple, 'Action dimension is not a tuple' + + self._logger = ArchLogger.get_logger() + self._ctx = mx.gpu() if ctx == 'gpu' else mx.cpu() + self._environment = environment + self._discount_factor = discount_factor + self._training_episodes = training_episodes + self._train_interval = train_interval + self._verbose = verbose + self._state_dim = state_dim + + replay_memory_params['state_dim'] = state_dim + replay_memory_params['action_dim'] = action_dim + self._replay_memory_params = replay_memory_params + rm_builder = ReplayMemoryBuilder() + self._memory = rm_builder.build_by_params(**replay_memory_params) + self._minibatch_size = self._memory.sample_size + self._action_dim = action_dim + + strategy_params['action_dim'] = self._action_dim + self._strategy_params = strategy_params + strategy_builder = StrategyBuilder() + self._strategy = strategy_builder.build_by_params(**strategy_params) + self._agent_name = agent_name + self._snapshot_interval = snapshot_interval + self._creation_time = time.time() + self._max_episode_step = max_episode_step + self._start_training = start_training + self._output_directory = output_directory + self._target_score = target_score + + self._evaluation_samples = evaluation_samples + self._best_avg_score = -np.infty + self._best_net = None + + self._interrupt_flag = False + self._training_stats = None + + # Training Context + self._current_episode = 0 + self._total_steps = 0 + + @property + def current_episode(self): + return self._current_episode + + @property + def environment(self): + return self._environment + + def save_config_file(self): + import json + make_directory_if_not_exist(self._output_directory) + filename = os.path.join(self._output_directory, 'config.json') + config = self._make_config_dict() + with open(filename, mode='w') as fp: + json.dump(config, fp, indent=4) + + def set_interrupt_flag(self, interrupt): + self._interrupt_flag = interrupt + + def _interrupt_training(self): + import pickle + self._logger.info('Training interrupted; Store state for resuming') + session_dir = self._get_session_dir() + agent_session_file = os.path.join(session_dir, 'agent.p') + logger = self._logger + + self._make_pickle_ready(session_dir) + + with open(agent_session_file, 'wb') as f: + pickle.dump(self, f, protocol=2) + logger.info('State successfully stored') + + def _make_pickle_ready(self, session_dir): + del self._training_stats.logger + self._logger = None + self._environment.close() + self._environment = None + self._save_net(self._best_net, 'best_net', session_dir) + self._best_net = None + + def _make_config_dict(self): + config = dict() + config['state_dim'] = self._state_dim + config['action_dim'] = self._action_dim + config['ctx'] = str(self._ctx) + config['discount_factor'] = self._discount_factor + config['strategy_params'] = self._strategy_params + config['replay_memory_params'] = self._replay_memory_params + config['training_episodes'] = self._training_episodes + config['start_training'] = self._start_training + config['evaluation_samples'] = self._evaluation_samples + config['train_interval'] = self._train_interval + config['snapshot_interval'] = self._snapshot_interval + config['agent_name'] = self._agent_name + config['max_episode_step'] = self._max_episode_step + config['output_directory'] = self._output_directory + config['verbose'] = self._verbose + config['target_score'] = self._target_score + return config + + def _adjust_optimizer_params(self, optimizer_params): + if 'weight_decay' in optimizer_params: + optimizer_params['wd'] = optimizer_params['weight_decay'] + del optimizer_params['weight_decay'] + if 'learning_rate_decay' in optimizer_params: + min_learning_rate = 1e-8 + if 'learning_rate_minimum' in optimizer_params: + min_learning_rate = optimizer_params['learning_rate_minimum'] + del optimizer_params['learning_rate_minimum'] + optimizer_params['lr_scheduler'] = mx.lr_scheduler.FactorScheduler( + optimizer_params['step_size'], + factor=optimizer_params['learning_rate_decay'], + stop_factor_lr=min_learning_rate) + del optimizer_params['step_size'] + del optimizer_params['learning_rate_decay'] + + return optimizer_params + + def _sample_from_memory(self): + states, actions, rewards, next_states, terminals\ + = self._memory.sample(batch_size=self._minibatch_size) + states = nd.array(states, ctx=self._ctx) + actions = nd.array(actions, ctx=self._ctx) + rewards = nd.array(rewards, ctx=self._ctx) + next_states = nd.array(next_states, ctx=self._ctx) + terminals = nd.array(terminals, ctx=self._ctx) + return states, actions, rewards, next_states, terminals + + def evaluate(self, target=None, sample_games=100, verbose=True): + target = self._target_score if target is None else target + if target: + target_achieved = 0 + total_reward = 0 + + self._logger.info('Sampling from {} games...'.format(sample_games)) + for g in pyprind.prog_bar(range(sample_games)): + state = self._environment.reset() + step = 0 + game_reward = 0 + terminal = False + while not terminal and (step < self._max_episode_step): + action = self.get_next_action(state) + state, reward, terminal, _ = self._environment.step(action) + game_reward += reward + step += 1 + + if verbose: + info = 'Game %d: Reward %f' % (g, game_reward) + self._logger.debug(info) + if target: + if game_reward >= target: + target_achieved += 1 + total_reward += game_reward + + avg_reward = float(total_reward)/float(sample_games) + info = 'Avg. Reward: %f' % avg_reward + if target: + target_achieved_ratio = int( + (float(target_achieved)/float(sample_games))*100) + info += '; Target Achieved in %d%% of games'\ + % (target_achieved_ratio) + + if verbose: + self._logger.info(info) + return avg_reward + + def _do_snapshot_if_in_interval(self, episode): + do_snapshot =\ + (episode != 0 and (episode % self._snapshot_interval == 0)) + if do_snapshot: + self.save_parameters(episode=episode) + self._evaluate() + + def _evaluate(self, verbose=True): + avg_reward = self.evaluate( + sample_games=self._evaluation_samples, verbose=False) + info = 'Evaluation -> Average Reward in {} games: {}'.format( + self._evaluation_samples, avg_reward) + + if self._best_avg_score is None or self._best_avg_score <= avg_reward: + self._save_current_as_best_net() + self._best_avg_score = avg_reward + if verbose: + self._logger.info(info) + + def _is_target_reached(self, avg_reward): + return self._target_score is not None\ + and avg_reward > self._target_score + + def _do_training(self): + return (self._total_steps % self._train_interval == 0) and\ + (self._memory.is_sample_possible(self._minibatch_size)) and\ + (self._current_episode >= self._start_training) + + def _check_interrupt_routine(self): + if self._interrupt_flag: + self._interrupt_flag = False + self._interrupt_training() + return True + return False + + def _is_target_reached(self, avg_reward): + return self._target_score is not None\ + and avg_reward > self._target_score + + def _save_parameters(self, net, episode=None, filename='dqn-agent-params'): + assert self._output_directory + assert isinstance(net, gluon.HybridBlock) + make_directory_if_not_exist(self._output_directory) + + if(episode is not None): + self._logger.info( + 'Saving model parameters after episode %d' % episode) + filename = filename + '-ep{}'.format(episode) + else: + self._logger.info('Saving model parameters') + self._save_net(net, filename) + + def _save_net(self, net, filename, filedir=None): + filedir = self._output_directory if filedir is None else filedir + filename = os.path.join(filedir, filename + '.params') + net.save_parameters(filename) + + def save_best_network(self, path, epoch=0): + self._logger.info( + 'Saving best network with average reward of {}'.format( + self._best_avg_score)) + self._best_net.export(path, epoch=epoch) + + def _get_session_dir(self): + session_dir = os.path.join( + self._output_directory, '.interrupted_session') + make_directory_if_not_exist(session_dir) + return session_dir + + def _save_current_as_best_net(self): + raise NotImplementedError + + def get_next_action(self, state): + raise NotImplementedError + + def save_parameters(self, episode): + raise NotImplementedError + + def train(self, episodes=None): + raise NotImplementedError + + +class DdpgAgent(Agent): + def __init__( + self, + actor, + critic, + environment, + replay_memory_params, + strategy_params, + state_dim, + action_dim, + soft_target_update_rate=.001, + actor_optimizer='adam', + actor_optimizer_params={'learning_rate': 0.0001}, + critic_optimizer='adam', + critic_optimizer_params={'learning_rate': 0.001}, + ctx=None, + discount_factor=.9, + training_episodes=50, + start_training=20, + train_interval=1, + snapshot_interval=200, + agent_name='DdpgAgent', + max_episode_step=9999, + evaluation_samples=100, + output_directory='model_parameters', + verbose=True, + target_score=None + ): + super(DdpgAgent, self).__init__( + environment=environment, replay_memory_params=replay_memory_params, + strategy_params=strategy_params, state_dim=state_dim, + action_dim=action_dim, ctx=ctx, discount_factor=discount_factor, + training_episodes=training_episodes, start_training=start_training, + train_interval=train_interval, + snapshot_interval=snapshot_interval, agent_name=agent_name, + max_episode_step=max_episode_step, + output_directory=output_directory, verbose=verbose, + target_score=target_score, evaluation_samples=evaluation_samples) + assert critic is not None, 'Critic not set' + assert actor is not None, 'Actor is not set' + assert soft_target_update_rate > 0,\ + 'Target update must be greater zero' + assert actor_optimizer is not None, 'No actor optimizer set' + assert critic_optimizer is not None, 'No critic optimizer set' + + self._actor = actor + self._critic = critic + + self._actor_target = self._copy_actor() + self._critic_target = self._copy_critic() + + self._actor_optimizer = actor_optimizer + self._actor_optimizer_params = self._adjust_optimizer_params( + actor_optimizer_params) + + self._critic_optimizer = critic_optimizer + self._critic_optimizer_params = self._adjust_optimizer_params( + critic_optimizer_params) + + self._soft_target_update_rate = soft_target_update_rate + + self._logger.info( + 'Agent created with following parameters: {}'.format( + self._make_config_dict())) + + self._best_net = self._copy_actor() + + self._training_stats = DdpgTrainingStats(self._training_episodes) + + def _make_pickle_ready(self, session_dir): + super(DdpgAgent, self)._make_pickle_ready(session_dir) + self._save_net(self._actor, 'actor', session_dir) + self._actor = None + self._save_net(self._critic, 'critic', session_dir) + self._critic = None + self._save_net(self._actor_target, 'actor_target', session_dir) + self._actor_target = None + self._save_net(self._critic_target, 'critic_target', session_dir) + self._critic_target = None + + @classmethod + def resume_from_session(cls, session_dir, actor, critic, environment): + import pickle + if not os.path.exists(session_dir): + raise ValueError('Session directory does not exist') + + files = dict() + files['agent'] = os.path.join(session_dir, 'agent.p') + files['best_net_params'] = os.path.join(session_dir, 'best_net.params') + files['actor_net_params'] = os.path.join(session_dir, 'actor.params') + files['actor_target_net_params'] = os.path.join( + session_dir, 'actor_target.params') + files['critic_net_params'] = os.path.join(session_dir, 'critic.params') + files['critic_target_net_params'] = os.path.join( + session_dir, 'critic_target.params') + + for file in files.values(): + if not os.path.exists(file): + raise ValueError( + 'Session directory is not complete: {} is missing' + .format(file)) + + with open(files['agent'], 'rb') as f: + agent = pickle.load(f) + + agent._environment = environment + + agent._actor = actor + agent._actor.load_parameters(files['actor_net_params'], agent._ctx) + agent._actor.hybridize() + agent._actor(nd.random_normal( + shape=((1,) + agent._state_dim), ctx=agent._ctx)) + + agent._best_net = copy_net(agent._actor, agent._state_dim, agent._ctx) + agent._best_net.load_parameters(files['best_net_params'], agent._ctx) + + agent._actor_target = copy_net( + agent._actor, agent._state_dim, agent._ctx) + agent._actor_target.load_parameters(files['actor_target_net_params']) + + agent._critic = critic + agent._critic.load_parameters(files['critic_net_params'], agent._ctx) + agent._critic.hybridize() + agent._critic( + nd.random_normal(shape=((1,) + agent._state_dim), ctx=agent._ctx), + nd.random_normal(shape=((1,) + agent._action_dim), ctx=agent._ctx)) + + agent._critic_target = copy_net_with_two_inputs( + agent._critic, agent._state_dim, agent._action_dim, agent._ctx) + agent._critic_target.load_parameters(files['critic_target_net_params']) + + agent._logger = ArchLogger.get_logger() + agent._training_stats.logger = ArchLogger.get_logger() + agent._logger.info('Agent was retrieved; Training can be continued') + + return agent + + def _save_current_as_best_net(self): + self._best_net = self._copy_actor() + + def get_next_action(self, state): + action = self._actor(nd.array([state], ctx=self._ctx)) + return action[0].asnumpy() + + def save_parameters(self, episode): + self._save_parameters(self._actor, episode=episode) + + def train(self, episodes=None): + self.save_config_file() + self._logger.info("--- Start DDPG training ---") + episodes = \ + episodes if episodes is not None else self._training_episodes + + resume = (self._current_episode > 0) + if resume: + self._logger.info("Training session resumed") + self._logger.info( + "Starting from episode {}".format(self._current_episode)) + else: + self._training_stats = DdpgTrainingStats(episodes) + + # Initialize target Q' and mu' + self._actor_target = self._copy_actor() + self._critic_target = self._copy_critic() + + # Initialize l2 loss for critic network + l2_loss = gluon.loss.L2Loss() + + # Initialize critic and actor trainer + trainer_actor = gluon.Trainer( + self._actor.collect_params(), self._actor_optimizer, + self._actor_optimizer_params) + trainer_critic = gluon.Trainer( + self._critic.collect_params(), self._critic_optimizer, + self._critic_optimizer_params) + + # For episode=1..n + while self._current_episode < episodes: + # Check interrupt flag + if self._check_interrupt_routine(): + return False + + # Initialize new episode + step = 0 + episode_reward = 0 + start = time.time() + episode_critic_loss = 0 + episode_actor_loss = 0 + episode_avg_q_value = 0 + training_steps = 0 + + # Get initialial observation state s + state = self._environment.reset() + + # For step=1..T + while step < self._max_episode_step: + # Select an action a = mu(s) + N(step) according to current + # actor and exploration noise N according to strategy + action = self._strategy.select_action( + self.get_next_action(state)) + + # Execute action a and observe reward r and next state ns + next_state, reward, terminal, _ = \ + self._environment.step(action) + + self._logger.debug( + 'Applied action {} with reward {}'.format(action, reward)) + + # Store transition (s,a,r,ns) in replay buffer + self._memory.append( + state, action, reward, next_state, terminal) + + if self._do_training(): + # Sample random minibatch of b transitions + # (s_i, a_i, r_i, s_(i+1)) from replay buffer + states, actions, rewards, next_states, terminals =\ + self._sample_from_memory() + + actor_target_actions = self._actor_target(next_states) + critic_target_qvalues = self._critic_target( + next_states, actor_target_actions) + + rewards = rewards.reshape(self._minibatch_size, 1) + terminals = terminals.reshape(self._minibatch_size, 1) + + # y = r_i + discount * Q'(s_(i+1), mu'(s_(i+1))) + y = rewards + (1.0 - terminals) * self._discount_factor\ + * critic_target_qvalues + + # Train the critic network + with autograd.record(): + qvalues = self._critic(states, actions) + critic_loss = l2_loss(qvalues, y) + critic_loss.backward() + trainer_critic.step(self._minibatch_size) + + # Train the actor network + # Temporary critic so that gluon trainer does not mess + # with critic parameters + tmp_critic = self._copy_critic() + with autograd.record(): + actor_qvalues = tmp_critic(states, self._actor(states)) + # For maximizing qvalues we have to multiply with -1 + # as we use a minimizer + actor_loss = -1 * actor_qvalues + actor_loss.backward() + trainer_actor.step(self._minibatch_size) + + # Update target networks: + self._actor_target = self._soft_update( + self._actor, self._actor_target, + self._soft_target_update_rate) + self._critic_target = self._soft_update( + self._critic, self._critic_target, + self._soft_target_update_rate) + + # Update statistics + episode_critic_loss +=\ + np.sum(critic_loss.asnumpy()) / self._minibatch_size + episode_actor_loss +=\ + np.sum(actor_loss.asnumpy()) / self._minibatch_size + episode_avg_q_value +=\ + np.sum(actor_qvalues.asnumpy()) / self._minibatch_size + + training_steps += 1 + + episode_reward += reward + step += 1 + self._total_steps += 1 + state = next_state + + if terminal: + # Reset the strategy + self._strategy.reset() + break + + # Log the episode results + episode_actor_loss = 0 if training_steps == 0\ + else (episode_actor_loss / training_steps) + episode_critic_loss = 0 if training_steps == 0\ + else (episode_critic_loss / training_steps) + episode_avg_q_value = 0 if training_steps == 0\ + else (episode_avg_q_value / training_steps) + + avg_reward = self._training_stats.log_episode( + self._current_episode, start, training_steps, + episode_actor_loss, episode_critic_loss, episode_avg_q_value, + self._strategy.cur_eps, episode_reward) + + self._do_snapshot_if_in_interval(self._current_episode) + self._strategy.decay(self._current_episode) + + if self._is_target_reached(avg_reward): + self._logger.info( + 'Target score is reached in average; Training is stopped') + break + + self._current_episode += 1 + + self._evaluate() + self.save_parameters(episode=self._current_episode) + self.save_best_network(os.path.join(self._output_directory, 'best')) + self._training_stats.save_stats(self._output_directory) + self._logger.info('--------- Training finished ---------') + return True + + def _make_config_dict(self): + config = super(DdpgAgent, self)._make_config_dict() + config['soft_target_update_rate'] = self._soft_target_update_rate + config['actor_optimizer'] = self._actor_optimizer + config['actor_optimizer_params'] = self._actor_optimizer_params + config['critic_optimizer'] = self._critic_optimizer + config['critic_optimizer_params'] = self._critic_optimizer_params + return config + + def _soft_update(self, net, target, tau): + net_params = [p.data() for _, p in net.collect_params().items()] + for i, (_, p) in enumerate(target.collect_params().items()): + target_params = p.data() + p.set_data((1.0 - tau) * target_params + tau * net_params[i]) + return target + + def _copy_actor(self): + assert self._actor is not None + assert self._ctx is not None + assert type(self._state_dim) is tuple + return copy_net(self._actor, self._state_dim, ctx=self._ctx) + + def _copy_critic(self): + assert self._critic is not None + assert self._ctx is not None + assert type(self._state_dim) is tuple + assert type(self._action_dim) is tuple + return copy_net_with_two_inputs( + self._critic, self._state_dim, self._action_dim, ctx=self._ctx) + + +class DqnAgent(Agent): + def __init__( + self, + qnet, + environment, + replay_memory_params, + strategy_params, + state_dim, + action_dim, + ctx=None, + discount_factor=.9, + loss_function='euclidean', + optimizer='rmsprop', + optimizer_params={'learning_rate': 0.09}, + training_episodes=50, + start_training=0, + train_interval=1, + use_fix_target=False, + double_dqn=False, + target_update_interval=10, + snapshot_interval=200, + evaluation_samples=100, + agent_name='Dqn_agent', + max_episode_step=99999, + output_directory='model_parameters', + verbose=True, + target_score=None + ): + super(DqnAgent, self).__init__( + environment=environment, replay_memory_params=replay_memory_params, + strategy_params=strategy_params, state_dim=state_dim, + action_dim=action_dim, ctx=ctx, discount_factor=discount_factor, + training_episodes=training_episodes, start_training=start_training, + train_interval=train_interval, + snapshot_interval=snapshot_interval, agent_name=agent_name, + max_episode_step=max_episode_step, + output_directory=output_directory, verbose=verbose, + target_score=target_score, evaluation_samples=evaluation_samples) + + self._qnet = qnet + self._target_update_interval = target_update_interval + self._target_qnet = copy_net( + self._qnet, self._state_dim, ctx=self._ctx) + self._loss_function_str = loss_function + self._loss_function = get_loss_function(loss_function) + self._optimizer = optimizer + self._optimizer_params = optimizer_params + self._double_dqn = double_dqn + self._use_fix_target = use_fix_target + + # Initialize best network + self._best_net = copy_net(self._qnet, self._state_dim, self._ctx) + self._best_avg_score = -np.infty + + self._training_stats = None + + @classmethod + def resume_from_session(cls, session_dir, net, environment): + import pickle + if not os.path.exists(session_dir): + raise ValueError('Session directory does not exist') + + files = dict() + files['agent'] = os.path.join(session_dir, 'agent.p') + files['best_net_params'] = os.path.join(session_dir, 'best_net.params') + files['q_net_params'] = os.path.join(session_dir, 'qnet.params') + files['target_net_params'] = os.path.join( + session_dir, 'target_net.params') + + for file in files.values(): + if not os.path.exists(file): + raise ValueError( + 'Session directory is not complete: {} is missing' + .format(file)) + + with open(files['agent'], 'rb') as f: + agent = pickle.load(f) + + agent._environment = environment + agent._qnet = net + agent._qnet.load_parameters(files['q_net_params'], agent._ctx) + agent._qnet.hybridize() + agent._qnet(nd.random_normal( + shape=((1,) + agent._state_dim), ctx=agent._ctx)) + agent._best_net = copy_net(agent._qnet, agent._state_dim, agent._ctx) + agent._best_net.load_parameters(files['best_net_params'], agent._ctx) + agent._target_qnet = copy_net( + agent._qnet, agent._state_dim, agent._ctx) + agent._target_qnet.load_parameters( + files['target_net_params'], agent._ctx) + + agent._logger = ArchLogger.get_logger() + agent._training_stats.logger = agent._logger + agent._logger.info('Agent was retrieved; Training can be continued') + + return agent + + def _make_pickle_ready(self, session_dir): + super(DqnAgent, self)._make_pickle_ready(session_dir) + self._save_net(self._qnet, 'qnet', session_dir) + self._qnet = None + self._save_net(self._target_qnet, 'target_net', session_dir) + self._target_qnet = None + + def get_q_values(self, state, with_best=False): + return self.get_batch_q_values( + nd.array([state], ctx=self._ctx), with_best=with_best)[0] + + def get_batch_q_values(self, state_batch, with_best=False): + return self._best_net(state_batch)\ + if with_best else self._qnet(state_batch) + + def get_next_action(self, state, with_best=False): + q_values = self.get_q_values(state, with_best=with_best) + action = q_values.asnumpy().argmax() + return action + + def __determine_target_q_values( + self, states, actions, rewards, next_states, terminals + ): + if self._use_fix_target: + q_max_val = self._target_qnet(next_states) + else: + q_max_val = self._qnet(next_states) + + if self._double_dqn: + q_values_next_states = self._qnet(next_states) + target_rewards = rewards + nd.choose_element_0index( + q_max_val, nd.argmax_channel(q_values_next_states))\ + * (1.0 - terminals) * self._discount_factor + else: + target_rewards = rewards + nd.choose_element_0index( + q_max_val, nd.argmax_channel(q_max_val))\ + * (1.0 - terminals) * self._discount_factor + + target_qval = self._qnet(states) + for t in range(target_rewards.shape[0]): + target_qval[t][actions[t]] = target_rewards[t] + + return target_qval + + def __train_q_net_step(self, trainer): + states, actions, rewards, next_states, terminals =\ + self._sample_from_memory() + target_qval = self.__determine_target_q_values( + states, actions, rewards, next_states, terminals) + with autograd.record(): + q_values = self._qnet(states) + loss = self._loss_function(q_values, target_qval) + loss.backward() + trainer.step(self._minibatch_size) + return loss + + def __do_target_update_if_in_interval(self, total_steps): + do_target_update = ( + self._use_fix_target and + (total_steps % self._target_update_interval == 0)) + if do_target_update: + self._logger.info( + 'Target network is updated after {} steps'.format(total_steps)) + self._target_qnet = copy_net( + self._qnet, self._state_dim, self._ctx) + + def train(self, episodes=None): + self.save_config_file() + self._logger.info("--- Start training ---") + trainer = gluon.Trainer( + self._qnet.collect_params(), + self._optimizer, + self._adjust_optimizer_params(self._optimizer_params)) + episodes = episodes if episodes is not None\ + else self._training_episodes + + resume = (self._current_episode > 0) + if resume: + self._logger.info("Training session resumed") + self._logger.info("Starting from episode {}".format( + self._current_episode)) + else: + self._training_stats = DqnTrainingStats(episodes) + + # Implementation Deep Q Learning described by + # Mnih et. al. in Playing Atari with Deep Reinforcement Learning + while self._current_episode < episodes: + if self._check_interrupt_routine(): + return False + + step = 0 + episode_reward = 0 + start = time.time() + state = self._environment.reset() + episode_loss = 0 + training_steps = 0 + while step < self._max_episode_step: + # 1. Choose an action based on current game state and policy + q_values = self._qnet(nd.array([state], ctx=self._ctx)) + action = self._strategy.select_action(q_values[0]) + + # 2. Play the game for a single step + next_state, reward, terminal, _ =\ + self._environment.step(action) + + # 3. Store transition in replay memory + self._memory.append( + state, action, reward, next_state, terminal) + + # 4. Train the network if in interval + if self._do_training(): + loss = self.__train_q_net_step(trainer) + training_steps += 1 + episode_loss +=\ + np.sum(loss.asnumpy()) / self._minibatch_size + + # Update target network if in interval + self.__do_target_update_if_in_interval(self._total_steps) + + step += 1 + self._total_steps += 1 + episode_reward += reward + state = next_state + + if terminal: + self._strategy.reset() + break + + self._do_snapshot_if_in_interval(self._current_episode) + + episode_loss = (episode_loss / training_steps)\ + if training_steps > 0 else 0 + avg_reward = self._training_stats.log_episode( + self._current_episode, start, training_steps, + episode_loss, self._strategy.cur_eps, episode_reward) + + self._strategy.decay(self._current_episode) + + if self._is_target_reached(avg_reward): + self._logger.info( + 'Target score is reached in average; Training is stopped') + break + + self._current_episode += 1 + + self._evaluate() + self.save_parameters(episode=self._current_episode) + self.save_best_network(os.path.join(self._output_directory, 'best')) + self._training_stats.save_stats(self._output_directory) + self._logger.info('--------- Training finished ---------') + return True + + def _make_config_dict(self): + config = super(DqnAgent, self)._make_config_dict() + config['optimizer'] = self._optimizer + config['optimizer_params'] = self._optimizer_params + config['loss_function'] = self._loss_function_str + config['use_fix_target'] = self._use_fix_target + config['double_dqn'] = self._double_dqn + config['target_update_interval'] = self._target_update_interval + return config + + def save_parameters(self, episode): + self._save_parameters(self._qnet, episode=episode) + + def _save_current_as_best_net(self): + self._best_net = copy_net( + self._qnet, (1,) + self._state_dim, ctx=self._ctx) diff --git a/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/cnnarch_logger.py b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/cnnarch_logger.py new file mode 100644 index 0000000..a17f5b1 --- /dev/null +++ b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/cnnarch_logger.py @@ -0,0 +1,93 @@ +import logging +import sys +import os +import util + + +class ArchLogger(object): + _logger = None + + __output_level = logging.INFO + __logger_name = 'agent' + __output_directory = '.' + __append = True + __logformat = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + __dateformat = '%d-%b-%y %H:%M:%S' + + INFO = logging.INFO + DEBUG = logging.DEBUG + + @staticmethod + def set_output_level(output_level): + assert output_level is not None + ArchLogger.__output_level = output_level + + @staticmethod + def set_logger_name(logger_name): + assert logger_name is not None + ArchLogger.__logger_name = logger_name + + @staticmethod + def set_output_directory(output_directory): + assert output_directory is not None + ArchLogger.__output_directory = output_directory + + @staticmethod + def set_append(append): + assert append is not None + ArchLogger.__append = append + + @staticmethod + def set_log_format(logformat, dateformat): + assert logformat is not None + assert dateformat is not None + ArchLogger.__logformat = logformat + ArchLogger.__dateformat = dateformat + + @staticmethod + def init_logger(make_log_file=True): + assert ArchLogger._logger is None, 'Logger init already called' + filemode = 'a' if ArchLogger.__append else 'w' + formatter = logging.Formatter( + fmt=ArchLogger.__logformat, datefmt=ArchLogger.__dateformat) + + logger = logging.getLogger(ArchLogger.__logger_name) + logger.setLevel(ArchLogger.__output_level) + + stream_handler = logging.StreamHandler(sys.stdout) + stream_handler.setLevel(ArchLogger.__output_level) + stream_handler.setFormatter(formatter) + logger.addHandler(stream_handler) + + if make_log_file: + util.make_directory_if_not_exist(ArchLogger.__output_directory) + log_file = os.path.join( + ArchLogger.__output_directory, + ArchLogger.__logger_name + '.log') + file_handler = logging.FileHandler(log_file, mode=filemode) + file_handler.setLevel(ArchLogger.__output_level) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + ArchLogger._logger = logger + + @staticmethod + def get_logger(): + if ArchLogger._logger is None: + ArchLogger.init_logger() + assert ArchLogger._logger is not None + return ArchLogger._logger + +if __name__ == "__main__": + print('=== Test logger ===') + ArchLogger.set_logger_name('TestLogger') + ArchLogger.set_output_directory('test_log') + ArchLogger.init_logger() + logger = ArchLogger.get_logger() + logger.warning('This is a warning') + logger.debug('This is a debug information, which you should not see') + logger.info('This is a normal information') + assert os.path.exists('test_log')\ + and os.path.isfile(os.path.join('test_log', 'TestLogger.log')),\ + 'Test failed: No logfile exists' + import shutil + shutil.rmtree('test_log') \ No newline at end of file diff --git a/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/environment.py b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/environment.py new file mode 100644 index 0000000..381ec9e --- /dev/null +++ b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/environment.py @@ -0,0 +1,64 @@ +import abc +import logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class Environment: + __metaclass__ = abc.ABCMeta + + def __init__(self): + pass + + @abc.abstractmethod + def reset(self): + pass + + @abc.abstractmethod + def step(self, action): + pass + + @abc.abstractmethod + def close(self): + pass + +import gym +class GymEnvironment(Environment): + def __init__(self, env_name, **kwargs): + super(GymEnvironment, self).__init__(**kwargs) + self.__seed = 42 + self.__env = gym.make(env_name) + self.__env.seed(self.__seed) + + @property + def state_dim(self): + return self.__env.observation_space.shape + + + @property + def number_of_actions(self): + return self.__env.action_space.n + + @property + def rewards_dtype(self): + return 'float32' + + def reset(self): + return self.__env.reset() + + def step(self, action): + return self.__env.step(action) + + def close(self): + self.__env.close() + + def action_space(self): + self.__env.action_space + + def is_in_action_space(self, action): + return self.__env.action_space.contains(action) + + def sample_action(self): + return self.__env.action_space.sample() + + def render(self): + self.__env.render() diff --git a/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/replay_memory.py b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/replay_memory.py new file mode 100644 index 0000000..d22147a --- /dev/null +++ b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/replay_memory.py @@ -0,0 +1,208 @@ +import numpy as np + + +class ReplayMemoryBuilder(object): + def __init__(self): + self.__supported_methods = ['online', 'buffer', 'combined'] + + def build_by_params( + self, + state_dim, + method='online', + state_dtype='float32', + action_dim=(1,), + action_dtype='uint8', + rewards_dtype='float32', + memory_size=1000, + sample_size=32 + ): + assert state_dim is not None + assert action_dim is not None + assert method in self.__supported_methods + + if method == 'online': + return self.build_online_memory( + state_dim=state_dim, state_dtype=state_dtype, + action_dtype=action_dtype, action_dim=action_dim, + rewards_dtype=rewards_dtype) + else: + assert memory_size is not None and memory_size > 0 + assert sample_size is not None and sample_size > 0 + if method == 'buffer': + return self.build_buffered_memory( + state_dim=state_dim, sample_size=sample_size, + memory_size=memory_size, state_dtype=state_dtype, + action_dim=action_dim, action_dtype=action_dtype, + rewards_dtype=rewards_dtype) + else: + return self.build_combined_memory( + state_dim=state_dim, sample_size=sample_size, + memory_size=memory_size, state_dtype=state_dtype, + action_dim=action_dim, action_dtype=action_dtype, + rewards_dtype=rewards_dtype) + + def build_buffered_memory( + self, state_dim, memory_size, sample_size, state_dtype, action_dim, + action_dtype, rewards_dtype + ): + assert memory_size > 0 + assert sample_size > 0 + return ReplayMemory( + state_dim, size=memory_size, sample_size=sample_size, + state_dtype=state_dtype, action_dim=action_dim, + action_dtype=action_dtype, rewards_dtype=rewards_dtype) + + def build_combined_memory( + self, state_dim, memory_size, sample_size, state_dtype, action_dim, + action_dtype, rewards_dtype + ): + assert memory_size > 0 + assert sample_size > 0 + return CombinedReplayMemory( + state_dim, size=memory_size, sample_size=sample_size, + state_dtype=state_dtype, action_dim=action_dim, + action_dtype=action_dtype, rewards_dtype=rewards_dtype) + + def build_online_memory( + self, state_dim, state_dtype, action_dtype, action_dim, rewards_dtype + ): + return OnlineReplayMemory( + state_dim, state_dtype=state_dtype, action_dim=action_dim, + action_dtype=action_dtype, rewards_dtype=rewards_dtype) + + +class ReplayMemory(object): + def __init__( + self, + state_dim, + sample_size, + size=1000, + action_dim=(1,), + state_dtype='float32', + action_dtype='uint8', + rewards_dtype='float32' + ): + assert size > 0, "Size must be greater than zero" + assert type(state_dim) is tuple, "State dimension must be a tuple" + assert type(action_dim) is tuple, "Action dimension must be a tuple" + assert sample_size > 0 + self._size = size + self._sample_size = sample_size + self._cur_size = 0 + self._pointer = 0 + self._state_dim = state_dim + self._state_dtype = state_dtype + self._action_dim = action_dim + self._action_dtype = action_dtype + self._rewards_dtype = rewards_dtype + self._states = np.zeros((self._size,) + state_dim, dtype=state_dtype) + self._actions = np.zeros( + (self._size,) + action_dim, dtype=action_dtype) + self._rewards = np.array([0] * self._size, dtype=rewards_dtype) + self._next_states = np.zeros( + (self._size,) + state_dim, dtype=state_dtype) + self._terminals = np.array([0] * self._size, dtype='bool') + + @property + def sample_size(self): + return self._sample_size + + def append(self, state, action, reward, next_state, terminal): + self._states[self._pointer] = state + self._actions[self._pointer] = action + self._rewards[self._pointer] = reward + self._next_states[self._pointer] = next_state + self._terminals[self._pointer] = terminal + + self._pointer = self._pointer + 1 + if self._pointer == self._size: + self._pointer = 0 + + self._cur_size = min(self._size, self._cur_size + 1) + + def at(self, index): + return self._states[index],\ + self._actions[index],\ + self._rewards[index],\ + self._next_states[index],\ + self._terminals[index] + + def is_sample_possible(self, batch_size=None): + batch_size = batch_size if batch_size is not None\ + else self._sample_size + return self._cur_size >= batch_size + + def sample(self, batch_size=None): + batch_size = batch_size if batch_size is not None\ + else self._sample_size + assert self._cur_size >= batch_size,\ + "Size of replay memory must be larger than batch size" + i = 0 + states = np.zeros(( + batch_size,)+self._state_dim, dtype=self._state_dtype) + actions = np.zeros( + (batch_size,)+self._action_dim, dtype=self._action_dtype) + rewards = np.zeros(batch_size, dtype=self._rewards_dtype) + next_states = np.zeros( + (batch_size,)+self._state_dim, dtype=self._state_dtype) + terminals = np.zeros(batch_size, dtype='bool') + + while i < batch_size: + rnd_index = np.random.randint(low=0, high=self._cur_size) + states[i] = self._states.take(rnd_index, axis=0) + actions[i] = self._actions.take(rnd_index, axis=0) + rewards[i] = self._rewards.take(rnd_index, axis=0) + next_states[i] = self._next_states.take(rnd_index, axis=0) + terminals[i] = self._terminals.take(rnd_index, axis=0) + i += 1 + + return states, actions, rewards, next_states, terminals + + +class OnlineReplayMemory(ReplayMemory): + def __init__( + self, state_dim, state_dtype='float32', action_dim=(1,), + action_dtype='uint8', rewards_dtype='float32' + ): + super(OnlineReplayMemory, self).__init__( + state_dim, sample_size=1, size=1, state_dtype=state_dtype, + action_dim=action_dim, action_dtype=action_dtype, + rewards_dtype=rewards_dtype) + + +class CombinedReplayMemory(ReplayMemory): + def __init__( + self, state_dim, sample_size, size=1000, state_dtype='float32', + action_dim=(1,), action_dtype='uint8', rewards_dtype='float32' + ): + super(CombinedReplayMemory, self).__init__( + state_dim=state_dim, sample_size=(sample_size - 1), size=size, + state_dtype=state_dtype, action_dim=action_dim, + action_dtype=action_dtype, rewards_dtype=rewards_dtype) + + self._last_state = np.zeros((1,) + state_dim, dtype=state_dtype) + self._last_action = np.array((1,) + action_dim, dtype=action_dtype) + self._last_reward = np.array([0], dtype=rewards_dtype) + self._last_next_state = np.zeros((1,) + state_dim, dtype=state_dtype) + self._last_terminal = np.array([0], dtype='bool') + + def append(self, state, action, reward, next_state, terminal): + super(CombinedReplayMemory, self).append( + state, action, reward, next_state, terminal) + self._last_state = state + self._last_action = action + self._last_reward = reward + self._last_next_state = next_state + self._last_terminal = terminal + + def sample(self, batch_size=None): + batch_size = (batch_size-1) if batch_size is not None\ + else self._sample_size + states, actions, rewards, next_states, terminals = super( + CombinedReplayMemory, self).sample(batch_size=batch_size) + states = np.append(states, [self._last_state], axis=0) + actions = np.append(actions, [self._last_action], axis=0) + rewards = np.append(rewards, [self._last_reward], axis=0) + next_states = np.append(next_states, [self._last_next_state], axis=0) + terminals = np.append(terminals, [self._last_terminal], axis=0) + return states, actions, rewards, next_states, terminals \ No newline at end of file diff --git a/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/strategy.py b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/strategy.py new file mode 100644 index 0000000..1f8deb0 --- /dev/null +++ b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/strategy.py @@ -0,0 +1,172 @@ +import numpy as np + + +class StrategyBuilder(object): + def __init__(self): + pass + + def build_by_params( + self, + method='epsgreedy', + epsilon=0.5, + min_epsilon=0.05, + epsilon_decay_method='no', + epsilon_decay=0.0, + epsilon_decay_start=0, + action_dim=None, + action_low=None, + action_high=None, + mu=0.0, + theta=0.5, + sigma=0.3 + ): + + if epsilon_decay_method == 'linear': + decay = LinearDecay( + eps_decay=epsilon_decay, min_eps=min_epsilon, + decay_start=epsilon_decay_start) + else: + decay = NoDecay() + + if method == 'epsgreedy': + assert action_dim is not None + assert len(action_dim) == 1 + return EpsilonGreedyStrategy( + eps=epsilon, number_of_actions=action_dim[0], + decay_method=decay) + elif method == 'uhlenbeck_ornstein': + assert action_dim is not None + assert action_low is not None + assert action_high is not None + assert mu is not None + assert theta is not None + assert sigma is not None + return UhlenbeckOrnsteinStrategy( + action_dim, action_low, action_high, epsilon, mu, theta, + sigma, decay) + else: + assert action_dim is not None + assert len(action_dim) == 1 + return GreedyStrategy() + + +class BaseDecay(object): + def __init__(self): + pass + + def decay(self, *args): + raise NotImplementedError + + def __call__(self, *args): + return self.decay(*args) + + +class NoDecay(BaseDecay): + def __init__(self): + super(NoDecay, self).__init__() + + def decay(self, cur_eps, episode): + return cur_eps + + +class LinearDecay(BaseDecay): + def __init__(self, eps_decay, min_eps=0, decay_start=0): + super(LinearDecay, self).__init__() + self.eps_decay = eps_decay + self.min_eps = min_eps + self.decay_start = decay_start + + def decay(self, cur_eps, episode): + if episode < self.decay_start: + return cur_eps + else: + return max(cur_eps - self.eps_decay, self.min_eps) + + +class BaseStrategy(object): + def __init__(self, decay_method): + self._decay_method = decay_method + + def select_action(self, values, decay_method): + raise NotImplementedError + + def decay(self, episode): + self.cur_eps = self._decay_method.decay(self.cur_eps, episode) + + def reset(self): + pass + + +class EpsilonGreedyStrategy(BaseStrategy): + def __init__(self, eps, number_of_actions, decay_method): + super(EpsilonGreedyStrategy, self).__init__(decay_method) + self.eps = eps + self.cur_eps = eps + self.__number_of_actions = number_of_actions + + def select_action(self, values): + do_exploration = (np.random.rand() < self.cur_eps) + if do_exploration: + action = np.random.randint(low=0, high=self.__number_of_actions) + else: + action = values.asnumpy().argmax() + return action + + +class GreedyStrategy(BaseStrategy): + def __init__(self): + super(GreedyStrategy, self).__init__(None) + + def select_action(self, values): + return values.asnumpy().argmax() + + def decay(self): + pass + + +class UhlenbeckOrnsteinStrategy(BaseStrategy): + """ + Ornstein-Uhlenbeck process: dxt = theta * (mu - xt) * dt + sigma * dWt + where Wt denotes the Wiener process. + """ + def __init__( + self, + action_dim, + action_low, + action_high, + eps, + mu=0.0, + theta=.15, + sigma=.3, + decay=NoDecay() + ): + super(UhlenbeckOrnsteinStrategy, self).__init__() + self.eps = eps + self.cur_eps = eps + + self._decay_method = decay + + self._action_dim = action_dim + self._action_low = action_low + self._action_high = action_high + + self._mu = mu + self._theta = theta + self._sigma = sigma + + self.state = np.ones(self._action_dim) * self._mu + + def _evolve_state(self): + x = self.state + dx = self._theta * (self._mu - x)\ + + self._sigma * np.random.randn(len(x)) + self.state = x + dx + return self.state + + def reset(self): + self.state = np.ones(self._action_dim) * self._mu + + def select_action(self, values): + noise = self._evolve_state() + action = values + (self.cur_eps * noise) + return np.clip(action, self._action_low, self._action_high) diff --git a/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/util.py b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/util.py new file mode 100644 index 0000000..a1da1ce --- /dev/null +++ b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/util.py @@ -0,0 +1,276 @@ +import signal +import sys +import numpy as np +import matplotlib.pyplot as plt +from matplotlib import style +import time +import os +import mxnet as mx +from mxnet import gluon, nd +import cnnarch_logger + +LOSS_FUNCTIONS = { + 'l1': gluon.loss.L1Loss(), + 'euclidean': gluon.loss.L2Loss(), + 'huber_loss': gluon.loss.HuberLoss(), + 'softmax_cross_entropy': gluon.loss.SoftmaxCrossEntropyLoss(), + 'sigmoid_cross_entropy': gluon.loss.SigmoidBinaryCrossEntropyLoss()} + + +def make_directory_if_not_exist(directory): + if not os.path.exists(directory): + os.makedirs(directory) + + +def copy_net(net, input_state_dim, ctx): + assert isinstance(net, gluon.HybridBlock) + assert type(net.__class__) is type + + net2 = net.__class__() + net2.collect_params().initialize(mx.init.Zero(), ctx=ctx) + net2.hybridize() + net2(mx.nd.ones((1,) + input_state_dim, ctx=ctx)) + + params_of_net = [p.data() for _, p in net.collect_params().items()] + for i, (_, p) in enumerate(net2.collect_params().items()): + p.set_data(params_of_net[i]) + + return net2 + + +def copy_net_with_two_inputs(net, input_state_dim1, input_state_dim2, ctx): + assert isinstance(net, gluon.HybridBlock) + assert type(net.__class__) is type + + net2 = net.__class__() + net2.collect_params().initialize(mx.init.Zero(), ctx=ctx) + net2.hybridize() + net2( + nd.ones((1,) + input_state_dim1, ctx=ctx), + nd.ones((1,) + input_state_dim2, ctx=ctx)) + + params_of_net = [p.data() for _, p in net.collect_params().items()] + for i, (_, p) in enumerate(net2.collect_params().items()): + p.set_data(params_of_net[i]) + + return net2 + + +def get_loss_function(loss_function_name): + if loss_function_name not in LOSS_FUNCTIONS: + raise ValueError('Loss function does not exist') + return LOSS_FUNCTIONS[loss_function_name] + + +class AgentSignalHandler(object): + def __init__(self): + signal.signal(signal.SIGINT, self.interrupt_training) + self.__agent = None + self.__times_interrupted = 0 + + def register_agent(self, agent): + self.__agent = agent + + def interrupt_training(self, sig, frame): + self.__times_interrupted = self.__times_interrupted + 1 + if self.__times_interrupted <= 3: + if self.__agent: + self.__agent.set_interrupt_flag(True) + else: + print('Interrupt called three times: Force quit') + sys.exit(1) + +style.use('fivethirtyeight') + + +class TrainingStats(object): + def __init__(self, max_episodes): + self._logger = cnnarch_logger.ArchLogger.get_logger() + self._max_episodes = max_episodes + self._all_total_rewards = np.zeros((max_episodes,)) + self._all_eps = np.zeros((max_episodes,)) + self._all_time = np.zeros((max_episodes,)) + self._all_mean_reward_last_100_episodes = np.zeros((max_episodes,)) + + @property + def logger(self): + return self._logger + + @logger.setter + def logger(self, logger): + self._logger = logger + + @logger.deleter + def logger(self): + self._logger = None + + def add_total_reward(self, episode, total_reward): + self._all_total_rewards[episode] = total_reward + + def add_eps(self, episode, eps): + self._all_eps[episode] = eps + + def add_time(self, episode, time): + self._all_time[episode] = time + + def add_mean_reward_last_100(self, episode, mean_reward): + self._all_mean_reward_last_100_episodes[episode] = mean_reward + + def log_episode(self, *args): + raise NotImplementedError + + def mean_of_reward(self, cur_episode, last=100): + if cur_episode > 0: + reward_last_100 =\ + self._all_total_rewards[max(0, cur_episode-last):cur_episode] + return np.mean(reward_last_100) + else: + return self._all_total_rewards[0] + + def save(self, path): + np.save(os.path.join(path, 'total_rewards'), self._all_total_rewards) + np.save(os.path.join(path, 'eps'), self._all_eps) + np.save(os.path.join(path, 'time'), self._all_time) + np.save( + os.path.join(path, 'mean_reward'), + self._all_mean_reward_last_100_episodes) + + def _log_episode(self, episode, start_time, training_steps, eps, reward): + self.add_eps(episode, eps) + self.add_total_reward(episode, reward) + end = time.time() + mean_reward_last_100 = self.mean_of_reward(episode, last=100) + time_elapsed = end - start_time + self.add_time(episode, time_elapsed) + self.add_mean_reward_last_100(episode, mean_reward_last_100) + return ('Episode: %d, Total Reward: %.3f, ' + 'Avg. Reward Last 100 Episodes: %.3f, {}, ' + 'Time: %.3f, Training Steps: %d, Eps: %.3f') % ( + episode, reward, mean_reward_last_100, time_elapsed, + training_steps, eps), mean_reward_last_100 + + +class DqnTrainingStats(TrainingStats): + def __init__(self, max_episodes): + super(DqnTrainingStats, self).__init__(max_episodes) + self._all_avg_loss = np.zeros((max_episodes,)) + + def add_avg_loss(self, episode, avg_loss): + self._all_avg_loss[episode] = avg_loss + + def log_episode( + self, episode, start_time, training_steps, avg_loss, eps, reward + ): + self.add_avg_loss(episode, avg_loss) + + info, avg_reward = self._log_episode( + episode, start_time, training_steps, eps, reward) + info = info.format(('Avg. Loss: %.3f') % (avg_loss)) + + self._logger.info(info) + return avg_reward + + def save_stats(self, path): + fig = plt.figure(figsize=(20, 20)) + + sub_rewards = fig.add_subplot(221) + sub_rewards.set_title('Total Rewards per episode') + sub_rewards.plot( + np.arange(self._max_episodes), self._all_total_rewards) + + sub_loss = fig.add_subplot(222) + sub_loss.set_title('Avg. Loss per episode') + sub_loss.plot(np.arange(self._max_episodes), self._all_avg_loss) + + sub_eps = fig.add_subplot(223) + sub_eps.set_title('Epsilon per episode') + sub_eps.plot(np.arange(self._max_episodes), self._all_eps) + + sub_rewards = fig.add_subplot(224) + sub_rewards.set_title('Avg. mean reward of last 100 episodes') + sub_rewards.plot(np.arange(self._max_episodes), + self._all_mean_reward_last_100_episodes) + + self.save(path) + plt.savefig(os.path.join(path, 'stats.pdf')) + + def save(self, path): + super(DqnTrainingStats, self).save(path) + np.save(os.path.join(path, 'avg_loss'), self._all_avg_loss) + + +class DdpgTrainingStats(TrainingStats): + def __init__(self, max_episodes): + super(DdpgTrainingStats, self).__init__(max_episodes) + self._all_avg_critic_loss = np.zeros((max_episodes,)) + self._all_avg_actor_loss = np.zeros((max_episodes,)) + self._all_avg_qvalues = np.zeros((max_episodes,)) + + def add_avg_critic_loss(self, episode, avg_critic_loss): + self._all_avg_critic_loss[episode] = avg_critic_loss + + def add_avg_actor_loss(self, episode, avg_actor_loss): + self._all_avg_actor_loss[episode] = avg_actor_loss + + def add_avg_qvalues(self, episode, avg_qvalues): + self._all_avg_qvalues[episode] = avg_qvalues + + def log_episode( + self, episode, start_time, training_steps, actor_loss, + critic_loss, qvalues, eps, reward + ): + self.add_avg_actor_loss(episode, actor_loss) + self.add_avg_critic_loss(episode, critic_loss) + self.add_avg_qvalues(episode, qvalues) + + info, avg_reward = self._log_episode( + episode, start_time, training_steps, eps, reward) + info = info.format(( + 'Avg. Actor Loss: %.3f ' + 'Avg. Critic Loss: %.3f ' + 'Avg. Q-Values: %.3f') % (actor_loss, critic_loss, qvalues)) + + self.logger.info(info) + return avg_reward + + def save(self, path): + super(DdpgTrainingStats, self).save(path) + np.save(os.path.join( + path, 'avg_critic_loss'), self._all_avg_critic_loss) + np.save(os.path.join(path, 'avg_actor_loss'), self._all_avg_actor_loss) + np.save(os.path.join(path, 'avg_qvalues'), self._all_avg_qvalues) + + def save_stats(self, path): + fig = plt.figure(figsize=(120, 120)) + + sub_rewards = fig.add_subplot(321) + sub_rewards.set_title('Total Rewards per episode') + sub_rewards.plot( + np.arange(self._max_episodes), self._all_total_rewards) + + sub_actor_loss = fig.add_subplot(322) + sub_actor_loss.set_title('Avg. Actor Loss per episode') + sub_actor_loss.plot( + np.arange(self._max_episodes), self._all_avg_actor_loss) + + sub_critic_loss = fig.add_subplot(323) + sub_critic_loss.set_title('Avg. Critic Loss per episode') + sub_critic_loss.plot( + np.arange(self._max_episodes), self._all_avg_critic_loss) + + sub_qvalues = fig.add_subplot(324) + sub_qvalues.set_title('Avg. QValues per episode') + sub_qvalues.plot( + np.arange(self._max_episodes), self._all_avg_qvalues) + + sub_eps = fig.add_subplot(325) + sub_eps.set_title('Epsilon per episode') + sub_eps.plot(np.arange(self._max_episodes), self._all_eps) + + sub_rewards = fig.add_subplot(326) + sub_rewards.set_title('Avg. mean reward of last 100 episodes') + sub_rewards.plot(np.arange(self._max_episodes), + self._all_mean_reward_last_100_episodes) + + self.save(path) + plt.savefig(os.path.join(path, 'stats.pdf')) diff --git a/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/start_training.sh b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/start_training.sh new file mode 100644 index 0000000..37ede26 --- /dev/null +++ b/src/test/resources/target_code/gluon/reinforcementModel/mountaincar/start_training.sh @@ -0,0 +1,2 @@ +#!/bin/bash +python CNNTrainer_mountaincar_master_actor.py \ No newline at end of file diff --git a/src/test/resources/target_code/gluon/reinforcementModel/torcs/CNNDataLoader_torcs_agent_torcsAgent_dqn.py b/src/test/resources/target_code/gluon/reinforcementModel/torcs/CNNDataLoader_torcs_agent_torcsAgent_dqn.py new file mode 100644 index 0000000..3bc9087 --- /dev/null +++ b/src/test/resources/target_code/gluon/reinforcementModel/torcs/CNNDataLoader_torcs_agent_torcsAgent_dqn.py @@ -0,0 +1,57 @@ +import os +import h5py +import mxnet as mx +import logging +import sys + +class torcs_agent_torcsAgent_dqnDataLoader: + _input_names_ = ['state'] + _output_names_ = ['qvalues_label'] + + def __init__(self): + self._data_dir = "data/" + + def load_data(self, batch_size): + train_h5, test_h5 = self.load_h5_files() + + data_mean = train_h5[self._input_names_[0]][:].mean(axis=0) + data_std = train_h5[self._input_names_[0]][:].std(axis=0) + 1e-5 + + train_iter = mx.io.NDArrayIter(train_h5[self._input_names_[0]], + train_h5[self._output_names_[0]], + batch_size=batch_size, + data_name=self._input_names_[0], + label_name=self._output_names_[0]) + test_iter = None + if test_h5 != None: + test_iter = mx.io.NDArrayIter(test_h5[self._input_names_[0]], + test_h5[self._output_names_[0]], + batch_size=batch_size, + data_name=self._input_names_[0], + label_name=self._output_names_[0]) + return train_iter, test_iter, data_mean, data_std + + def load_h5_files(self): + train_h5 = None + test_h5 = None + train_path = self._data_dir + "train.h5" + test_path = self._data_dir + "test.h5" + if os.path.isfile(train_path): + train_h5 = h5py.File(train_path, 'r') + if not (self._input_names_[0] in train_h5 and self._output_names_[0] in train_h5): + logging.error("The HDF5 file '" + os.path.abspath(train_path) + "' has to contain the datasets: " + + "'" + self._input_names_[0] + "', '" + self._output_names_[0] + "'") + sys.exit(1) + test_iter = None + if os.path.isfile(test_path): + test_h5 = h5py.File(test_path, 'r') + if not (self._input_names_[0] in test_h5 and self._output_names_[0] in test_h5): + logging.error("The HDF5 file '" + os.path.abspath(test_path) + "' has to contain the datasets: " + + "'" + self._input_names_[0] + "', '" + self._output_names_[0] + "'") + sys.exit(1) + else: + logging.warning("Couldn't load test set. File '" + os.path.abspath(test_path) + "' does not exist.") + return train_h5, test_h5 + else: + logging.error("Data loading failure. File '" + os.path.abspath(train_path) + "' does not exist.") + sys.exit(1) \ No newline at end of file diff --git a/src/test/resources/target_code/gluon/reinforcementModel/torcs/CNNTrainer_torcs_agent_torcsAgent_dqn.py b/src/test/resources/target_code/gluon/reinforcementModel/torcs/CNNTrainer_torcs_agent_torcsAgent_dqn.py index 2c2778e..c8548d4 100644 --- a/src/test/resources/target_code/gluon/reinforcementModel/torcs/CNNTrainer_torcs_agent_torcsAgent_dqn.py +++ b/src/test/resources/target_code/gluon/reinforcementModel/torcs/CNNTrainer_torcs_agent_torcsAgent_dqn.py @@ -1,5 +1,6 @@ from reinforcement_learning.agent import DqnAgent from reinforcement_learning.util import AgentSignalHandler +from reinforcement_learning.cnnarch_logger import ArchLogger import reinforcement_learning.environment import CNNCreator_torcs_agent_torcsAgent_dqn @@ -9,9 +10,6 @@ import re import logging import mxnet as mx -session_output_dir = 'session' -agent_name='torcs_agent_torcsAgent_dqn' -session_param_output = os.path.join(session_output_dir, agent_name) def resume_session(): session_param_output = os.path.join(session_output_dir, agent_name) @@ -32,7 +30,18 @@ def resume_session(): break return resume_session, resume_directory + if __name__ == "__main__": + agent_name='torcs_agent_torcsAgent_dqn' + # Prepare output directory and logger + output_directory = 'model_output'\ + + '/' + agent_name\ + + '/' + time.strftime( + '%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) + ArchLogger.set_output_directory(output_directory) + ArchLogger.set_logger_name(agent_name) + ArchLogger.set_output_level(ArchLogger.INFO) + env_params = { 'ros_node_name' : 'torcs_agent_torcsAgent_dqnTrainerNode', 'state_topic' : 'preprocessor_state', @@ -41,58 +50,59 @@ if __name__ == "__main__": 'terminal_state_topic' : 'prepocessor_is_terminal' } env = reinforcement_learning.environment.RosEnvironment(**env_params) - context = mx.cpu() - net_creator = CNNCreator_torcs_agent_torcsAgent_dqn.CNNCreator_torcs_agent_torcsAgent_dqn() - net_creator.construct(context) - replay_memory_params = { - 'method':'buffer', - 'memory_size':1000000, - 'sample_size':32, - 'state_dtype':'float32', - 'action_dtype':'uint8', - 'rewards_dtype':'float32' - } + context = mx.cpu() + qnet_creator = CNNCreator_torcs_agent_torcsAgent_dqn.CNNCreator_torcs_agent_torcsAgent_dqn() + qnet_creator.construct(context) - policy_params = { - 'method':'epsgreedy', - 'epsilon': 1, - 'min_epsilon': 0.01, - 'epsilon_decay_method': 'linear', - 'epsilon_decay': 0.0001, + agent_params = { + 'environment': env, + 'replay_memory_params': { + 'method':'buffer', + 'memory_size':1000000, + 'sample_size':32, + 'state_dtype':'float32', + 'action_dtype':'float32', + 'rewards_dtype':'float32' + }, + 'strategy_params': { + 'method':'epsgreedy', + 'epsilon': 1, + 'min_epsilon': 0.01, + 'epsilon_decay_method': 'linear', + 'epsilon_decay': 0.0001, + }, + 'agent_name': agent_name, + 'verbose': True, + 'state_dim': (5,), + 'action_dim': (30,), + 'ctx': 'cpu', + 'discount_factor': 0.999, + 'training_episodes': 20000, + 'train_interval': 1, + 'snapshot_interval': 1000, + 'max_episode_step': 999999999, + 'qnet':qnet_creator.net, + 'use_fix_target': True, + 'target_update_interval': 500, + 'loss_function': 'euclidean', + 'optimizer': 'rmsprop', + 'optimizer_params': { + 'learning_rate': 0.001 }, + 'double_dqn': True, } - resume_session, resume_directory = resume_session() + resume, resume_directory = resume_session() - if resume_session: - agent = DqnAgent.resume_from_session(resume_directory, net_creator.net, env) + if resume: + resume_agent_params = { + 'session_dir': resume_directory, + 'environment': env, + 'net': qnet_creator.net, + } + agent = DqnAgent.resume_from_session(**resume_agent_params) else: - agent = DqnAgent( - network = net_creator.net, - environment=env, - replay_memory_params=replay_memory_params, - policy_params=policy_params, - state_dim=net_creator.get_input_shapes()[0], - ctx='cpu', - discount_factor=0.999, - loss_function='euclidean', - optimizer='rmsprop', - optimizer_params={ - 'learning_rate': 0.001 }, - training_episodes=20000, - train_interval=1, - use_fix_target=True, - target_update_interval=500, - double_dqn = True, - snapshot_interval=1000, - agent_name=agent_name, - max_episode_step=999999999, - output_directory=session_output_dir, - verbose=True, - live_plot = True, - make_logfile=True, - target_score=None - ) + agent = DqnAgent(**agent_params) signal_handler = AgentSignalHandler() signal_handler.register_agent(agent) @@ -100,4 +110,4 @@ if __name__ == "__main__": train_successful = agent.train() if train_successful: - agent.save_best_network(net_creator._model_dir_ + net_creator._model_prefix_ + '_newest', epoch=0) \ No newline at end of file + agent.save_best_network(qnet_creator._model_dir_ + qnet_creator._model_prefix_ + '_newest', epoch=0) diff --git a/src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/_torcs_agent_dqn_reward_executor.so b/src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/_torcs_agent_dqn_reward_executor.so deleted file mode 100755 index 4a1405c0e6bb336829975a206cfc4c776a5e3f06..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 174216 zcmeEv3w&Hf757csg%qe89>pTB|O&)$ZSG1f4QO#B^yzfwfPjTwj;#v%BtA7dDWg%|oS zxlpp)fltFFGp#gm8Xo-d$$y1~wZZyY9oK0$zSd)P*#$4@C)49n;7l|+lZAyfwRKBt zl34@kd-q?u=u~zL|Cn}CnrY$NOBaJa;wvl-mRALw*22QNij`#*C52@b%PIjh5MR&Xn!aPLicAm0VdLhL za@P%j!omf)7uua-fh}*1OyJ*uH08~fWfm6}R+JaktXi?Ovb?aWwmL1oECTZ%@L5AF zNyW_A#1o0c$XscH(1y-*MqvQOQ9l zt1VnsS5Z8uzxBsyeAciUOgr(7ci>`lu-v)Fqs79PWg%hOiEjWp;43MrElTAM9N*UC zHGRLmO2aTcyc{pBNoj$><59I%D%#bU8BVYz|$$m z1MyLg1NbW}T(t1KFZ&lPv~_5!1({_%t#osyPIFwHxQ;fA#%$ynfSZK|XWgA)TmnY7 z+?!zp4mOP5UuPJ7`(Vy=W*Cj1*EDgAtKu0(6mY&H#~7s$UNk1?J(+pSOd z8c;I*yt3lT@}lVl6=l;`6rmHM1;MQH>6+)$tH|f+OY6$YOVEt`W4k`Yq+RJ#WpPnC zd!@_2hzG|z@N$MR2`rszrI*ArTymhH_k?>mu!jSCIIxEUdpNL%1A92IhXZ>!u!jSC zIIxEUdpNL%1A92IhXZ>!u!jSCIIxEUdpNL%1A92IhXZ>!u!jTxiySBjMBWI5c4P%2 z4gCSLpcGx7gqZ zqyZn}0{@N;{+f_7xcB2+;8iyGg97e3w||K4%#=*4JNLcdX7?#J241kki1~)Xu&)zC z$p8%RZL=7{s}#WS;(HDnY{B@04aB=HfvD$sD~$o}*P3D3H1Co6vP%o~N2KAyP{W6l zywORlQ0)IO5S_39_wfSexJ_VxUqTR>9^TPonv!zm6U$us2RI^i&`_RVm$PDHKQ2F- z4Ii3y{kwa-55;VaG)P|sqB9QzTU68YXqvc1#+!Iq1?sL(WkVz^qMhyKQy)g^b_Tlq z3ZMXKpyV}6HFQ(tc*1m{WnorecDJ|b*koBfWZZM0l;uFNcB+4)agki!R0=ihJjB~H zL!doxU7hR=M7KW>c@{**)WHxivu;|Loq4w&OC2kuLRrkdMdd%r+<7`%g3&@%IMI*#WZvKzY63 z>|g(67;T37>T@Wx85QxjC;S%9-TxMxJ+iRe`Y&Rdce%#Y9Wc)XCKTm$1Jg?y(?M=b zZR{t;$*(X(`CO6o_*!dKz+64cJm)Zoid){Wo+ZaGQlLOuT<~ay-KtUAV|evd9D$)3 zcD|HphRp_PGKPy$-!+C)1xh3`HHJNE@$TKb?J?}pV~BfW>7_VeYz`?S(<|HPUo}kwHo+Imjq^>Vgup^SEIA9xDLt!=6-5x-DpGA9_rhT%L zcF3_g@j%JBm9!I^5ERJ+VFlglGzgW*b~k#$ewYP2OlcM6ow@C~x*xsLk2t7CYYzHz zL^-(V&$8z$O&E2IVI1S%IoH3_d)ubQS2Mg1Z5pT_vhDs#Pr-3RJ%I6&0e18hj9}aR zm$(!Gp_T6PtU#zMBM_MtFg;VyNvNb!+`W^o%0zC6>&e`!X6uSbcRtFNfK&I(!fy49 zGM?>}<&-{3%KJI9+yFVYzLW8zzJk1W`CmsXhAs6V8DbcS7QC)r`Oun=p@tr)lv+T| zh=JsuK(sC%*&NxV-qJI+H`MSt0KN=>P(vKCT*U0jh&hXOdlR$;9`BCqtv7 zrW769paPMu2*iGt?wtakiqVi+@OoqumaQK>Np^vS=pB|enx}(ipliJ`&zaXZbR4=?nNj~M>q7z()(Vd zb5UfodiZHj6@I}goe^u^WJ+ponDv^YzeDJa9)PN$4#DAF8#`rpbC~&SBwU~24a<}~ zn>{n7adir`8{(uOP#h)tx#Pa6=bxs9Y?WE6n+wGM0gP!ZE}L((z69 zz=b*4aG>}(^Pm=>?|MPduO0&ZV?j6?F^6>i}fRpkm#gE|NGNYOFsn`?*N?HWYfpUa-cLc;Nai z6qqZYQ1?Oxz55Q-Z`bc=#aiN$r)_V$F$Nb2$P!Beq0_r} zdz<`&3(RrQ<{b(!<_3X`gxir&CFp;^j4~dWy~*3O531@;AYuz5D~KR6l2|E%$bLct z;+iwynNN5(eYYWJDEh)a7FZ$_o%sq(V94U)tuS=M53+@m9Odr*nFrBIZPPV;Qlkwu z+%l{tpn$@I5=~Kx_{%Km1?yEmL?*H+673m;!T}R9uCcw>H1aaBG1Xwj#d%_ zbA1BX0)9&}I@18Xfyicbas{O{HQ@jq3EzcKU8cpig`@~OQzWkzlEWe;5Ef7d(Y^~h z0u7;;&C=xBMtMZ*iR?&rtTVDfIwoQ0ZFH=Y9Ygt*7!< z;QG?n<=-JQ$i%*I+y`J0_8pA3lkc+mY3S1{Ef+*F_GV11QX>hCZDUd4?v8`VvQj z8z%2BPzcMuxZ(9cwivEEPMrWRTC9K`(#2R@_xF6d&burj5BNo{Npr@Y|pqYe?Jek}6 zPmGKjzlAAon_)iR{)A!pXT%mzh_K{>$?839ZrNdA-*H8Tq5cB@ky?s1j>bvU2mYwN zP%B`1-guJ_T0Bi?NPV~&y5cv`9VoitFMLe+5+9M~S)2%%5Y4D3SY$=~js`8%pqXya zD_x+K8uWAx`e(MA!g{g`bb$svM1wxy2Ho2QdWZ(yCEb{ClN)p!dXu8{?eXl7=XV

?fv(V?KhdCndc(zmY!~Q!4J!RHC$zai zJucACYtRynHS7lc<0dBuc4P_%ay95>ZqQ%2KsRa7BQ)rAH)xX!^mYyUFT$7;#<@W+ zb%9>3LDy-}KfLbZz%&2Hm~U*{%IG=+8CiYB%T$F3>kU!U0o*Ug!q>u?zIq z8gz;Vo$3a?&IQ_{K|LDuKd-vF^%WOrP=g+=L4W54o#FzWqd|AGWpl!MH|W1RoE-3K z&}|yD!43Mn3zTo>K@&crK^MD0f8+vvM1$V0L638Te!~TNiw3RHpdZFv9Js&*TC72{ zH0X0~&@Z|`XKB#4*s?j{J~!yU9&>VFf(AWTWBs}tw8sUyeXMZcI1QTb2EEq>`mhEa zr$HyXL9caz-mF1i)wF)_ii-p1yFiOH=td3ttQ+)L7bw37fYG{BgWlr?egD@^4(zKz zXKBz?ZqR33pz$$+b)p76&kg!R7w85J`T)CbPB_L5TJHi~qd~u^LHl2JaUjnHTBt$G zHRx~MphvqvPtl+YHR#=L(Dxp7a$vj${h|h4=>~n;1qZzy_iE7Zxk3Md9YxB}gCewitD!BvA?@?)ceJr(n(=Va)5jZ#C-XVhJE--T@1(M!S?IO}rO$YGI%eKIv{w>}wPQf(I zC6+FK%xVUX)Yv{KZtwELOC>G$1&Z^Y4qUx88!b!2YX;}@>@{H4wS!P6xOj>q9rfJU za5J!>3NiI3un{|0$u%VlJ6QFU#oX|uA<*MQByk|}6Zpff>S@^zJ)QU}5r-OL^!$pD z`D3A3RD7_hGMD3a|t=9C`;6iAtK7uv=vu{cQu9HlR7$LW7!9z;P9r+8x-` zKNb+{(YH9V@Pxx7;%`M7+cchp6kuSsV_>kYTR4d|BK~zK1;i}%HUt4Y&BEUp9sC<<-D&{`m7T^sUR1LppCv;g!Hg2rX@lGltj(62i5>rM3Q z-9Y4A$W=I>d?eon1R$~({lQs@zc-OjU)uFsD9+q|C(kePMYu!rwk!gRz2Q?)9*BC* z{|o8#v?CL#=lFwi%~;u zKvCi%0Yo_u$)cXw#5mg@x4fqp=@}HqU}p&gN0=M-a1W)zq;r5Jp8Z*m$>@Y#UnYrx ztWd)?G$<3>tEW((rV0~auBR7J-q33lrY?Vv6huA0X4ToAEi7xADwNH8n0cnBT^_jD zK-n_0FjsFuj(Syh50g?(H|9tkkLd&s)%$?$P+jshX9;CrPH1v{GI!9-1Rxt4()xF42CupjnrW(2dyHsG=*o5Z< zz5pB=_ngED*^6r=+dKiMj&vXF&#g)YUU&klTT;9X!Y0id@yOh})W#jfLmx zy5Ly-B(&|*Nf4Na_AxmlxtKqs1&UId&CSvsK)EhM?_)klLoD%k@rMcFhD~^6940x2 zSWA0fVn2cL#21ng7{9J@nqP95jU$r}JP>xlQ= zfLZNhdePPXc=n!>s4if5T!nVf)EMhB+DFp*a+7UnSet3u(3foPZ~u90U&M- zMLh8!hhot2jzj-M=jCB4DmY6?GR7)-@q>4wuq;Bgz}W~L7ZSxF-m24B5u2V-?3ZtZYAj5Is%C8Mn5NwdVV7iSb5}*lU87!zGSs7R{P5@ zZ03AC&e=`B8Qf_`1e%ykKtuc6d1D zib;5iF1f_Q9-TQBh~V9x>B108%my54WO$pb8edG-_~03-G1gV1-CE*4L_HWfbn@a> zOzQN=@^|A*AEu&feZ@M31@V9F>2|=Ej08`W=zy2=Sv@q#%MK3vD62io4L@!q z{607Q3tInWImgb0KgSJ!&q(l%Zur`f;IY+r_IJTZ@HCzr{5fbicy5d!d*(Sjws5d7 zwZ4k{%jOAikdr?38t_|l!BNAOmz?$kP@Ws%OoBYs!A8Wbw+J>_T*q;bY0+GNulGjk z2CNkQco?D&+GSX2bN2*ra1-i;*<07snNg@U3KVzb8MThJWO-|dtH0e(I$$1i2PYTe zAjnhS4NQHGw+h`Ya9tNfW?fv}&x+^?8o;*D-;L#m*DqZxSZ3d!$P&SqIb!ucEba21 zx07S3`wsR-lXe38&Rc(Dx$(q)Mq9!#G>U~Fe|$}5bKXndhYDW0etRFXHhnNIP`num z^t=ZQW6geh32E5k$&IOo{^m@AhPHbFlq~JEn1xcouWhJ!LXQ2>HO^>jJhgt~XkPh* z=$Gqv=fZbTm?CA{*-Y=n7Zi0opKL;Zi#q?a0AAQpbU@$X;S)cZIL8 z0lLEZsc0Y-&9S4{sUnK5zWgvutF6sX2BKlY1)@>HG_Re`L}c@tFeB=fA3>_aQxFvs z#gr&r0b!19VgZQga7Zn4 z=8qO&)3EFXGBKQK*TUAEab$6L0-+8WC2PkuMOSB*YVZl&hNB|mph+r5!u{Bgs+Agw z*HMH5pqHyD+_iMH0G{^p27*SSI}k1~!+fYh)YBETAYn`htyW`{K^t$s*u|26{!z#e zb&xBtfFaFfpztWVK?HG;uE={mniXhZpg>q8sWCv7xE1B<5~7WSd+b^upK)YWNnfM# zh(w!_>I!Eg1c$5}dtG7XB8l4+b_rS#YFd3HPyhnvyvv?u>P+Y~^ zRRA%8jXme#l~33R$RG5O!mJLz50>D*>Ou6-c|>o`-zA_3R@knm_^{`1HrKewhADh> zg?)ISEl$))sc4oJ?F#Rg$~5e#)sb^>IT3USqUg*!;i|M|Dgm386p63piz7hc9Y;I-h%#=8<2O@9y2%#s;1jLzQ32`<%`jJ+bfoh2G-_dpNzHa++ zNzzWR_wBZG*78Mk`L_sF{>S)$&Cq5&p$we;ej7$H?ejk z?FU+BY}LBoY-odU8K+}66s_x4H^Eqm__u@_w&1#iANYiCNJ4P-Mbh+diTLkH&|1+N z9tp&ee-DJ^kk6x4tj^Jy^c$)_9*p|mM=6%!ZmgtO?i<#lnI}}^d2HsfP{U*HW*$p6 z^B6pz&SqMzW?IqA1Hh}B`7xWRVKcMP%o_g_$Ko>ZOV|qcLMwVD>m`~)$iALzQY&Tk zp@S8Fd|!uFmTh+-lXlM%*M28|nDOXAWo$g5&ICzJo_YY5>H&yxuh!~hK8D=E{1})Y znj_5bQTt#ejlk*L@U)vNPbazZG~bDEFu&d6N;|l67gdum{~&VZN+c7fp)vHTM>F5w zyQZ03z(w}vyvNt%JuWxe-&fT#O@?R6fZgGOjs(Hc)+<{IrW z1faVOvMFBsP2P)R-l>hG5RX>GdE``ir{*ldUq1eF@RyB0AO5nuQ-Px)CV2AN zRnHjYn86A(K;~(B-b(|7! zbKP>=&dY=QQqN-EgzHjynh3-@)&7X9nq7pshPq2CZ9t9$gzz-`?dMFJ7CZzP$^5{xKAW zp7L8apvPAsIZ)rWLN?k}Yd)KKFx2p%TeCcv)GQC;EXmo-I;)v=Xl8!0ncuRRv22F= z_FZq5zx`eB{?IR4<8qvs*@J=UMS-FAmK9-mVW|2n^lccFS@Y#L-*L_to^+)aJI&IWfy-{wd_jTUc%q*=Wo;S-=c1WPfrAtWrM)WiBl< z^we5?{TpEoeF>I<;P zKf>_3?Y%+z@0ZeX(SLUgqyGwUGs30+VE;l1YyF3~x>=Mtbf30r(&@iXI6hbWO2~{s z^a^Y;McLiP@ktt&G0e9+XIS&?c*x&~^DXV*JO+O2!Py&as|F+!$GHZlp;sLOSy8uQ zW%>x?;Ph76_RRG=8pTQCd8mWzB$|Hsf0odQk8HSvkD(no(GKo0)x|&rh#UZdG;AIC z)~k-4XvMezO+_c!QL{s^M`zxO72euZ5-t#p5~g|W-(({W_YWf~z81U;zmBL@NfH$+ z0n0C5F@#kkd&^AJjYP*GOx%e)Y(NC5Ink4y+E0>P>?hao^wW(OS&>PIM+4kh%p z6X7J_13(WLasm={0S9Th%$@E z`R2rPPl7n?io7Q#Fu24L^;V9sr`b`Fad0eO*Ovz_=^)i5PB% z>5=mLrfS;5Qa}xW`##dhyh`kJT)u%u(2#U>F#+EIRgSp2M1>qA2s$!>K%M-5#80pl zd~|LtlKyM!{q(*e_ZQkec>EV)AGCeTxrl$&a%Cx;21m+2znSW`KYp7lM2P+IEyUG+nvRj|k1W`SfWc|;zN|3ddqK(jidw{Rd=$ys zwg@L)X5O~Yt7G`m_}ZAf=^e>?47-qlH!MycN2)xHv{vMcGbdvB#On6Uxh91aYxU?#TL0-wf^gNcB&M%*$)&$I zl(4>_D%q1TLf8o6Z!WKi$-OEUE%3_mh%Y1Yu5Jauvv5WiSHBT6$lnSgTCu$BuZ!8# z6yS^{U;u%OaxtbE(JnvCds5TTit6wO7Tl%o#y$-OJ}!%c?s#$$m04e_vhjjG~rBR|Z{(AF`GL^7pV2gd`Mjt$dU z2)vPan5t7V5~`4$A@&0q73>IO`v!cv1>Q@FXUH07jbf}T#|K#3uafu@*FZWNn|e7c z`YQwnX>4(@&4(pHZfeqQk$2CAyt&zenk_(biyqsSQ2nuBf1{PsreZT-ngJ6;E->-% zwxuigSfoIDLo)`T57X*zHzOJGH=+YAT{*|1&`?)1PkO;Uv^tl`E`KA-fU^-e=|ljM zMpcXX43g_9l_9y^iqbX!CavzZQU+ncE4hK_+A3MBH!P6{c5-VOQ{TlEBwJJ6T&~;U zZK{hHHN#R#iy%@&XtC8Scsf#`eU#i`z!UNG1ezH*CInDoyw1`ooC+cjtt4Ps5|UBG zEJ0ulOaLR&UoS#o@~Ttgpo>{4WZG#uUod*Ug*8u4_ZiG^O)PtotNt}CqH#p2Q!gQD zZAAsEQ2QaT#@LrkhuSaL-#c@jhyPL#4|yJrZx3Nu`_LXgITY&JFVY`+hxedWq~Ue% zO>dCes7FtEO#foM>7KE9_x1qpPjWqy{YYQpX@Z%aLmgRM(2)8+bxKUF zHu*qM_+pES&IhI8Zm5nPH6Cy<*B{V%F&8ltlEeTuF9udU2cHIrdGaP9X1x$othG>& zIdcX)o@)JL`t9ZL1!zvAJ45$v<4>h;=EU*zj6!c?gj!+SM6mw6i(0ZEMQ5%@8)m^> z`uhjEp%!WA1E2}bdujuIW6m4BLU1b(joVQyUzj3uH17AnAsNL%NZ$I1bO=aliR9@UEhOxD0)J@oZ49mMK6w4J zKLJyG+|bBclgMg_Y_*LH)(T0>SmWPKqb3G(2;NbLKhUz30_HhcL;AB*<-Q@yLpyv6 znq#@mTgB?KZsz1-<}7H=`wOSeo8Se^FKG}Yue;Lw0Uh%?CCW71Bw(p#UKT!F?1G}< zy`&U5_I*ivec_-)>-ruQNk4#$7;wW+)1SJ}gs*R50Q&Jctk4Hzq#E{j3!V%VzmGeO zX5Nm_`x)K^n*+t@!gZN}uCpQ797N)4EvH-AJ7*3D=8Q;OF0UEYIh=vIFcf0TT7Or6#QaEwwyyFGyY1f3Iy zR;KN|Sho?^D-vDINUE{QYg>IYO@|u^!W?L6)>30l^dlRz{6l_*b-V zVgAd(f<@bUyT*Ey^e4Kdy6~x0{Yp4W?b)e*t^38bipCnXja?HGp5BZ?VbY9B-fuq& zwbkFR2cYb3)K9nh$jWXCwb8{zo!oDNLN$IavNehza_Zq(_{nb#bn0YmV9W`fn{Sp@i-mzc9v2oT9xqz>M4k0YBcnP zR{L<}@(9sUTzp-#*3y%h2su7VnD9ZA&v+hO2gUW!1iS?eJ|Ii{GO(-7 zI$oSQvw^*rE79wR)2}E@>eu!5HWl^*nxxfE#e?#Qztu`pwXH)O=>|Oc?OAC?US$i9 zmx@>_%7tj&^WZzL=*9WW&_Gnz3FiXPJsg23OcK~_69^1i675e?#?==o4Sau8dpjtr zAhX4CIOrGrqMm(e=x85=H7MF25uNw-n_1Ec)|~-Sjf87$1*hRe$A<@zu7)EWg-Ozr zYgdwVPzUwcrSAMX8oquD#no}2z}B7eKF2kXeXKKL+)6x-I980hxHrTZiVXz4726Wj z@BEx4mJ{yHWOAj)ZrGRVf>lC0`h~uwbQ@@LDkNLqXG>M+S9_>zqcQTV z8mqa0Rk|S#4m_`kLXT;3bMM=wUc}e6aJ%6di~!P`)r)AHl!jY1aKCzxk(7oxSLcH| z%=s2gD%p$-asVvFBl#^-ZQ8IfDT4UostjE!KyA>JzBUSzwc|t8R#52!*ppn3vV<{^ zFZE76Ix@gNZp$C+16cg3y`MYldY}3!VbN)IpXBIk0c&)o%}z@_^^3uyjhj9zJ_-8< z_9~L^lg@SJOaa>ri3_Rr{)Ee*_PV>4I81#4ql^YhQ;D40P0fm@K=l>{Q&!AWb{~*;Z%XaYd?=#DD3-qeQLhe4BfJLa*bAo&WYJNr^jXy zI-E0jf2)bsZv6LnwAuEJ2B8HnM)MRIqNI9J zJNra$$q1o&+KNto`)oer2^9DXt)}`*VdWdaoNU&<89;LM(J#JdwRRAC=Y5;#ynNEj zuNu15QF6ZJYyZe29AhDcO9sxJEGNk;bdtQRjXv0Z4Qdfqh&BFK;3s)m`$BJysm0X?gMmf^zZVh7AawGR$!+RMI8nhkG>`XgMRUS7l=tQ@&JwYV|U(Z4_A{75YW(d z^yYmTAm}WT++c_4QPN2VPrHaGIhFv8Ws4w$Hi7Tx`sjIj&gS9@?1t1zI z+=%V1_1IAMg)gVMpOXX&dtKiy_*V4~dO&D9weZYVn=#jAplL;IU~5_}C7pwV_0te* z-U>}7sPV_uk)H#WO=>Kffc?11d&g$)L+^&(1hYIJLL9k)>{Rt%laBQTHpMBtYA5?f zffFnFw#~Gya&fu|Ez3&y&ZR2TRVa^ImV{C#efi)@znV$7=**i?jhLtD!9kT%nLLuo z=*+96-?-n4=T%vSz}c-KBb}Z8-(j|M)}4tf&2iw2al0#@vcOgZ_7=|_BB-cE9AAA0 zc$>N!?Me<1V9;ise>++-CuYl|p)xlM=bI0D@F~4mia=_~E+~1Qwlm*>AKE;{@OQAks6Fv&OD8sZzpI}k` zk?BDKq1OvYQI~qQl3Nbj-p+F3-woe@WwF`%$0y~&S`Hx;^+LsOEr~;+Ya=`)IH91Q z1cL3|*r(3YT9l zMRdy=T%_K{P&fM@mW(z2hm&^%M22*gb~SYz>9$$BHx_1eja*%?&Oi|t8MFpB^F5Q> z@C^0)k{1~I@UbFF^*VbM;XGQ0vB96E;`!a90&A8g!sOE|A%Q?3NuFt%2hXUP?sl!2VvX}7V zpkvPQ+DP5cxik@QfU(qqUGeJC=R1(Rf}pNNi6B!C z2n<@heoHG_y7CE4B=MX@pg6#jjVb;hfhGd5x{>$FIXy(E^@0CgU{6W_2HV5xr)+Y- z{(^!5SzWWYxb~bTla_ZaKQK|W{AyDPBht$|I`|z1=jK2gwEcyPjkUo!KU1>+6C32vY4SWb z<#YMU3KvRL?!$v5P{#Tbi$Ra(u6+3C7VT=TYxiy}cs3A)08%kPVxA%zE;kULK=Ny2 z>UGI30NK_D$U0RKv+&MrN))wh&Gol?o4zyz4PAb^6^+CZ2xL@SB6&AM4A-eMFpP-~ zWCH{t)HX7wBGdFYhhvd&Ioi<2+|mV8GC~TU+^0Z!o9a*}!J4{e0lj)R2=lZAd5f?P7`U~3Rn3)f^ph-;E9%Lk^wYDPhp=}=u)tzV;8kgV^ z0jGqR&8*n`q<75*A4ciKx2`{)@M2ms2LG)PP497Q5)I=zb!M$5x82PH5#74#) zt4u`n{IdFs^WFhJ7@^l6-2e0?`6tSo3Q`m>L?0i5gS`kXi7m?R{3U2rZ?>Ayqa^=* z%{?}TX8$|rydZ$=zoS3}Nkt{c`kCafx|~u1mY&7uFykd_5f&Bmx=CmWbkSl%9A>q1 z5N-ee0D4f7))E$GJzCPaxX^c4*|7A9M__`hp!J`TJw8ke@f37qS*S7F@Fn$>? z@zr#xGYP9@#5I2$V_tB$Fuxs!iBhs!z?`!6Bjl6zZYHRO zOHOouL9Z%=!yt`3B*h>tJrt93hWuW>{w+GQ?!&ot{X^*w+0?oHGdVfUkW+m6#E+{7 z=n?iTNcO0|1u4UbT&Rb>TRP<3SO+j1wqtFF{nn*>$$ifFW!%Gcc{j$;)0``xkY$Ir zYl+du7FTU0qO?=zV_5-nX4x7_mnHq>YEP54K*SeTG!SBF{3P2lRh1 zzVXsE%=1GSu{4_QPge}9JX~1=`ln_V0}}f`_y(!erb&0GO1sq2j7Mj#5W1l6@VqLM zo4-CagGFNPD3u{qX5J3l0UNm0@<0z}A=|OOk^khSUVBtv_meyl4ZWR-oH$B)y*E8b zqNAP-(g{qh)TKs$AN&(JIh^2lAw(jQGt16lL~^q297ZH(ik-uV%&gr_SCX!;B*hsy^(Lh=wjRjw~E)f zixYqZ3O1GDy@@xP%4|$czpIDL_;HY?I_+Q1i{(TkL&=xO@o?h>k`I!;3?fe z@7pj}+X4~%6l7it{67#I%l_x|^@9gdR)oJa%i$3+3mR<)t&kdn-)iwTor;De z4=QI9xQodM@63~`6b;Jn5K$HB0|{J6hZZ@X1>9Qc%ubZ+f>TN2KzlC-?e{{M!3o^w zn@bOL9e$%UEZrl2vV}!vJ_Zcy%)4I&4lqY^8alvS>_bh!U4tjj)mMYB1(w!;f`zYZ zC@=4SfM+-_?=$QKXwZ$x3g+bI0?S^xen&oMTcjhCVieh+4?@=V^9q)^Rx;sFm}}?9 zkZ=E}oP4P!UcAG`L!6Cm~lix9Sp~MZ^K8- zHn0|mm_k~j@`)Oxd`Q8W-@Nun$(}h2UM^&3A+<*Gc5UXILMs+O!mQtEK%n*4c1-It zHW{G~Hf?xs6yHWjz8nPKD`!966ks2aqP?R5fmNn&SC~a$la&u8VC9or%uoC!S%^wi zNLctX@WMzS8rA)Za=(j|l3vQhh2u5MyW9An@j6rsM55G|Ya}me#%jA>cF4L{gt6x^ z-he@KDatO-{V-V4fymlk_GxyM&3bQ=xnn`h*0OP90qCN){TeL1aHVU=DHB&BX?xqhF7c$d z{a$i?Xoq{OIs7;Z6J8({pLz94x1Y|69VRWn5)qqiAs;~lTp%NNc=CAu|||MuHBeWCs?qa;PIM z$?MS){z!5oNNxl+a7lzW#fcM%*0f1>(0=_4oI0i?A6dc`q3PLSSyc+TL$E_TOebWCIgr)nbM7O$&Y4B zb^X8+e*G4zbDF!!{l!N(-`V~b_7ek76a4pYQU=8n;_xon>jnHNgY9pZJ-^n8tT}~j z@cz_%O5z?)?-Nkis_F+94zQJ9x?qUTq@Vv?BC%-L96~~BdBY|*et;h@V$63_noOFJ-Pq)CHHUo z3(1ZvR{aTZ2<>^kwF-~f{(f)xY|_pS0CoRxcXUt)6I_06xB-{(>^glQu0L^G5&>Zv z*E2a8!5HzHggMu=EyZ`8F`RiyZMzVv`+4kz@ zn@o=I!~oNC_Su%NLk3hS&dopm?|1d$I6eG>E-F$dd5{ z@72|Gx?A3Wl>STJ>8|g#C$0Qnev&5eostm}96g6P-71TWtYr z#b-;g88QpDg|=seKCmx`JQ>N`0{lHdvr`y^{eZr#_=d0muja}NP)#st4Uuwtd#A76 z(`eF;lDrjEm88D$(=MlA$N&ruh$6W_fH>Mn zSdc(dKSjNH0jjZOIp1m0W`T$jD`lY)dM^lY?`;bk(D&Z3z$fp$0TPNJ4g%Dd@J6_# zo<~aFKbB6e-PHK^-T1$2lWnex%tq%OREFlGGap*Q#{?fzk1oSzS#@A^Ee2~x6B<`v z>=opYUindrn- zg?XF)eT|g-&5eQ7A?2?h6Djq(2jX|VCKGwqWNumua;;q_Igcj4mZhFKU-}?BLI_m3 z?w?Xe=SzMs*wBl7Kv4#I_1Xz}qxgyCsm5pw7r~$+tei++3mK~Gj`;6azti=VFmo7{ zKGZ;HGSDLX2R3jWOf7&(eUY61vPm1n|J3-Luyv^M(dFX*v{s>i$ds$=?>d{WL|H1u zwchrD%YERo>yqLF0B+Al1y}@gI7a5YRhZ}C9(xZ)+g=wWQJ9J44Ze&_DVtrN=?$M? z_q2;Yc`3cNYRQ~L+^&;|*q~ya+yU3MdKeP`T*tz7HgSSqa@8~!kg2s8O)o_0{_4?+h(Wb}47lK-|p9$uje^aAzC8F(2BV37wncUE7({xNCP#8yo9|P< zCjxv7uvax8rcK9VS*RI20a17msM*~3X}7CcsFG3;5`&3>UyDxuLbElV+3xYAEx;2Z zoVq{^se+-orVB^v`qtdi2HY@Dz}d0Hgm&GM~UEhdir-eCPTeEU^e8{qN?Ul73dh|Wl|1J`BE#aXzA z9Ygl|MUl>2fmF8%G5Gy8{TmG!j)gwNTR~GdxDzLBma()>@+j3N%EhTsuntBmcus{h zv}f~Jj->W^(LlB?H>J$ko%Tqh>s4Z0WBZ4HgT`0(RC)0Qcqs*FfQox8UI4}z%LQNs zca0>n7Qjqd5Xqg?m3tUb1-f#5@`Q(FE))@38Zg=^ds6e8_U68Cd?^U%mjTTmSidq!k<5MV(K#cT$*DSw@qogl6*u^WDBaTm z+BG$Q^U+txCl^l0hPdXiKc_*Z7tpszw*6yrVrQW(TqptyFkxU=jm2}5q+?Qzi+diT z<4{aS*=F`ly8mbcAD}%ez>ez=(Aq!9hE`Km?QORHMC}(@SAPOXt8u#PG!5F1HlMuW zBog>r_LH^<6eFR|7j!&?q>8X406m!N$IqeAvt(z15?PrvALN(${z_M+`<>i;r$S8H zM>!%pTq|))7uKID!Q8Rr1!K=BmVbX76ri`V)4=c2K5{Gwg4upAX06>dRgXiC8BqrEB17UDT&8*CGuSAzQFHI=>g>fF#(?xh$ufPi{+G1SW%Y9iJpBM#p_} z>L{X$J;PsNs@oI89$d7ny-lh0$JaRT(m$g?Z&;d*&Rc|(y7w&0;NvtI@_q2b0S1+R z1fd%M9WXunFiZF&mtCy)!9Uyvi1P&ki(Cv;4SXNG>}6^HN_IQyc>(bHeefGr4EH|x zi9p%qIUaSfPLRb)`GK4(RQmV9R|;}Y2~Mb??(13e4W#nEv#fU$1wSfSC*j=Fs2^}F zi3O})orAcn+ymtezjri_aGp*seWI;UqeJmNOk_hGkI33>aydp4VCalOuYDWW!=Ll@nkF@|*w zjt73;Xx<5!Ab6#5{uuzlkGB9t;usle2V6Gb4$*L5NWtOgM-T0UGlj;~$$*T!H%yBT zKq3a$Z($cPXumpB2F-P!aM=DA6A4~`;XSC2*MBxYV*eLkq5D5C)qkzH9sQrBCd(RY z_1ErpKuG&&-F^&wgRHvO3yQaCeZS)@8Ce*4!+pQwN}O`4p8*xSHO%)rcI3P4MeAn2 zY+G=`Y%K@}$nfCRgi;w(>o-cYVeS6xP7Lx42R&q&zf(OH78}3zVx=4* zFzTG|&tUD9E!P6Q-%_kd2rJDK{=3d%R+9foeJtl3A>0aqeVzPqj6QWQKLf|@|K`)B zan9yUxEyjgr{uKgS)9@hP|gnhGZOxpy6=5z>8Hd$;{I;v{aX(D8%GB}`}Zkdnd`K0 z#YC{}1Xv<5=qaj3S@tmUke4p70!$SHO z`1ws3P$@2^V(8>Y=jTOmf`2l8ZpPOAe~O=HqC+VzreYt9pL;#?N%>hl>wk=&@8IQz z6cS*a`%9kzKRdfWCO;?MZ`orfqxnxGpXuzqT^9c5vCSO?{A=H{@K=ux{&2sA z|A5iKUq>%L?coPzj0%5(_#l+vUpzYa3#{FhdN+Gi==bAIGok<1(ZOHvl7;`|(ZT;4 zzE{NYeICAoQIP))J1zXxql5omo??;y14ak`T3bInFl|)$6Snok;?cp6PZetZyo*Ef zQ5fH#tsib39sJ4t^2whPf13SWKK8?KLhwQ?PH8f5PeLpOa0DqbHEGtQfT!ggPVWvjjxFh0%V8kbKs77 zfl2!_e)a(uzsnn{vH{INjvNw%PJa#+;8CF{q3c#QPs8!K51T}DJWq{~>`gjWdezO( ziy-!@vJ;$k1n_`gatx>v*U8NhA#BPR?@cg>bvyWM?>O&`$Jt3Xs~f)-g_D{nh?Sn~EhE*Yg5 z@tMloIydT1C2xy?3&dfPe=>jLN0PUbFozsc@LB4&xQ#l+Ye!3;jecu>?7vsup8lI9 zZxP&vNb%q^mA5Hw)SpV;w&N&v)aC7oV@FNisz85=&8ZmrWUtMidOe-==zp)g-Tzlh z-VU*OY)`k(Zar;3#>r!Sf;qsx_{i&NHE@l(yj^GC5+xmz9d*$usge7l3 z#_^r2`=6b>1>C4Vm3|uoT%#^;@8DvVJ?)@%KFjsA#ph(Ri{YP{ew&5E&H?iodm7$> z{QtjSC|8!e&9-@LGy1cWx85V1JpOe0?N;C#b$Kf|eAML4=iPrQePQu2Gk_zu`uVJ?%bY{nmD5Va zcB#!{o6(=0e(OKP$>UF_-yQ(2QINNk{TS-Xl>0j%Gt2S5E!HFKiPOH%;Iqs5O-HU> z-X{GD5bxv7Qq}nOpF_6fil+9KDK+QkIEzYtRKR^d8f`{!qcaA79+pD5mZk$ys2{J2 zrm&-|Yxf0NTrAZ%6l=m=`pu%f%fA!Xhw$?0&i8uzyqosM#ZP$poVT-wxSu5L?`OCF zi_cr_PXHo}1A3?1pOl7QY0kTXyeLnKw*%p)8P7EMJvROuMhE}B{LM1bfAr|!_uKe4 zW1vTYKX2IQMN3Bqf7r%9W_0jRw(;LRaa8#88~eO&{^;Nj+W5C0G%ECaZ2UKj4*q-X z^U|Y72fyFOzZoYNqapwHdC}6*!5_Bqj~N~OlWqKW;}-2G@aH%7dENZc!5_5oZ^xTj zqd>pM#(%@;;J?>CFFks6@cV82o3loRKX2IQ$xBBEf7r%9W_0jRw(;NX9Took#y-!V zKRWn>Hva9O8x{IJHvSt%2mihHeTt(;2fyFOzZr(+DCmbb?E5B5M+bk{#y@6s@F(vh z+>L|hPs5)y=XLJ=(0lK~+I8%QqVp#H5HEt{oSf$Ou%h$czkB%dKaWtpX@v57M<~CM z<-YXvEoZrrzC6J4_>8puo67Q7`tp5P-kQGrtse|u{`(QiJ4PtKcZBlj2<3GnlwUbQ zxqpQ6lSU|?DCKzD+POPTzTXm^_wQEnFMatNEMJnI|G#BE-!z}lu z$G?K*@igW59rRQ1zMkjAd*q#=q;J6Pzv+2W%CpjzKPu%rF-eEwpXvFYl=r4BzedU* zOIyBJ%3ITxPm}VdwB<)jd1>16y`+3m+VajXkw4jK%YP!}2c<3lj+FOfk`B$E>!f^J z+VYE}JeIbcw}c~}d(xJFPRg6pmXDG0^0eh|;+Ad1lb^QyJ}EyXZTT80pOm)zS}Fe! zlW=JMd{xThY0FQQ@-1o0c}qCrS)aE2?HMe;Ic@oNDX&Uf-YMl5rY*lw%4el5ze>sv zOIv=KlpATw&ye!hG0BJK-@#Jeowl5}gd?8&)0RIYv<z}Uc z`%xcXHA*3H)>n66_oE(Q@LL9o!C36*)DaA3Gq{jJC4)5#?qv{T@Fxbl80?QdvO1B$ zISiIEXkf68!LJxR!yv(692^enNCtBl6fme_U^2Lm!6pXV7`(^e0JvDxNeluEiWywT z;M)uyWbiD5zcTQ^U!{Bu<}$d5!Pgkv%-}~1HZ%A$gMJ1F!UL$LF<8i;gu!YC-(j$U zK@Wq!G1v?4PIVLlc&v9%o`pM5yC=_)@N@~!kT6HW`4TRW@N5YK5}qsJA_>ovFkiyO z5?(0bMG{^j;bjtDA>mggTq5C82}>kgCSj?BS4mhdVTFWM5>`uCE8#T~)=PM;gx`?x zItd#kyg@>KGiUc?UdG%#xmm(B65cG~EfTg!c&miBNqD=2>m>Y+gm+2!JqcSSyj#M1 zB)m_;^%DL>!uuusxrA*JJ|N+P5&`z8E9 z!Ve|+-N5a!3JVQc$mUH*y`4TRW@N5YK2p2^U z|AP--rTEJ3$y@PkDLu@CBO4r!?Jmt{Y#U?HWk%w8#&8N{B=8c^?ozDOMq(plUuNt< z#+Ebo6UG`DYh~=ajNQ)ILyXu?of}K(!`H7(1S^%NRR@v3$nl z_vg-LY&r8zXY4x0u&LQydMjg77+cTS5sbwcJBYED8QX`k_ZTx6J75fAyP!f!vl-jL z*tv{tXRMGhemuUkjlnM3v0BEe7`uwGZ!)%ov3nW2h_MdF7BTjF#^y8j55{IOw(mH^rZILLW5+Va z--b(kfw7Aj%VMmOG3w3IX2w2*8ZG@UWA8HdFk_0b=NWsMv3D4Ifw6li5l>1fV=>0w zW9%WuF5U~V`x*NwV|O$5Z^qU!ws3F6Zei>u#+n%GV(ePRCS)R3#aKRL%NSe1*cFWZ zjIqUx^)MD-YzJd!Fm}jz#7<#s4r9kNb|GUv#>yExn6U_B`!V)I#>O!AIAdIQO8>;z z+l+bGqt_W5$Gkr>_6}n`jAibFSSMo#Gxiu`$1wH)V<$7Vp0QlUzQ@=FjNQiA62{gr zR>@c+W2+gvhOt{1D`)Jxj4fsCe#S0g?AMH)$JkShEnw_VjLks|^Luy35yp*>>Kilu zHTL-o<3`VcuX&74&wRw2_xd>s1SZRBd>m|ZPnyuD+L?~nNo;><^z1S$LVpCJ#%c{2*ZrNv-GonwP9oR8ab-!+xTdnK#He6DYK)3X=}&QKQ8mh? zheplHvgL){Z zN@e+qgrbq_`lg&!=&P*uon1KfW1=Z7x+dtWD5^zIWW+10OM=ycgX4cv&B~&xDhR_c zgb0SoCB71H7qU}X!JeE$&RDfd(REk3F}dVKpFw&jmss2rc4$mB9|zO%CmNH>OHLew zrKYr~D#)RnJQOvKQlqf8vbwmYuxNR(qPDQ)YZZmn!IeeTC56HIU~wJFjEcGyRjZ~I zS5^n7t*9%nEz_)6R)jfK;;XInm941a91$U_s;!>mGrn^1!gC7qS1q(=T3!%ywxsa9 z;7TJ+TV)khb+v_ayQYv=1%D&@|vu6_5yu!08E3OGv*XGw&=T?-Qzp}!ZUQiRPu94j z=>-*K(^nKxpiZhS4c3&^Os}f0ybY1rfp8>6q)mYD~70lWHpK zs*8g)Ck0m&m6T7{vNpYHRe9Oc=}YU%%1frFq0nYv{-qZN&OfhkWpz>2wBq`D0}HD0 z6$nQDs`K?4HM!E5TrbK2Xl3m#!PF^6+rA?3a7AVHDqnf9=;{#PSie)*;T<;RO_2ubp{sx8d6Y=`B_vGtnncM7L}G&33O@DXDxX~ zQL(IR_H!7>>R@eMbw$l|4Er!pTqP<>4E?C_6=4}J4OT-sO3Sc{TI+R1u)Y?w2|gdn zeasjDQ-}#)Rb5z^gwu7g5LZ`Xenz|~gYqGy$6}TUq=F1mm`8bBYRgecCO$1+E4Qwi(R{o;$r}(B#o93H}dL=%Z zUJ&RNMHN-63b9^8FxkQs-?F0enxJp$wBe0{+yOShNZ^C*lB7vsS6x|FTk4x!mRdW( zG23*>MZrnQ<++HI)6f}&V!T1`HL@HoO_s+}{3N_WcCIl>7{CX9Ja*|kAmSq;Rd(pOGhVC)mDuCC;w&C#4QN6*MP zbFBF>Qth;rutspoX>@LCY#Nr8)r%CC_)3GY$01V{6j0iPr9~CXgC*{298Py92u))d zGXfU1Pb2`Uux>?#?;2P;3VvDkJ~>ieZt;e7#A;qH7#AWm$RA@*3Y4eF8JTVBw+*7oJymVSeGl z^A=t>72WF3Ya4klijc3dP_n6w{(|6*AHbre)Tx;pK?PuY}1Ahj1^7?E_-{L$obP@PZO&*7v|H!`A7Szr#14KGz1{*w}%R(IP5 zvEJ!8s7WI#F8Gf}ja`+3l3spoC~G56$Ja=EmVs41Ia!~sErMVP2t2fUn z6g1k!4Q_8>wKrSrUoqB-mKV{p?aQ6N5QeMl#(dfJlk0s;VTp4in9PO2CPo}l62fwf z4}mKGW1~7d_rlyob9{q{tMS?1cI=YNiYkosfaPViwXo(j+=)Vd(qm9kwoK-DtyNqR ztifKp)@S*)vFTh5nKeq(|0qDnefcN#W2?6+0tSd+wjc3!!UN@X{E5%!LzZ`F!J&2=|Ver`nKX64$rD5(GH%i&!HVWvtBTa zRy-SD#OLwx?CUj*7@kZ1WEeeo_Wao};&|r7(H}h9wxfd{l)qsZS$K{`azA6P(JvfA z-DyTf>1DGibr8YMrwS@N!s_63Mr1klf!d`IZ66TQ8D) z>&23X_!$%WjeJ~kU!UZg*GWFMR`UK{$+e{98&i_kr6u2#k$mtf$&a~O^3p{svYzU0sqcK-i7>p=} zdI{wH$TuLr5;+ZLN{@ywm46L64M&P;*irdb{Hvoemw&@FhVsuxV`tf z%8u{NPy5l&Zxf|HW-G9v{}L(4BiBA#Z4I`H6X2^eYLrHy%DC#^Fus9Segg6T;$OAa zed9op@Hw^RN9S7n3n3ROEmPlgN@*6Q z*_3#zqo*OYIh5#6Z?!!ranGkBvF(WxUpGvYNTv9Dc$UQFQRD^W&mw;j`4!0jj9i;7 z^=2X82l>IsKLb5KcmeW7$WKS!g*=P=6R3BQ$;dY%zZ&^<$Zte$c4(iSrClMqPUIiQ zq5O9OUNg@59Ec2vwZfh(*97?S$;g^d_B)U zyMH<%=j(hY`FhCxbj|A|zu8=W+*vNA)tATMy!&kA8B@seGS4hPp8tWZ zUgn8c9IOtRBJ#RFj84a4iac*M z<24SgKeyK3Q8Ya+Sv@^C9S5sJrii@m4_aZ0R_xKt^!{LVSl4tMtPYtX^143^Psd@3 zJa0AQH4Z-dd#8WLZ+cv^dOiP3EAJ?pj)Tq9^!I-Mj^A_~tX@yi-&_7We$#QVdVTb1 zEAJ?pj)T?HA^Lmcf5&e+4py%ZKWF6~MbmMxdfNZIm3I{V{~Ct}s{H-K;EOxT^VaoD z$HD546#nkkTKcc2={Q(@T$ql7)ge{;uyvD)*inacZqUmwT>goJ+9IOtRBJ#RFj7`U3iac*M<24SUSFQDT z6itsyR!{;uyv8B)y0!j}qUmwT>gl2BI9MGrMdWpV zsC#3IR_xKt^!{LVSZX>BR)lO^-`fPY+GU!RnA5MErqNUs8P?lRlW4PR9)T#HMf2 z`xYI4ypO*y8cswEpZOKi9n>Z-s~P*KXq&lNw+`QVYMtPFPp1Bk%CEwAsah|SHP&Lq z*P#4c$Wu1?11R4E`5@%xcsGjj*?UMy*C)lW`MsIlQGPt~QOM2p`%%6Ex%vGoQ+_te z-TLXC1Ed}IA)lz;gV1})rT$L%uGG=UC$hf;dOa@nL)S_B&Hhhh-;}prBlU8~hmmXe z-jw;h7E|AAZhq&*Ydurn{637yUUT!iGG6PM`v0$wH)cPk^Jx!EdtllF(;k@iz_bUZ zJuvNo|35si*I|VjU5kUMx~|?uT?Z{n)urZjw(~MIG$U2F^5m7llrNQ^*|mQg{d=PS zmaJj}{i$tKbvpZh&;>&?SGKM6Z=A8Rt+V)F>X?;_<-e5wjHRiCsfDN0uCY(69vV0~ znT+eJ22zH0`UQ1)E?KlfO}$8=l*GfCOc@9JBYImln~rt&WsP7uolI*=kIUOv*b917 z$#m9Un1~wvwz6eLZ!$e#D_pV8NUx43*V~F$_9e2hUi-Q`2NKz^-E8MTCTsNC>T`!} zMQ4WNefF|*jdXHxY+Wo9OD4+Hm&D>mnLC!mli_R`w_}c~tY}#>+GnW)A1^f$MmiQ* zYGm1;T5kIugSnZ?m)t@f;^oGA68+(y@|!_&3e#Cecx~EPtuG44$v+2G_mZL_Ns3hCon~EE~Mj~rOP1#%$p1Zs=8wkXcJ+X*xaG)0lg6p*0 zU@g~VVS}y*8Y3hMYPrp$s%d6%QzT1$XpaQzV^NY8{_7uQ?PsNy?r>D6)+Zy%sxrB% z-L^v2#&fy2q%LLAaEO|c+i7q+9P>|EeKbzx_QJ&aUVE9|)8DVBjC3ZM2*+dD0ezic z%Z-`FX5{y!F6uPf7+I@F)~wZg`oifbtvNank5X}8x85D2t(byV_**eD>VU(psc#6U zd&7y;fZiR>kn`9Tku^qSZTs>@mNYC%#@nO8V6Z#1K$p4B?rz!zM|M_jsBbc>uQMWg zBuRF%Nj=fmn;NhiDAo_H(#0`L&y;mEEe>boNRcF8Qbs{Gp^w+W?aKt%(#ZPm~jnnG2iknpN z6ga8kDRNT9Q|P3ML+tA7`M7p+CY4RUyMx(YLkd955cYuf)lOZ$V*mAW=#Fnc~BDP#@60zlKlZY)3)l(8% zCqt8pEgPCdY`NMbV$0Pg5nHY{iP&${XG@cbu2}LOUa99jzEaP7 zfTf=I2unThA(nbhF0y1DCul+evnK6r4K2=n#q0vPbp@@UynQ7JIou}Vp_e8Lj6~Ghrv?!;bsIgg zgtI7ZWQ?q%s3n$&k-w60XQLvfEul9!Fkoq@th9wD@4|7tmuB!*I=5H09cx!)Gjtj< z0{*BGH?oG_8@9FAxeo8D6V-`MsIhuNjo5@5=TE57KcU9Jcp9m6Y+ayU%^B%yj5y7V z@w^cq!B;}j?-8jPKP5|g9bP16raKie z>0VxhK{_fgqFqlla8s#liII)0;nfY6l_lxqLN$1Esc=Ok(w9yfG|x}oFBOMqJxX;; zscfZ@p;H@MuLv<8NOM=_6s7FyGL!A#mPPhM#=>o#FA)K zMB;2+GP$?J7Pmoa-hU#8t_+L!O-WX7%b455b}8(LWvDcjqT@ar$<0yjSEY!^#%FrZm07aUdO1lG_xw*r`EE~ zQoc;x=VJXaDHpT)qGTcx&bB3@iw)JS?H1{h;x?MwSw#i4*sCSk#tK=f&3Tcj@j*v8 zHs4(U+j%c_1<(+va$#Zg8<9Se zHS!54AJ%BFaa(7BR5pV3<6gEXo4d+=($nOC*-Upc!*QdfFJtJd!x1_urURW{SGPc^ z1-acU+6!aJj5=>P*)$`awPdqrz7?}gRcI6yxPC(mIvrq*r5>6E#h0w09t=F!p zZS*I*&*ujx)YTntx7k=vU$T#;p~zwDxqu!o;)i>RHDjTecF*jF1(u4dDcI?7V*FZL zW-r%1JID4=h4%LRfAv+zEhlKR8&U&&tmYXhIw3B0M*HH-CheA)ekG%m-N+ic2E2k? z^TMo!X}Uh6>tS8{-t0#6-?Y=k))}Edb2i))8ffE7VmVN2f8J@PHn>dFW~%2{l2LP2 zG3~P2-tbzZ%(o8JLor(Kv$M?Az?}|?+Nb8s)N(7enc6Tf##U2Zq zvsx^A;|BQGvzoNacJiC;iPDo~G@{X{rY9U4MXlHVzSE?h=XrdWnc7e%_vB1k44$KD zHOgDnJwTh^U9-)GWnvdPMI#iCX&cB<`SZx==!a-g_y}1pZQc%@&~iJKy6GT~Y1=8p zLtSLYTzf=MruEh1M^<}%C!WpW=bXGfsPy`t6zDK}h8M%#+>;0`hStzxFv(>XXxeNw zg`+(>M^3J^PwZY#Q@_^P6z!h9TbA>KQLAVVXxDDqC!qa!uC`O7_Q=i++Q=>eZFJXa zb3lLJ&DF%}<~`j^58bsVW~#Jj=W8`progn0Hr!@IbghI?$6+S;+3 zW@xpwxi9iE|1$mUUyVF$Ml+?_8@tH7s&?hBT8)|<&h08Q!Jpi#$}wDzN2ZPT;&k_dCXPw#5Z>OQk4wdwOUGu#l~{pYE1L)X=?!>;EhPI{8*jy?WG zlVDr+u+Qsh5A0D}JA4B-I)5WC3pa7o%O~evp-Q&Nzp*_q!S@!`E)1%A-{9q#TITO2 z8#R`@EVt}wxtX$!T$R6vYmBO8{+s;o!EbXt^9Dd+PxCIpo1{H@pR#{HFNYrBW&ROf zj;iJGqx|pOFL*gfORZ+yt1a0Ob?uS5@t@()?l_9@(2Xw!J5qFy2S2DVuS=VxNjR>+ANRMm@@_$UUYS zNK0)F-5FXJjt8~B?z0;I`ujdH{Oc2Q&&R(mpSuSCx_U0n#;Y3F&KjE;~p5{R6UbztS33T59`Hhl9lGBpzGU z-kDqVkyh>PL$#U)?bG|pJs|DYgSDAFWNNqU%eSnx?eF11Q~TKg?1f-Z+qN&STf1~W zzCo%D?#K5#wfpy5!`aAwe79A5b3eXWtL5KQ(cwGZqt!Iile}~|4r>3{w~^1Ng4*U2 zYPE+{7sp-jofJ>>8`=AExYfl5MieZa4_*yMtsmS!LfcWp|To z&?fYM$?jm8c=eTol~+G`i1O+! zhbXT;aES8i(}yUpzIBN5>gNuv@akPO+7#oKI|OTggw`-*UHeU@tU$7Rt+Fkq>`9Vs zvC5t>WzUf8eyeQEls!+fVXN%QL(!IPBzwpz+ic2SC)rjdgI6Cs6kdI{yk8@x{+p!# znAO&srtEE!6_l(&yWEG-_xDF&^d0o6(f5VJ)aZNrdo?)GI z;*E!K6pHwz!#Gk!{K{b*u_E4iSVfTTIgAd1zoFw&?Uqqq4!^+5tuOPk@Oxg4yjI#_ ziB6&zTe|S#;g&AEa=4`nV~1CC;jP2z zq~kBV>h0TknR|zq8~@46{DxVy+;rJ298}9%e8YPsp2a>5zL!opHnRTaPm{j23)dcD z>B5aiSi10~BP?C`$`O_>+>u>vVMHfbnv~=OwBQ0Hc@kmP-UOCdzg|QY3xh}5qHsH()qaQdH{Z|t4cdK2VUM}-cODmyWCG!mGzo6vlYfTmH<;;kS6X^>4f^yu-_p ze>!yG>f>!uxb}F8!iL$TzwNTw_MQHU`Iau^=UcjP-F!G; zrzmXXRd4?^FLPJ(a^vTCnZK5on>JCC)Fo57(^aF&3P#Az;Ek-m`DWHPpALF+zWFC{ z*PTGl`ckZiP&-)D)#WGk89u*%cvjco4EnFemzyy(gLp=6Mwc%? zqfj$6W4I!McH<;a5HA8M`h z*QKZ}wfPytGe&nMmEoG9nvl<*Vh!JDt#7o(=kt%wBzaAjKSaGraVNXRXuY|bLQRO9 zk)IJ7q*ml=#%7S9uCA_&)-gn_8+XxV`;jyAHC-N-K3^#0Z?##mBQ^Hj{aRZ!Z8WWE z^J$CjzfseUp!{CHrX4aPrwtyVso#^}Ves4FQShMw(btX?{h(q^^MTjf_$xMk(#CV@ z*!TpLgtqXiJxshWfrrWO{M!J|EfD^y$qR)a2M>a^X0hM8SmZ0gzNNw`lTQ`C3p{w5@DGXA_O)F2 zCGaq~hT6gMQSebo^VIq=osA@E(Me5Kfb#FT?y0Eaq7zUK#EAG{pwJ4@tOfk)2~ z{slO7p76dWN&WnXgqMSdJ}i6%nD3JD?_qFA7vAYVr2c4^@F`%rHKl$Vz(d`_-v<{W z!ta1XQQ?;VM1Kt70dU>CS|s-CE*18J^WYDGTQ3v&$HC_NRlf`#g#2N!`To?` zz~=i>cc*cd{oVR0v3DpjkB?p8h2SypN^tAtqThr1zAJ^VG5IRt-+%|f2L@$*!{BbP z?=zx*CpeWCewCQplLzZdq<-OZBEJW$eO|Z#_JM2ZxWWC)gM;89@WsUHcmTcz@-gs# zgX^vl`)`0l;HFc=eh&OG@DTV0@F;i`?7LR%EpM0lt>F8?Iq(T|oa6Qnfs^0@_|xD~ z@V($M@J@7GRPnt|)^`Rt2fhe^Hwn^;mN5^GuZw|Z+JOn-;JPN)X-1-Hn{{*-I-VUz2LFC8Magy!(z-JQk@p$ky zk*~EeAD4OiFy+^xzV;Q-9|rq33%>|1fOB-*RQ~^}$ZrFOzz>59;Ae=rJ=)hq|L=-( zMLvR#ldAoYhl$ns#2uo)K{1_AfWHd;LGbUPuiYj3yU=l!`)6`MG5H7bm5`e}K&<)$ z`B%Xy@K3>^A+i59cockar>rk`x5$@*hrkzrwR=Q<1=tV%vMIk;z^&gBu02Ea^WPRe!Q?H%=YdDRBm5b#|GUE90gr(n z2dC~6`5R#Ee&K!2l=X$cL2&AOB2R;T4+ww3>Yi!v_Azt9h?Ji1n0q9z+>QT#Jqin9+dX|U2(3+d((Lx%Ttga zZR3*^=Zf;v!FlMfCRW??55@j%(GiCpr)2{omw+6w~<} zjoP3W8Qoj)Y)EkOSeVqRZv ztE{g}$*I5KPeI@0yCBzoBKp6w@jK8rlb?n> z1o@1QNW4uxMlo&Akgouz!0U*4eZEJ<{_Wr#_(||6c(yM3t&fR5{dJ|PKL}n8)_yAT z>%d*$5%4hhfG({6XQF>9I0wEEJPQ6Axb^3v|1@|Iyh~W@X$6tLAKV4L7(5Jq0POpP z=)XbC+iMOyt6O*wd>}FVEB{N;?|^&^OfO1P_3NGxc|Y{ECxyQXc^CNm;2ii@&`*ts z{_Kd@D}WCIkAdrnxjmy#i+%+1*8dS60C$0}hJNTbA|Hl)2>c|t0DcYn!_SHS`=Zhw z->C3e;8yU*iMc)g=SBV?DIri#FbB<3HQ@kkwXu z=h=9Zjelt4*KNFikJa8{8z*gigN>iE@!U05dmps%`8K}R#=|y#)yDh9toA!?oVW4) zHr{6Ax#wH$wc0pt<4rbxQgN;r-x+Jgp6?}z_k0_lW#bJtzSG7dHr_XGwZGEFAGPsj z8~@VA|FH3~y;l3HZ2Vap|IEgJv+)rLtGyLAzR(r`mX(jc>8>&usj* zjgLxM?XR?PUNOyoBfj^6TfxtPyTCIqknte(vTP3v!3A(H*#A3`-vZ8qp8}78cT0=C z&^FQU1P_640N4Fq~n1+e*D_gQ^X-~7J#`@!aSu9IN%d&{?i&F}uc3O2t#Tfa{1ncqQO12(_Mc{AAj zF60wn|CqFQm-SLV4{imIf!Be}?}j}9HospryPvn8oL2WIv9}oP2X7$e<4Fko1#k*{ zH#iS|6g&ie5nKTO9Xtl!Zvg%Ov#kGpU_ba2a0na*r@$A2^Wba1L*To>1@Knz82EW` z-J7!hcffw|eix$uU_UqoJ{6n?N5MnjkAVx|P2e%`-Qc>v$od}x`@t`RLtyP9X-^7# z05}hB1P_6igA3p_;4$z9aNS$7{+qyl@VCGrZ~>eGzXHyKYcH1e4S^2<7r+a_W8jtG zy1&Z$<6uAdli(0|5S#*k7n}z_0UiRs4laP_d{o*u20k2Iw_VnM64(zu6C47kz$x$* z;5_(N@DTU`Z~;659s~agT=zFw|85_X_W8j_fkR-Pr{?`J1wI#?2WP=U;Lm^y;LYGM z@PpvGw`Kj$fc@aVf>YqVKaTi)RSWY1A|WJa?w>0Qea2Rp67rUjlc5zX{HQ ze*(^de*?~g{|Md;o|TjKJOkbz{3iGWaBZ!$uMIpGduJPZzi3*aDl6nqYN4BQLWX3fy}cL}%-d@b`V z8t=jPGVey$TStrjQ{Z{W3ct;~JIyB?FMP-bVgGz#KRAD)@G0OzUU)rN`>gO?;MUIx z{}^n3hwK@!`CYNyJ|XKfzZ2F1HoyDT1K##|u{Qwr;X7xa1)JYNyB%zP5A8{?`CYV` zm&*Fg@1q64t@vJA7xNype}XRpXTjHl`@#2uF9tsj&VjdqF9H7(yaBxLWwO3cfR6)T z3T^{m20k0S5!?sPgFgq}1pYR75d3TKX7F3!A@Dw*l=f@^H-Lx1tH4{q{on%lGvE>M zt>97ccfs4hzXFee$H3dcyM9XAqg^NQKMFht+y<@#cY=N3^TFo#^)3SULw+MT2mT&- zBlrz)-6mQ8yp7VHdEleLKJaPaW55GoKlo8_2>dGeZ1C=vi~TO}2f^FEAnUK8eSS~+ zXX~+Q_t)lv^Ct*@0PJrRj(`V`5~kr+)gSW<-)_q13qNl10%1N(@cJ~Yua5rB>_`0% zfLjBi-vb^6U$2tbe!-{Am0u0+FG`!X)i+F#!U5J zLwwGM{2|D%g?tS12O!@HIX!Kz>c0c|-m}F16OjK0Guf}j`d35#4CJ4MT!Z|(kdHwA z669~f{vNZ%{$r50Fq8f5kat5~fcz@RL$JRU@?&tkn6;DCUj=z9Gx=ve(^8bZA1^JO=i|wBR`RUA*;*ehi`C`bw0(lqYzkqxp zV*ez_I~C`&(FHQzTnk>)BK#D12iY+<%hr$HCfv3V#E<@q@z8f$Q3Y_uN3|ugTH-OLP12z4=4eq)`_#fbH8-#=Vi#>n8@P*(l z1H#_`w_YUtB6#b?!uuZ}^+O*MUJf4lxbP>yzCPhcz?;_z?{%QmA6qND1l->%d=Xen z3J-xdri5Pt*QJFII7sYm$_TFj4_+mFJ@}Zbg`WebE*0MQV5z_TGU3y}xlajS0-kfZ z@VCMFD}?_5o_D42dk-OdGiZMIZWRt~894t1;S0cBw+UYh_J3OV+u%CLN6h+HiTp37 z9QN7|mGzB+*MkSwiT>5#+w>~2r0}nkbd^tF` zT=*B@6!_r7$v%xo!}%FXp$R+$UdBxE%ppDraQ+6dcO%&UvhdHqLoW&cFIa>8`1kVq zH0^#IPgjA5!5;+=g0BJJ3jR8H2>c*;Gx*owJHTV$d%-&$A?x1)K7^V2SGZo<+YTNB zpAQb@MV>eLGs1raw_^KjJd*9x_K)lP>zT>5!wW_Kd&Gn@g*5HwHh$j5f3~r9l(qi8 zHa^nE3v9g1#=4CYHon-#SK4@!jW^r)0UJMR;}ILbY~#0VJo9L4|M#}>du{xF8;5MH z+c<6GOKp6;jqkAWu#JCd-)UiNdqY@$5f^$))mJ zYvV1#wWjwVVsA7JD8igQK(G}`zi8!wXj#dw`AT&%y=#vils)i%D(##?OsQyV{T z<2P(P%dh%dY~Ooqe4LE~HeO)k588OCjYBp*+s0isj@kGE8xPp{5*uG`g_K$-y`R!al>UR#e^P3t)JExKN{c8hrWB;Kgwj$< zr%-CAbSk9|O3NrMr?i4nh!W3Lo=GWKLv<)UMd>W!vnh?xzkf~XX-fY?=^0ADq4X@J z=O~R*`YomBDZN1HMM^JGdYRJiC~c$kdrGfRdX>^^l>R{JbxLng8l&__N^es73#GRx z{gu*oO6Sn}&!u!8r5Z{zDb-T?Fr|-B(kXRO3RCK)6rnVW(rQXQl-5w1O({lcEu{pd zB&8Ik3n-;2WhiAS^-)?!X+5QWN&}QGqV!QpAEWefN;yiGP})H06O=BcbQz^jQu-97 zjg&5@bOohPQ{p*a`nB#x`5u(^q2!}<7^TB0y_eDvl#Zly6s4mn9Yg6@O2<(;p3;0u z@1s;t=>wEbqqLGzC#6-C&Y-l6e6^g?^^`VI`U0gJDA8-)w3{e>kQP-Ad^;O1D$`3Z>1IzDnt9l)g^s4oY`Yx{J~`C=F5iZ%X{)H+rF)b}uD*rJMFG zO5dimh0=E@eU}ow$W6PS()TFQi`=yDQyQl914<84`XQxQC|yYD zVoEzx+J(}tl&1IH>3!E}-}PzqF^+)sEx24#Kk>U*m74iN)Nx+N%6ipa!pkxjuh?a^ zCYSHhRk~)gs=rKD^}DI!mAzcM`2OGV-x_OCo!|wsq+9Rhow}?)skiEK#Yui2TU9k3 z=jsKwT!Y_)H1R8lS<}Nyh)I1iubHN5lYFf;)pU4MHkI1mS#7=mrBY(PK!qgB15Phl zR9_dfzt2Tp6NWd7O{Qw`U1{Db%L{ELwO+gn$9zlU`0M5Vxmt^Qdzm|%+yGZL)q8l{ z+2ohgxw6TME`84L{c&Yey&%Y4JNe~7u57Be47sz(Zy<7IQ@yIlolSm?kt>_(eMb}9 zX?K9aGU|J6-KNY}4hw!>X?K9aGU| zJ6-KNY}4hw!>X?K9aGU|J6-KNY}4hw!>_%Yksv>TKMDzKW4ceLlNzK4oz00VfOIV#F4_#EA91#EA91 z#EA91#EA91#EA91#E6e#?=D787QDrXEqI9$>wAe2>wAe2>wAe2>wAe2>wAci436(A zNX{laMM+F}2$NLs5GSeNAy87mL!_jFhfqm{idfZ;|9Kp@2dBxHafq24GZk@jQ>h|$ zZYov8&rPL@7`mxc5l1(bJjap;R2Tc+MC*1kGl^KT8PBogN}glMl|09iD|wD3SMnT7 zuH-qEJn&9IES=0uB9?5%b1b=%=U8$j&#~l6o@2?CJjaqNd5xuv%t}n7aD<9A8DLuTloeKW5lw`EFhn9m~h`oOS4P+QHzu z?v;{sSi#%l*<<=O9CE0={6*NpgFj%JYq>+<@2aCLM`vq18S?MuZxG{dNL6oXkVoV zn<=(^ar)kTC%p}CLOV1fupRnrD{cAod26F5J)tdpy|wJ_x9sOAFTeM#+E?wPvc<-^ z`DiRFTR&%O|DI#T*BK|SQucMnO6BoC4$1muU(!?WXRo{fP)i*yW zX)89rC21=*KRanFHork>D>gqvX)89rNy)_+3j>ig;j~Vl`i^BYG)Pu_LAAOnzN}hZ zq5RvdC9Qwa7h_8+nDp0d$%J{Fo3i5vIkfC3q|uJ);v?@me~7$C4~5eiV^v=&ZfGbu zBasS6))tFSPA22}l611SBbn%79aX>|0cBXULQR6|ZJ9HZv1llpE-x%9(MQ9>-El)- zw8n_6RT|+GjXH+D)X0XCJdQ{`l(zS#=)>^kddrRV3gh8SMmNnajzzL;p*?H#vQ#lN z)xSG!GH|E*m^;@9(xH1%+z6|+Gb}OaTkfW0Ns>l6)eIJ`h(!9*X(MV%L-a-XqHv{= z$tLM_kFZHAvaaN;bS!I}k)RzQ!;VmT{Smz_OHJwS%Nn$Tq%?v`+gDgodBa+ih#LKt zl4VA3GR^%dnOU*UNUx43*INo#_9e3PRd!2R=RhJGwwmi4pu=;oMT@;|DL6A6@3WSi zYowElW9wr4$@ik_k{HF=4B3)+GMuH2WpT0(Zjch*zU*aY9oZUm$(ALfwB4}(+7p>< zI1w>)iulT~+B}Ly#i|KjnYrP1#4?1b6rKIB0fpFH)W6F%7p`JyrYhi8mTl(J7QHPR zm7Rc%qKvjm-U7=?%qC%fSwy=8OUrOcUm}tv&ruQWn3*!^lC+_^4CSRp!bry=v<}{W z%Jiz1lr~0~Xr~dPtYUj+Rl2Wy70al3$p~vmRee!15eaA864AwmYB!2YyNFppE^;g^ zH9?Im95psCQ095|6YbQhho%k}wP!l}y0aK{l+sEg935aAI`>8#->5e?(pgN|*3pqn zuI)>yvZDJ`Y}8mqnkZH4B0S3ouT2}Pbq)~ks-bkUe?Wp)CbgF8%aaL%I@re%CoSSE zZLV>n*Px*-T9llTSRYG7bs8-Cddb^#d~68Nuo21V;U3b|qZcITe{Npwv%bQN~S7bi&dbON6tA(puGS+g{lo7nbUBNi-Z%>e0C5iS@i4 z>U2tDq_cWDn_!CchouQ*oJN6&k>X>5DN$GGI*r79mLke@G_w}0rlUn=&7M9UR>Ixt z#7b(SC~`qxj89$43)6HxO?$gps9ea<)8DVBjC3ZM2*+dD0ezjHOX)xyS*u6Ztfe6& zMu&&;615UdUrpO99n!dM8eOK#(WTeujihJ@r@S}0js-?9#ZlAgDmIZ_t?8NKKeB1+ zG!;4eq!MOCGGrIa>OaT;u`T8*n?@p!<`OFhCTx-@A` zW2O{bNT*hrV9-sC7G;5Y<5GX2(l-VScjnO`az(VybB%<%8qx)hh+RM6&m`mPjB(=@ zXz1zf)uVj2XY@tlR-e_^voK>AQGKmpq-aMWy;!2hs#Up~)!|Imrc+u?Z+K1EDlA(K z?HyS@5cYM`wr$laUrnG{Z5jscjqzA_ns$GyZka{)eUdsck;O)3F_F}1qomUqtG)7d z%N{^S>OL1bHiwqm3CQM6YwV^bR<{*sFk6-x=uM5M*rXJByjZXI^~CuABm=YBL(6<( zdO1b2TXut`K#?1|tC}#~YBA9fsOJmiba+5tbxw#*9sytWjzF`iw&;}8 zmUOf&?Q~L&iEe|byExdnXl1CH$>N%qFK%DvWKOIFiq@R9u+k<|t#j4Nc3N@O)wY<5 z!R2iycLb#~m2DKW_4S=+wJ)WU-t3xWLSGW=Uq%PqWHb{BP_yLUi;dNxKvQOYtVbP| zvveA^IvEND8dVV=@l^>X!aM2%$|9fC$LJ_?p&_UCa?I?gUs%3Ab;TEt)71%I3}!Q- z1$t6VNK~#6p4VgQa#{~_NXs@Q$HPH=`Nme#Q2W)QdYm_mcdV#lAYeIGR8z7ZD=b>& zJ6+k~%c@h_?0b_@gW5|6O!Md`W271e9fW0rtl8I!x~1{evevR)(mK9cv?|-L##gIO zS^If>wFt{rWA6Df8d|ih~ZmfNw7uT^c? z$JgqL72~T#wah1`mt}CYXq6pr)%a==mYFc!YBA9fC=P~IM@_ZsHq+@G{eMQd#Ym{h z#MtWgvpc#2bje6pbabX9J9x7U4|GC`nVL`}w9dTb>j*UIIvrgjsT7|w(*NaE8;VsI zhqGa>r>+w_>YFg+nDa|Atnm~^alA3t-`J?HPlr?VM~#k`^|1t<2h*zg;$rQw%qqEhi90vJqYbBaG%S>MB#iardZMxe0jY@ldEJ#;NJh#c^C+^*wC!@CW8akn&+50Tb zQMM(bE7m89JqXcYWWi{0ZdNCI%zJCC*_u9dP=oVQ&UFfDHcxJlp0TQ;zY4{ z+PS!GRa@1w#FAF~vTEm&_O6Da1N4ot;z>vYKLQjDn)50aZL^ThzcKpiJfN4Iv6&vU z9B%L^106znCepl9z0QcxMi`CJ96}P)JstHHZ!M)eHZ(x2S`#asL&^cAQLUccqwZeO z0iz-SqA2#1B3&z*N)h_Eobawgm;wuVO~r|4v1*>TDpM`8N@|PhZbx8&r3Rldi-1m* z%T4jRSI{jH$69f^r4b@$mQ|2;H$B-Z zlD(;L+OSzO52v=C)4WOArYSZXE$gM5BWtSK#YWy@sX?!Lpj&+^wv~OamrVeB3!OBm z+o6l-mNr#z@*$rXIP9jPir`YHLNqs-9atXjHR#&Tb}?k?mPLzaG?HaQf%M$!@vuzh zSv?-GMYzmH(a%R(Y9VU$a(kX~E*sohRUCS;_JEeU9kOz-MU>zDnve>e7 z)3~J`JfVB7DZT8zLp#NRp6;?f+$GPbP_#M%0gf4;meJHD-((2UQ(&EqdWc2{y8ob_ zO;C@+bk>)ihvEL1w=!JKkjFE@&=Q^;C(FUI+8vEFLmTHCAP)25e78jp*zB*;SNZi| zuycWBo_$icDHa9V7t@>@w$tUf>0@7^bK5XyA6eIpt6-Tt572#-6wi~>ytAAnRO=ge zttCmmr$^(u`&R383!J94=su6x!Db2pd8>0F|IeN%PRs|XxXg#nW}KT%M^q=LMmV?< z7kHt#GP)@e?#ZB&5Yo&&-@T&SUR8RhW^1i=ytJ~JXUYxpDLnD)gzH>LXP)ZXLCrky zBr)BkrrURMy?8%DIjy3D>toTZ?XFK#@%EfVr%E^Jsj0qff;S+Z*Dvl7liV1CxLsy$ zhm$hX7^SN-%O+Whq;zx&np$r*6&w8mx;sKgDtcHhLo>&za4b!;^Lk)X9iVGaI*nVA*sGScFAsX{>=ki@`egV4KNEdLXZQBqqsSToq z?o4OsfR^Z)3TuId(m=L1m8}d=Gr8lLHFOWLis66JhTF6{Mw2yG=Zv=%y{rAQxY^Qv zO8vF&nT^<_%!e8I%p>407AEQTc%b~q)zK1;MtRgot~R&Am2@O3-`O1xkyVaubXntY z!cci^oA6N5l#bFN+3L~KFzO(j*dmR+M#Az;LPzPb5*|#7clT+^j?dN96&fBuu^gF; zicOU^i;c7%4BzFEsorv$gfYL7UM<8ATG5PQFF&~28*J#M=j6&q>UtUsFrR2wR^wJ# z1$CRCBhXShgDpO7(B9V2qF0{5nwQ7s=&3B=0@u7ewwz#_XUiSdE0#8OOL~Qx!?K^# z;}o}}F_KITw1=X!^#+4<`l$v>>xSAvq13!T)7Z4*7H+-GFc^$V#6-E>?>w|U|N$~Hd?5$jR&yY2KqdN3%u>gjZt z2=OU)n{GWjrdecN?@OquS3b?8o=`V*bbzLP)ny4iq)Jb#chdu{MK^@hbVNr$F7fyQ zKP8t*iu!uzc^-4pX8HKfS($3331oGTjIR#mxJ0)POAV=V!MMC?Bh1ZWf<{2Cqh2mN zaTQCA=v)(y()8WzH}1WMHWi=1we{Y6GvTipdDS#LPAJP1LZ@-AS>177EIiIYB9teHr5!)F>vlHi#A`W1;0R8H?SV6?jlo z({+4nkb*Cdk*fX9)x%rnh&@gX%fnkP^vfUKa-nW}c*|Q2et6484a>t@rhcdVQKbEp zDjIB|=bh-HVtIJE%M+`pIqeFZfP2tiYr(>IJx11W$1$J?8mcWmv&wpiQ)rT>@ zVX@ljhDAjoG|m32kgIdI3QZo$snAgqh2!^*|0K9-YhzBWttjNyQekBqDm1yZ)ihnT zmj4vFYHQDTYHdX!x0VVkTU(*Yt*xf%sY5BG7SpU!9aSGE04>cx`=*}u3+F4VY> zj%t;?c2u)GgIM*oFAhF*v^FVU@d)L?SFG;BR}N}UzEWxv`^v$ZQ-Hi!o6uL>T;7hX zTT%7#ll8d)Ui2%++wtvJj(FqiSB^R3>sRjei1E@%$OJ8~+_1`x+fNz9c&S@;|kk?vNQ{^qc*yQn5c{_Hg%3GyNRo=#2s@lUN zgWM!5OKe?KHct^;s=D+&TXpGsR(0unR(0unR(0unR-Lr(ohrNbJs+4j$}W7*sxE!c zsxE!csxE!csxE!csxE!cs+0D;Q)Rclm(kpv^F`O4_eIy8`$gBC|3%kb07Tba1Vnez zA>b<*cTQ9nhRTOHGE6YYD;K&`RUUMwsvPJ}Rr$}IY8CfQt}?dZi7WHaqkQj9)ur#* zs!QLqs!QLqs!QLqs!QLq>ZE<|RN1xfdFXNJdscPndscPndscPndscPndscPndsdya z@0}{U^}UQ(?wl{W?z}I$?%XfB?))#h?gAjX?jj(%lMX>~TJg;rq!=( z#sM_~H)y@a1_C8Rx`+=(c4^(wPpsEJ~Rb3FM>Vx{K zKB%wigZipIsITgS`l>#tuj+&Psy?W%>Vx{KKB%wigNCX;XsGIghN?bjsOp1;sy=9_ z>Vt-=K4_@wgNCX;XsGIg#;QJOtm=ctsy=9}>Vw9rK4`4!gT|^pXsqgk#;QJOtm=cN zsy=9{>Vu}LK4_}ygQluJXsYUirm8+@s_KKLsy=9{>VxL0K4`A$gXXF}2#hmas_}AZ z$BL8NI_T*}dgWYuwU=KM-z;#>s#8`hSAUv%B8}SG(nX8#<~}{k?>GsNV=K#=9Lg3} zmNhSMD09$j@mJ{8)8WFt-g+9^E6Ql#tSocTqm8OUkHTy(YoLQfMHvkX6=k#wSC&<- zw}Ezz3O(A4E6W_#L7RtNuhCx_k4Ar`PaA0~si^DFw?;ZxRp`-yrLxRn9W-E9=&6D7 zTzZflFE_wLtLj}g!3O%PeL8a>akcjb1RJvacExfPJQ!Pi3b>>aXyRvvBlK!1ewPlt zLXSUtz(sn3{t6_+X6R3^`Hu?lN-!x|pdO@FdVH=%fBRuwp}bQ~y)jxnb{v$)@8z{# zdX(QvT;9G4b^5!2@;{mn)0-Xpdb{P#j~#)9ymr{?l{b}1vl`V4VzTtN&GaHmejzxu zzA;cLbpA^a+FRv4y-J^6d&B0Hkt+J?1wf+QGNH2kd9}1Jpxh=8txJhG67QENI+FgB zMcIuw^+Xj`X_-?`#?!Cp$#}}8o|HDeiY-e5ol+0En}L>_;H6iubu_BCLb3-cL`~}b zobs1s>fU)-wPtyxfwPdl>!9ApSteD_b62cPN}J?moTWBES-o=8UfRGf*0dKk@vE@< zoQo4>#dJwjeA{J3W2{1ce_FMg{Jn+ZTP3S0wG`hISxuvnUl2K-FkVf#0B?k>CSOQ$ z^(M$_3iM5&;wvD_bUIr21)u6wpO#X7A7^DLznrY1G|*sqOXA<}%^Vq2)4Hx&8 0 - assert target_update_interval > 0 - assert snapshot_interval > 0 - assert max_episode_step > 0 - assert training_episodes > 0 - assert replay_memory_params is not None - assert type(state_dim) is tuple - - self.__ctx = mx.gpu() if ctx == 'gpu' else mx.cpu() - self.__qnet = network - - self.__environment = environment - self.__discount_factor = discount_factor - self.__training_episodes = training_episodes - self.__train_interval = train_interval - self.__verbose = verbose - self.__state_dim = state_dim - self.__action_dim = self.__qnet(nd.random_normal(shape=((1,) + self.__state_dim), ctx=self.__ctx)).shape[1:] + target_score=None + ): + assert 0 < discount_factor <= 1,\ + 'Discount factor must be between 0 and 1' + assert train_interval > 0, 'Train interval must be greater 0' + assert snapshot_interval > 0, 'Snapshot interval must be greater 0' + assert max_episode_step > 0,\ + 'Maximal steps per episode must be greater 0' + assert training_episodes > 0, 'Trainings episode must be greater 0' + assert replay_memory_params is not None,\ + 'Replay memory parameter not set' + assert type(state_dim) is tuple, 'State dimension is not a tuple' + assert type(action_dim) is tuple, 'Action dimension is not a tuple' + + self._logger = ArchLogger.get_logger() + self._ctx = mx.gpu() if ctx == 'gpu' else mx.cpu() + self._environment = environment + self._discount_factor = discount_factor + self._training_episodes = training_episodes + self._train_interval = train_interval + self._verbose = verbose + self._state_dim = state_dim replay_memory_params['state_dim'] = state_dim - self.__replay_memory_params = replay_memory_params + replay_memory_params['action_dim'] = action_dim + self._replay_memory_params = replay_memory_params rm_builder = ReplayMemoryBuilder() - self.__memory = rm_builder.build_by_params(**replay_memory_params) - self.__minibatch_size = self.__memory.sample_size - - policy_params['action_dim'] = self.__action_dim - self.__policy_params = policy_params - p_builder = ActionPolicyBuilder() - self.__policy = p_builder.build_by_params(**policy_params) - - self.__target_update_interval = target_update_interval - self.__target_qnet = copy_net(self.__qnet, self.__state_dim, ctx=self.__ctx) - self.__loss_function_str = loss_function - self.__loss_function = get_loss_function(loss_function) - self.__agent_name = agent_name - self.__snapshot_interval = snapshot_interval - self.__creation_time = time.time() - self.__max_episode_step = max_episode_step - self.__optimizer = optimizer - self.__optimizer_params = optimizer_params - self.__make_logfile = make_logfile - self.__double_dqn = double_dqn - self.__use_fix_target = use_fix_target - self.__live_plot = live_plot - self.__user_given_directory = output_directory - self.__target_score = target_score - - self.__interrupt_flag = False + self._memory = rm_builder.build_by_params(**replay_memory_params) + self._minibatch_size = self._memory.sample_size + self._action_dim = action_dim + + strategy_params['action_dim'] = self._action_dim + self._strategy_params = strategy_params + strategy_builder = StrategyBuilder() + self._strategy = strategy_builder.build_by_params(**strategy_params) + self._agent_name = agent_name + self._snapshot_interval = snapshot_interval + self._creation_time = time.time() + self._max_episode_step = max_episode_step + self._start_training = start_training + self._output_directory = output_directory + self._target_score = target_score + + self._evaluation_samples = evaluation_samples + self._best_avg_score = -np.infty + self._best_net = None + + self._interrupt_flag = False + self._training_stats = None # Training Context - self.__current_episode = 0 - self.__total_steps = 0 - - # Initialize best network - self.__best_net = copy_net(self.__qnet, self.__state_dim, self.__ctx) - self.__best_avg_score = None + self._current_episode = 0 + self._total_steps = 0 - # Gluon Trainer definition - self.__training_stats = None + @property + def current_episode(self): + return self._current_episode - # Prepare output directory and logger - self.__output_directory = output_directory\ - + '/' + self.__agent_name\ - + '/' + time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(self.__creation_time)) - self.__logger = self.__setup_logging() - self.__logger.info('Agent created with following parameters: {}'.format(self.__make_config_dict())) + @property + def environment(self): + return self._environment - @classmethod - def from_config_file(cls, network, environment, config_file_path, ctx=None): + def save_config_file(self): import json - # Load config - with open(config_file_path, 'r') as config_file: - config_dict = json.load(config_file) - return cls(network, environment, ctx=ctx, **config_dict) + make_directory_if_not_exist(self._output_directory) + filename = os.path.join(self._output_directory, 'config.json') + config = self._make_config_dict() + with open(filename, mode='w') as fp: + json.dump(config, fp, indent=4) + + def set_interrupt_flag(self, interrupt): + self._interrupt_flag = interrupt + + def _interrupt_training(self): + import pickle + self._logger.info('Training interrupted; Store state for resuming') + session_dir = self._get_session_dir() + agent_session_file = os.path.join(session_dir, 'agent.p') + logger = self._logger + + self._make_pickle_ready(session_dir) + + with open(agent_session_file, 'wb') as f: + pickle.dump(self, f, protocol=2) + logger.info('State successfully stored') + + def _make_pickle_ready(self, session_dir): + del self._training_stats.logger + self._logger = None + self._environment.close() + self._environment = None + self._save_net(self._best_net, 'best_net', session_dir) + self._best_net = None + + def _make_config_dict(self): + config = dict() + config['state_dim'] = self._state_dim + config['action_dim'] = self._action_dim + config['ctx'] = str(self._ctx) + config['discount_factor'] = self._discount_factor + config['strategy_params'] = self._strategy_params + config['replay_memory_params'] = self._replay_memory_params + config['training_episodes'] = self._training_episodes + config['start_training'] = self._start_training + config['evaluation_samples'] = self._evaluation_samples + config['train_interval'] = self._train_interval + config['snapshot_interval'] = self._snapshot_interval + config['agent_name'] = self._agent_name + config['max_episode_step'] = self._max_episode_step + config['output_directory'] = self._output_directory + config['verbose'] = self._verbose + config['target_score'] = self._target_score + return config + + def _adjust_optimizer_params(self, optimizer_params): + if 'weight_decay' in optimizer_params: + optimizer_params['wd'] = optimizer_params['weight_decay'] + del optimizer_params['weight_decay'] + if 'learning_rate_decay' in optimizer_params: + min_learning_rate = 1e-8 + if 'learning_rate_minimum' in optimizer_params: + min_learning_rate = optimizer_params['learning_rate_minimum'] + del optimizer_params['learning_rate_minimum'] + optimizer_params['lr_scheduler'] = mx.lr_scheduler.FactorScheduler( + optimizer_params['step_size'], + factor=optimizer_params['learning_rate_decay'], + stop_factor_lr=min_learning_rate) + del optimizer_params['step_size'] + del optimizer_params['learning_rate_decay'] + + return optimizer_params + + def _sample_from_memory(self): + states, actions, rewards, next_states, terminals\ + = self._memory.sample(batch_size=self._minibatch_size) + states = nd.array(states, ctx=self._ctx) + actions = nd.array(actions, ctx=self._ctx) + rewards = nd.array(rewards, ctx=self._ctx) + next_states = nd.array(next_states, ctx=self._ctx) + terminals = nd.array(terminals, ctx=self._ctx) + return states, actions, rewards, next_states, terminals + + def evaluate(self, target=None, sample_games=100, verbose=True): + target = self._target_score if target is None else target + if target: + target_achieved = 0 + total_reward = 0 + + self._logger.info('Sampling from {} games...'.format(sample_games)) + for g in pyprind.prog_bar(range(sample_games)): + state = self._environment.reset() + step = 0 + game_reward = 0 + terminal = False + while not terminal and (step < self._max_episode_step): + action = self.get_next_action(state) + state, reward, terminal, _ = self._environment.step(action) + game_reward += reward + step += 1 + + if verbose: + info = 'Game %d: Reward %f' % (g, game_reward) + self._logger.debug(info) + if target: + if game_reward >= target: + target_achieved += 1 + total_reward += game_reward + + avg_reward = float(total_reward)/float(sample_games) + info = 'Avg. Reward: %f' % avg_reward + if target: + target_achieved_ratio = int( + (float(target_achieved)/float(sample_games))*100) + info += '; Target Achieved in %d%% of games'\ + % (target_achieved_ratio) + + if verbose: + self._logger.info(info) + return avg_reward + + def _do_snapshot_if_in_interval(self, episode): + do_snapshot =\ + (episode != 0 and (episode % self._snapshot_interval == 0)) + if do_snapshot: + self.save_parameters(episode=episode) + self._evaluate() + + def _evaluate(self, verbose=True): + avg_reward = self.evaluate( + sample_games=self._evaluation_samples, verbose=False) + info = 'Evaluation -> Average Reward in {} games: {}'.format( + self._evaluation_samples, avg_reward) + + if self._best_avg_score is None or self._best_avg_score <= avg_reward: + self._save_current_as_best_net() + self._best_avg_score = avg_reward + if verbose: + self._logger.info(info) + + def _is_target_reached(self, avg_reward): + return self._target_score is not None\ + and avg_reward > self._target_score + + def _do_training(self): + return (self._total_steps % self._train_interval == 0) and\ + (self._memory.is_sample_possible(self._minibatch_size)) and\ + (self._current_episode >= self._start_training) + + def _check_interrupt_routine(self): + if self._interrupt_flag: + self._interrupt_flag = False + self._interrupt_training() + return True + return False + + def _is_target_reached(self, avg_reward): + return self._target_score is not None\ + and avg_reward > self._target_score + + def _save_parameters(self, net, episode=None, filename='dqn-agent-params'): + assert self._output_directory + assert isinstance(net, gluon.HybridBlock) + make_directory_if_not_exist(self._output_directory) + + if(episode is not None): + self._logger.info( + 'Saving model parameters after episode %d' % episode) + filename = filename + '-ep{}'.format(episode) + else: + self._logger.info('Saving model parameters') + self._save_net(net, filename) + + def _save_net(self, net, filename, filedir=None): + filedir = self._output_directory if filedir is None else filedir + filename = os.path.join(filedir, filename + '.params') + net.save_parameters(filename) + + def save_best_network(self, path, epoch=0): + self._logger.info( + 'Saving best network with average reward of {}'.format( + self._best_avg_score)) + self._best_net.export(path, epoch=epoch) + + def _get_session_dir(self): + session_dir = os.path.join( + self._output_directory, '.interrupted_session') + make_directory_if_not_exist(session_dir) + return session_dir + + def _save_current_as_best_net(self): + raise NotImplementedError + + def get_next_action(self, state): + raise NotImplementedError + + def save_parameters(self, episode): + raise NotImplementedError + + def train(self, episodes=None): + raise NotImplementedError + + +class DdpgAgent(Agent): + def __init__( + self, + actor, + critic, + environment, + replay_memory_params, + strategy_params, + state_dim, + action_dim, + soft_target_update_rate=.001, + actor_optimizer='adam', + actor_optimizer_params={'learning_rate': 0.0001}, + critic_optimizer='adam', + critic_optimizer_params={'learning_rate': 0.001}, + ctx=None, + discount_factor=.9, + training_episodes=50, + start_training=20, + train_interval=1, + snapshot_interval=200, + agent_name='DdpgAgent', + max_episode_step=9999, + evaluation_samples=100, + output_directory='model_parameters', + verbose=True, + target_score=None + ): + super(DdpgAgent, self).__init__( + environment=environment, replay_memory_params=replay_memory_params, + strategy_params=strategy_params, state_dim=state_dim, + action_dim=action_dim, ctx=ctx, discount_factor=discount_factor, + training_episodes=training_episodes, start_training=start_training, + train_interval=train_interval, + snapshot_interval=snapshot_interval, agent_name=agent_name, + max_episode_step=max_episode_step, + output_directory=output_directory, verbose=verbose, + target_score=target_score, evaluation_samples=evaluation_samples) + assert critic is not None, 'Critic not set' + assert actor is not None, 'Actor is not set' + assert soft_target_update_rate > 0,\ + 'Target update must be greater zero' + assert actor_optimizer is not None, 'No actor optimizer set' + assert critic_optimizer is not None, 'No critic optimizer set' + + self._actor = actor + self._critic = critic + + self._actor_target = self._copy_actor() + self._critic_target = self._copy_critic() + + self._actor_optimizer = actor_optimizer + self._actor_optimizer_params = self._adjust_optimizer_params( + actor_optimizer_params) + + self._critic_optimizer = critic_optimizer + self._critic_optimizer_params = self._adjust_optimizer_params( + critic_optimizer_params) + + self._soft_target_update_rate = soft_target_update_rate + + self._logger.info( + 'Agent created with following parameters: {}'.format( + self._make_config_dict())) + + self._best_net = self._copy_actor() + + self._training_stats = DdpgTrainingStats(self._training_episodes) + + def _make_pickle_ready(self, session_dir): + super(DdpgAgent, self)._make_pickle_ready(session_dir) + self._save_net(self._actor, 'actor', session_dir) + self._actor = None + self._save_net(self._critic, 'critic', session_dir) + self._critic = None + self._save_net(self._actor_target, 'actor_target', session_dir) + self._actor_target = None + self._save_net(self._critic_target, 'critic_target', session_dir) + self._critic_target = None @classmethod - def resume_from_session(cls, session_dir, net, environment): + def resume_from_session(cls, session_dir, actor, critic, environment): import pickle if not os.path.exists(session_dir): raise ValueError('Session directory does not exist') @@ -121,386 +385,516 @@ class DqnAgent(object): files = dict() files['agent'] = os.path.join(session_dir, 'agent.p') files['best_net_params'] = os.path.join(session_dir, 'best_net.params') - files['q_net_params'] = os.path.join(session_dir, 'qnet.params') - files['target_net_params'] = os.path.join(session_dir, 'target_net.params') + files['actor_net_params'] = os.path.join(session_dir, 'actor.params') + files['actor_target_net_params'] = os.path.join( + session_dir, 'actor_target.params') + files['critic_net_params'] = os.path.join(session_dir, 'critic.params') + files['critic_target_net_params'] = os.path.join( + session_dir, 'critic_target.params') for file in files.values(): if not os.path.exists(file): - raise ValueError('Session directory is not complete: {} is missing'.format(file)) + raise ValueError( + 'Session directory is not complete: {} is missing' + .format(file)) with open(files['agent'], 'rb') as f: agent = pickle.load(f) - agent.__environment = environment - agent.__qnet = net - agent.__qnet.load_parameters(files['q_net_params'], agent.__ctx) - agent.__qnet.hybridize() - agent.__qnet(nd.random_normal(shape=((1,) + agent.__state_dim), ctx=agent.__ctx)) - agent.__best_net = copy_net(agent.__qnet, agent.__state_dim, agent.__ctx) - agent.__best_net.load_parameters(files['best_net_params'], agent.__ctx) - agent.__target_qnet = copy_net(agent.__qnet, agent.__state_dim, agent.__ctx) - agent.__target_qnet.load_parameters(files['target_net_params'], agent.__ctx) + agent._environment = environment + + agent._actor = actor + agent._actor.load_parameters(files['actor_net_params'], agent._ctx) + agent._actor.hybridize() + agent._actor(nd.random_normal( + shape=((1,) + agent._state_dim), ctx=agent._ctx)) + + agent._best_net = copy_net(agent._actor, agent._state_dim, agent._ctx) + agent._best_net.load_parameters(files['best_net_params'], agent._ctx) + + agent._actor_target = copy_net( + agent._actor, agent._state_dim, agent._ctx) + agent._actor_target.load_parameters(files['actor_target_net_params']) - agent.__logger = agent.__setup_logging(append=True) - agent.__training_stats.logger = agent.__logger - agent.__logger.info('Agent was retrieved; Training can be continued') + agent._critic = critic + agent._critic.load_parameters(files['critic_net_params'], agent._ctx) + agent._critic.hybridize() + agent._critic( + nd.random_normal(shape=((1,) + agent._state_dim), ctx=agent._ctx), + nd.random_normal(shape=((1,) + agent._action_dim), ctx=agent._ctx)) + + agent._critic_target = copy_net_with_two_inputs( + agent._critic, agent._state_dim, agent._action_dim, agent._ctx) + agent._critic_target.load_parameters(files['critic_target_net_params']) + + agent._logger = ArchLogger.get_logger() + agent._training_stats.logger = ArchLogger.get_logger() + agent._logger.info('Agent was retrieved; Training can be continued') return agent - def __interrupt_training(self): - import pickle - self.__logger.info('Training interrupted; Store state for resuming') - session_dir = os.path.join(self.__output_directory, '.interrupted_session') - if not os.path.exists(session_dir): - os.mkdir(session_dir) + def _save_current_as_best_net(self): + self._best_net = self._copy_actor() - del self.__training_stats.logger - logger = self.__logger - self.__logger = None - self.__environment.close() - self.__environment = None + def get_next_action(self, state): + action = self._actor(nd.array([state], ctx=self._ctx)) + return action[0].asnumpy() - self.__save_net(self.__qnet, 'qnet', session_dir) - self.__qnet = None - self.__save_net(self.__best_net, 'best_net', session_dir) - self.__best_net = None - self.__save_net(self.__target_qnet, 'target_net', session_dir) - self.__target_qnet = None + def save_parameters(self, episode): + self._save_parameters(self._actor, episode=episode) - agent_session_file = os.path.join(session_dir, 'agent.p') + def train(self, episodes=None): + self.save_config_file() + self._logger.info("--- Start DDPG training ---") + episodes = \ + episodes if episodes is not None else self._training_episodes - with open(agent_session_file, 'wb') as f: - pickle.dump(self, f) - self.__logger = logger - logger.info('State successfully stored') + resume = (self._current_episode > 0) + if resume: + self._logger.info("Training session resumed") + self._logger.info( + "Starting from episode {}".format(self._current_episode)) + else: + self._training_stats = DdpgTrainingStats(episodes) - @property - def current_episode(self): - return self.__current_episode + # Initialize target Q' and mu' + self._actor_target = self._copy_actor() + self._critic_target = self._copy_critic() - @property - def environment(self): - return self.__environment + # Initialize l2 loss for critic network + l2_loss = gluon.loss.L2Loss() - def __adjust_optimizer_params(self, optimizer_params): - if 'weight_decay' in optimizer_params: - optimizer_params['wd'] = optimizer_params['weight_decay'] - del optimizer_params['weight_decay'] - if 'learning_rate_decay' in optimizer_params: - min_learning_rate = 1e-8 - if 'learning_rate_minimum' in optimizer_params: - min_learning_rate = optimizer_params['learning_rate_minimum'] - del optimizer_params['learning_rate_minimum'] - optimizer_params['lr_scheduler'] = mx.lr.scheduler.FactorScheduler( - optimizer_params['step_size'], - factor=optimizer_params['learning_rate_decay'], - stop_factor_lr=min_learning_rate) - del optimizer_params['step_size'] - del optimizer_params['learning_rate_decay'] + # Initialize critic and actor trainer + trainer_actor = gluon.Trainer( + self._actor.collect_params(), self._actor_optimizer, + self._actor_optimizer_params) + trainer_critic = gluon.Trainer( + self._critic.collect_params(), self._critic_optimizer, + self._critic_optimizer_params) - return optimizer_params + # For episode=1..n + while self._current_episode < episodes: + # Check interrupt flag + if self._check_interrupt_routine(): + return False - def set_interrupt_flag(self, interrupt): - self.__interrupt_flag = interrupt + # Initialize new episode + step = 0 + episode_reward = 0 + start = time.time() + episode_critic_loss = 0 + episode_actor_loss = 0 + episode_avg_q_value = 0 + training_steps = 0 + # Get initialial observation state s + state = self._environment.reset() + + # For step=1..T + while step < self._max_episode_step: + # Select an action a = mu(s) + N(step) according to current + # actor and exploration noise N according to strategy + action = self._strategy.select_action( + self.get_next_action(state)) + + # Execute action a and observe reward r and next state ns + next_state, reward, terminal, _ = \ + self._environment.step(action) + + self._logger.debug( + 'Applied action {} with reward {}'.format(action, reward)) + + # Store transition (s,a,r,ns) in replay buffer + self._memory.append( + state, action, reward, next_state, terminal) + + if self._do_training(): + # Sample random minibatch of b transitions + # (s_i, a_i, r_i, s_(i+1)) from replay buffer + states, actions, rewards, next_states, terminals =\ + self._sample_from_memory() + + actor_target_actions = self._actor_target(next_states) + critic_target_qvalues = self._critic_target( + next_states, actor_target_actions) + + rewards = rewards.reshape(self._minibatch_size, 1) + terminals = terminals.reshape(self._minibatch_size, 1) + + # y = r_i + discount * Q'(s_(i+1), mu'(s_(i+1))) + y = rewards + (1.0 - terminals) * self._discount_factor\ + * critic_target_qvalues + + # Train the critic network + with autograd.record(): + qvalues = self._critic(states, actions) + critic_loss = l2_loss(qvalues, y) + critic_loss.backward() + trainer_critic.step(self._minibatch_size) + + # Train the actor network + # Temporary critic so that gluon trainer does not mess + # with critic parameters + tmp_critic = self._copy_critic() + with autograd.record(): + actor_qvalues = tmp_critic(states, self._actor(states)) + # For maximizing qvalues we have to multiply with -1 + # as we use a minimizer + actor_loss = -1 * actor_qvalues + actor_loss.backward() + trainer_actor.step(self._minibatch_size) + + # Update target networks: + self._actor_target = self._soft_update( + self._actor, self._actor_target, + self._soft_target_update_rate) + self._critic_target = self._soft_update( + self._critic, self._critic_target, + self._soft_target_update_rate) + + # Update statistics + episode_critic_loss +=\ + np.sum(critic_loss.asnumpy()) / self._minibatch_size + episode_actor_loss +=\ + np.sum(actor_loss.asnumpy()) / self._minibatch_size + episode_avg_q_value +=\ + np.sum(actor_qvalues.asnumpy()) / self._minibatch_size - def __make_output_directory_if_not_exist(self): - assert self.__output_directory - if not os.path.exists(self.__output_directory): - os.makedirs(self.__output_directory) + training_steps += 1 - def __setup_logging(self, append=False): - assert self.__output_directory - assert self.__agent_name + episode_reward += reward + step += 1 + self._total_steps += 1 + state = next_state - output_level = logging.DEBUG if self.__verbose else logging.WARNING - filemode = 'a' if append else 'w' + if terminal: + # Reset the strategy + self._strategy.reset() + break + + # Log the episode results + episode_actor_loss = 0 if training_steps == 0\ + else (episode_actor_loss / training_steps) + episode_critic_loss = 0 if training_steps == 0\ + else (episode_critic_loss / training_steps) + episode_avg_q_value = 0 if training_steps == 0\ + else (episode_avg_q_value / training_steps) + + avg_reward = self._training_stats.log_episode( + self._current_episode, start, training_steps, + episode_actor_loss, episode_critic_loss, episode_avg_q_value, + self._strategy.cur_eps, episode_reward) + + self._do_snapshot_if_in_interval(self._current_episode) + self._strategy.decay(self._current_episode) + + if self._is_target_reached(avg_reward): + self._logger.info( + 'Target score is reached in average; Training is stopped') + break + + self._current_episode += 1 + + self._evaluate() + self.save_parameters(episode=self._current_episode) + self.save_best_network(os.path.join(self._output_directory, 'best')) + self._training_stats.save_stats(self._output_directory) + self._logger.info('--------- Training finished ---------') + return True + + def _make_config_dict(self): + config = super(DdpgAgent, self)._make_config_dict() + config['soft_target_update_rate'] = self._soft_target_update_rate + config['actor_optimizer'] = self._actor_optimizer + config['actor_optimizer_params'] = self._actor_optimizer_params + config['critic_optimizer'] = self._critic_optimizer + config['critic_optimizer_params'] = self._critic_optimizer_params + return config - logformat = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' - dateformat = '%d-%b-%y %H:%M:%S' - formatter = logging.Formatter(fmt=logformat, datefmt=dateformat) + def _soft_update(self, net, target, tau): + net_params = [p.data() for _, p in net.collect_params().items()] + for i, (_, p) in enumerate(target.collect_params().items()): + target_params = p.data() + p.set_data((1.0 - tau) * target_params + tau * net_params[i]) + return target + + def _copy_actor(self): + assert self._actor is not None + assert self._ctx is not None + assert type(self._state_dim) is tuple + return copy_net(self._actor, self._state_dim, ctx=self._ctx) + + def _copy_critic(self): + assert self._critic is not None + assert self._ctx is not None + assert type(self._state_dim) is tuple + assert type(self._action_dim) is tuple + return copy_net_with_two_inputs( + self._critic, self._state_dim, self._action_dim, ctx=self._ctx) + + +class DqnAgent(Agent): + def __init__( + self, + qnet, + environment, + replay_memory_params, + strategy_params, + state_dim, + action_dim, + ctx=None, + discount_factor=.9, + loss_function='euclidean', + optimizer='rmsprop', + optimizer_params={'learning_rate': 0.09}, + training_episodes=50, + start_training=0, + train_interval=1, + use_fix_target=False, + double_dqn=False, + target_update_interval=10, + snapshot_interval=200, + evaluation_samples=100, + agent_name='Dqn_agent', + max_episode_step=99999, + output_directory='model_parameters', + verbose=True, + target_score=None + ): + super(DqnAgent, self).__init__( + environment=environment, replay_memory_params=replay_memory_params, + strategy_params=strategy_params, state_dim=state_dim, + action_dim=action_dim, ctx=ctx, discount_factor=discount_factor, + training_episodes=training_episodes, start_training=start_training, + train_interval=train_interval, + snapshot_interval=snapshot_interval, agent_name=agent_name, + max_episode_step=max_episode_step, + output_directory=output_directory, verbose=verbose, + target_score=target_score, evaluation_samples=evaluation_samples) + + self._qnet = qnet + self._target_update_interval = target_update_interval + self._target_qnet = copy_net( + self._qnet, self._state_dim, ctx=self._ctx) + self._loss_function_str = loss_function + self._loss_function = get_loss_function(loss_function) + self._optimizer = optimizer + self._optimizer_params = optimizer_params + self._double_dqn = double_dqn + self._use_fix_target = use_fix_target - logger = logging.getLogger('DQNAgent') - logger.setLevel(output_level) + # Initialize best network + self._best_net = copy_net(self._qnet, self._state_dim, self._ctx) + self._best_avg_score = -np.infty - stream_handler = logging.StreamHandler(sys.stdout) - stream_handler.setLevel(output_level) - stream_handler.setFormatter(formatter) - logger.addHandler(stream_handler) + self._training_stats = None - if self.__make_logfile: - self.__make_output_directory_if_not_exist() - log_file = os.path.join(self.__output_directory, self.__agent_name + '.log') - file_handler = logging.FileHandler(log_file, mode=filemode) - file_handler.setLevel(output_level) - file_handler.setFormatter(formatter) - logger.addHandler(file_handler) + @classmethod + def resume_from_session(cls, session_dir, net, environment): + import pickle + if not os.path.exists(session_dir): + raise ValueError('Session directory does not exist') - return logger + files = dict() + files['agent'] = os.path.join(session_dir, 'agent.p') + files['best_net_params'] = os.path.join(session_dir, 'best_net.params') + files['q_net_params'] = os.path.join(session_dir, 'qnet.params') + files['target_net_params'] = os.path.join( + session_dir, 'target_net.params') - def __is_target_reached(self, avg_reward): - return self.__target_score is not None\ - and avg_reward > self.__target_score + for file in files.values(): + if not os.path.exists(file): + raise ValueError( + 'Session directory is not complete: {} is missing' + .format(file)) + with open(files['agent'], 'rb') as f: + agent = pickle.load(f) + + agent._environment = environment + agent._qnet = net + agent._qnet.load_parameters(files['q_net_params'], agent._ctx) + agent._qnet.hybridize() + agent._qnet(nd.random_normal( + shape=((1,) + agent._state_dim), ctx=agent._ctx)) + agent._best_net = copy_net(agent._qnet, agent._state_dim, agent._ctx) + agent._best_net.load_parameters(files['best_net_params'], agent._ctx) + agent._target_qnet = copy_net( + agent._qnet, agent._state_dim, agent._ctx) + agent._target_qnet.load_parameters( + files['target_net_params'], agent._ctx) + + agent._logger = ArchLogger.get_logger() + agent._training_stats.logger = agent._logger + agent._logger.info('Agent was retrieved; Training can be continued') + + return agent + + def _make_pickle_ready(self, session_dir): + super(DqnAgent, self)._make_pickle_ready(session_dir) + self._save_net(self._qnet, 'qnet', session_dir) + self._qnet = None + self._save_net(self._target_qnet, 'target_net', session_dir) + self._target_qnet = None def get_q_values(self, state, with_best=False): - return self.get_batch_q_values(nd.array([state], ctx=self.__ctx), with_best=with_best)[0] + return self.get_batch_q_values( + nd.array([state], ctx=self._ctx), with_best=with_best)[0] def get_batch_q_values(self, state_batch, with_best=False): - return self.__best_net(state_batch) if with_best else self.__qnet(state_batch) + return self._best_net(state_batch)\ + if with_best else self._qnet(state_batch) def get_next_action(self, state, with_best=False): q_values = self.get_q_values(state, with_best=with_best) action = q_values.asnumpy().argmax() - return q_values.asnumpy().argmax() + return action - def __sample_from_memory(self): - states, actions, rewards, next_states, terminals\ - = self.__memory.sample(batch_size=self.__minibatch_size) - states = nd.array(states, ctx=self.__ctx) - actions = nd.array(actions, ctx=self.__ctx) - rewards = nd.array(rewards, ctx=self.__ctx) - next_states = nd.array(next_states, ctx=self.__ctx) - terminals = nd.array(terminals, ctx=self.__ctx) - return states, actions, rewards, next_states, terminals - - def __determine_target_q_values(self, states, actions, rewards, next_states, terminals): - if self.__use_fix_target: - q_max_val = self.__target_qnet(next_states) + def __determine_target_q_values( + self, states, actions, rewards, next_states, terminals + ): + if self._use_fix_target: + q_max_val = self._target_qnet(next_states) else: - q_max_val = self.__qnet(next_states) + q_max_val = self._qnet(next_states) - if self.__double_dqn: - q_values_next_states = self.__qnet(next_states) - target_rewards = rewards + nd.choose_element_0index(q_max_val, nd.argmax_channel(q_values_next_states))\ - * (1.0 - terminals) * self.__discount_factor + if self._double_dqn: + q_values_next_states = self._qnet(next_states) + target_rewards = rewards + nd.choose_element_0index( + q_max_val, nd.argmax_channel(q_values_next_states))\ + * (1.0 - terminals) * self._discount_factor else: - target_rewards = rewards + nd.choose_element_0index(q_max_val, nd.argmax_channel(q_max_val))\ - * (1.0 - terminals) * self.__discount_factor + target_rewards = rewards + nd.choose_element_0index( + q_max_val, nd.argmax_channel(q_max_val))\ + * (1.0 - terminals) * self._discount_factor - target_qval = self.__qnet(states) + target_qval = self._qnet(states) for t in range(target_rewards.shape[0]): target_qval[t][actions[t]] = target_rewards[t] return target_qval def __train_q_net_step(self, trainer): - states, actions, rewards, next_states, terminals = self.__sample_from_memory() - target_qval = self.__determine_target_q_values(states, actions, rewards, next_states, terminals) + states, actions, rewards, next_states, terminals =\ + self._sample_from_memory() + target_qval = self.__determine_target_q_values( + states, actions, rewards, next_states, terminals) with autograd.record(): - q_values = self.__qnet(states) - loss = self.__loss_function(q_values, target_qval) + q_values = self._qnet(states) + loss = self._loss_function(q_values, target_qval) loss.backward() - trainer.step(self.__minibatch_size) + trainer.step(self._minibatch_size) return loss - def __do_snapshot_if_in_interval(self, episode): - do_snapshot = (episode != 0 and (episode % self.__snapshot_interval == 0)) - if do_snapshot: - self.save_parameters(episode=episode) - self.__evaluate() - def __do_target_update_if_in_interval(self, total_steps): - do_target_update = (self.__use_fix_target and total_steps % self.__target_update_interval == 0) + do_target_update = ( + self._use_fix_target and + (total_steps % self._target_update_interval == 0)) if do_target_update: - self.__logger.info('Target network is updated after {} steps'.format(total_steps)) - self.__target_qnet = copy_net(self.__qnet, self.__state_dim, self.__ctx) + self._logger.info( + 'Target network is updated after {} steps'.format(total_steps)) + self._target_qnet = copy_net( + self._qnet, self._state_dim, self._ctx) def train(self, episodes=None): - self.__logger.info("--- Start training ---") - trainer = gluon.Trainer(self.__qnet.collect_params(), self.__optimizer, self.__adjust_optimizer_params(self.__optimizer_params)) - episodes = episodes if episodes != None else self.__training_episodes - - resume = (self.__current_episode > 0) + self.save_config_file() + self._logger.info("--- Start training ---") + trainer = gluon.Trainer( + self._qnet.collect_params(), + self._optimizer, + self._adjust_optimizer_params(self._optimizer_params)) + episodes = episodes if episodes is not None\ + else self._training_episodes + + resume = (self._current_episode > 0) if resume: - self.__logger.info("Training session resumed") - self.__logger.info("Starting from episode {}".format(self.__current_episode)) + self._logger.info("Training session resumed") + self._logger.info("Starting from episode {}".format( + self._current_episode)) else: - self.__training_stats = util.TrainingStats(self.__logger, episodes, self.__live_plot) + self._training_stats = DqnTrainingStats(episodes) - # Implementation Deep Q Learning described by Mnih et. al. in Playing Atari with Deep Reinforcement Learning - while self.__current_episode < episodes: - # Check interrupt flag - if self.__interrupt_flag: - self.__interrupt_flag = False - self.__interrupt_training() + # Implementation Deep Q Learning described by + # Mnih et. al. in Playing Atari with Deep Reinforcement Learning + while self._current_episode < episodes: + if self._check_interrupt_routine(): return False step = 0 episode_reward = 0 start = time.time() - state = self.__environment.reset() + state = self._environment.reset() episode_loss = 0 training_steps = 0 - while step < self.__max_episode_step: - #1. Choose an action based on current game state and policy - q_values = self.__qnet(nd.array([state], ctx=self.__ctx)) - action = self.__policy.select_action(q_values[0]) + while step < self._max_episode_step: + # 1. Choose an action based on current game state and policy + q_values = self._qnet(nd.array([state], ctx=self._ctx)) + action = self._strategy.select_action(q_values[0]) - #2. Play the game for a single step - next_state, reward, terminal, _ = self.__environment.step(action) + # 2. Play the game for a single step + next_state, reward, terminal, _ =\ + self._environment.step(action) - #3. Store transition in replay memory - self.__memory.append(state, action, reward, next_state, terminal) + # 3. Store transition in replay memory + self._memory.append( + state, action, reward, next_state, terminal) - #4. Train the network if in interval - do_training = (self.__total_steps % self.__train_interval == 0\ - and self.__memory.is_sample_possible(self.__minibatch_size)) - if do_training: + # 4. Train the network if in interval + if self._do_training(): loss = self.__train_q_net_step(trainer) - loss_sum = sum(loss).asnumpy()[0] - episode_loss += float(loss_sum)/float(self.__minibatch_size) training_steps += 1 + episode_loss +=\ + np.sum(loss.asnumpy()) / self._minibatch_size # Update target network if in interval - self.__do_target_update_if_in_interval(self.__total_steps) + self.__do_target_update_if_in_interval(self._total_steps) step += 1 - self.__total_steps += 1 + self._total_steps += 1 episode_reward += reward state = next_state if terminal: - episode_loss = episode_loss if training_steps > 0 else None - _, _, avg_reward = self.__training_stats.log_episode(self.__current_episode, start, training_steps, - episode_loss, self.__policy.cur_eps, episode_reward) + self._strategy.reset() break - self.__do_snapshot_if_in_interval(self.__current_episode) - self.__policy.decay() - - if self.__is_target_reached(avg_reward): - self.__logger.info('Target score is reached in average; Training is stopped') - break - - self.__current_episode += 1 + self._do_snapshot_if_in_interval(self._current_episode) - self.__evaluate() - training_stats_file = os.path.join(self.__output_directory, 'training_stats.pdf') - self.__training_stats.save_stats(training_stats_file) - self.__logger.info('--------- Training finished ---------') - return True - - def __save_net(self, net, filename, filedir=None): - filedir = self.__output_directory if filedir is None else filedir - filename = os.path.join(filedir, filename + '.params') - net.save_parameters(filename) + episode_loss = (episode_loss / training_steps)\ + if training_steps > 0 else 0 + avg_reward = self._training_stats.log_episode( + self._current_episode, start, training_steps, + episode_loss, self._strategy.cur_eps, episode_reward) + self._strategy.decay(self._current_episode) - def save_parameters(self, episode=None, filename='dqn-agent-params'): - assert self.__output_directory - self.__make_output_directory_if_not_exist() - - if(episode != None): - self.__logger.info('Saving model parameters after episode %d' % episode) - filename = filename + '-ep{}'.format(episode) - else: - self.__logger.info('Saving model parameters') - self.__save_net(self.__qnet, filename) - - def evaluate(self, target=None, sample_games=100, verbose=True): - target = self.__target_score if target is None else target - if target: - target_achieved = 0 - total_reward = 0 - - for g in range(sample_games): - state = self.__environment.reset() - step = 0 - game_reward = 0 - while step < self.__max_episode_step: - action = self.get_next_action(state) - state, reward, terminal, _ = self.__environment.step(action) - game_reward += reward - - if terminal: - if verbose: - info = 'Game %d: Reward %f' % (g,game_reward) - self.__logger.debug(info) - if target: - if game_reward >= target: - target_achieved += 1 - total_reward += game_reward - break - - step += 1 - - avg_reward = float(total_reward)/float(sample_games) - info = 'Avg. Reward: %f' % avg_reward - if target: - target_achieved_ratio = int((float(target_achieved)/float(sample_games))*100) - info += '; Target Achieved in %d%% of games' % (target_achieved_ratio) - - if verbose: - self.__logger.info(info) - return avg_reward - - def __evaluate(self, verbose=True): - sample_games = 100 - avg_reward = self.evaluate(sample_games=sample_games, verbose=False) - info = 'Evaluation -> Average Reward in {} games: {}'.format(sample_games, avg_reward) - - if self.__best_avg_score is None or self.__best_avg_score <= avg_reward: - self.__best_net = copy_net(self.__qnet, self.__state_dim, self.__ctx) - self.__best_avg_score = avg_reward - info += ' (NEW BEST)' - - if verbose: - self.__logger.info(info) - - - - def play(self, update_frame=1, with_best=False): - step = 0 - state = self.__environment.reset() - total_reward = 0 - while step < self.__max_episode_step: - action = self.get_next_action(state, with_best=with_best) - state, reward, terminal, _ = self.__environment.step(action) - total_reward += reward - do_update_frame = (step % update_frame == 0) - if do_update_frame: - self.__environment.render() - time.sleep(.100) - - if terminal: + if self._is_target_reached(avg_reward): + self._logger.info( + 'Target score is reached in average; Training is stopped') break - step += 1 - return total_reward + self._current_episode += 1 - def save_best_network(self, path, epoch=0): - self.__logger.info('Saving best network with average reward of {}'.format(self.__best_avg_score)) - self.__best_net.export(path, epoch=epoch) + self._evaluate() + self.save_parameters(episode=self._current_episode) + self.save_best_network(os.path.join(self._output_directory, 'best')) + self._training_stats.save_stats(self._output_directory) + self._logger.info('--------- Training finished ---------') + return True - def __make_config_dict(self): - config = dict() - config['discount_factor'] = self.__discount_factor - config['optimizer'] = self.__optimizer - config['optimizer_params'] = self.__optimizer_params - config['policy_params'] = self.__policy_params - config['replay_memory_params'] = self.__replay_memory_params - config['loss_function'] = self.__loss_function_str - config['optimizer'] = self.__optimizer - config['training_episodes'] = self.__training_episodes - config['train_interval'] = self.__train_interval - config['use_fix_target'] = self.__use_fix_target - config['double_dqn'] = self.__double_dqn - config['target_update_interval'] = self.__target_update_interval - config['snapshot_interval']= self.__snapshot_interval - config['agent_name'] = self.__agent_name - config['max_episode_step'] = self.__max_episode_step - config['output_directory'] = self.__user_given_directory - config['verbose'] = self.__verbose - config['live_plot'] = self.__live_plot - config['make_logfile'] = self.__make_logfile - config['target_score'] = self.__target_score + def _make_config_dict(self): + config = super(DqnAgent, self)._make_config_dict() + config['optimizer'] = self._optimizer + config['optimizer_params'] = self._optimizer_params + config['loss_function'] = self._loss_function_str + config['use_fix_target'] = self._use_fix_target + config['double_dqn'] = self._double_dqn + config['target_update_interval'] = self._target_update_interval return config - def save_config_file(self): - import json - self.__make_output_directory_if_not_exist() - filename = os.path.join(self.__output_directory, 'config.json') - config = self.__make_config_dict() - with open(filename, mode='w') as fp: - json.dump(config, fp, indent=4) \ No newline at end of file + def save_parameters(self, episode): + self._save_parameters(self._qnet, episode=episode) + + def _save_current_as_best_net(self): + self._best_net = copy_net( + self._qnet, (1,) + self._state_dim, ctx=self._ctx) diff --git a/src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/cnnarch_logger.py b/src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/cnnarch_logger.py new file mode 100644 index 0000000..a17f5b1 --- /dev/null +++ b/src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/cnnarch_logger.py @@ -0,0 +1,93 @@ +import logging +import sys +import os +import util + + +class ArchLogger(object): + _logger = None + + __output_level = logging.INFO + __logger_name = 'agent' + __output_directory = '.' + __append = True + __logformat = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + __dateformat = '%d-%b-%y %H:%M:%S' + + INFO = logging.INFO + DEBUG = logging.DEBUG + + @staticmethod + def set_output_level(output_level): + assert output_level is not None + ArchLogger.__output_level = output_level + + @staticmethod + def set_logger_name(logger_name): + assert logger_name is not None + ArchLogger.__logger_name = logger_name + + @staticmethod + def set_output_directory(output_directory): + assert output_directory is not None + ArchLogger.__output_directory = output_directory + + @staticmethod + def set_append(append): + assert append is not None + ArchLogger.__append = append + + @staticmethod + def set_log_format(logformat, dateformat): + assert logformat is not None + assert dateformat is not None + ArchLogger.__logformat = logformat + ArchLogger.__dateformat = dateformat + + @staticmethod + def init_logger(make_log_file=True): + assert ArchLogger._logger is None, 'Logger init already called' + filemode = 'a' if ArchLogger.__append else 'w' + formatter = logging.Formatter( + fmt=ArchLogger.__logformat, datefmt=ArchLogger.__dateformat) + + logger = logging.getLogger(ArchLogger.__logger_name) + logger.setLevel(ArchLogger.__output_level) + + stream_handler = logging.StreamHandler(sys.stdout) + stream_handler.setLevel(ArchLogger.__output_level) + stream_handler.setFormatter(formatter) + logger.addHandler(stream_handler) + + if make_log_file: + util.make_directory_if_not_exist(ArchLogger.__output_directory) + log_file = os.path.join( + ArchLogger.__output_directory, + ArchLogger.__logger_name + '.log') + file_handler = logging.FileHandler(log_file, mode=filemode) + file_handler.setLevel(ArchLogger.__output_level) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + ArchLogger._logger = logger + + @staticmethod + def get_logger(): + if ArchLogger._logger is None: + ArchLogger.init_logger() + assert ArchLogger._logger is not None + return ArchLogger._logger + +if __name__ == "__main__": + print('=== Test logger ===') + ArchLogger.set_logger_name('TestLogger') + ArchLogger.set_output_directory('test_log') + ArchLogger.init_logger() + logger = ArchLogger.get_logger() + logger.warning('This is a warning') + logger.debug('This is a debug information, which you should not see') + logger.info('This is a normal information') + assert os.path.exists('test_log')\ + and os.path.isfile(os.path.join('test_log', 'TestLogger.log')),\ + 'Test failed: No logfile exists' + import shutil + shutil.rmtree('test_log') \ No newline at end of file diff --git a/src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/environment.py b/src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/environment.py index 6fbbda7..f2989e2 100644 --- a/src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/environment.py +++ b/src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/environment.py @@ -40,7 +40,7 @@ import rospy import thread import numpy as np import time -from std_msgs.msg import Float32MultiArray, Bool, Int32 +from std_msgs.msg import Float32MultiArray, Bool, Int32, MultiArrayDimension class RosEnvironment(Environment): def __init__(self, diff --git a/src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/replay_memory.py b/src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/replay_memory.py index e66cd93..d22147a 100644 --- a/src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/replay_memory.py +++ b/src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/replay_memory.py @@ -1,58 +1,90 @@ import numpy as np + class ReplayMemoryBuilder(object): def __init__(self): self.__supported_methods = ['online', 'buffer', 'combined'] - def build_by_params(self, + def build_by_params( + self, state_dim, method='online', state_dtype='float32', + action_dim=(1,), action_dtype='uint8', rewards_dtype='float32', memory_size=1000, - sample_size=32): + sample_size=32 + ): assert state_dim is not None + assert action_dim is not None assert method in self.__supported_methods if method == 'online': - return self.build_online_memory(state_dim=state_dim, state_dtype=state_dtype, - action_dtype=action_dtype, rewards_dtype=rewards_dtype) + return self.build_online_memory( + state_dim=state_dim, state_dtype=state_dtype, + action_dtype=action_dtype, action_dim=action_dim, + rewards_dtype=rewards_dtype) else: assert memory_size is not None and memory_size > 0 assert sample_size is not None and sample_size > 0 if method == 'buffer': - return self.build_buffered_memory(state_dim=state_dim, sample_size=sample_size, - memory_size=memory_size, state_dtype=state_dtype, action_dtype=action_dtype, + return self.build_buffered_memory( + state_dim=state_dim, sample_size=sample_size, + memory_size=memory_size, state_dtype=state_dtype, + action_dim=action_dim, action_dtype=action_dtype, rewards_dtype=rewards_dtype) else: - return self.build_combined_memory(state_dim=state_dim, sample_size=sample_size, - memory_size=memory_size, state_dtype=state_dtype, action_dtype=action_dtype, + return self.build_combined_memory( + state_dim=state_dim, sample_size=sample_size, + memory_size=memory_size, state_dtype=state_dtype, + action_dim=action_dim, action_dtype=action_dtype, rewards_dtype=rewards_dtype) - def build_buffered_memory(self, state_dim, memory_size=1000, sample_size=1, state_dtype='float32', - action_dtype='uint8', rewards_dtype='float32'): + def build_buffered_memory( + self, state_dim, memory_size, sample_size, state_dtype, action_dim, + action_dtype, rewards_dtype + ): assert memory_size > 0 assert sample_size > 0 - return ReplayMemory(state_dim, size=memory_size, sample_size=sample_size, - state_dtype=state_dtype, action_dtype=action_dtype, rewards_dtype=rewards_dtype) - - def build_combined_memory(self, state_dim, memory_size=1000, sample_size=1, state_dtype='float32', - action_dtype='uint8', rewards_dtype='float32'): + return ReplayMemory( + state_dim, size=memory_size, sample_size=sample_size, + state_dtype=state_dtype, action_dim=action_dim, + action_dtype=action_dtype, rewards_dtype=rewards_dtype) + + def build_combined_memory( + self, state_dim, memory_size, sample_size, state_dtype, action_dim, + action_dtype, rewards_dtype + ): assert memory_size > 0 assert sample_size > 0 - return CombinedReplayMemory(state_dim, size=memory_size, sample_size=sample_size, - state_dtype=state_dtype, action_dtype=action_dtype, rewards_dtype=rewards_dtype) + return CombinedReplayMemory( + state_dim, size=memory_size, sample_size=sample_size, + state_dtype=state_dtype, action_dim=action_dim, + action_dtype=action_dtype, rewards_dtype=rewards_dtype) + + def build_online_memory( + self, state_dim, state_dtype, action_dtype, action_dim, rewards_dtype + ): + return OnlineReplayMemory( + state_dim, state_dtype=state_dtype, action_dim=action_dim, + action_dtype=action_dtype, rewards_dtype=rewards_dtype) - def build_online_memory(self, state_dim, state_dtype='float32', action_dtype='uint8', - rewards_dtype='float32'): - return OnlineReplayMemory(state_dim, state_dtype=state_dtype, action_dtype=action_dtype, - rewards_dtype=rewards_dtype) class ReplayMemory(object): - def __init__(self, state_dim, sample_size, size=1000, state_dtype='uint8', action_dtype='uint8', rewards_dtype='float32'): + def __init__( + self, + state_dim, + sample_size, + size=1000, + action_dim=(1,), + state_dtype='float32', + action_dtype='uint8', + rewards_dtype='float32' + ): assert size > 0, "Size must be greater than zero" assert type(state_dim) is tuple, "State dimension must be a tuple" + assert type(action_dim) is tuple, "Action dimension must be a tuple" assert sample_size > 0 self._size = size self._sample_size = sample_size @@ -60,12 +92,15 @@ class ReplayMemory(object): self._pointer = 0 self._state_dim = state_dim self._state_dtype = state_dtype + self._action_dim = action_dim self._action_dtype = action_dtype self._rewards_dtype = rewards_dtype self._states = np.zeros((self._size,) + state_dim, dtype=state_dtype) - self._actions = np.array([0] * self._size, dtype=action_dtype) + self._actions = np.zeros( + (self._size,) + action_dim, dtype=action_dtype) self._rewards = np.array([0] * self._size, dtype=rewards_dtype) - self._next_states = np.zeros((self._size,) + state_dim, dtype=state_dtype) + self._next_states = np.zeros( + (self._size,) + state_dim, dtype=state_dtype) self._terminals = np.array([0] * self._size, dtype='bool') @property @@ -93,17 +128,23 @@ class ReplayMemory(object): self._terminals[index] def is_sample_possible(self, batch_size=None): - batch_size = batch_size if batch_size is not None else self._sample_size + batch_size = batch_size if batch_size is not None\ + else self._sample_size return self._cur_size >= batch_size def sample(self, batch_size=None): - batch_size = batch_size if batch_size is not None else self._sample_size - assert self._cur_size >= batch_size, "Size of replay memory must be larger than batch size" - i=0 - states = np.zeros((batch_size,)+self._state_dim, dtype=self._state_dtype) - actions = np.zeros(batch_size, dtype=self._action_dtype) + batch_size = batch_size if batch_size is not None\ + else self._sample_size + assert self._cur_size >= batch_size,\ + "Size of replay memory must be larger than batch size" + i = 0 + states = np.zeros(( + batch_size,)+self._state_dim, dtype=self._state_dtype) + actions = np.zeros( + (batch_size,)+self._action_dim, dtype=self._action_dtype) rewards = np.zeros(batch_size, dtype=self._rewards_dtype) - next_states = np.zeros((batch_size,)+self._state_dim, dtype=self._state_dtype) + next_states = np.zeros( + (batch_size,)+self._state_dim, dtype=self._state_dtype) terminals = np.zeros(batch_size, dtype='bool') while i < batch_size: @@ -119,25 +160,35 @@ class ReplayMemory(object): class OnlineReplayMemory(ReplayMemory): - def __init__(self, state_dim, state_dtype='float32', action_dtype='uint8', rewards_dtype='float32'): - super(OnlineReplayMemory, self).__init__(state_dim, sample_size=1, size=1, - state_dtype=state_dtype, action_dtype=action_dtype, rewards_dtype=rewards_dtype) + def __init__( + self, state_dim, state_dtype='float32', action_dim=(1,), + action_dtype='uint8', rewards_dtype='float32' + ): + super(OnlineReplayMemory, self).__init__( + state_dim, sample_size=1, size=1, state_dtype=state_dtype, + action_dim=action_dim, action_dtype=action_dtype, + rewards_dtype=rewards_dtype) class CombinedReplayMemory(ReplayMemory): - def __init__(self, state_dim, sample_size, size=1000, - state_dtype='uint8', action_dtype='uint8', rewards_dtype='float32'): - super(CombinedReplayMemory, self).__init__(state_dim, sample_size=(sample_size - 1), size=size, - state_dtype=state_dtype, action_dtype=action_dtype, rewards_dtype=rewards_dtype) + def __init__( + self, state_dim, sample_size, size=1000, state_dtype='float32', + action_dim=(1,), action_dtype='uint8', rewards_dtype='float32' + ): + super(CombinedReplayMemory, self).__init__( + state_dim=state_dim, sample_size=(sample_size - 1), size=size, + state_dtype=state_dtype, action_dim=action_dim, + action_dtype=action_dtype, rewards_dtype=rewards_dtype) self._last_state = np.zeros((1,) + state_dim, dtype=state_dtype) - self._last_action = np.array([0], dtype=action_dtype) + self._last_action = np.array((1,) + action_dim, dtype=action_dtype) self._last_reward = np.array([0], dtype=rewards_dtype) self._last_next_state = np.zeros((1,) + state_dim, dtype=state_dtype) self._last_terminal = np.array([0], dtype='bool') def append(self, state, action, reward, next_state, terminal): - super(CombinedReplayMemory, self).append(state, action, reward, next_state, terminal) + super(CombinedReplayMemory, self).append( + state, action, reward, next_state, terminal) self._last_state = state self._last_action = action self._last_reward = reward @@ -145,8 +196,10 @@ class CombinedReplayMemory(ReplayMemory): self._last_terminal = terminal def sample(self, batch_size=None): - batch_size = (batch_size-1) if batch_size is not None else self._sample_size - states, actions, rewards, next_states, terminals = super(CombinedReplayMemory, self).sample(batch_size=batch_size) + batch_size = (batch_size-1) if batch_size is not None\ + else self._sample_size + states, actions, rewards, next_states, terminals = super( + CombinedReplayMemory, self).sample(batch_size=batch_size) states = np.append(states, [self._last_state], axis=0) actions = np.append(actions, [self._last_action], axis=0) rewards = np.append(rewards, [self._last_reward], axis=0) diff --git a/src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/strategy.py b/src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/strategy.py new file mode 100644 index 0000000..1f8deb0 --- /dev/null +++ b/src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/strategy.py @@ -0,0 +1,172 @@ +import numpy as np + + +class StrategyBuilder(object): + def __init__(self): + pass + + def build_by_params( + self, + method='epsgreedy', + epsilon=0.5, + min_epsilon=0.05, + epsilon_decay_method='no', + epsilon_decay=0.0, + epsilon_decay_start=0, + action_dim=None, + action_low=None, + action_high=None, + mu=0.0, + theta=0.5, + sigma=0.3 + ): + + if epsilon_decay_method == 'linear': + decay = LinearDecay( + eps_decay=epsilon_decay, min_eps=min_epsilon, + decay_start=epsilon_decay_start) + else: + decay = NoDecay() + + if method == 'epsgreedy': + assert action_dim is not None + assert len(action_dim) == 1 + return EpsilonGreedyStrategy( + eps=epsilon, number_of_actions=action_dim[0], + decay_method=decay) + elif method == 'uhlenbeck_ornstein': + assert action_dim is not None + assert action_low is not None + assert action_high is not None + assert mu is not None + assert theta is not None + assert sigma is not None + return UhlenbeckOrnsteinStrategy( + action_dim, action_low, action_high, epsilon, mu, theta, + sigma, decay) + else: + assert action_dim is not None + assert len(action_dim) == 1 + return GreedyStrategy() + + +class BaseDecay(object): + def __init__(self): + pass + + def decay(self, *args): + raise NotImplementedError + + def __call__(self, *args): + return self.decay(*args) + + +class NoDecay(BaseDecay): + def __init__(self): + super(NoDecay, self).__init__() + + def decay(self, cur_eps, episode): + return cur_eps + + +class LinearDecay(BaseDecay): + def __init__(self, eps_decay, min_eps=0, decay_start=0): + super(LinearDecay, self).__init__() + self.eps_decay = eps_decay + self.min_eps = min_eps + self.decay_start = decay_start + + def decay(self, cur_eps, episode): + if episode < self.decay_start: + return cur_eps + else: + return max(cur_eps - self.eps_decay, self.min_eps) + + +class BaseStrategy(object): + def __init__(self, decay_method): + self._decay_method = decay_method + + def select_action(self, values, decay_method): + raise NotImplementedError + + def decay(self, episode): + self.cur_eps = self._decay_method.decay(self.cur_eps, episode) + + def reset(self): + pass + + +class EpsilonGreedyStrategy(BaseStrategy): + def __init__(self, eps, number_of_actions, decay_method): + super(EpsilonGreedyStrategy, self).__init__(decay_method) + self.eps = eps + self.cur_eps = eps + self.__number_of_actions = number_of_actions + + def select_action(self, values): + do_exploration = (np.random.rand() < self.cur_eps) + if do_exploration: + action = np.random.randint(low=0, high=self.__number_of_actions) + else: + action = values.asnumpy().argmax() + return action + + +class GreedyStrategy(BaseStrategy): + def __init__(self): + super(GreedyStrategy, self).__init__(None) + + def select_action(self, values): + return values.asnumpy().argmax() + + def decay(self): + pass + + +class UhlenbeckOrnsteinStrategy(BaseStrategy): + """ + Ornstein-Uhlenbeck process: dxt = theta * (mu - xt) * dt + sigma * dWt + where Wt denotes the Wiener process. + """ + def __init__( + self, + action_dim, + action_low, + action_high, + eps, + mu=0.0, + theta=.15, + sigma=.3, + decay=NoDecay() + ): + super(UhlenbeckOrnsteinStrategy, self).__init__() + self.eps = eps + self.cur_eps = eps + + self._decay_method = decay + + self._action_dim = action_dim + self._action_low = action_low + self._action_high = action_high + + self._mu = mu + self._theta = theta + self._sigma = sigma + + self.state = np.ones(self._action_dim) * self._mu + + def _evolve_state(self): + x = self.state + dx = self._theta * (self._mu - x)\ + + self._sigma * np.random.randn(len(x)) + self.state = x + dx + return self.state + + def reset(self): + self.state = np.ones(self._action_dim) * self._mu + + def select_action(self, values): + noise = self._evolve_state() + action = values + (self.cur_eps * noise) + return np.clip(action, self._action_low, self._action_high) diff --git a/src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/util.py b/src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/util.py index 5857893..a1da1ce 100644 --- a/src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/util.py +++ b/src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/util.py @@ -5,9 +5,9 @@ import matplotlib.pyplot as plt from matplotlib import style import time import os -import mxnet +import mxnet as mx from mxnet import gluon, nd - +import cnnarch_logger LOSS_FUNCTIONS = { 'l1': gluon.loss.L1Loss(), @@ -16,17 +16,46 @@ LOSS_FUNCTIONS = { 'softmax_cross_entropy': gluon.loss.SoftmaxCrossEntropyLoss(), 'sigmoid_cross_entropy': gluon.loss.SigmoidBinaryCrossEntropyLoss()} -def copy_net(net, input_state_dim, ctx, tmp_filename='tmp.params'): + +def make_directory_if_not_exist(directory): + if not os.path.exists(directory): + os.makedirs(directory) + + +def copy_net(net, input_state_dim, ctx): + assert isinstance(net, gluon.HybridBlock) + assert type(net.__class__) is type + + net2 = net.__class__() + net2.collect_params().initialize(mx.init.Zero(), ctx=ctx) + net2.hybridize() + net2(mx.nd.ones((1,) + input_state_dim, ctx=ctx)) + + params_of_net = [p.data() for _, p in net.collect_params().items()] + for i, (_, p) in enumerate(net2.collect_params().items()): + p.set_data(params_of_net[i]) + + return net2 + + +def copy_net_with_two_inputs(net, input_state_dim1, input_state_dim2, ctx): assert isinstance(net, gluon.HybridBlock) assert type(net.__class__) is type - net.save_parameters(tmp_filename) + net2 = net.__class__() - net2.load_parameters(tmp_filename, ctx=ctx) - os.remove(tmp_filename) + net2.collect_params().initialize(mx.init.Zero(), ctx=ctx) net2.hybridize() - net2(nd.ones((1,) + input_state_dim, ctx=ctx)) + net2( + nd.ones((1,) + input_state_dim1, ctx=ctx), + nd.ones((1,) + input_state_dim2, ctx=ctx)) + + params_of_net = [p.data() for _, p in net.collect_params().items()] + for i, (_, p) in enumerate(net2.collect_params().items()): + p.set_data(params_of_net[i]) + return net2 + def get_loss_function(loss_function_name): if loss_function_name not in LOSS_FUNCTIONS: raise ValueError('Loss function does not exist') @@ -52,89 +81,196 @@ class AgentSignalHandler(object): sys.exit(1) style.use('fivethirtyeight') + + class TrainingStats(object): - def __init__(self, logger, max_episodes, live_plot=True): - self.__logger = logger - self.__max_episodes = max_episodes - self.__all_avg_loss = np.zeros((max_episodes,)) - self.__all_total_rewards = np.zeros((max_episodes,)) - self.__all_eps = np.zeros((max_episodes,)) - self.__all_time = np.zeros((max_episodes,)) - self.__all_mean_reward_last_100_episodes = np.zeros((max_episodes,)) - self.__live_plot = live_plot + def __init__(self, max_episodes): + self._logger = cnnarch_logger.ArchLogger.get_logger() + self._max_episodes = max_episodes + self._all_total_rewards = np.zeros((max_episodes,)) + self._all_eps = np.zeros((max_episodes,)) + self._all_time = np.zeros((max_episodes,)) + self._all_mean_reward_last_100_episodes = np.zeros((max_episodes,)) @property def logger(self): - return self.__logger + return self._logger @logger.setter def logger(self, logger): - self.__logger = logger + self._logger = logger @logger.deleter def logger(self): - self.__logger = None - - def add_avg_loss(self, episode, avg_loss): - self.__all_avg_loss[episode] = avg_loss + self._logger = None def add_total_reward(self, episode, total_reward): - self.__all_total_rewards[episode] = total_reward + self._all_total_rewards[episode] = total_reward def add_eps(self, episode, eps): - self.__all_eps[episode] = eps + self._all_eps[episode] = eps def add_time(self, episode, time): - self.__all_time[episode] = time + self._all_time[episode] = time def add_mean_reward_last_100(self, episode, mean_reward): - self.__all_mean_reward_last_100_episodes[episode] = mean_reward + self._all_mean_reward_last_100_episodes[episode] = mean_reward + + def log_episode(self, *args): + raise NotImplementedError + + def mean_of_reward(self, cur_episode, last=100): + if cur_episode > 0: + reward_last_100 =\ + self._all_total_rewards[max(0, cur_episode-last):cur_episode] + return np.mean(reward_last_100) + else: + return self._all_total_rewards[0] - def log_episode(self, episode, start_time, training_steps, loss, eps, reward): + def save(self, path): + np.save(os.path.join(path, 'total_rewards'), self._all_total_rewards) + np.save(os.path.join(path, 'eps'), self._all_eps) + np.save(os.path.join(path, 'time'), self._all_time) + np.save( + os.path.join(path, 'mean_reward'), + self._all_mean_reward_last_100_episodes) + + def _log_episode(self, episode, start_time, training_steps, eps, reward): self.add_eps(episode, eps) self.add_total_reward(episode, reward) end = time.time() - if training_steps == 0: - avg_loss = 0 - else: - avg_loss = float(loss)/float(training_steps) - mean_reward_last_100 = self.mean_of_reward(episode, last=100) - time_elapsed = end - start_time - info = "Episode: %d, Total Reward: %.3f, Avg. Reward Last 100 Episodes: %.3f, Avg Loss: %.3f, Time: %.3f, Training Steps: %d, Eps: %.3f"\ - % (episode, reward, mean_reward_last_100, avg_loss, time_elapsed, training_steps, eps) - self.__logger.info(info) - self.add_avg_loss(episode, avg_loss) self.add_time(episode, time_elapsed) self.add_mean_reward_last_100(episode, mean_reward_last_100) + return ('Episode: %d, Total Reward: %.3f, ' + 'Avg. Reward Last 100 Episodes: %.3f, {}, ' + 'Time: %.3f, Training Steps: %d, Eps: %.3f') % ( + episode, reward, mean_reward_last_100, time_elapsed, + training_steps, eps), mean_reward_last_100 - return avg_loss, time_elapsed, mean_reward_last_100 - def mean_of_reward(self, cur_episode, last=100): - if cur_episode > 0: - reward_last_100 = self.__all_total_rewards[max(0, cur_episode-last):cur_episode] - return np.mean(reward_last_100) - else: - return self.__all_total_rewards[0] +class DqnTrainingStats(TrainingStats): + def __init__(self, max_episodes): + super(DqnTrainingStats, self).__init__(max_episodes) + self._all_avg_loss = np.zeros((max_episodes,)) + + def add_avg_loss(self, episode, avg_loss): + self._all_avg_loss[episode] = avg_loss + + def log_episode( + self, episode, start_time, training_steps, avg_loss, eps, reward + ): + self.add_avg_loss(episode, avg_loss) + + info, avg_reward = self._log_episode( + episode, start_time, training_steps, eps, reward) + info = info.format(('Avg. Loss: %.3f') % (avg_loss)) - def save_stats(self, file): - fig = plt.figure(figsize=(20,20)) + self._logger.info(info) + return avg_reward + + def save_stats(self, path): + fig = plt.figure(figsize=(20, 20)) sub_rewards = fig.add_subplot(221) sub_rewards.set_title('Total Rewards per episode') - sub_rewards.plot(np.arange(self.__max_episodes), self.__all_total_rewards) + sub_rewards.plot( + np.arange(self._max_episodes), self._all_total_rewards) sub_loss = fig.add_subplot(222) sub_loss.set_title('Avg. Loss per episode') - sub_loss.plot(np.arange(self.__max_episodes), self.__all_avg_loss) + sub_loss.plot(np.arange(self._max_episodes), self._all_avg_loss) sub_eps = fig.add_subplot(223) sub_eps.set_title('Epsilon per episode') - sub_eps.plot(np.arange(self.__max_episodes), self.__all_eps) + sub_eps.plot(np.arange(self._max_episodes), self._all_eps) sub_rewards = fig.add_subplot(224) sub_rewards.set_title('Avg. mean reward of last 100 episodes') - sub_rewards.plot(np.arange(self.__max_episodes), self.__all_mean_reward_last_100_episodes) + sub_rewards.plot(np.arange(self._max_episodes), + self._all_mean_reward_last_100_episodes) + + self.save(path) + plt.savefig(os.path.join(path, 'stats.pdf')) + + def save(self, path): + super(DqnTrainingStats, self).save(path) + np.save(os.path.join(path, 'avg_loss'), self._all_avg_loss) + + +class DdpgTrainingStats(TrainingStats): + def __init__(self, max_episodes): + super(DdpgTrainingStats, self).__init__(max_episodes) + self._all_avg_critic_loss = np.zeros((max_episodes,)) + self._all_avg_actor_loss = np.zeros((max_episodes,)) + self._all_avg_qvalues = np.zeros((max_episodes,)) + + def add_avg_critic_loss(self, episode, avg_critic_loss): + self._all_avg_critic_loss[episode] = avg_critic_loss + + def add_avg_actor_loss(self, episode, avg_actor_loss): + self._all_avg_actor_loss[episode] = avg_actor_loss + + def add_avg_qvalues(self, episode, avg_qvalues): + self._all_avg_qvalues[episode] = avg_qvalues + + def log_episode( + self, episode, start_time, training_steps, actor_loss, + critic_loss, qvalues, eps, reward + ): + self.add_avg_actor_loss(episode, actor_loss) + self.add_avg_critic_loss(episode, critic_loss) + self.add_avg_qvalues(episode, qvalues) + + info, avg_reward = self._log_episode( + episode, start_time, training_steps, eps, reward) + info = info.format(( + 'Avg. Actor Loss: %.3f ' + 'Avg. Critic Loss: %.3f ' + 'Avg. Q-Values: %.3f') % (actor_loss, critic_loss, qvalues)) + + self.logger.info(info) + return avg_reward + + def save(self, path): + super(DdpgTrainingStats, self).save(path) + np.save(os.path.join( + path, 'avg_critic_loss'), self._all_avg_critic_loss) + np.save(os.path.join(path, 'avg_actor_loss'), self._all_avg_actor_loss) + np.save(os.path.join(path, 'avg_qvalues'), self._all_avg_qvalues) + + def save_stats(self, path): + fig = plt.figure(figsize=(120, 120)) + + sub_rewards = fig.add_subplot(321) + sub_rewards.set_title('Total Rewards per episode') + sub_rewards.plot( + np.arange(self._max_episodes), self._all_total_rewards) + + sub_actor_loss = fig.add_subplot(322) + sub_actor_loss.set_title('Avg. Actor Loss per episode') + sub_actor_loss.plot( + np.arange(self._max_episodes), self._all_avg_actor_loss) + + sub_critic_loss = fig.add_subplot(323) + sub_critic_loss.set_title('Avg. Critic Loss per episode') + sub_critic_loss.plot( + np.arange(self._max_episodes), self._all_avg_critic_loss) + + sub_qvalues = fig.add_subplot(324) + sub_qvalues.set_title('Avg. QValues per episode') + sub_qvalues.plot( + np.arange(self._max_episodes), self._all_avg_qvalues) + + sub_eps = fig.add_subplot(325) + sub_eps.set_title('Epsilon per episode') + sub_eps.plot(np.arange(self._max_episodes), self._all_eps) + + sub_rewards = fig.add_subplot(326) + sub_rewards.set_title('Avg. mean reward of last 100 episodes') + sub_rewards.plot(np.arange(self._max_episodes), + self._all_mean_reward_last_100_episodes) - plt.savefig(file) \ No newline at end of file + self.save(path) + plt.savefig(os.path.join(path, 'stats.pdf')) diff --git a/src/test/resources/target_code/gluon/reinforcementModel/torcs/reward/pylib/__init__.py b/src/test/resources/target_code/gluon/reinforcementModel/torcs/reward/pylib/__init__.py deleted file mode 100644 index e69de29..0000000 -- GitLab