Commit 2f61d2cc authored by Nicola Gatto's avatar Nicola Gatto

Adapt tests

parent 0c8d4322
Pipeline #148401 failed with stages
in 58 seconds
......@@ -268,13 +268,15 @@ public class GenerationTest extends AbstractSymtabTest {
"reinforcement_learning/environment.py",
"reinforcement_learning/replay_memory.py",
"reinforcement_learning/util.py",
"reinforcement_learning/cnnarch_logger.py",
"reinforcement_learning/torcs_agent_dqn_reward_executor.py"
"reinforcement_learning/cnnarch_logger.py"
)
);
assertTrue(Paths.get(
"./target/generated-sources-emadl/reinforcement_learning/_torcs_agent_dqn_reward_executor.so")
.toFile().exists());
assertTrue(Paths.get(
"./target/generated-sources-emadl/reinforcement_learning/torcs_agent_dqn_reward_executor.py")
.toFile().exists());
}
@Test
......
......@@ -26,6 +26,7 @@ import de.monticore.lang.monticar.emadl.generator.EMADLGeneratorCli;
import de.se_rwth.commons.logging.Log;
import freemarker.template.TemplateException;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import java.io.IOException;
......@@ -39,6 +40,7 @@ import java.util.List;
import static junit.framework.TestCase.assertTrue;
import static org.junit.Assert.assertFalse;
@Ignore
public class IntegrationCaffe2Test extends IntegrationTest {
public IntegrationCaffe2Test() {
super("CAFFE2", "39253EC049D4A4E5FA0536AD34874B9D#1DBAEE1B1BD83FB7CB5F70AE91B29638#13D139510DC5681639AA91D7250288D3#1A42D4842D0664937A9F6B727BD60CEF");
......
......@@ -7,21 +7,21 @@ import CNNCreator_cartpole_master_dqn
import os
import sys
import re
import logging
import time
import numpy as np
import mxnet as mx
def resume_session():
session_param_output = os.path.join(session_output_dir, agent_name)
def resume_session(sessions_dir):
resume_session = False
resume_directory = None
if os.path.isdir(session_output_dir) and os.path.isdir(session_param_output):
if os.path.isdir(sessions_dir):
regex = re.compile(r'\d\d\d\d-\d\d-\d\d-\d\d-\d\d')
dir_content = os.listdir(session_param_output)
dir_content = os.listdir(sessions_dir)
session_files = filter(regex.search, dir_content)
session_files.sort(reverse=True)
for d in session_files:
interrupted_session_dir = os.path.join(session_param_output, d, '.interrupted_session')
interrupted_session_dir = os.path.join(sessions_dir, d, '.interrupted_session')
if os.path.isdir(interrupted_session_dir):
resume = raw_input('Interrupted session from {} found. Do you want to resume? (y/n) '.format(d))
if resume == 'y':
......@@ -32,12 +32,13 @@ def resume_session():
if __name__ == "__main__":
agent_name='cartpole_master_dqn'
agent_name = 'cartpole_master_dqn'
# Prepare output directory and logger
output_directory = 'model_output'\
+ '/' + agent_name\
+ '/' + time.strftime(
'%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
all_output_dir = os.path.join('model', agent_name)
output_directory = os.path.join(
all_output_dir,
time.strftime('%Y-%m-%d-%H-%M-%S',
time.localtime(time.time())))
ArchLogger.set_output_directory(output_directory)
ArchLogger.set_logger_name(agent_name)
ArchLogger.set_output_level(ArchLogger.INFO)
......@@ -51,12 +52,12 @@ if __name__ == "__main__":
agent_params = {
'environment': env,
'replay_memory_params': {
'method':'buffer',
'memory_size':10000,
'sample_size':32,
'state_dtype':'float32',
'action_dtype':'float32',
'rewards_dtype':'float32'
'method': 'buffer',
'memory_size': 10000,
'sample_size': 32,
'state_dtype': 'float32',
'action_dtype': 'float32',
'rewards_dtype': 'float32'
},
'strategy_params': {
'method':'epsgreedy',
......@@ -67,6 +68,7 @@ if __name__ == "__main__":
},
'agent_name': agent_name,
'verbose': True,
'output_directory': output_directory,
'state_dim': (4,),
'action_dim': (2,),
'ctx': 'cpu',
......@@ -86,9 +88,11 @@ if __name__ == "__main__":
'double_dqn': False,
}
resume, resume_directory = resume_session()
resume, resume_directory = resume_session(all_output_dir)
if resume:
output_directory, _ = os.path.split(resume_directory)
ArchLogger.set_output_directory(output_directory)
resume_agent_params = {
'session_dir': resume_directory,
'environment': env,
......
......@@ -52,22 +52,24 @@ class ArchLogger(object):
fmt=ArchLogger.__logformat, datefmt=ArchLogger.__dateformat)
logger = logging.getLogger(ArchLogger.__logger_name)
logger.setLevel(ArchLogger.__output_level)
logger.propagate = False
stream_handler = logging.StreamHandler(sys.stdout)
stream_handler.setLevel(ArchLogger.__output_level)
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
if not logger.handlers:
logger.setLevel(ArchLogger.__output_level)
stream_handler = logging.StreamHandler(sys.stdout)
stream_handler.setLevel(ArchLogger.__output_level)
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
if make_log_file:
util.make_directory_if_not_exist(ArchLogger.__output_directory)
log_file = os.path.join(
ArchLogger.__output_directory,
ArchLogger.__logger_name + '.log')
file_handler = logging.FileHandler(log_file, mode=filemode)
file_handler.setLevel(ArchLogger.__output_level)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
if make_log_file:
util.make_directory_if_not_exist(ArchLogger.__output_directory)
log_file = os.path.join(
ArchLogger.__output_directory,
ArchLogger.__logger_name + '.log')
file_handler = logging.FileHandler(log_file, mode=filemode)
file_handler.setLevel(ArchLogger.__output_level)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
ArchLogger._logger = logger
@staticmethod
......
......@@ -140,7 +140,7 @@ class OrnsteinUhlenbeckStrategy(BaseStrategy):
sigma=.3,
decay=NoDecay()
):
super(OrnsteinUhlenbeckStrategy, self).__init__()
super(OrnsteinUhlenbeckStrategy, self).__init__(decay)
self.eps = eps
self.cur_eps = eps
......@@ -150,9 +150,9 @@ class OrnsteinUhlenbeckStrategy(BaseStrategy):
self._action_low = action_low
self._action_high = action_high
self._mu = mu
self._theta = theta
self._sigma = sigma
self._mu = np.array(mu)
self._theta = np.array(theta)
self._sigma = np.array(sigma)
self.state = np.ones(self._action_dim) * self._mu
......
......@@ -8,21 +8,21 @@ import CNNCreator_mountaincar_master_actor
import os
import sys
import re
import logging
import time
import numpy as np
import mxnet as mx
def resume_session():
session_param_output = os.path.join(session_output_dir, agent_name)
def resume_session(sessions_dir):
resume_session = False
resume_directory = None
if os.path.isdir(session_output_dir) and os.path.isdir(session_param_output):
if os.path.isdir(sessions_dir):
regex = re.compile(r'\d\d\d\d-\d\d-\d\d-\d\d-\d\d')
dir_content = os.listdir(session_param_output)
dir_content = os.listdir(sessions_dir)
session_files = filter(regex.search, dir_content)
session_files.sort(reverse=True)
for d in session_files:
interrupted_session_dir = os.path.join(session_param_output, d, '.interrupted_session')
interrupted_session_dir = os.path.join(sessions_dir, d, '.interrupted_session')
if os.path.isdir(interrupted_session_dir):
resume = raw_input('Interrupted session from {} found. Do you want to resume? (y/n) '.format(d))
if resume == 'y':
......@@ -33,12 +33,13 @@ def resume_session():
if __name__ == "__main__":
agent_name='mountaincar_master_actor'
agent_name = 'mountaincar_master_actor'
# Prepare output directory and logger
output_directory = 'model_output'\
+ '/' + agent_name\
+ '/' + time.strftime(
'%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
all_output_dir = os.path.join('model', agent_name)
output_directory = os.path.join(
all_output_dir,
time.strftime('%Y-%m-%d-%H-%M-%S',
time.localtime(time.time())))
ArchLogger.set_output_directory(output_directory)
ArchLogger.set_logger_name(agent_name)
ArchLogger.set_output_level(ArchLogger.INFO)
......@@ -48,18 +49,18 @@ if __name__ == "__main__":
context = mx.cpu()
actor_creator = CNNCreator_mountaincar_master_actor.CNNCreator_mountaincar_master_actor()
actor_creator.construct(context)
critic_creator = CNNCreator_MountaincarCritic.CNNCreator_MountaincarCritic()
critic_creator = CNNCreator_MountaincarCritic()
critic_creator.construct(context)
agent_params = {
'environment': env,
'replay_memory_params': {
'method':'buffer',
'memory_size':1000000,
'sample_size':64,
'state_dtype':'float32',
'action_dtype':'float32',
'rewards_dtype':'float32'
'method': 'buffer',
'memory_size': 1000000,
'sample_size': 64,
'state_dtype': 'float32',
'action_dtype': 'float32',
'rewards_dtype': 'float32'
},
'strategy_params': {
'method':'ornstein_uhlenbeck',
......@@ -67,14 +68,15 @@ if __name__ == "__main__":
'min_epsilon': 0.01,
'epsilon_decay_method': 'linear',
'epsilon_decay': 0.01,
'action_low': -1
'action_high': 1
'mu': np.array([0])
'theta': np.array([0.15])
'sigma': np.array([0.3])
'action_low': -1,
'action_high': 1,
'mu': [0],
'theta': [0.15],
'sigma': [0.3],
},
'agent_name': agent_name,
'verbose': True,
'output_directory': output_directory,
'state_dim': (2,),
'action_dim': (1,),
'ctx': 'cpu',
......@@ -93,9 +95,11 @@ if __name__ == "__main__":
'learning_rate': 0.001},
}
resume, resume_directory = resume_session()
resume, resume_directory = resume_session(all_output_dir)
if resume:
output_directory, _ = os.path.split(resume_directory)
ArchLogger.set_output_directory(output_directory)
resume_agent_params = {
'session_dir': resume_directory,
'environment': env,
......
......@@ -52,22 +52,24 @@ class ArchLogger(object):
fmt=ArchLogger.__logformat, datefmt=ArchLogger.__dateformat)
logger = logging.getLogger(ArchLogger.__logger_name)
logger.setLevel(ArchLogger.__output_level)
logger.propagate = False
stream_handler = logging.StreamHandler(sys.stdout)
stream_handler.setLevel(ArchLogger.__output_level)
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
if not logger.handlers:
logger.setLevel(ArchLogger.__output_level)
stream_handler = logging.StreamHandler(sys.stdout)
stream_handler.setLevel(ArchLogger.__output_level)
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
if make_log_file:
util.make_directory_if_not_exist(ArchLogger.__output_directory)
log_file = os.path.join(
ArchLogger.__output_directory,
ArchLogger.__logger_name + '.log')
file_handler = logging.FileHandler(log_file, mode=filemode)
file_handler.setLevel(ArchLogger.__output_level)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
if make_log_file:
util.make_directory_if_not_exist(ArchLogger.__output_directory)
log_file = os.path.join(
ArchLogger.__output_directory,
ArchLogger.__logger_name + '.log')
file_handler = logging.FileHandler(log_file, mode=filemode)
file_handler.setLevel(ArchLogger.__output_level)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
ArchLogger._logger = logger
@staticmethod
......
......@@ -140,7 +140,7 @@ class OrnsteinUhlenbeckStrategy(BaseStrategy):
sigma=.3,
decay=NoDecay()
):
super(OrnsteinUhlenbeckStrategy, self).__init__()
super(OrnsteinUhlenbeckStrategy, self).__init__(decay)
self.eps = eps
self.cur_eps = eps
......@@ -150,9 +150,9 @@ class OrnsteinUhlenbeckStrategy(BaseStrategy):
self._action_low = action_low
self._action_high = action_high
self._mu = mu
self._theta = theta
self._sigma = sigma
self._mu = np.array(mu)
self._theta = np.array(theta)
self._sigma = np.array(sigma)
self.state = np.ones(self._action_dim) * self._mu
......
......@@ -7,21 +7,21 @@ import CNNCreator_torcs_agent_torcsAgent_dqn
import os
import sys
import re
import logging
import time
import numpy as np
import mxnet as mx
def resume_session():
session_param_output = os.path.join(session_output_dir, agent_name)
def resume_session(sessions_dir):
resume_session = False
resume_directory = None
if os.path.isdir(session_output_dir) and os.path.isdir(session_param_output):
if os.path.isdir(sessions_dir):
regex = re.compile(r'\d\d\d\d-\d\d-\d\d-\d\d-\d\d')
dir_content = os.listdir(session_param_output)
dir_content = os.listdir(sessions_dir)
session_files = filter(regex.search, dir_content)
session_files.sort(reverse=True)
for d in session_files:
interrupted_session_dir = os.path.join(session_param_output, d, '.interrupted_session')
interrupted_session_dir = os.path.join(sessions_dir, d, '.interrupted_session')
if os.path.isdir(interrupted_session_dir):
resume = raw_input('Interrupted session from {} found. Do you want to resume? (y/n) '.format(d))
if resume == 'y':
......@@ -32,22 +32,23 @@ def resume_session():
if __name__ == "__main__":
agent_name='torcs_agent_torcsAgent_dqn'
agent_name = 'torcs_agent_torcsAgent_dqn'
# Prepare output directory and logger
output_directory = 'model_output'\
+ '/' + agent_name\
+ '/' + time.strftime(
'%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
all_output_dir = os.path.join('model', agent_name)
output_directory = os.path.join(
all_output_dir,
time.strftime('%Y-%m-%d-%H-%M-%S',
time.localtime(time.time())))
ArchLogger.set_output_directory(output_directory)
ArchLogger.set_logger_name(agent_name)
ArchLogger.set_output_level(ArchLogger.INFO)
env_params = {
'ros_node_name' : 'torcs_agent_torcsAgent_dqnTrainerNode',
'state_topic' : 'preprocessor_state',
'action_topic' : 'postprocessor_action',
'reset_topic' : 'torcs_reset',
'terminal_state_topic' : 'prepocessor_is_terminal'
'ros_node_name': 'torcs_agent_torcsAgent_dqnTrainerNode',
'state_topic': 'preprocessor_state',
'action_topic': 'postprocessor_action',
'reset_topic': 'torcs_reset',
'terminal_state_topic': 'prepocessor_is_terminal',
}
env = reinforcement_learning.environment.RosEnvironment(**env_params)
......@@ -58,12 +59,12 @@ if __name__ == "__main__":
agent_params = {
'environment': env,
'replay_memory_params': {
'method':'buffer',
'memory_size':1000000,
'sample_size':32,
'state_dtype':'float32',
'action_dtype':'float32',
'rewards_dtype':'float32'
'method': 'buffer',
'memory_size': 1000000,
'sample_size': 32,
'state_dtype': 'float32',
'action_dtype': 'float32',
'rewards_dtype': 'float32'
},
'strategy_params': {
'method':'epsgreedy',
......@@ -74,6 +75,7 @@ if __name__ == "__main__":
},
'agent_name': agent_name,
'verbose': True,
'output_directory': output_directory,
'state_dim': (5,),
'action_dim': (30,),
'ctx': 'cpu',
......@@ -92,9 +94,11 @@ if __name__ == "__main__":
'double_dqn': True,
}
resume, resume_directory = resume_session()
resume, resume_directory = resume_session(all_output_dir)
if resume:
output_directory, _ = os.path.split(resume_directory)
ArchLogger.set_output_directory(output_directory)
resume_agent_params = {
'session_dir': resume_directory,
'environment': env,
......
......@@ -52,22 +52,24 @@ class ArchLogger(object):
fmt=ArchLogger.__logformat, datefmt=ArchLogger.__dateformat)
logger = logging.getLogger(ArchLogger.__logger_name)
logger.setLevel(ArchLogger.__output_level)
logger.propagate = False
stream_handler = logging.StreamHandler(sys.stdout)
stream_handler.setLevel(ArchLogger.__output_level)
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
if not logger.handlers:
logger.setLevel(ArchLogger.__output_level)
stream_handler = logging.StreamHandler(sys.stdout)
stream_handler.setLevel(ArchLogger.__output_level)
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
if make_log_file:
util.make_directory_if_not_exist(ArchLogger.__output_directory)
log_file = os.path.join(
ArchLogger.__output_directory,
ArchLogger.__logger_name + '.log')
file_handler = logging.FileHandler(log_file, mode=filemode)
file_handler.setLevel(ArchLogger.__output_level)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
if make_log_file:
util.make_directory_if_not_exist(ArchLogger.__output_directory)
log_file = os.path.join(
ArchLogger.__output_directory,
ArchLogger.__logger_name + '.log')
file_handler = logging.FileHandler(log_file, mode=filemode)
file_handler.setLevel(ArchLogger.__output_level)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
ArchLogger._logger = logger
@staticmethod
......
......@@ -10,9 +10,11 @@ class RewardFunction(object):
self.__reward_wrapper.init()
def reward(self, state, terminal):
s = state.astype('double')
t = bool(terminal)
inp = torcs_agent_dqn_reward_executor.torcs_agent_dqn_reward_input()
inp.state = state
inp.isTerminal = terminal
inp.state = s
inp.isTerminal = t
output = self.__reward_wrapper.execute(inp)
return output.reward
......@@ -40,7 +42,7 @@ import rospy
import thread
import numpy as np
import time
from std_msgs.msg import Float32MultiArray, Bool, Int32, MultiArrayDimension
from std_msgs.msg import Float32MultiArray, Bool, Int32, MultiArrayDimension, Float32
class RosEnvironment(Environment):
def __init__(self,
......@@ -50,15 +52,13 @@ class RosEnvironment(Environment):
action_topic='action',
reset_topic='reset',
terminal_state_topic='terminal',
meta_topic='meta',
greeting_topic='greeting'):
reward_topic='reward'):
super(RosEnvironment, self).__init__()
self.__timeout_in_s = timeout_in_s
self.__waiting_for_state_update = False
self.__waiting_for_terminal_update = False
self.__last_received_state = 0
self.__last_received_terminal = 0
self.__last_received_terminal = True
rospy.loginfo("Initialize node {0}".format(ros_node_name))
......@@ -111,7 +111,8 @@ class RosEnvironment(Environment):
def __wait_for_new_state(self, publisher, msg):
time_of_timeout = time.time() + self.__timeout_in_s
timeout_counter = 0
while(self.__waiting_for_state_update or self.__waiting_for_terminal_update):
while(self.__waiting_for_state_update
or self.__waiting_for_terminal_update):
is_timeout = (time.time() > time_of_timeout)
if (is_timeout):
if timeout_counter < 3:
......@@ -127,9 +128,8 @@ class RosEnvironment(Environment):
def close(self):
rospy.signal_shutdown('Program ended!')
def __state_callback(self, data):
self.__last_received_state = np.array(data.data, dtype='double')
self.__last_received_state = np.array(data.data, dtype='float32')
rospy.logdebug('Received state: {}'.format(self.__last_received_state))
self.__waiting_for_state_update = False
......
......@@ -140,7 +140,7 @@ class OrnsteinUhlenbeckStrategy(BaseStrategy):
sigma=.3,
decay=NoDecay()
):
super(OrnsteinUhlenbeckStrategy, self).__init__()
super(OrnsteinUhlenbeckStrategy, self).__init__(decay)
self.eps = eps
self.cur_eps = eps
......@@ -150,9 +150,9 @@ class OrnsteinUhlenbeckStrategy(BaseStrategy):
self._action_low = action_low
self._action_high = action_high
self._mu = mu
self._theta = theta
self._sigma = sigma
self._mu = np.array(mu)
self._theta = np.array(theta)
self._sigma = np.array(sigma)
self.state = np.ones(self._action_dim) * self._mu
......
# This file was automatically generated by SWIG (http://www.swig.org).
# Version 3.0.8
# Version 3.0.12
#
# Do not make changes to this file unless you know what you are doing--modify
# the SWIG interface file instead.
from sys import version_info
if version_info >= (2, 6, 0):
from sys import version_info as _swig_python_version_info
if _swig_python_version_info >= (2, 7, 0):
def swig_import_helper():
import importlib
pkg = __name__.rpartition('.')[0]
mname = '.'.join((pkg, '_torcs_agent_dqn_reward_executor')).lstrip('.')
try:
return importlib.import_module(mname)
except ImportError:
return importlib.import_module('_torcs_agent_dqn_reward_executor')
_torcs_agent_dqn_reward_executor = swig_import_helper()
del swig_import_helper
elif _swig_python_version_info >= (2, 6, 0):
def swig_import_helper():
from os.path import dirname
import imp
......@@ -19,22 +26,27 @@ if version_info >= (2, 6, 0):
except ImportError:
import _torcs_agent_dqn_reward_executor
return _torcs_agent_dqn_reward_executor
if fp is not None:
try:
_mod = imp.load_module('_torcs_agent_dqn_reward_executor', fp, pathname, description)
finally:
try:
_mod = imp.load_module('_torcs_agent_dqn_reward_executor', fp, pathname, description)
finally:
if fp is not None:
fp.close()
return _mod
return _mod
_torcs_agent_dqn_reward_executor = swig_import_helper()
del swig_import_helper
else:
import _torcs_agent_dqn_reward_executor
del version_info
del _swig_python_version_info
try:
_swig_property = property
except NameError:
pass # Python < 2.2 doesn't have 'property'.
try: