Commit 5a35c2f7 authored by Nicola Gatto's avatar Nicola Gatto

Add generation test for ros environment reinforcement model

parent d91b80d0
......@@ -222,6 +222,58 @@ public class GenerationTest extends AbstractSymtabTest {
);
}
@Test
public void testGluonReinforcementModelRosEnvironment() {
Log.getFindings().clear();
String[] args = {"-m", "src/test/resources/models/reinforcementModel", "-r", "torcs.agent.TorcsAgent", "-b", "GLUON", "-f", "n", "-c", "n"};
EMADLGeneratorCli.main(args);
assertTrue(Log.getFindings().stream().filter(Finding::isError).collect(Collectors.toList()).isEmpty());
checkFilesAreEqual(
Paths.get("./target/generated-sources-emadl"),
Paths.get("./src/test/resources/target_code/gluon/reinforcementModel/torcs"),
Arrays.asList(
"CMakeLists.txt",
"CNNBufferFile.h",
"torcs_agent_torcsAgent.cpp",
"torcs_agent_torcsAgent.h",
"torcs_agent_torcsAgent_dqn.h",
"torcs_agent_torcsAgent_policy.h",
"CNNCreator_torcs_agent_torcsAgent_dqn.py",
"CNNNet_torcs_agent_torcsAgent_dqn.py",
"CNNPredictor_torcs_agent_torcsAgent_dqn.h",
"CNNTrainer_torcs_agent_torcsAgent_dqn.py",
"CNNTranslator.h",
"HelperA.h",
"start_training.sh",
"reward/CMakeLists.txt",
"reward/HelperA.h",
"reward/torcs_agent_dqn_reward.cpp",
"reward/torcs_agent_dqn_reward.h",
"reward/pylib/CMakeLists.txt",
"reward/pylib/torcs_agent_dqn_reward_executor.cpp",
"reward/pylib/torcs_agent_dqn_reward_executor.h",
"reward/pylib/torcs_agent_dqn_reward_executor.i",
"reward/pylib/armanpy/armanpy.hpp",
"reward/pylib/armanpy/armanpy.i",
"reward/pylib/armanpy/armanpy_1d.i",
"reward/pylib/armanpy/armanpy_2d.i",
"reward/pylib/armanpy/armanpy_3d.i",
"reward/pylib/armanpy/numpy.i",
"reinforcement_learning/__init__.py",
"reinforcement_learning/action_policy.py",
"reinforcement_learning/agent.py",
"reinforcement_learning/environment.py",
"reinforcement_learning/replay_memory.py",
"reinforcement_learning/util.py",
"reinforcement_learning/torcs_agent_dqn_reward_executor.py"
)
);
assertTrue(Paths.get(
"./target/generated-sources-emadl/reinforcement_learning/_torcs_agent_dqn_reward_executor.so")
.toFile().exists());
}
@Test
public void testHashFunction() {
EMADLGenerator tester = new EMADLGenerator(Backend.MXNET);
......
package torcs.agent;
import torcs.agent.policy.GreedyDiscreteActionPolicy;
import torcs.agent.dqn.TorcsDQN;
component TorcsAgent {
ports
in Q^{5} state,
out Z action;
instance TorcsDQN<30> dqn;
instance GreedyDiscreteActionPolicy<30> policy;
connect state -> dqn.state;
connect dqn.qvalues -> policy.values;
connect policy.action -> action;
}
\ No newline at end of file
package torcs.agent;
conforms to de.monticore.lang.monticar.generator.roscpp.RosToEmamTagSchema;
tags TorcsAgent {
tag torcsAgent.state with RosConnection = {topic=(preprocessor_state, std_msgs/Float32MultiArray)};
tag torcsAgent.action with RosConnection = {topic=(postprocessor_action, std_msgs/Int32)};
}
\ No newline at end of file
package torcs.agent.dqn;
component Reward {
ports
in Q^{5} state,
in B isTerminal,
out Q reward;
implementation Math {
Q angle = state(1);
Q speed = state(2);
reward = speed * cos(angle);
}
}
\ No newline at end of file
configuration TorcsDQN {
context : cpu
learning_method : reinforcement
environment : ros_interface {
state_topic : "preprocessor_state"
terminal_state_topic : "prepocessor_is_terminal"
action_topic : "postprocessor_action"
reset_topic : "torcs_reset"
}
reward_function : torcs.agent.dqn.reward
num_episodes : 20000
discount_factor : 0.999
num_max_steps : 999999999
training_interval : 1
use_fix_target_network : true
target_network_update_interval : 500
snapshot_interval : 1000
use_double_dqn : true
loss : euclidean
replay_memory : buffer{
memory_size : 1000000
sample_size : 32
}
action_selection : epsgreedy{
epsilon : 1.0
min_epsilon : 0.01
epsilon_decay_method: linear
epsilon_decay : 0.0001
}
optimizer : rmsprop{
learning_rate : 0.001
}
}
\ No newline at end of file
package torcs.agent.dqn;
component TorcsDQN<Z(1:oo) discrete_actions = 2> {
ports
in Q^{5} state,
out Q(-oo:oo)^{discrete_actions} qvalues;
implementation CNN {
state ->
FullyConnected(units=512) ->
Tanh() ->
FullyConnected(units=256) ->
Tanh() ->
FullyConnected(units=discrete_actions) ->
qvalues
}
}
\ No newline at end of file
package torcs.agent.policy;
component GreedyDiscreteActionPolicy<Z(1:oo) n = 2> {
ports
in Q^{n} values,
out Z action;
implementation Math {
Z best_action = 0;
Q value_of_best_action = values(1);
for i=2:n
if values(i) > value_of_best_action
best_action = i-1;
value_of_best_action = values(i);
end
end
action = best_action;
}
}
\ No newline at end of file
cmake_minimum_required(VERSION 3.5)
set(CMAKE_CXX_STANDARD 14)
project(torcs_agent_torcsAgent LANGUAGES CXX)
#set cmake module path
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
# add dependencies
find_package(Armadillo REQUIRED)
set(INCLUDE_DIRS ${INCLUDE_DIRS} ${Armadillo_INCLUDE_DIRS})
set(LIBS ${LIBS} ${Armadillo_LIBRARIES})
# additional commands
set(LIBS ${LIBS} mxnet)
# create static library
include_directories(${INCLUDE_DIRS})
add_library(torcs_agent_torcsAgent torcs_agent_torcsAgent.cpp)
target_include_directories(torcs_agent_torcsAgent PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${INCLUDE_DIRS})
target_link_libraries(torcs_agent_torcsAgent PUBLIC ${LIBS})
set_target_properties(torcs_agent_torcsAgent PROPERTIES LINKER_LANGUAGE CXX)
# export cmake project
export(TARGETS torcs_agent_torcsAgent FILE torcs_agent_torcsAgent.cmake)
# additional commands end
#ifndef CNNBUFFERFILE_H
#define CNNBUFFERFILE_H
#include <stdio.h>
#include <iostream>
#include <fstream>
// Read file to buffer
class BufferFile {
public :
std::string file_path_;
int length_;
char* buffer_;
explicit BufferFile(std::string file_path)
:file_path_(file_path) {
std::ifstream ifs(file_path.c_str(), std::ios::in | std::ios::binary);
if (!ifs) {
std::cerr << "Can't open the file. Please check " << file_path << ". \n";
length_ = 0;
buffer_ = NULL;
return;
}
ifs.seekg(0, std::ios::end);
length_ = ifs.tellg();
ifs.seekg(0, std::ios::beg);
std::cout << file_path.c_str() << " ... "<< length_ << " bytes\n";
buffer_ = new char[sizeof(char) * length_];
ifs.read(buffer_, length_);
ifs.close();
}
int GetLength() {
return length_;
}
char* GetBuffer() {
return buffer_;
}
~BufferFile() {
if (buffer_) {
delete[] buffer_;
buffer_ = NULL;
}
}
};
#endif // CNNBUFFERFILE_H
import mxnet as mx
import logging
import os
from CNNNet_torcs_agent_torcsAgent_dqn import Net
class CNNCreator_torcs_agent_torcsAgent_dqn:
_model_dir_ = "model/torcs.agent.dqn.TorcsDQN/"
_model_prefix_ = "model"
_input_shapes_ = [(5,)]
def __init__(self):
self.weight_initializer = mx.init.Normal()
self.net = None
def get_input_shapes(self):
return self._input_shapes_
def load(self, context):
lastEpoch = 0
param_file = None
try:
os.remove(self._model_dir_ + self._model_prefix_ + "_newest-0000.params")
except OSError:
pass
try:
os.remove(self._model_dir_ + self._model_prefix_ + "_newest-symbol.json")
except OSError:
pass
if os.path.isdir(self._model_dir_):
for file in os.listdir(self._model_dir_):
if ".params" in file and self._model_prefix_ in file:
epochStr = file.replace(".params","").replace(self._model_prefix_ + "-","")
epoch = int(epochStr)
if epoch > lastEpoch:
lastEpoch = epoch
param_file = file
if param_file is None:
return 0
else:
logging.info("Loading checkpoint: " + param_file)
self.net.load_parameters(self._model_dir_ + param_file)
return lastEpoch
def construct(self, context, data_mean=None, data_std=None):
self.net = Net(data_mean=data_mean, data_std=data_std)
self.net.collect_params().initialize(self.weight_initializer, ctx=context)
self.net.hybridize()
self.net(mx.nd.zeros((1,)+self._input_shapes_[0], ctx=context))
if not os.path.exists(self._model_dir_):
os.makedirs(self._model_dir_)
self.net.export(self._model_dir_ + self._model_prefix_, epoch=0)
import mxnet as mx
import numpy as np
from mxnet import gluon
class Softmax(gluon.HybridBlock):
def __init__(self, **kwargs):
super(Softmax, self).__init__(**kwargs)
def hybrid_forward(self, F, x):
return F.softmax(x)
class Split(gluon.HybridBlock):
def __init__(self, num_outputs, axis=1, **kwargs):
super(Split, self).__init__(**kwargs)
with self.name_scope():
self.axis = axis
self.num_outputs = num_outputs
def hybrid_forward(self, F, x):
return F.split(data=x, axis=self.axis, num_outputs=self.num_outputs)
class Concatenate(gluon.HybridBlock):
def __init__(self, dim=1, **kwargs):
super(Concatenate, self).__init__(**kwargs)
with self.name_scope():
self.dim = dim
def hybrid_forward(self, F, *x):
return F.concat(*x, dim=self.dim)
class ZScoreNormalization(gluon.HybridBlock):
def __init__(self, data_mean, data_std, **kwargs):
super(ZScoreNormalization, self).__init__(**kwargs)
with self.name_scope():
self.data_mean = self.params.get('data_mean', shape=data_mean.shape,
init=mx.init.Constant(data_mean.asnumpy().tolist()), differentiable=False)
self.data_std = self.params.get('data_std', shape=data_mean.shape,
init=mx.init.Constant(data_std.asnumpy().tolist()), differentiable=False)
def hybrid_forward(self, F, x, data_mean, data_std):
x = F.broadcast_sub(x, data_mean)
x = F.broadcast_div(x, data_std)
return x
class Padding(gluon.HybridBlock):
def __init__(self, padding, **kwargs):
super(Padding, self).__init__(**kwargs)
with self.name_scope():
self.pad_width = padding
def hybrid_forward(self, F, x):
x = F.pad(data=x,
mode='constant',
pad_width=self.pad_width,
constant_value=0)
return x
class NoNormalization(gluon.HybridBlock):
def __init__(self, **kwargs):
super(NoNormalization, self).__init__(**kwargs)
def hybrid_forward(self, F, x):
return x
class Net(gluon.HybridBlock):
def __init__(self, data_mean=None, data_std=None, **kwargs):
super(Net, self).__init__(**kwargs)
with self.name_scope():
if not data_mean is None:
assert(not data_std is None)
self.input_normalization = ZScoreNormalization(data_mean=data_mean, data_std=data_std)
else:
self.input_normalization = NoNormalization()
self.fc1_ = gluon.nn.Dense(units=512, use_bias=True)
# fc1_, output shape: {[512,1,1]}
self.tanh1_ = gluon.nn.Activation(activation='tanh')
self.fc2_ = gluon.nn.Dense(units=256, use_bias=True)
# fc2_, output shape: {[256,1,1]}
self.tanh2_ = gluon.nn.Activation(activation='tanh')
self.fc3_ = gluon.nn.Dense(units=30, use_bias=True)
# fc3_, output shape: {[30,1,1]}
self.last_layer = 'linear'
def hybrid_forward(self, F, x):
state = self.input_normalization(x)
fc1_ = self.fc1_(state)
tanh1_ = self.tanh1_(fc1_)
fc2_ = self.fc2_(tanh1_)
tanh2_ = self.tanh2_(fc2_)
fc3_ = self.fc3_(tanh2_)
return fc3_
#ifndef CNNPREDICTOR_TORCS_AGENT_TORCSAGENT_DQN
#define CNNPREDICTOR_TORCS_AGENT_TORCSAGENT_DQN
#include <mxnet/c_predict_api.h>
#include <cassert>
#include <string>
#include <vector>
#include <CNNBufferFile.h>
class CNNPredictor_torcs_agent_torcsAgent_dqn{
public:
const std::string json_file = "model/torcs.agent.dqn.TorcsDQN/model_newest-symbol.json";
const std::string param_file = "model/torcs.agent.dqn.TorcsDQN/model_newest-0000.params";
//const std::vector<std::string> input_keys = {"data"};
const std::vector<std::string> input_keys = {"state"};
const std::vector<std::vector<mx_uint>> input_shapes = {{1,5}};
const bool use_gpu = false;
PredictorHandle handle;
explicit CNNPredictor_torcs_agent_torcsAgent_dqn(){
init(json_file, param_file, input_keys, input_shapes, use_gpu);
}
~CNNPredictor_torcs_agent_torcsAgent_dqn(){
if(handle) MXPredFree(handle);
}
void predict(const std::vector<float> &state,
std::vector<float> &qvalues){
MXPredSetInput(handle, "data", state.data(), static_cast<mx_uint>(state.size()));
MXPredForward(handle);
mx_uint output_index;
mx_uint *shape = 0;
mx_uint shape_len;
size_t size;
output_index = 0;
MXPredGetOutputShape(handle, output_index, &shape, &shape_len);
size = 1;
for (mx_uint i = 0; i < shape_len; ++i) size *= shape[i];
assert(size == qvalues.size());
MXPredGetOutput(handle, 0, &(qvalues[0]), qvalues.size());
}
void init(const std::string &json_file,
const std::string &param_file,
const std::vector<std::string> &input_keys,
const std::vector<std::vector<mx_uint>> &input_shapes,
const bool &use_gpu){
BufferFile json_data(json_file);
BufferFile param_data(param_file);
int dev_type = use_gpu ? 2 : 1;
int dev_id = 0;
if (json_data.GetLength() == 0 ||
param_data.GetLength() == 0) {
std::exit(-1);
}
const mx_uint num_input_nodes = input_keys.size();
const char* input_key[1] = { "data" };
const char** input_keys_ptr = input_key;
mx_uint shape_data_size = 0;
mx_uint input_shape_indptr[input_shapes.size() + 1];
input_shape_indptr[0] = 0;
for(mx_uint i = 0; i < input_shapes.size(); i++){
input_shape_indptr[i+1] = input_shapes[i].size();
shape_data_size += input_shapes[i].size();
}
mx_uint input_shape_data[shape_data_size];
mx_uint index = 0;
for(mx_uint i = 0; i < input_shapes.size(); i++){
for(mx_uint j = 0; j < input_shapes[i].size(); j++){
input_shape_data[index] = input_shapes[i][j];
index++;
}
}
MXPredCreate(static_cast<const char*>(json_data.GetBuffer()),
static_cast<const char*>(param_data.GetBuffer()),
static_cast<size_t>(param_data.GetLength()),
dev_type,
dev_id,
num_input_nodes,
input_keys_ptr,
input_shape_indptr,
input_shape_data,
&handle);
assert(handle);
}
};
#endif // CNNPREDICTOR_TORCS_AGENT_TORCSAGENT_DQN
from reinforcement_learning.agent import DqnAgent
import reinforcement_learning.environment
import CNNCreator_torcs_agent_torcsAgent_dqn
import logging
import mxnet as mx
if __name__ == "__main__":
env_params = {
'ros_node_name' : 'torcs_agent_torcsAgent_dqnTrainerNode',
'state_topic' : 'preprocessor_state',
'action_topic' : 'postprocessor_action',
'reset_topic' : 'torcs_reset',
'terminal_state_topic' : 'prepocessor_is_terminal'
}
env = reinforcement_learning.environment.RosEnvironment(**env_params)
context = mx.cpu()
net_creator = CNNCreator_torcs_agent_torcsAgent_dqn.CNNCreator_torcs_agent_torcsAgent_dqn()
net_creator.construct(context)
replay_memory_params = {
'method':'buffer',
'memory_size':1000000,
'sample_size':32,
'state_dtype':'float32',
'action_dtype':'uint8',
'rewards_dtype':'float32'
}
policy_params = {
'method':'epsgreedy',
'epsilon': 1,
'min_epsilon': 0.01,
'epsilon_decay_method': 'linear',
'epsilon_decay': 0.0001,
}
agent = DqnAgent(
network = net_creator.net,
environment=env,
replay_memory_params=replay_memory_params,
policy_params=policy_params,
state_dim=net_creator.get_input_shapes()[0],
ctx='cpu',
discount_factor=0.999,
loss_function='euclidean',
optimizer='rmsprop',
optimizer_params={
'learning_rate': 0.001
},
training_episodes=20000,
train_interval=1,
use_fix_target=True,
target_update_interval=500,
double_dqn = True,
snapshot_interval=1000,
agent_name='torcs_agent_torcsAgent_dqn',
max_episode_step=999999999,
output_directory='model',
verbose=True,
live_plot =