Commit d91b80d0 authored by Nicola Gatto's avatar Nicola Gatto

Add generation test for gym environment reinforcement model

parent aa7af2ba
......@@ -190,11 +190,36 @@ public class GenerationTest extends AbstractSymtabTest {
}
@Test
public void testGluonReinforcementModel() {
public void testGluonReinforcementModelGymEnvironment() {
Log.getFindings().clear();
String[] args = {"-m", "src/test/resources/models/reinforcementModel", "-r", "cartpole.Master", "-b", "GLUON", "-f", "n", "-c", "n"};
EMADLGeneratorCli.main(args);
assertTrue(Log.getFindings().stream().filter(Finding::isError).collect(Collectors.toList()).isEmpty());
checkFilesAreEqual(
Paths.get("./target/generated-sources-emadl"),
Paths.get("./src/test/resources/target_code/gluon/reinforcementModel/cartpole"),
Arrays.asList(
"cartpole_master.cpp",
"cartpole_master.h",
"cartpole_master_dqn.h",
"cartpole_master_policy.h",
"CMakeLists.txt",
"CNNBufferFile.h",
"CNNCreator_cartpole_master_dqn.py",
"CNNNet_cartpole_master_dqn.py",
"CNNPredictor_cartpole_master_dqn.h",
"CNNTrainer_cartpole_master_dqn.py",
"CNNTranslator.h",
"HelperA.h",
"start_training.sh",
"reinforcement_learning/__init__.py",
"reinforcement_learning/action_policy.py",
"reinforcement_learning/agent.py",
"reinforcement_learning/environment.py",
"reinforcement_learning/replay_memory.py",
"reinforcement_learning/util.py"
)
);
}
@Test
......
configuration CartPoleDQN {
learning_method : reinforcement
environment : gym { name : "CartPole-v0" }
context : cpu
agent_name : "reinforcement_agent"
reward_function : cartpole.agent.reward.reward
learning_method : reinforcement
environment : gym {name : "CartPole-v0"}
num_episodes : 1000
num_episodes : 160
target_score : 185.5
discount_factor : 0.999
num_max_steps : 500
num_max_steps : 250
training_interval : 1
use_fix_target_network : true
target_network_update_interval : 200
snapshot_interval : 50
snapshot_interval : 20
use_double_dqn : true
use_double_dqn : false
loss : huber_loss
loss : euclidean
replay_memory : buffer{
memory_size : 20000
memory_size : 10000
sample_size : 32
}
......@@ -31,7 +28,7 @@ configuration CartPoleDQN {
epsilon : 1.0
min_epsilon : 0.01
epsilon_decay_method: linear
epsilon_decay : 0.001
epsilon_decay : 0.01
}
optimizer : rmsprop{
......
......@@ -7,10 +7,10 @@ component CartPoleDQN {
implementation CNN {
state ->
FullyConnected(units=256) ->
Relu() ->
FullyConnected(units=128) ->
Relu() ->
Tanh() ->
FullyConnected(units=256) ->
Tanh() ->
FullyConnected(units=2) ->
qvalues
}
......
package cartpole.agent.reward;
package cartpole.agent;
component Reward {
ports
in Q^{4} state,
in B isTerminal,
out Q reward;
implementation Math {
Q rew = state(1);
reward = rew;
reward = 2;
}
}
\ No newline at end of file
......@@ -6,11 +6,11 @@ component Greedy {
out Z action;
implementation Math {
Q maxQValue = values(0);
Q maxQValue = values(1);
Z maxValueAction = 0;
for i = 1:2
if maxQValue > values(i)
if values(i) > maxQValue
maxQValue = values(i);
maxValueAction = i-1;
end
......
cmake_minimum_required(VERSION 3.5)
set(CMAKE_CXX_STANDARD 14)
project(cartpole_master LANGUAGES CXX)
#set cmake module path
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
# add dependencies
find_package(Armadillo REQUIRED)
set(INCLUDE_DIRS ${INCLUDE_DIRS} ${Armadillo_INCLUDE_DIRS})
set(LIBS ${LIBS} ${Armadillo_LIBRARIES})
# additional commands
set(LIBS ${LIBS} mxnet)
# create static library
include_directories(${INCLUDE_DIRS})
add_library(cartpole_master cartpole_master.cpp)
target_include_directories(cartpole_master PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${INCLUDE_DIRS})
target_link_libraries(cartpole_master PUBLIC ${LIBS})
set_target_properties(cartpole_master PROPERTIES LINKER_LANGUAGE CXX)
# export cmake project
export(TARGETS cartpole_master FILE cartpole_master.cmake)
# additional commands end
#ifndef CNNBUFFERFILE_H
#define CNNBUFFERFILE_H
#include <stdio.h>
#include <iostream>
#include <fstream>
// Read file to buffer
class BufferFile {
public :
std::string file_path_;
int length_;
char* buffer_;
explicit BufferFile(std::string file_path)
:file_path_(file_path) {
std::ifstream ifs(file_path.c_str(), std::ios::in | std::ios::binary);
if (!ifs) {
std::cerr << "Can't open the file. Please check " << file_path << ". \n";
length_ = 0;
buffer_ = NULL;
return;
}
ifs.seekg(0, std::ios::end);
length_ = ifs.tellg();
ifs.seekg(0, std::ios::beg);
std::cout << file_path.c_str() << " ... "<< length_ << " bytes\n";
buffer_ = new char[sizeof(char) * length_];
ifs.read(buffer_, length_);
ifs.close();
}
int GetLength() {
return length_;
}
char* GetBuffer() {
return buffer_;
}
~BufferFile() {
if (buffer_) {
delete[] buffer_;
buffer_ = NULL;
}
}
};
#endif // CNNBUFFERFILE_H
import mxnet as mx
import logging
import os
from CNNNet_cartpole_master_dqn import Net
class CNNCreator_cartpole_master_dqn:
_model_dir_ = "model/cartpole.agent.CartPoleDQN/"
_model_prefix_ = "model"
_input_shapes_ = [(4,)]
def __init__(self):
self.weight_initializer = mx.init.Normal()
self.net = None
def get_input_shapes(self):
return self._input_shapes_
def load(self, context):
lastEpoch = 0
param_file = None
try:
os.remove(self._model_dir_ + self._model_prefix_ + "_newest-0000.params")
except OSError:
pass
try:
os.remove(self._model_dir_ + self._model_prefix_ + "_newest-symbol.json")
except OSError:
pass
if os.path.isdir(self._model_dir_):
for file in os.listdir(self._model_dir_):
if ".params" in file and self._model_prefix_ in file:
epochStr = file.replace(".params","").replace(self._model_prefix_ + "-","")
epoch = int(epochStr)
if epoch > lastEpoch:
lastEpoch = epoch
param_file = file
if param_file is None:
return 0
else:
logging.info("Loading checkpoint: " + param_file)
self.net.load_parameters(self._model_dir_ + param_file)
return lastEpoch
def construct(self, context, data_mean=None, data_std=None):
self.net = Net(data_mean=data_mean, data_std=data_std)
self.net.collect_params().initialize(self.weight_initializer, ctx=context)
self.net.hybridize()
self.net(mx.nd.zeros((1,)+self._input_shapes_[0], ctx=context))
if not os.path.exists(self._model_dir_):
os.makedirs(self._model_dir_)
self.net.export(self._model_dir_ + self._model_prefix_, epoch=0)
import mxnet as mx
import numpy as np
from mxnet import gluon
class Softmax(gluon.HybridBlock):
def __init__(self, **kwargs):
super(Softmax, self).__init__(**kwargs)
def hybrid_forward(self, F, x):
return F.softmax(x)
class Split(gluon.HybridBlock):
def __init__(self, num_outputs, axis=1, **kwargs):
super(Split, self).__init__(**kwargs)
with self.name_scope():
self.axis = axis
self.num_outputs = num_outputs
def hybrid_forward(self, F, x):
return F.split(data=x, axis=self.axis, num_outputs=self.num_outputs)
class Concatenate(gluon.HybridBlock):
def __init__(self, dim=1, **kwargs):
super(Concatenate, self).__init__(**kwargs)
with self.name_scope():
self.dim = dim
def hybrid_forward(self, F, *x):
return F.concat(*x, dim=self.dim)
class ZScoreNormalization(gluon.HybridBlock):
def __init__(self, data_mean, data_std, **kwargs):
super(ZScoreNormalization, self).__init__(**kwargs)
with self.name_scope():
self.data_mean = self.params.get('data_mean', shape=data_mean.shape,
init=mx.init.Constant(data_mean.asnumpy().tolist()), differentiable=False)
self.data_std = self.params.get('data_std', shape=data_mean.shape,
init=mx.init.Constant(data_std.asnumpy().tolist()), differentiable=False)
def hybrid_forward(self, F, x, data_mean, data_std):
x = F.broadcast_sub(x, data_mean)
x = F.broadcast_div(x, data_std)
return x
class Padding(gluon.HybridBlock):
def __init__(self, padding, **kwargs):
super(Padding, self).__init__(**kwargs)
with self.name_scope():
self.pad_width = padding
def hybrid_forward(self, F, x):
x = F.pad(data=x,
mode='constant',
pad_width=self.pad_width,
constant_value=0)
return x
class NoNormalization(gluon.HybridBlock):
def __init__(self, **kwargs):
super(NoNormalization, self).__init__(**kwargs)
def hybrid_forward(self, F, x):
return x
class Net(gluon.HybridBlock):
def __init__(self, data_mean=None, data_std=None, **kwargs):
super(Net, self).__init__(**kwargs)
with self.name_scope():
if not data_mean is None:
assert(not data_std is None)
self.input_normalization = ZScoreNormalization(data_mean=data_mean, data_std=data_std)
else:
self.input_normalization = NoNormalization()
self.fc1_ = gluon.nn.Dense(units=128, use_bias=True)
# fc1_, output shape: {[128,1,1]}
self.tanh1_ = gluon.nn.Activation(activation='tanh')
self.fc2_ = gluon.nn.Dense(units=256, use_bias=True)
# fc2_, output shape: {[256,1,1]}
self.tanh2_ = gluon.nn.Activation(activation='tanh')
self.fc3_ = gluon.nn.Dense(units=2, use_bias=True)
# fc3_, output shape: {[2,1,1]}
self.last_layer = 'linear'
def hybrid_forward(self, F, x):
state = self.input_normalization(x)
fc1_ = self.fc1_(state)
tanh1_ = self.tanh1_(fc1_)
fc2_ = self.fc2_(tanh1_)
tanh2_ = self.tanh2_(fc2_)
fc3_ = self.fc3_(tanh2_)
return fc3_
#ifndef CNNPREDICTOR_CARTPOLE_MASTER_DQN
#define CNNPREDICTOR_CARTPOLE_MASTER_DQN
#include <mxnet/c_predict_api.h>
#include <cassert>
#include <string>
#include <vector>
#include <CNNBufferFile.h>
class CNNPredictor_cartpole_master_dqn{
public:
const std::string json_file = "model/cartpole.agent.CartPoleDQN/model_newest-symbol.json";
const std::string param_file = "model/cartpole.agent.CartPoleDQN/model_newest-0000.params";
//const std::vector<std::string> input_keys = {"data"};
const std::vector<std::string> input_keys = {"state"};
const std::vector<std::vector<mx_uint>> input_shapes = {{1,4}};
const bool use_gpu = false;
PredictorHandle handle;
explicit CNNPredictor_cartpole_master_dqn(){
init(json_file, param_file, input_keys, input_shapes, use_gpu);
}
~CNNPredictor_cartpole_master_dqn(){
if(handle) MXPredFree(handle);
}
void predict(const std::vector<float> &state,
std::vector<float> &qvalues){
MXPredSetInput(handle, "data", state.data(), static_cast<mx_uint>(state.size()));
MXPredForward(handle);
mx_uint output_index;
mx_uint *shape = 0;
mx_uint shape_len;
size_t size;
output_index = 0;
MXPredGetOutputShape(handle, output_index, &shape, &shape_len);
size = 1;
for (mx_uint i = 0; i < shape_len; ++i) size *= shape[i];
assert(size == qvalues.size());
MXPredGetOutput(handle, 0, &(qvalues[0]), qvalues.size());
}
void init(const std::string &json_file,
const std::string &param_file,
const std::vector<std::string> &input_keys,
const std::vector<std::vector<mx_uint>> &input_shapes,
const bool &use_gpu){
BufferFile json_data(json_file);
BufferFile param_data(param_file);
int dev_type = use_gpu ? 2 : 1;
int dev_id = 0;
if (json_data.GetLength() == 0 ||
param_data.GetLength() == 0) {
std::exit(-1);
}
const mx_uint num_input_nodes = input_keys.size();
const char* input_key[1] = { "data" };
const char** input_keys_ptr = input_key;
mx_uint shape_data_size = 0;
mx_uint input_shape_indptr[input_shapes.size() + 1];
input_shape_indptr[0] = 0;
for(mx_uint i = 0; i < input_shapes.size(); i++){
input_shape_indptr[i+1] = input_shapes[i].size();
shape_data_size += input_shapes[i].size();
}
mx_uint input_shape_data[shape_data_size];
mx_uint index = 0;
for(mx_uint i = 0; i < input_shapes.size(); i++){
for(mx_uint j = 0; j < input_shapes[i].size(); j++){
input_shape_data[index] = input_shapes[i][j];
index++;
}
}
MXPredCreate(static_cast<const char*>(json_data.GetBuffer()),
static_cast<const char*>(param_data.GetBuffer()),
static_cast<size_t>(param_data.GetLength()),
dev_type,
dev_id,
num_input_nodes,
input_keys_ptr,
input_shape_indptr,
input_shape_data,
&handle);
assert(handle);
}
};
#endif // CNNPREDICTOR_CARTPOLE_MASTER_DQN
from reinforcement_learning.agent import DqnAgent
import reinforcement_learning.environment
import CNNCreator_cartpole_master_dqn
import logging
import mxnet as mx
if __name__ == "__main__":
env = reinforcement_learning.environment.GymEnvironment('CartPole-v0')
context = mx.cpu()
net_creator = CNNCreator_cartpole_master_dqn.CNNCreator_cartpole_master_dqn()
net_creator.construct(context)
replay_memory_params = {
'method':'buffer',
'memory_size':10000,
'sample_size':32,
'state_dtype':'float32',
'action_dtype':'uint8',
'rewards_dtype':'float32'
}
policy_params = {
'method':'epsgreedy',
'epsilon': 1,
'min_epsilon': 0.01,
'epsilon_decay_method': 'linear',
'epsilon_decay': 0.01,
}
agent = DqnAgent(
network = net_creator.net,
environment=env,
replay_memory_params=replay_memory_params,
policy_params=policy_params,
state_dim=net_creator.get_input_shapes()[0],
ctx='cpu',
discount_factor=0.999,
loss_function='euclidean',
optimizer='rmsprop',
optimizer_params={
'learning_rate': 0.001
},
training_episodes=160,
train_interval=1,
use_fix_target=True,
target_update_interval=200,
double_dqn = False,
snapshot_interval=20,
agent_name='cartpole_master_dqn',
max_episode_step=250,
output_directory='model',
verbose=True,
live_plot = True,
make_logfile=True,
target_score=185.5
)
train_successfull = agent.train()
agent.save_best_network(net_creator._model_dir_ + net_creator._model_prefix_ + '_newest', epoch=0)
\ No newline at end of file
#ifndef CNNTRANSLATOR_H
#define CNNTRANSLATOR_H
#include <armadillo>
#include <cassert>
using namespace std;
using namespace arma;
class CNNTranslator{
public:
template<typename T> static void addColToSTDVector(const Col<T> &source, vector<float> &data){
for(size_t i = 0; i < source.n_elem; i++){
data.push_back((float) source(i));
}
}
template<typename T> static void addRowToSTDVector(const subview_row<T> &source, vector<float> &data){
for(size_t i = 0; i < source.n_elem; i++){
data.push_back((float) source(i));
}
}
template<typename T> static void addRowToSTDVector(const Row<T> &source, vector<float> &data){
for(size_t i = 0; i < source.n_elem; i++){
data.push_back((float) source(i));
}
}
template<typename T> static void addMatToSTDVector(const Mat<T> &source, vector<float> &data){
for(size_t i = 0; i < source.n_rows; i++){
addRowToSTDVector(source.row(i), data);
}
}
template<typename T> static vector<float> translate(const Col<T> &source){
size_t size = source.n_elem;
vector<float> data;
data.reserve(size);
addColToSTDVector(source, data);
return data;
}
template<typename T> static vector<float> translate(const Row<T> &source){
size_t size = source.n_elem;
vector<float> data;
data.reserve(size);
addRowToSTDVector(source, data);
return data;
}
template<typename T> static vector<float> translate(const Mat<T> &source){
size_t size = source.n_elem;
vector<float> data;
data.reserve(size);
addMatToSTDVector(source, data);
return data;
}
template<typename T> static vector<float> translate(const Cube<T> &source){
size_t size = source.n_elem;
vector<float> data;
data.reserve(size);
for(size_t i = 0; i < source.n_slices; i++){
addMatToSTDVector(source.slice(i), data);
}
return data;
}
static vec translateToCol(const vector<float> &source, const vector<size_t> &shape){
assert(shape.size() == 1);