...
 
Commits (4)
......@@ -9,7 +9,7 @@
<groupId>de.monticore.lang.monticar</groupId>
<artifactId>embedded-montiarc-emadl-generator</artifactId>
<version>0.3.6-SNAPSHOT</version>
<version>0.3.7-SNAPSHOT</version>
<!-- == PROJECT DEPENDENCIES ============================================= -->
......@@ -17,11 +17,11 @@
<!-- .. SE-Libraries .................................................. -->
<emadl.version>0.2.10-SNAPSHOT</emadl.version>
<CNNTrain.version>0.3.6-SNAPSHOT</CNNTrain.version>
<CNNTrain.version>0.3.7-SNAPSHOT</CNNTrain.version>
<cnnarch-generator.version>0.0.4-SNAPSHOT</cnnarch-generator.version>
<cnnarch-mxnet-generator.version>0.2.17-SNAPSHOT</cnnarch-mxnet-generator.version>
<cnnarch-caffe2-generator.version>0.2.13-SNAPSHOT</cnnarch-caffe2-generator.version>
<cnnarch-gluon-generator.version>0.2.8-SNAPSHOT</cnnarch-gluon-generator.version>
<cnnarch-gluon-generator.version>0.2.9-SNAPSHOT</cnnarch-gluon-generator.version>
<cnnarch-tensorflow-generator.version>0.1.0-SNAPSHOT</cnnarch-tensorflow-generator.version>
<embedded-montiarc-math-opt-generator>0.1.4</embedded-montiarc-math-opt-generator>
......
......@@ -4,7 +4,6 @@ package de.monticore.lang.monticar.emadl;
import de.monticore.lang.monticar.emadl.generator.EMADLGeneratorCli;
import de.se_rwth.commons.logging.Finding;
import de.se_rwth.commons.logging.Log;
import org.junit.Ignore;
import org.junit.Test;
import java.nio.file.Paths;
......@@ -12,6 +11,7 @@ import java.util.Arrays;
import java.util.stream.Collectors;
import static junit.framework.TestCase.assertTrue;
import static org.junit.Assume.assumeFalse;
/**
*
......@@ -19,6 +19,7 @@ import static junit.framework.TestCase.assertTrue;
public class IntegrationPythonWrapperTest extends AbstractSymtabTest {
@Test
public void testGluonReinforcementModelRosEnvironment() {
assumeFalse(System.getProperty("os.name").toLowerCase().startsWith("win"));
Log.getFindings().clear();
String[] args = {"-m", "src/test/resources/models/reinforcementModel", "-r", "torcs.agent.TorcsAgent", "-b", "GLUON", "-f", "n", "-c", "n"};
EMADLGeneratorCli.main(args);
......@@ -71,4 +72,51 @@ public class IntegrationPythonWrapperTest extends AbstractSymtabTest {
"./target/generated-sources-emadl/reinforcement_learning/torcs_agent_dqn_reward_executor.py")
.toFile().exists());
}
@Test
public void testTorcsTD3() {
Log.getFindings().clear();
String[] args = {"-m", "src/test/resources/models/reinforcementModel/torcs_td3", "-r", "torcs.agent.TorcsAgent", "-b", "GLUON", "-f", "n", "-c", "n"};
EMADLGeneratorCli.main(args);
assertTrue(Log.getFindings().stream().filter(Finding::isError).collect(Collectors.toList()).isEmpty());
checkFilesAreEqual(
Paths.get("./target/generated-sources-emadl"),
Paths.get("./src/test/resources/target_code/gluon/reinforcementModel/torcs_td3"),
Arrays.asList(
"CMakeLists.txt",
"CNNBufferFile.h",
"torcs_agent_torcsAgent.cpp",
"torcs_agent_torcsAgent.h",
"torcs_agent_torcsAgent_actor.h",
"CNNCreator_torcs_agent_torcsAgent_actor.py",
"CNNNet_torcs_agent_torcsAgent_actor.py",
"CNNPredictor_torcs_agent_torcsAgent_actor.h",
"CNNTrainer_torcs_agent_torcsAgent_actor.py",
"start_training.sh",
"reward/CMakeLists.txt",
"reward/torcs_agent_network_reward.cpp",
"reward/torcs_agent_network_reward.h",
"reward/pylib/CMakeLists.txt",
"reward/pylib/torcs_agent_network_reward_executor.cpp",
"reward/pylib/torcs_agent_network_reward_executor.h",
"reward/pylib/torcs_agent_network_reward_executor.i",
"reward/pylib/armanpy/armanpy.hpp",
"reward/pylib/armanpy/armanpy.i",
"reward/pylib/armanpy/armanpy_1d.i",
"reward/pylib/armanpy/armanpy_2d.i",
"reward/pylib/armanpy/armanpy_3d.i",
"reward/pylib/armanpy/numpy.i",
"reinforcement_learning/__init__.py",
"reinforcement_learning/strategy.py",
"reinforcement_learning/agent.py",
"reinforcement_learning/environment.py",
"reinforcement_learning/replay_memory.py",
"reinforcement_learning/util.py",
"reinforcement_learning/cnnarch_logger.py",
"reinforcement_learning/CNNCreator_torcs_agent_network_torcsCritic.py",
"reinforcement_learning/CNNNet_torcs_agent_network_torcsCritic.py"
)
);
}
}
/* (c) https://github.com/MontiCore/monticore */
package torcs.agent;
import torcs.agent.network.TorcsActor;
component TorcsAgent {
ports
in Q^{29} state,
out Q(-1:1)^{3} action;
instance TorcsActor actor;
connect state -> actor.state;
connect actor.commands -> action;
}
/* (c) https://github.com/MontiCore/monticore */
package torcs.agent;
conforms to de.monticore.lang.monticar.generator.roscpp.RosToEmamTagSchema;
tags TorcsAgent {
tag torcsAgent.state with RosConnection = {topic=(/torcs/state, std_msgs/Float32MultiArray)};
tag torcsAgent.action with RosConnection = {topic=(/torcs/step, std_msgs/Float32MultiArray)};
}
/* (c) https://github.com/MontiCore/monticore */
package torcs.agent.network;
component Reward {
ports
in Q^{29} state,
in B isTerminal,
out Q reward;
implementation Math {
Q speedX = state(22) * 300;
Q angle = state(1) * 3.1416;
Q trackPos = state(21);
if speedX < 0
speedX = 0;
end
reward = (speedX * cos(angle)) - (speedX * sin(angle)) - (speedX * abs(trackPos));
if abs(trackPos) > 1.0
reward = -200;
end
for i=2:20
if abs(state(i)) > 1.0
reward = -200;
end
end
}
}
/* (c) https://github.com/MontiCore/monticore */
configuration TorcsActor {
context : gpu
learning_method : reinforcement
agent_name: "TorcsAgent"
rl_algorithm: td3-algorithm
policy_noise: 0.2
noise_clip: 0.5
policy_delay: 2
critic: torcs.agent.network.torcsCritic
environment : ros_interface {
state_topic: "/torcs/state"
terminal_state_topic: "/torcs/terminal"
action_topic: "/torcs/step"
reset_topic: "/torcs/reset"
}
reward_function: torcs.agent.network.reward
num_episodes : 3500
discount_factor : 0.99
num_max_steps : 900000
training_interval : 1
start_training_at: 0
evaluation_samples: 1
soft_target_update_rate: 0.005
snapshot_interval : 150
replay_memory : buffer{
memory_size : 120000
sample_size : 100
}
strategy : ornstein_uhlenbeck{
epsilon : 1.0
min_epsilon : 0.0001
epsilon_decay_method: linear
epsilon_decay : 0.000008
epsilon_decay_start: 10
epsilon_decay_per_step: true
theta: (0.6, 1.0, 1.0)
mu: (0.0, 0.0, -1.2)
sigma: (0.3, 0.2, 0.05)
}
actor_optimizer: adam {
learning_rate: 0.001
}
critic_optimizer: adam {
learning_rate: 0.001
}
}
/* (c) https://github.com/MontiCore/monticore */
package torcs.agent.network;
component TorcsActor {
ports
in Q^{29} state,
out Q(-1:1)^{3} commands;
implementation CNN {
state ->
FullyConnected(units=300) ->
Relu() ->
FullyConnected(units=600) ->
Relu() ->
FullyConnected(units=3) ->
Tanh() ->
commands;
}
}
/* (c) https://github.com/MontiCore/monticore */
configuration TorcsActor {
context : gpu
learning_method : reinforcement
agent_name: "TorcsAgent"
rl_algorithm: ddpg-algorithm
critic: torcs.agent.network.torcsCritic
environment : ros_interface {
state_topic: "/torcs/state"
terminal_state_topic: "/torcs/terminal"
action_topic: "/torcs/step"
reset_topic: "/torcs/reset"
}
reward_function: torcs.agent.network.reward
num_episodes : 3500
discount_factor : 0.99
num_max_steps : 900000
training_interval : 1
start_training_at: 0
evaluation_samples: 1
soft_target_update_rate: 0.001
snapshot_interval : 150
replay_memory : buffer{
memory_size : 120000
sample_size : 32
}
strategy : ornstein_uhlenbeck{
epsilon : 1.0
min_epsilon : 0.0001
epsilon_decay_method: linear
epsilon_decay : 0.000008
epsilon_decay_start: 10
epsilon_decay_per_step: true
theta: (0.6, 1.0, 1.0)
mu: (0.0, 0.0, -1.2)
sigma: (0.3, 0.2, 0.05)
}
actor_optimizer: adam {
learning_rate: 0.0001
}
critic_optimizer: adam {
learning_rate: 0.001
}
}
/* (c) https://github.com/MontiCore/monticore */
package torcs.agent.network;
component TorcsCritic {
ports
in Q^{29} state,
in Q(-1:1)^{3} action,
out Q(-oo:oo)^{1} qvalues;
implementation CNN {
(state | action)->
Concatenate() ->
FullyConnected(units=300) ->
Relu() ->
FullyConnected(units=600) ->
Relu() ->
FullyConnected(units=1) ->
qvalues;
}
}
/* (c) https://github.com/MontiCore/monticore */
package torcs.agent.network;
component TorcsCritic {
ports
in Q^{29} state,
in Q(-1:1)^{3} action,
out Q(-oo:oo)^{1} qvalues;
implementation CNN {
(
state ->
FullyConnected(units=300) ->
Relu() ->
FullyConnected(units=600)
|
action ->
FullyConnected(units=600)
)->
Add() ->
FullyConnected(units=600) ->
Relu() ->
FullyConnected(units=1) ->
qvalues;
}
}
/* (c) https://github.com/MontiCore/monticore */
// Copyright (C) 2012 thomas.natschlaeger@gmail.com
//
// This file is part of the ArmaNpy library.
......
cmake_minimum_required(VERSION 3.5)
set(CMAKE_CXX_STANDARD 14)
project(torcs_agent_torcsAgent LANGUAGES CXX)
#set cmake module path
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
# add dependencies
find_package(Armadillo REQUIRED)
set(INCLUDE_DIRS ${INCLUDE_DIRS} ${Armadillo_INCLUDE_DIRS})
set(LIBS ${LIBS} ${Armadillo_LIBRARIES})
# additional commands
set(LIBS ${LIBS} mxnet)
# create static library
include_directories(${INCLUDE_DIRS})
add_library(torcs_agent_torcsAgent torcs_agent_torcsAgent.cpp)
target_include_directories(torcs_agent_torcsAgent PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${INCLUDE_DIRS})
target_link_libraries(torcs_agent_torcsAgent PUBLIC ${LIBS})
set_target_properties(torcs_agent_torcsAgent PROPERTIES LINKER_LANGUAGE CXX)
# export cmake project
export(TARGETS torcs_agent_torcsAgent FILE torcs_agent_torcsAgent.cmake)
# additional commands end
#ifndef CNNBUFFERFILE_H
#define CNNBUFFERFILE_H
#include <stdio.h>
#include <iostream>
#include <fstream>
// Read file to buffer
class BufferFile {
public :
std::string file_path_;
int length_;
char* buffer_;
explicit BufferFile(std::string file_path)
:file_path_(file_path) {
std::ifstream ifs(file_path.c_str(), std::ios::in | std::ios::binary);
if (!ifs) {
std::cerr << "Can't open the file. Please check " << file_path << ". \n";
length_ = 0;
buffer_ = NULL;
return;
}
ifs.seekg(0, std::ios::end);
length_ = ifs.tellg();
ifs.seekg(0, std::ios::beg);
std::cout << file_path.c_str() << " ... "<< length_ << " bytes\n";
buffer_ = new char[sizeof(char) * length_];
ifs.read(buffer_, length_);
ifs.close();
}
int GetLength() {
return length_;
}
char* GetBuffer() {
return buffer_;
}
~BufferFile() {
if (buffer_) {
delete[] buffer_;
buffer_ = NULL;
}
}
};
#endif // CNNBUFFERFILE_H
import mxnet as mx
import logging
import os
from CNNNet_torcs_agent_torcsAgent_actor import Net_0
class CNNCreator_torcs_agent_torcsAgent_actor:
_model_dir_ = "model/torcs.agent.network.TorcsActor/"
_model_prefix_ = "model"
def __init__(self):
self.weight_initializer = mx.init.Normal()
self.networks = {}
def load(self, context):
earliestLastEpoch = None
for i, network in self.networks.items():
lastEpoch = 0
param_file = None
try:
os.remove(self._model_dir_ + self._model_prefix_ + "_" + str(i) + "_newest-0000.params")
except OSError:
pass
try:
os.remove(self._model_dir_ + self._model_prefix_ + "_" + str(i) + "_newest-symbol.json")
except OSError:
pass
if os.path.isdir(self._model_dir_):
for file in os.listdir(self._model_dir_):
if ".params" in file and self._model_prefix_ + "_" + str(i) in file:
epochStr = file.replace(".params","").replace(self._model_prefix_ + "_" + str(i) + "-","")
epoch = int(epochStr)
if epoch > lastEpoch:
lastEpoch = epoch
param_file = file
if param_file is None:
earliestLastEpoch = 0
else:
logging.info("Loading checkpoint: " + param_file)
network.load_parameters(self._model_dir_ + param_file)
if earliestLastEpoch == None or lastEpoch < earliestLastEpoch:
earliestLastEpoch = lastEpoch
return earliestLastEpoch
def construct(self, context, data_mean=None, data_std=None):
self.networks[0] = Net_0(data_mean=data_mean, data_std=data_std)
self.networks[0].collect_params().initialize(self.weight_initializer, ctx=context)
self.networks[0].hybridize()
self.networks[0](mx.nd.zeros((1,29,), ctx=context))
if not os.path.exists(self._model_dir_):
os.makedirs(self._model_dir_)
for i, network in self.networks.items():
network.export(self._model_dir_ + self._model_prefix_ + "_" + str(i), epoch=0)
\ No newline at end of file
import os
import h5py
import mxnet as mx
import logging
import sys
from mxnet import nd
class CNNDataLoader_torcs_agent_torcsAgent_actor:
_input_names_ = ['state']
_output_names_ = ['commands_label']
def __init__(self):
self._data_dir = "data/"
def load_data(self, batch_size):
train_h5, test_h5 = self.load_h5_files()
train_data = {}
data_mean = {}
data_std = {}
for input_name in self._input_names_:
train_data[input_name] = train_h5[input_name]
data_mean[input_name] = nd.array(train_h5[input_name][:].mean(axis=0))
data_std[input_name] = nd.array(train_h5[input_name][:].std(axis=0) + 1e-5)
train_label = {}
for output_name in self._output_names_:
train_label[output_name] = train_h5[output_name]
train_iter = mx.io.NDArrayIter(data=train_data,
label=train_label,
batch_size=batch_size)
test_iter = None
if test_h5 != None:
test_data = {}
for input_name in self._input_names_:
test_data[input_name] = test_h5[input_name]
test_label = {}
for output_name in self._output_names_:
test_label[output_name] = test_h5[output_name]
test_iter = mx.io.NDArrayIter(data=test_data,
label=test_label,
batch_size=batch_size)
return train_iter, test_iter, data_mean, data_std
def load_h5_files(self):
train_h5 = None
test_h5 = None
train_path = self._data_dir + "train.h5"
test_path = self._data_dir + "test.h5"
if os.path.isfile(train_path):
train_h5 = h5py.File(train_path, 'r')
for input_name in self._input_names_:
if not input_name in train_h5:
logging.error("The HDF5 file '" + os.path.abspath(train_path) + "' has to contain the dataset "
+ "'" + input_name + "'")
sys.exit(1)
for output_name in self._output_names_:
if not output_name in train_h5:
logging.error("The HDF5 file '" + os.path.abspath(train_path) + "' has to contain the dataset "
+ "'" + output_name + "'")
sys.exit(1)
if os.path.isfile(test_path):
test_h5 = h5py.File(test_path, 'r')
for input_name in self._input_names_:
if not input_name in test_h5:
logging.error("The HDF5 file '" + os.path.abspath(test_path) + "' has to contain the dataset "
+ "'" + input_name + "'")
sys.exit(1)
for output_name in self._output_names_:
if not output_name in test_h5:
logging.error("The HDF5 file '" + os.path.abspath(test_path) + "' has to contain the dataset "
+ "'" + output_name + "'")
sys.exit(1)
else:
logging.warning("Couldn't load test set. File '" + os.path.abspath(test_path) + "' does not exist.")
return train_h5, test_h5
else:
logging.error("Data loading failure. File '" + os.path.abspath(train_path) + "' does not exist.")
sys.exit(1)
\ No newline at end of file
import mxnet as mx
import numpy as np
from mxnet import gluon
class OneHot(gluon.HybridBlock):
def __init__(self, size, **kwargs):
super(OneHot, self).__init__(**kwargs)
with self.name_scope():
self.size = size
def hybrid_forward(self, F, x):
return F.one_hot(indices=F.argmax(data=x, axis=1), depth=self.size)
class Softmax(gluon.HybridBlock):
def __init__(self, **kwargs):
super(Softmax, self).__init__(**kwargs)
def hybrid_forward(self, F, x):
return F.softmax(x)
class Split(gluon.HybridBlock):
def __init__(self, num_outputs, axis=1, **kwargs):
super(Split, self).__init__(**kwargs)
with self.name_scope():
self.axis = axis
self.num_outputs = num_outputs
def hybrid_forward(self, F, x):
return F.split(data=x, axis=self.axis, num_outputs=self.num_outputs)
class Concatenate(gluon.HybridBlock):
def __init__(self, dim=1, **kwargs):
super(Concatenate, self).__init__(**kwargs)
with self.name_scope():
self.dim = dim
def hybrid_forward(self, F, *x):
return F.concat(*x, dim=self.dim)
class ZScoreNormalization(gluon.HybridBlock):
def __init__(self, data_mean, data_std, **kwargs):
super(ZScoreNormalization, self).__init__(**kwargs)
with self.name_scope():
self.data_mean = self.params.get('data_mean', shape=data_mean.shape,
init=mx.init.Constant(data_mean.asnumpy().tolist()), differentiable=False)
self.data_std = self.params.get('data_std', shape=data_mean.shape,
init=mx.init.Constant(data_std.asnumpy().tolist()), differentiable=False)
def hybrid_forward(self, F, x, data_mean, data_std):
x = F.broadcast_sub(x, data_mean)
x = F.broadcast_div(x, data_std)
return x
class Padding(gluon.HybridBlock):
def __init__(self, padding, **kwargs):
super(Padding, self).__init__(**kwargs)
with self.name_scope():
self.pad_width = padding
def hybrid_forward(self, F, x):
x = F.pad(data=x,
mode='constant',
pad_width=self.pad_width,
constant_value=0)
return x
class NoNormalization(gluon.HybridBlock):
def __init__(self, **kwargs):
super(NoNormalization, self).__init__(**kwargs)
def hybrid_forward(self, F, x):
return x
class Net_0(gluon.HybridBlock):
def __init__(self, data_mean=None, data_std=None, **kwargs):
super(Net_0, self).__init__(**kwargs)
self.last_layers = {}
with self.name_scope():
if data_mean:
assert(data_std)
self.input_normalization_state_ = ZScoreNormalization(data_mean=data_mean['state_'],
data_std=data_std['state_'])
else:
self.input_normalization_state_ = NoNormalization()
self.fc1_ = gluon.nn.Dense(units=300, use_bias=True, flatten=True)
# fc1_, output shape: {[300,1,1]}
self.relu1_ = gluon.nn.Activation(activation='relu')
self.fc2_ = gluon.nn.Dense(units=600, use_bias=True, flatten=True)
# fc2_, output shape: {[600,1,1]}
self.relu2_ = gluon.nn.Activation(activation='relu')
self.fc3_ = gluon.nn.Dense(units=3, use_bias=True, flatten=True)
# fc3_, output shape: {[3,1,1]}
self.tanh3_ = gluon.nn.Activation(activation='tanh')
def hybrid_forward(self, F, state_):
state_ = self.input_normalization_state_(state_)
fc1_ = self.fc1_(state_)
relu1_ = self.relu1_(fc1_)
fc2_ = self.fc2_(relu1_)
relu2_ = self.relu2_(fc2_)
fc3_ = self.fc3_(relu2_)
tanh3_ = self.tanh3_(fc3_)
commands_ = tanh3_
return commands_
#ifndef CNNPREDICTOR_TORCS_AGENT_TORCSAGENT_ACTOR
#define CNNPREDICTOR_TORCS_AGENT_TORCSAGENT_ACTOR
#include <mxnet/c_predict_api.h>
#include <cassert>
#include <string>
#include <vector>
#include <CNNBufferFile.h>
class CNNPredictor_torcs_agent_torcsAgent_actor_0{
public:
const std::string json_file = "model/torcs.agent.network.TorcsActor/model_0_newest-symbol.json";
const std::string param_file = "model/torcs.agent.network.TorcsActor/model_0_newest-0000.params";
const std::vector<std::string> input_keys = {
"data"
};
const std::vector<std::vector<mx_uint>> input_shapes = {{1, 29}};
const bool use_gpu = false;
PredictorHandle handle;
explicit CNNPredictor_torcs_agent_torcsAgent_actor_0(){
init(json_file, param_file, input_keys, input_shapes, use_gpu);
}
~CNNPredictor_torcs_agent_torcsAgent_actor_0(){
if(handle) MXPredFree(handle);
}
void predict(const std::vector<float> &in_state_,
std::vector<float> &out_commands_){
MXPredSetInput(handle, input_keys[0].c_str(), in_state_.data(), static_cast<mx_uint>(in_state_.size()));
MXPredForward(handle);
mx_uint output_index;
mx_uint *shape = 0;
mx_uint shape_len;
size_t size;
output_index = 0;
MXPredGetOutputShape(handle, output_index, &shape, &shape_len);
size = 1;
for (mx_uint i = 0; i < shape_len; ++i) size *= shape[i];
assert(size == out_commands_.size());
MXPredGetOutput(handle, 0, &(out_commands_[0]), out_commands_.size());
}
void init(const std::string &json_file,
const std::string &param_file,
const std::vector<std::string> &input_keys,
const std::vector<std::vector<mx_uint>> &input_shapes,
const bool &use_gpu){
BufferFile json_data(json_file);
BufferFile param_data(param_file);
int dev_type = use_gpu ? 2 : 1;
int dev_id = 0;
if (json_data.GetLength() == 0 ||
param_data.GetLength() == 0) {
std::exit(-1);
}
const mx_uint num_input_nodes = input_keys.size();
const char* input_keys_ptr[num_input_nodes];
for(mx_uint i = 0; i < num_input_nodes; i++){
input_keys_ptr[i] = input_keys[i].c_str();
}
mx_uint shape_data_size = 0;
mx_uint input_shape_indptr[input_shapes.size() + 1];
input_shape_indptr[0] = 0;
for(mx_uint i = 0; i < input_shapes.size(); i++){
shape_data_size += input_shapes[i].size();
input_shape_indptr[i+1] = shape_data_size;
}
mx_uint input_shape_data[shape_data_size];
mx_uint index = 0;
for(mx_uint i = 0; i < input_shapes.size(); i++){
for(mx_uint j = 0; j < input_shapes[i].size(); j++){
input_shape_data[index] = input_shapes[i][j];
index++;
}
}
MXPredCreate(static_cast<const char*>(json_data.GetBuffer()),
static_cast<const char*>(param_data.GetBuffer()),
static_cast<size_t>(param_data.GetLength()),
dev_type,
dev_id,
num_input_nodes,
input_keys_ptr,
input_shape_indptr,
input_shape_data,
&handle);
assert(handle);
}
};
#endif // CNNPREDICTOR_TORCS_AGENT_TORCSAGENT_ACTOR
import mxnet as mx
import logging
import numpy as np
import time
import os
import shutil
from mxnet import gluon, autograd, nd
class CrossEntropyLoss(gluon.loss.Loss):
def __init__(self, axis=-1, sparse_label=True, weight=None, batch_axis=0, **kwargs):
super(CrossEntropyLoss, self).__init__(weight, batch_axis, **kwargs)
self._axis = axis
self._sparse_label = sparse_label
def hybrid_forward(self, F, pred, label, sample_weight=None):
pred = F.log(pred)
if self._sparse_label:
loss = -F.pick(pred, label, axis=self._axis, keepdims=True)
else:
label = gluon.loss._reshape_like(F, label, pred)
loss = -F.sum(pred * label, axis=self._axis, keepdims=True)
loss = gluon.loss._apply_weighting(F, loss, self._weight, sample_weight)
return F.mean(loss, axis=self._batch_axis, exclude=True)
class LogCoshLoss(gluon.loss.Loss):
def __init__(self, weight=None, batch_axis=0, **kwargs):
super(LogCoshLoss, self).__init__(weight, batch_axis, **kwargs)
def hybrid_forward(self, F, pred, label, sample_weight=None):
loss = F.log(F.cosh(pred - label))
loss = gluon.loss._apply_weighting(F, loss, self._weight, sample_weight)
return F.mean(loss, axis=self._batch_axis, exclude=True)
class CNNSupervisedTrainer_torcs_agent_torcsAgent_actor:
def __init__(self, data_loader, net_constructor):
self._data_loader = data_loader
self._net_creator = net_constructor
self._networks = {}
def train(self, batch_size=64,
num_epoch=10,
eval_metric='acc',
loss ='softmax_cross_entropy',
loss_params={},
optimizer='adam',
optimizer_params=(('learning_rate', 0.001),),
load_checkpoint=True,
context='gpu',
checkpoint_period=5,
normalize=True):
if context == 'gpu':
mx_context = mx.gpu()
elif context == 'cpu':
mx_context = mx.cpu()
else:
logging.error("Context argument is '" + context + "'. Only 'cpu' and 'gpu are valid arguments'.")
if 'weight_decay' in optimizer_params:
optimizer_params['wd'] = optimizer_params['weight_decay']
del optimizer_params['weight_decay']
if 'learning_rate_decay' in optimizer_params:
min_learning_rate = 1e-08
if 'learning_rate_minimum' in optimizer_params:
min_learning_rate = optimizer_params['learning_rate_minimum']
del optimizer_params['learning_rate_minimum']
optimizer_params['lr_scheduler'] = mx.lr_scheduler.FactorScheduler(
optimizer_params['step_size'],
factor=optimizer_params['learning_rate_decay'],
stop_factor_lr=min_learning_rate)
del optimizer_params['step_size']
del optimizer_params['learning_rate_decay']
train_iter, test_iter, data_mean, data_std = self._data_loader.load_data(batch_size)
if normalize:
self._net_creator.construct(context=mx_context, data_mean=data_mean, data_std=data_std)
else:
self._net_creator.construct(context=mx_context)
begin_epoch = 0
if load_checkpoint:
begin_epoch = self._net_creator.load(mx_context)
else:
if os.path.isdir(self._net_creator._model_dir_):
shutil.rmtree(self._net_creator._model_dir_)
self._networks = self._net_creator.networks
try:
os.makedirs(self._net_creator._model_dir_)
except OSError:
if not os.path.isdir(self._net_creator._model_dir_):
raise
trainers = [mx.gluon.Trainer(network.collect_params(), optimizer, optimizer_params) for network in self._networks.values()]
margin = loss_params['margin'] if 'margin' in loss_params else 1.0
sparseLabel = loss_params['sparse_label'] if 'sparse_label' in loss_params else True
if loss == 'softmax_cross_entropy':
fromLogits = loss_params['from_logits'] if 'from_logits' in loss_params else False
loss_function = mx.gluon.loss.SoftmaxCrossEntropyLoss(from_logits=fromLogits, sparse_label=sparseLabel)
elif loss == 'sigmoid_binary_cross_entropy':
loss_function = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss()
elif loss == 'cross_entropy':
loss_function = CrossEntropyLoss(sparse_label=sparseLabel)
elif loss == 'l2':
loss_function = mx.gluon.loss.L2Loss()
elif loss == 'l1':
loss_function = mx.gluon.loss.L2Loss()
elif loss == 'huber':
rho = loss_params['rho'] if 'rho' in loss_params else 1
loss_function = mx.gluon.loss.HuberLoss(rho=rho)
elif loss == 'hinge':
loss_function = mx.gluon.loss.HingeLoss(margin=margin)
elif loss == 'squared_hinge':
loss_function = mx.gluon.loss.SquaredHingeLoss(margin=margin)
elif loss == 'logistic':
labelFormat = loss_params['label_format'] if 'label_format' in loss_params else 'signed'
loss_function = mx.gluon.loss.LogisticLoss(label_format=labelFormat)
elif loss == 'kullback_leibler':
fromLogits = loss_params['from_logits'] if 'from_logits' in loss_params else True
loss_function = mx.gluon.loss.KLDivLoss(from_logits=fromLogits)
elif loss == 'log_cosh':
loss_function = LogCoshLoss()
else:
logging.error("Invalid loss parameter.")
speed_period = 50
tic = None
for epoch in range(begin_epoch, begin_epoch + num_epoch):
train_iter.reset()
for batch_i, batch in enumerate(train_iter):
state_data = batch.data[0].as_in_context(mx_context)
commands_label = batch.label[0].as_in_context(mx_context)
with autograd.record():
commands_output = self._networks[0](state_data)
loss = \
loss_function(commands_output, commands_label)
loss.backward()
for trainer in trainers:
trainer.step(batch_size)
if tic is None:
tic = time.time()
else:
if batch_i % speed_period == 0:
try:
speed = speed_period * batch_size / (time.time() - tic)
except ZeroDivisionError:
speed = float("inf")
logging.info("Epoch[%d] Batch[%d] Speed: %.2f samples/sec" % (epoch, batch_i, speed))
tic = time.time()
tic = None
train_iter.reset()
metric = mx.metric.create(eval_metric)
for batch_i, batch in enumerate(train_iter):
state_data = batch.data[0].as_in_context(mx_context)
labels = [
batch.label[0].as_in_context(mx_context)
]
if True: # Fix indentation
commands_output = self._networks[0](state_data)
predictions = [
mx.nd.argmax(commands_output, axis=1)
]
metric.update(preds=predictions, labels=labels)
train_metric_score = metric.get()[1]
test_iter.reset()
metric = mx.metric.create(eval_metric)
for batch_i, batch in enumerate(test_iter):
state_data = batch.data[0].as_in_context(mx_context)
labels = [
batch.label[0].as_in_context(mx_context)
]
if True: # Fix indentation
commands_output = self._networks[0](state_data)
predictions = [
mx.nd.argmax(commands_output, axis=1)
]
metric.update(preds=predictions, labels=labels)
test_metric_score = metric.get()[1]
logging.info("Epoch[%d] Train: %f, Test: %f" % (epoch, train_metric_score, test_metric_score))
if (epoch - begin_epoch) % checkpoint_period == 0:
for i, network in self._networks.items():
network.save_parameters(self.parameter_path(i) + '-' + str(epoch).zfill(4) + '.params')
for i, network in self._networks.items():
network.save_parameters(self.parameter_path(i) + '-' + str(num_epoch + begin_epoch).zfill(4) + '.params')
network.export(self.parameter_path(i) + '_newest', epoch=0)
def parameter_path(self, index):
return self._net_creator._model_dir_ + self._net_creator._model_prefix_ + '_' + str(index)
\ No newline at end of file
from reinforcement_learning.agent import TwinDelayedDdpgAgent
from reinforcement_learning.util import AgentSignalHandler
from reinforcement_learning.cnnarch_logger import ArchLogger
from reinforcement_learning.CNNCreator_torcs_agent_network_torcsCritic import CNNCreator_torcs_agent_network_torcsCritic
import reinforcement_learning.environment
import CNNCreator_torcs_agent_torcsAgent_actor
import os
import sys
import re
import time
import numpy as np
import mxnet as mx
def resume_session(sessions_dir):
resume_session = False
resume_directory = None
if os.path.isdir(sessions_dir):
regex = re.compile(r'\d\d\d\d-\d\d-\d\d-\d\d-\d\d')
dir_content = os.listdir(sessions_dir)
session_files = filter(regex.search, dir_content)
session_files.sort(reverse=True)
for d in session_files:
interrupted_session_dir = os.path.join(sessions_dir, d, '.interrupted_session')
if os.path.isdir(interrupted_session_dir):
resume = raw_input('Interrupted session from {} found. Do you want to resume? (y/n) '.format(d))
if resume == 'y':
resume_session = True
resume_directory = interrupted_session_dir
break
return resume_session, resume_directory
if __name__ == "__main__":
agent_name = 'TorcsAgent'
# Prepare output directory and logger
all_output_dir = os.path.join('model', agent_name)
output_directory = os.path.join(
all_output_dir,
time.strftime('%Y-%m-%d-%H-%M-%S',
time.localtime(time.time())))
ArchLogger.set_output_directory(output_directory)
ArchLogger.set_logger_name(agent_name)
ArchLogger.set_output_level(ArchLogger.INFO)
env_params = {
'ros_node_name': 'torcs_agent_torcsAgent_actorTrainerNode',
'state_topic': '/torcs/state',
'action_topic': '/torcs/step',
'reset_topic': '/torcs/reset',
'terminal_state_topic': '/torcs/terminal',
}
env = reinforcement_learning.environment.RosEnvironment(**env_params)
context = mx.gpu()
actor_creator = CNNCreator_torcs_agent_torcsAgent_actor.CNNCreator_torcs_agent_torcsAgent_actor()
actor_creator.construct(context)
critic_creator = CNNCreator_torcs_agent_network_torcsCritic()
critic_creator.construct(context)
agent_params = {
'environment': env,
'replay_memory_params': {
'method': 'buffer',
'memory_size': 120000,
'sample_size': 100,
'state_dtype': 'float32',
'action_dtype': 'float32',
'rewards_dtype': 'float32'
},
'strategy_params': {
'method':'ornstein_uhlenbeck',
'epsilon': 1,
'min_epsilon': 0.0001,
'epsilon_decay_method': 'linear',
'epsilon_decay': 0.000008,
'epsilon_decay_start': 10,
'epsilon_decay_per_step': True,
'action_low': -1,
'action_high': 1,
'mu': [0, 0, -1.2],
'theta': [0.6, 1, 1],
'sigma': [0.3, 0.2, 0.05],
},
'agent_name': agent_name,
'verbose': True,
'output_directory': output_directory,
'state_dim': (29,),
'action_dim': (3,),
'ctx': 'gpu',
'discount_factor': 0.99,
'training_episodes': 3500,
'train_interval': 1,
'start_training': 0,
'snapshot_interval': 150,
'max_episode_step': 900000,
'evaluation_samples': 1,
'actor': actor_creator.networks[0],
'critic': critic_creator.networks[0],
'soft_target_update_rate': 0.005,
'actor_optimizer': 'adam',
'actor_optimizer_params': {
'learning_rate': 0.001},
'critic_optimizer': 'adam',
'critic_optimizer_params': {
'learning_rate': 0.001},
'policy_noise': 0.2,
'noise_clip': 0.5,
'policy_delay': 2,
}
resume, resume_directory = resume_session(all_output_dir)
if resume:
output_directory, _ = os.path.split(resume_directory)
ArchLogger.set_output_directory(output_directory)
resume_agent_params = {
'session_dir': resume_directory,
'environment': env,
'actor': actor_creator.networks[0],
'critic': critic_creator.networks[0]
}
agent = TwinDelayedDdpgAgent.resume_from_session(**resume_agent_params)
else:
agent = TwinDelayedDdpgAgent(**agent_params)
signal_handler = AgentSignalHandler()
signal_handler.register_agent(agent)
train_successful = agent.train()
if train_successful:
agent.export_best_network(path=actor_creator._model_dir_ + actor_creator._model_prefix_ + '_0_newest', epoch=0)
/* (c) https://github.com/MontiCore/monticore */
#ifndef CNNTRANSLATOR_H
#define CNNTRANSLATOR_H
#include <armadillo>
#include <cassert>
using namespace std;
using namespace arma;
class CNNTranslator{
public:
template<typename T> static void addColToSTDVector(const Col<T> &source, vector<float> &data){
for(size_t i = 0; i < source.n_elem; i++){
data.push_back((float) source(i));
}
}
template<typename T> static void addRowToSTDVector(const subview_row<T> &source, vector<float> &data){
for(size_t i = 0; i < source.n_elem; i++){
data.push_back((float) source(i));
}
}
template<typename T> static void addRowToSTDVector(const Row<T> &source, vector<float> &data){
for(size_t i = 0; i < source.n_elem; i++){
data.push_back((float) source(i));
}
}
template<typename T> static void addMatToSTDVector(const Mat<T> &source, vector<float> &data){
for(size_t i = 0; i < source.n_rows; i++){
addRowToSTDVector(source.row(i), data);
}
}
template<typename T> static vector<float> translate(const Col<T> &source){
size_t size = source.n_elem;
vector<float> data;
data.reserve(size);
addColToSTDVector(source, data);
return data;
}
template<typename T> static vector<float> translate(const Row<T> &source){
size_t size = source.n_elem;
vector<float> data;
data.reserve(size);
addRowToSTDVector(source, data);
return data;
}
template<typename T> static vector<float> translate(const Mat<T> &source){
size_t size = source.n_elem;
vector<float> data;
data.reserve(size);
addMatToSTDVector(source, data);
return data;
}
template<typename T> static vector<float> translate(const Cube<T> &source){
size_t size = source.n_elem;
vector<float> data;
data.reserve(size);
for(size_t i = 0; i < source.n_slices; i++){
addMatToSTDVector(source.slice(i), data);
}
return data;
}
static vec translateToCol(const vector<float> &source, const vector<size_t> &shape){
assert(shape.size() == 1);
vec column(shape[0]);
for(size_t i = 0; i < source.size(); i++){
column(i) = (double) source[i];
}
return column;
}
static mat translateToMat(const vector<float> &source, const vector<size_t> &shape){
assert(shape.size() == 2);
mat matrix(shape[1], shape[0]); //create transposed version of the matrix
int startPos = 0;
int endPos = matrix.n_rows;
const vector<size_t> columnShape = {matrix.n_rows};
for(size_t i = 0; i < matrix.n_cols; i++){
vector<float> colSource(&source[startPos], &source[endPos]);
matrix.col(i) = translateToCol(colSource, columnShape);
startPos = endPos;
endPos += matrix.n_rows;
}
return matrix.t();
}
static cube translateToCube(const vector<float> &source, const vector<size_t> &shape){
assert(shape.size() == 3);
cube cubeMatrix(shape[1], shape[2], shape[0]);
const int matrixSize = shape[1] * shape[2];
const vector<size_t> matrixShape = {shape[1], shape[2]};
int startPos = 0;
int endPos = matrixSize;
for(size_t i = 0; i < cubeMatrix.n_slices; i++){
vector<float> matrixSource(&source[startPos], &source[endPos]);
cubeMatrix.slice(i) = translateToMat(matrixSource, matrixShape);
startPos = endPos;
endPos += matrixSize;
}
return cubeMatrix;
}
template<typename T> static vector<size_t> getShape(const Col<T> &source){
return {source.n_elem};
}
template<typename T> static vector<size_t> getShape(const Row<T> &source){
return {source.n_elem};
}
template<typename T> static vector<size_t> getShape(const Mat<T> &source){
return {source.n_rows, source.n_cols};
}
template<typename T> static vector<size_t> getShape(const Cube<T> &source){
return {source.n_slices, source.n_rows, source.n_cols};
}
};
#endif
#ifndef HELPERA_H
#define HELPERA_H
#include <iostream>
#include "armadillo"
#include <stdarg.h>
#include <initializer_list>
#include <fstream>
using namespace arma;
#ifndef _FILESTRING_CONVERSION___A
#define _FILESTRING_CONVERSION___A
void toFileString(std::ofstream& myfile, mat A){
myfile << "[";
for (int i = 0; i < A.n_rows; i++){
for (int j = 0; j < A.n_cols; j++){
myfile << A(i,j);
if(j + 1 < A.n_cols){
myfile << ", ";
}
}
if(i + 1 < A.n_rows){
myfile << ";";
}
}
myfile << "]";
}
void toFileString(std::ofstream& myfile, double A){
myfile << A;
}
void toFileString(std::ofstream& myfile, float A){
myfile << A;
}
void toFileString(std::ofstream& myfile, int A){
myfile << A;
}
void toFileString(std::ofstream& myfile, bool A){
myfile << A;
}
bool Is_close(mat& X, mat& Y, double tol)
{
// abs returns a mat type then max checks columns and returns a row_vec
// max used again will return the biggest element in the row_vec
bool close(false);
if(arma::max(arma::max(arma::abs(X-Y))) < tol)
{
close = true;
}
return close;
}
#endif
class HelperA{
public:
static mat getEigenVectors(mat A){
vec eigenValues;
mat eigenVectors;
eig_sym(eigenValues,eigenVectors,A);
return eigenVectors;
}
static vec getEigenValues(mat A){
vec eigenValues;
mat eigenVectors;
eig_sym(eigenValues,eigenVectors,A);
return eigenValues;
}
static mat getKMeansClusters(mat A, int k){
mat clusters;
kmeans(clusters,A.t(),k,random_subset,20,true);
/*printf("cluster centroid calculation done\n");
std::ofstream myfile;
myfile.open("data after cluster.txt");
myfile << A;
myfile.close();
std::ofstream myfile2;
myfile2.open("cluster centroids.txt");
myfile2 << clusters;
myfile2.close();*/
mat indexedData=getKMeansClustersIndexData(A.t(), clusters);
/*std::ofstream myfile3;
myfile3.open("data after index.txt");
myfile3 << indexedData;
myfile3.close();
*/
return indexedData;
}
static mat getKMeansClustersIndexData(mat A, mat centroids){
mat result=mat(A.n_cols, 1);
for(int i=0;i<A.n_cols;++i){
result(i, 0) = getIndexForClusterCentroids(A, i, centroids);
}
return result;
}
static int getIndexForClusterCentroids(mat A, int colIndex, mat centroids){
int index=1;
double lowestDistance=getEuclideanDistance(A, colIndex, centroids, 0);
for(int i=1;i<centroids.n_cols;++i){
double curDistance=getEuclideanDistance(A, colIndex, centroids, i);
if(curDistance<lowestDistance){
lowestDistance=curDistance;
index=i+1;
}
}
return index;
}
static double getEuclideanDistance(mat A, int colIndexA, mat B, int colIndexB){
double distance=0;
for(int i=0;i<A.n_rows;++i){
double elementA=A(i,colIndexA);
double elementB=B(i,colIndexB);
double diff=elementA-elementB;
distance+=diff*diff;
}
return sqrt(distance);
}
static mat getSqrtMat(mat A){
cx_mat result=sqrtmat(A);
return real(result);
}
static mat getSqrtMatDiag(mat A){
for(int i=0;i<A.n_rows;++i){
double curVal = A(i,i);
A(i,i) = sqrt(curVal);
}
return A;
}
static mat invertDiagMatrix(mat A){
for(int i=0;i<A.n_rows;++i){
double curVal = A(i,i);
A(i,i) = 1/curVal;
}
return A;
}
};
#endif
# Automatically generated file
#
# - Try to find Armadillo
# Once done this will define
# Armadillo_FOUND - System has Armadillo
# Armadillo_INCLUDE_DIRS - The Armadillo include directories
# Armadillo_LIBRARY_DIRS - The library directories needed to use Armadillo
# Armadillo_LIBRARIES - The libraries needed to use Armadillo
find_path(Armadillo_INCLUDE_DIR
NAMES armadillo
PATH_SUFFIXES "include"
PATHS
HINTS $ENV{Armadillo_HOME}
)
find_library(Armadillo_LIBRARY
NAMES armadillo
PATH_SUFFIXES "lib" "lib64" "lib/x86_64-linux-gnu" "examples/lib_win64" "build" "Release"
PATHS
HINTS $ENV{Armadillo_HOME}
)
include(FindPackageHandleStandardArgs)
# if all listed variables are TRUE
find_package_handle_standard_args(
Armadillo
DEFAULT_MSG
Armadillo_INCLUDE_DIR
Armadillo_LIBRARY
)