Commit 08f5f619 authored by Evgeny Kusmenko's avatar Evgeny Kusmenko

Merge branch 'release-gluon-integration' into 'master'

Update Gluon Generator and CNNTrain and minor test changes

See merge request !29
parents 9e4a0b62 df882e6e
Pipeline #191332 passed with stages
in 9 minutes and 19 seconds
......@@ -9,7 +9,7 @@
<groupId>de.monticore.lang.monticar</groupId>
<artifactId>embedded-montiarc-emadl-generator</artifactId>
<version>0.3.6-SNAPSHOT</version>
<version>0.3.7-SNAPSHOT</version>
<!-- == PROJECT DEPENDENCIES ============================================= -->
......@@ -17,11 +17,11 @@
<!-- .. SE-Libraries .................................................. -->
<emadl.version>0.2.10-SNAPSHOT</emadl.version>
<CNNTrain.version>0.3.6-SNAPSHOT</CNNTrain.version>
<CNNTrain.version>0.3.7-SNAPSHOT</CNNTrain.version>
<cnnarch-generator.version>0.0.4-SNAPSHOT</cnnarch-generator.version>
<cnnarch-mxnet-generator.version>0.2.17-SNAPSHOT</cnnarch-mxnet-generator.version>
<cnnarch-caffe2-generator.version>0.2.13-SNAPSHOT</cnnarch-caffe2-generator.version>
<cnnarch-gluon-generator.version>0.2.8-SNAPSHOT</cnnarch-gluon-generator.version>
<cnnarch-gluon-generator.version>0.2.9-SNAPSHOT</cnnarch-gluon-generator.version>
<cnnarch-tensorflow-generator.version>0.1.0-SNAPSHOT</cnnarch-tensorflow-generator.version>
<embedded-montiarc-math-opt-generator>0.1.4</embedded-montiarc-math-opt-generator>
......
......@@ -4,7 +4,6 @@ package de.monticore.lang.monticar.emadl;
import de.monticore.lang.monticar.emadl.generator.EMADLGeneratorCli;
import de.se_rwth.commons.logging.Finding;
import de.se_rwth.commons.logging.Log;
import org.junit.Ignore;
import org.junit.Test;
import java.nio.file.Paths;
......@@ -12,6 +11,7 @@ import java.util.Arrays;
import java.util.stream.Collectors;
import static junit.framework.TestCase.assertTrue;
import static org.junit.Assume.assumeFalse;
/**
*
......@@ -19,6 +19,7 @@ import static junit.framework.TestCase.assertTrue;
public class IntegrationPythonWrapperTest extends AbstractSymtabTest {
@Test
public void testGluonReinforcementModelRosEnvironment() {
assumeFalse(System.getProperty("os.name").toLowerCase().startsWith("win"));
Log.getFindings().clear();
String[] args = {"-m", "src/test/resources/models/reinforcementModel", "-r", "torcs.agent.TorcsAgent", "-b", "GLUON", "-f", "n", "-c", "n"};
EMADLGeneratorCli.main(args);
......@@ -71,4 +72,51 @@ public class IntegrationPythonWrapperTest extends AbstractSymtabTest {
"./target/generated-sources-emadl/reinforcement_learning/torcs_agent_dqn_reward_executor.py")
.toFile().exists());
}
@Test
public void testTorcsTD3() {
Log.getFindings().clear();
String[] args = {"-m", "src/test/resources/models/reinforcementModel/torcs_td3", "-r", "torcs.agent.TorcsAgent", "-b", "GLUON", "-f", "n", "-c", "n"};
EMADLGeneratorCli.main(args);
assertTrue(Log.getFindings().stream().filter(Finding::isError).collect(Collectors.toList()).isEmpty());
checkFilesAreEqual(
Paths.get("./target/generated-sources-emadl"),
Paths.get("./src/test/resources/target_code/gluon/reinforcementModel/torcs_td3"),
Arrays.asList(
"CMakeLists.txt",
"CNNBufferFile.h",
"torcs_agent_torcsAgent.cpp",
"torcs_agent_torcsAgent.h",
"torcs_agent_torcsAgent_actor.h",
"CNNCreator_torcs_agent_torcsAgent_actor.py",
"CNNNet_torcs_agent_torcsAgent_actor.py",
"CNNPredictor_torcs_agent_torcsAgent_actor.h",
"CNNTrainer_torcs_agent_torcsAgent_actor.py",
"start_training.sh",
"reward/CMakeLists.txt",
"reward/torcs_agent_network_reward.cpp",
"reward/torcs_agent_network_reward.h",
"reward/pylib/CMakeLists.txt",
"reward/pylib/torcs_agent_network_reward_executor.cpp",
"reward/pylib/torcs_agent_network_reward_executor.h",
"reward/pylib/torcs_agent_network_reward_executor.i",
"reward/pylib/armanpy/armanpy.hpp",
"reward/pylib/armanpy/armanpy.i",
"reward/pylib/armanpy/armanpy_1d.i",
"reward/pylib/armanpy/armanpy_2d.i",
"reward/pylib/armanpy/armanpy_3d.i",
"reward/pylib/armanpy/numpy.i",
"reinforcement_learning/__init__.py",
"reinforcement_learning/strategy.py",
"reinforcement_learning/agent.py",
"reinforcement_learning/environment.py",
"reinforcement_learning/replay_memory.py",
"reinforcement_learning/util.py",
"reinforcement_learning/cnnarch_logger.py",
"reinforcement_learning/CNNCreator_torcs_agent_network_torcsCritic.py",
"reinforcement_learning/CNNNet_torcs_agent_network_torcsCritic.py"
)
);
}
}
/* (c) https://github.com/MontiCore/monticore */
package torcs.agent;
import torcs.agent.network.TorcsActor;
component TorcsAgent {
ports
in Q^{29} state,
out Q(-1:1)^{3} action;
instance TorcsActor actor;
connect state -> actor.state;
connect actor.commands -> action;
}
/* (c) https://github.com/MontiCore/monticore */
package torcs.agent;
conforms to de.monticore.lang.monticar.generator.roscpp.RosToEmamTagSchema;
tags TorcsAgent {
tag torcsAgent.state with RosConnection = {topic=(/torcs/state, std_msgs/Float32MultiArray)};
tag torcsAgent.action with RosConnection = {topic=(/torcs/step, std_msgs/Float32MultiArray)};
}
/* (c) https://github.com/MontiCore/monticore */
package torcs.agent.network;
component Reward {
ports
in Q^{29} state,
in B isTerminal,
out Q reward;
implementation Math {
Q speedX = state(22) * 300;
Q angle = state(1) * 3.1416;
Q trackPos = state(21);
if speedX < 0
speedX = 0;
end
reward = (speedX * cos(angle)) - (speedX * sin(angle)) - (speedX * abs(trackPos));
if abs(trackPos) > 1.0
reward = -200;
end
for i=2:20
if abs(state(i)) > 1.0
reward = -200;
end
end
}
}
/* (c) https://github.com/MontiCore/monticore */
configuration TorcsActor {
context : gpu
learning_method : reinforcement
agent_name: "TorcsAgent"
rl_algorithm: td3-algorithm
policy_noise: 0.2
noise_clip: 0.5
policy_delay: 2
critic: torcs.agent.network.torcsCritic
environment : ros_interface {
state_topic: "/torcs/state"
terminal_state_topic: "/torcs/terminal"
action_topic: "/torcs/step"
reset_topic: "/torcs/reset"
}
reward_function: torcs.agent.network.reward
num_episodes : 3500
discount_factor : 0.99
num_max_steps : 900000
training_interval : 1
start_training_at: 0
evaluation_samples: 1
soft_target_update_rate: 0.005
snapshot_interval : 150
replay_memory : buffer{
memory_size : 120000
sample_size : 100
}
strategy : ornstein_uhlenbeck{
epsilon : 1.0
min_epsilon : 0.0001
epsilon_decay_method: linear
epsilon_decay : 0.000008
epsilon_decay_start: 10
epsilon_decay_per_step: true
theta: (0.6, 1.0, 1.0)
mu: (0.0, 0.0, -1.2)
sigma: (0.3, 0.2, 0.05)
}
actor_optimizer: adam {
learning_rate: 0.001
}
critic_optimizer: adam {
learning_rate: 0.001
}
}
/* (c) https://github.com/MontiCore/monticore */
package torcs.agent.network;
component TorcsActor {
ports
in Q^{29} state,
out Q(-1:1)^{3} commands;
implementation CNN {
state ->
FullyConnected(units=300) ->
Relu() ->
FullyConnected(units=600) ->
Relu() ->
FullyConnected(units=3) ->
Tanh() ->
commands;
}
}
/* (c) https://github.com/MontiCore/monticore */
configuration TorcsActor {
context : gpu
learning_method : reinforcement
agent_name: "TorcsAgent"
rl_algorithm: ddpg-algorithm
critic: torcs.agent.network.torcsCritic
environment : ros_interface {
state_topic: "/torcs/state"
terminal_state_topic: "/torcs/terminal"
action_topic: "/torcs/step"
reset_topic: "/torcs/reset"
}
reward_function: torcs.agent.network.reward
num_episodes : 3500
discount_factor : 0.99
num_max_steps : 900000
training_interval : 1
start_training_at: 0
evaluation_samples: 1
soft_target_update_rate: 0.001
snapshot_interval : 150
replay_memory : buffer{
memory_size : 120000
sample_size : 32
}
strategy : ornstein_uhlenbeck{
epsilon : 1.0
min_epsilon : 0.0001
epsilon_decay_method: linear
epsilon_decay : 0.000008
epsilon_decay_start: 10
epsilon_decay_per_step: true
theta: (0.6, 1.0, 1.0)
mu: (0.0, 0.0, -1.2)
sigma: (0.3, 0.2, 0.05)
}
actor_optimizer: adam {
learning_rate: 0.0001
}
critic_optimizer: adam {
learning_rate: 0.001
}
}
/* (c) https://github.com/MontiCore/monticore */
package torcs.agent.network;
component TorcsCritic {
ports
in Q^{29} state,
in Q(-1:1)^{3} action,
out Q(-oo:oo)^{1} qvalues;
implementation CNN {
(state | action)->
Concatenate() ->
FullyConnected(units=300) ->
Relu() ->
FullyConnected(units=600) ->
Relu() ->
FullyConnected(units=1) ->
qvalues;
}
}
/* (c) https://github.com/MontiCore/monticore */
package torcs.agent.network;
component TorcsCritic {
ports
in Q^{29} state,
in Q(-1:1)^{3} action,
out Q(-oo:oo)^{1} qvalues;
implementation CNN {
(
state ->
FullyConnected(units=300) ->
Relu() ->
FullyConnected(units=600)
|
action ->
FullyConnected(units=600)
)->
Add() ->
FullyConnected(units=600) ->
Relu() ->
FullyConnected(units=1) ->
qvalues;
}
}
/* (c) https://github.com/MontiCore/monticore */
// Copyright (C) 2012 thomas.natschlaeger@gmail.com
//
// This file is part of the ArmaNpy library.
......
cmake_minimum_required(VERSION 3.5)
set(CMAKE_CXX_STANDARD 14)
project(torcs_agent_torcsAgent LANGUAGES CXX)
#set cmake module path
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
# add dependencies
find_package(Armadillo REQUIRED)
set(INCLUDE_DIRS ${INCLUDE_DIRS} ${Armadillo_INCLUDE_DIRS})
set(LIBS ${LIBS} ${Armadillo_LIBRARIES})
# additional commands
set(LIBS ${LIBS} mxnet)
# create static library
include_directories(${INCLUDE_DIRS})
add_library(torcs_agent_torcsAgent torcs_agent_torcsAgent.cpp)
target_include_directories(torcs_agent_torcsAgent PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${INCLUDE_DIRS})
target_link_libraries(torcs_agent_torcsAgent PUBLIC ${LIBS})
set_target_properties(torcs_agent_torcsAgent PROPERTIES LINKER_LANGUAGE CXX)
# export cmake project
export(TARGETS torcs_agent_torcsAgent FILE torcs_agent_torcsAgent.cmake)
# additional commands end
#ifndef CNNBUFFERFILE_H
#define CNNBUFFERFILE_H
#include <stdio.h>
#include <iostream>
#include <fstream>
// Read file to buffer
class BufferFile {
public :
std::string file_path_;
int length_;
char* buffer_;
explicit BufferFile(std::string file_path)
:file_path_(file_path) {
std::ifstream ifs(file_path.c_str(), std::ios::in | std::ios::binary);
if (!ifs) {
std::cerr << "Can't open the file. Please check " << file_path << ". \n";
length_ = 0;
buffer_ = NULL;
return;
}
ifs.seekg(0, std::ios::end);
length_ = ifs.tellg();
ifs.seekg(0, std::ios::beg);
std::cout << file_path.c_str() << " ... "<< length_ << " bytes\n";
buffer_ = new char[sizeof(char) * length_];
ifs.read(buffer_, length_);
ifs.close();
}
int GetLength() {
return length_;
}
char* GetBuffer() {
return buffer_;
}
~BufferFile() {
if (buffer_) {
delete[] buffer_;
buffer_ = NULL;
}
}
};
#endif // CNNBUFFERFILE_H
import mxnet as mx
import logging
import os
from CNNNet_torcs_agent_torcsAgent_actor import Net_0
class CNNCreator_torcs_agent_torcsAgent_actor:
_model_dir_ = "model/torcs.agent.network.TorcsActor/"
_model_prefix_ = "model"
def __init__(self):
self.weight_initializer = mx.init.Normal()
self.networks = {}
def load(self, context):
earliestLastEpoch = None
for i, network in self.networks.items():
lastEpoch = 0
param_file = None
try:
os.remove(self._model_dir_ + self._model_prefix_ + "_" + str(i) + "_newest-0000.params")
except OSError:
pass
try:
os.remove(self._model_dir_ + self._model_prefix_ + "_" + str(i) + "_newest-symbol.json")
except OSError:
pass
if os.path.isdir(self._model_dir_):
for file in os.listdir(self._model_dir_):
if ".params" in file and self._model_prefix_ + "_" + str(i) in file:
epochStr = file.replace(".params","").replace(self._model_prefix_ + "_" + str(i) + "-","")
epoch = int(epochStr)
if epoch > lastEpoch:
lastEpoch = epoch
param_file = file
if param_file is None:
earliestLastEpoch = 0
else:
logging.info("Loading checkpoint: " + param_file)
network.load_parameters(self._model_dir_ + param_file)
if earliestLastEpoch == None or lastEpoch < earliestLastEpoch:
earliestLastEpoch = lastEpoch
return earliestLastEpoch
def construct(self, context, data_mean=None, data_std=None):
self.networks[0] = Net_0(data_mean=data_mean, data_std=data_std)
self.networks[0].collect_params().initialize(self.weight_initializer, ctx=context)
self.networks[0].hybridize()
self.networks[0](mx.nd.zeros((1,29,), ctx=context))
if not os.path.exists(self._model_dir_):
os.makedirs(self._model_dir_)
for i, network in self.networks.items():
network.export(self._model_dir_ + self._model_prefix_ + "_" + str(i), epoch=0)
\ No newline at end of file
import os
import h5py
import mxnet as mx
import logging
import sys
from mxnet import nd
class CNNDataLoader_torcs_agent_torcsAgent_actor:
_input_names_ = ['state']
_output_names_ = ['commands_label']
def __init__(self):
self._data_dir = "data/"
def load_data(self, batch_size):
train_h5, test_h5 = self.load_h5_files()
train_data = {}
data_mean = {}
data_std = {}
for input_name in self._input_names_:
train_data[input_name] = train_h5[input_name]
data_mean[input_name] = nd.array(train_h5[input_name][:].mean(axis=0))
data_std[input_name] = nd.array(train_h5[input_name][:].std(axis=0) + 1e-5)
train_label = {}
for output_name in self._output_names_:
train_label[output_name] = train_h5[output_name]
train_iter = mx.io.NDArrayIter(data=train_data,
label=train_label,
batch_size=batch_size)
test_iter = None
if test_h5 != None:
test_data = {}
for input_name in self._input_names_:
test_data[input_name] = test_h5[input_name]
test_label = {}
for output_name in self._output_names_:
test_label[output_name] = test_h5[output_name]
test_iter = mx.io.NDArrayIter(data=test_data,
label=test_label,
batch_size=batch_size)
return train_iter, test_iter, data_mean, data_std
def load_h5_files(self):
train_h5 = None
test_h5 = None
train_path = self._data_dir + "train.h5"
test_path = self._data_dir + "test.h5"
if os.path.isfile(train_path):
train_h5 = h5py.File(train_path, 'r')
for input_name in self._input_names_:
if not input_name in train_h5:
logging.error("The HDF5 file '" + os.path.abspath(train_path) + "' has to contain the dataset "
+ "'" + input_name + "'")
sys.exit(1)
for output_name in self._output_names_:
if not output_name in train_h5:
logging.error("The HDF5 file '" + os.path.abspath(train_path) + "' has to contain the dataset "
+ "'" + output_name + "'")
sys.exit(1)
if os.path.isfile(test_path):
test_h5 = h5py.File(test_path, 'r')
for input_name in self._input_names_:
if not input_name in test_h5:
logging.error("The HDF5 file '" + os.path.abspath(test_path) + "' has to contain the dataset "
+ "'" + input_name + "'")
sys.exit(1)
for output_name in self._output_names_:
if not output_name in test_h5:
logging.error("The HDF5 file '" + os.path.abspath(test_path) + "' has to contain the dataset "
+ "'" + output_name + "'")
sys.exit(1)
else:
logging.warning("Couldn't load test set. File '" + os.path.abspath(test_path) + "' does not exist.")
return train_h5, test_h5
else:
logging.error("Data loading failure. File '" + os.path.abspath(train_path) + "' does not exist.")
sys.exit(1)
\ No newline at end of file
import mxnet as mx
import numpy as np
from mxnet import gluon
class OneHot(gluon.HybridBlock):
def __init__(self, size, **kwargs):
super(OneHot, self).__init__(**kwargs)
with self.name_scope():
self.size = size
def hybrid_forward(self, F, x):
return F.one_hot(indices=F.argmax(data=x, axis=1), depth=self.size)
class Softmax(gluon.HybridBlock):
def __init__(self, **kwargs):
super(Softmax, self).__init__(**kwargs)
def hybrid_forward(self, F, x):
return F.softmax(x)
class Split(gluon.HybridBlock):
def __init__(self, num_outputs, axis=1, **kwargs):
super(Split, self).__init__(**kwargs)
with self.name_scope():
self.axis = axis
self.num_outputs = num_outputs
def hybrid_forward(self, F, x):
return F.split(data=x, axis=self.axis, num_outputs=self.num_outputs)
class Concatenate(gluon.HybridBlock):
def __init__(self, dim=1, **kwargs):
super(Concatenate, self).__init__(**kwargs)
with self.name_scope():
self.dim = dim
def hybrid_forward(self, F, *x):
return F.concat(*x, dim=self.dim)
class ZScoreNormalization(gluon.HybridBlock):
def __init__(self, data_mean, data_std, **kwargs):
super(ZScoreNormalization, self).__init__(**kwargs)
with self.name_scope():
self.data_mean = self.params.get('data_mean', shape=data_mean.shape,
init=mx.init.Constant(data_mean.asnumpy().tolist()), differentiable=False)
self.data_std = self.params.get('data_std', shape=data_mean.shape,
init=mx.init.Constant(data_std.asnumpy().tolist()), differentiable=False)
def hybrid_forward(