Commit f9d4ddaa authored by Evgeny Kusmenko's avatar Evgeny Kusmenko

Merge branch 'sascha' into 'master'

Sascha

See merge request !5
parents 7c7f7226 dd684f29
Pipeline #410924 passed with stage
in 13 minutes and 6 seconds
......@@ -23,8 +23,8 @@ TrainAgent:
- chmod +x install.sh
- ./install.sh
- echo "Training agent.."
- chmod +x train_agent.sh
- ./train_agent.sh
- chmod +x train_agent_pipeline.sh
- ./train_agent_pipeline.sh
EvaluateAgent:
image: registry.git.rwth-aachen.de/monticore/embeddedmontiarc/applications/reinforcement_learning/cartpole/emadl-rl-mxnet:latest
......@@ -34,8 +34,8 @@ EvaluateAgent:
- chmod +x install.sh
- ./install.sh
- echo "Evaluating agent.."
- chmod +x evaluate_agent.sh
- ./evaluate_agent.sh
- chmod +x evaluate_agent_pipeline.sh
- ./evaluate_agent_pipeline.sh
RandomAgent:
image: registry.git.rwth-aachen.de/monticore/embeddedmontiarc/applications/reinforcement_learning/cartpole/emadl-rl-mxnet:latest
......@@ -45,7 +45,8 @@ RandomAgent:
- chmod +x install.sh
- ./install.sh
- echo "Evaluating random agent.."
- ./evaluate_agent.sh -b random
- chmod +x evaluate_agent_pipeline.sh
- ./evaluate_agent_pipeline.sh -b random
RuleBasedAgent:
image: registry.git.rwth-aachen.de/monticore/embeddedmontiarc/applications/reinforcement_learning/cartpole/emadl-rl-mxnet:latest
......@@ -55,4 +56,5 @@ RuleBasedAgent:
- chmod +x install.sh
- ./install.sh
- echo "Evaluating rulebased agent.."
- ./evaluate_agent.sh -b rulebased
- chmod +x evaluate_agent_pipeline.sh
- ./evaluate_agent_pipeline.sh -b rulebased
......@@ -65,6 +65,9 @@ class RosSimConnector(object):
self.__score = 0
self.__scores = []
self.__scores_counter = 0
if not os.path.exists(RosSimConnector.results_dirname):
os.makedirs(RosSimConnector.results_dirname)
rospy.init_node('ForestrySim', anonymous=True)
rate = rospy.Rate(RosSimConnector.ros_update_rate)
......
......@@ -64,6 +64,9 @@ class RosSimConnector(object):
self.__score = 0
self.__scores = []
self.__scores_counter = 0
if not os.path.exists(RosSimConnector.results_dirname):
os.makedirs(RosSimConnector.results_dirname)
rospy.init_node('ForestrySim', anonymous=True)
rate = rospy.Rate(RosSimConnector.ros_update_rate)
......
......@@ -8,11 +8,6 @@ ARCH="multistep"
BENCHMARK="none"
AGENT="multistep"
# Make sure that ROS is not running
pkill roscore
pkill rosmaster
sleep 2
while getopts "sb:" OPTION; do
case $OPTION in
s) ARCH="singlestep"; AGENT="singlestep" ;;
......@@ -34,27 +29,27 @@ cp "${POSTPROCESSOR_BUILD}/forestrl_${ARCH}_postprocessor_master/coordinator/Coo
cp "${AGENT_BUILD}/forestrl_singlestep_agent_master/coordinator/Coordinator_forestrl_singlestep_agent_master" "${BINARY}/agent"
echo "Start ROSCORE..."
roscore &
xterm -title "ROSCORE" -e "roscore; bash" &
sleep 10
echo "Start up environment..."
python bin/dummysim/${ARCH}_launcher.py --quiet --eval &
xterm -title "Forest-Sim" -e "python bin/dummysim/${ARCH}_launcher.py --quiet --eval; bash" &
sleep 2
if [ "$BENCHMARK" = "none" ]; then
echo "Start up preprocessor..."
${BINARY}/preprocessor &
xterm -title "Preprocessor" -e "${BINARY}/preprocessor; bash" &
sleep 2
echo "Start up ${AGENT} agent..."
${BINARY}/agent &
xterm -title "Agent (${AGENT})" -e "${BINARY}/agent; bash" &
sleep 2
else
echo "Start up ${AGENT} agent..."
python bin/benchmark_agents/${BENCHMARK}/${BENCHMARK}_agent.py &
xterm -title "Agent (${AGENT})" -e "python bin/benchmark_agents/${BENCHMARK}/${BENCHMARK}_agent.py; bash" &
sleep 2
fi
echo "Start up postprocessor..."
${BINARY}/postprocessor &
xterm -title "Postprocessor" -e "${BINARY}/postprocessor; bash" &
sleep 2
#!/bin/bash
# (c) https://github.com/MontiCore/monticore
. config.sh
AGENT_BUILD="target/agent/build"
PREPROCESSOR_BUILD="target/preprocessor/build"
POSTPROCESSOR_BUILD="target/postprocessor/build"
ARCH="multistep"
BENCHMARK="none"
AGENT="multistep"
# Make sure that ROS is not running
pkill roscore
pkill rosmaster
sleep 2
while getopts "sb:" OPTION; do
case $OPTION in
s) ARCH="singlestep"; AGENT="singlestep" ;;
b) BENCHMARK=$OPTARG; AGENT=$OPTARG ;;
esac
done
if [ "$BENCHMARK" != "none" ] && [ "$BENCHMARK" != "random" ] && [ "$BENCHMARK" != "rulebased" ]; then
echo "Invalid benchmark agent '$BENCHMARK'. Valid options are: random, rulebased"
exit
fi
rm -rf "${BINARY}"
mkdir "${BINARY}"
cp "${PREPROCESSOR_BUILD}/forestrl_singlestep_preprocessor_master/coordinator/Coordinator_forestrl_singlestep_preprocessor_master" "${BINARY}/preprocessor"
cp "${POSTPROCESSOR_BUILD}/forestrl_${ARCH}_postprocessor_master/coordinator/Coordinator_forestrl_${ARCH}_postprocessor_master" "${BINARY}/postprocessor"
cp "${AGENT_BUILD}/forestrl_singlestep_agent_master/coordinator/Coordinator_forestrl_singlestep_agent_master" "${BINARY}/agent"
echo "Start ROSCORE..."
roscore &
sleep 10
echo "Start up environment..."
python bin/dummysim/${ARCH}_launcher.py --quiet --eval &
sleep 2
if [ "$BENCHMARK" = "none" ]; then
echo "Start up preprocessor..."
${BINARY}/preprocessor &
sleep 2
echo "Start up ${AGENT} agent..."
${BINARY}/agent &
sleep 2
else
echo "Start up ${AGENT} agent..."
python bin/benchmark_agents/${BENCHMARK}/${BENCHMARK}_agent.py &
sleep 2
fi
echo "Start up postprocessor..."
${BINARY}/postprocessor &
sleep 2
configuration ForestActor {
agent_name: "ForestAgent"
context: cpu
learning_method: reinforcement
rl_algorithm: td3-algorithm
critic: forestrl.singlestep.agent.networks.forestCritic
environment: ros_interface {
state_topic: "/preprocessor/state"
terminal_state_topic: "/sim/terminal"
reward_topic: "/sim/reward"
action_topic: "/postprocessor/action"
reset_topic: "/sim/reset"
}
discount_factor: 0.7
policy_noise: 0.2
noise_clip: 0.5
policy_delay: 2
num_episodes: 1
start_training_at: 1
num_max_steps: 10000
training_interval: 1
snapshot_interval: 50
evaluation_samples: 5
soft_target_update_rate: 0.005
replay_memory: buffer{
memory_size : 100000
sample_size : 100
}
strategy : gaussian {
epsilon : 1.0
min_epsilon : 0.05
epsilon_decay_method: linear
epsilon_decay_start: 500
epsilon_decay : 0.005
epsilon_decay_per_step: false
noise_variance : 0.1
}
actor_optimizer : sgd {
learning_rate : 0.002
}
critic_optimizer : sgd {
learning_rate : 0.0005
}
}
package forestrl.singlestep.agent.networks;
component ForestActor {
ports
in Q^{488} state,
out Q(-1:1)^{63} netvalues;
implementation CNN {
state ->
FullyConnected(units=400) ->
Relu() ->
FullyConnected(units=300) ->
Relu() ->
FullyConnected(units=63) ->
Tanh() ->
netvalues;
}
}
package forestrl.singlestep.agent.networks;
component ForestCritic {
ports
in Q^{488} state,
in Q(-1:1)^{63} action,
out Q(-oo:oo)^{1} qvalue;
implementation CNN {
(state | action) ->
Concatenate() ->
FullyConnected(units=400) ->
Relu() ->
FullyConnected(units=300) ->
Relu() ->
FullyConnected(units=1) ->
qvalue;
}
}
This source diff could not be displayed because it is too large. You can view the blob instead.
{
"nodes": [
{
"op": "null",
"name": "data",
"inputs": []
},
{
"op": "null",
"name": "net_00_dense0_weight",
"attrs": {
"__dtype__": "0",
"__lr_mult__": "1.0",
"__shape__": "(400, 0)",
"__storage_type__": "0",
"__wd_mult__": "1.0"
},
"inputs": []
},
{
"op": "null",
"name": "net_00_dense0_bias",
"attrs": {
"__dtype__": "0",
"__init__": "zeros",
"__lr_mult__": "1.0",
"__shape__": "(400,)",
"__storage_type__": "0",
"__wd_mult__": "1.0"
},
"inputs": []
},
{
"op": "FullyConnected",
"name": "net_00_dense0_fwd",
"attrs": {
"flatten": "True",
"no_bias": "False",
"num_hidden": "400"
},
"inputs": [[0, 0, 0], [1, 0, 0], [2, 0, 0]]
},
{
"op": "Activation",
"name": "net_00_relu0_fwd",
"attrs": {"act_type": "relu"},
"inputs": [[3, 0, 0]]
},
{
"op": "null",
"name": "net_00_dense1_weight",
"attrs": {
"__dtype__": "0",
"__lr_mult__": "1.0",
"__shape__": "(300, 0)",
"__storage_type__": "0",
"__wd_mult__": "1.0"
},
"inputs": []
},
{
"op": "null",
"name": "net_00_dense1_bias",
"attrs": {
"__dtype__": "0",
"__init__": "zeros",
"__lr_mult__": "1.0",
"__shape__": "(300,)",
"__storage_type__": "0",
"__wd_mult__": "1.0"
},
"inputs": []
},
{
"op": "FullyConnected",
"name": "net_00_dense1_fwd",
"attrs": {
"flatten": "True",
"no_bias": "False",
"num_hidden": "300"
},
"inputs": [[4, 0, 0], [5, 0, 0], [6, 0, 0]]
},
{
"op": "Activation",
"name": "net_00_relu1_fwd",
"attrs": {"act_type": "relu"},
"inputs": [[7, 0, 0]]
},
{
"op": "null",
"name": "net_00_dense2_weight",
"attrs": {
"__dtype__": "0",
"__lr_mult__": "1.0",
"__shape__": "(63, 0)",
"__storage_type__": "0",
"__wd_mult__": "1.0"
},
"inputs": []
},
{
"op": "null",
"name": "net_00_dense2_bias",
"attrs": {
"__dtype__": "0",
"__init__": "zeros",
"__lr_mult__": "1.0",
"__shape__": "(63,)",
"__storage_type__": "0",
"__wd_mult__": "1.0"
},
"inputs": []
},
{
"op": "FullyConnected",
"name": "net_00_dense2_fwd",
"attrs": {
"flatten": "True",
"no_bias": "False",
"num_hidden": "63"
},
"inputs": [[8, 0, 0], [9, 0, 0], [10, 0, 0]]
},
{
"op": "Activation",
"name": "net_00_tanh0_fwd",
"attrs": {"act_type": "tanh"},
"inputs": [[11, 0, 0]]
}
],
"arg_nodes": [0, 1, 2, 5, 6, 9, 10],
"node_row_ptr": [
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13
],
"heads": [[12, 0, 0]],
"attrs": {"mxnet_version": ["int", 10500]}
}
\ No newline at end of file
{
"nodes": [
{
"op": "null",
"name": "data",
"inputs": []
},
{
"op": "null",
"name": "net_00_dense0_weight",
"attrs": {
"__dtype__": "0",
"__lr_mult__": "1.0",
"__shape__": "(400, 0)",
"__storage_type__": "0",
"__wd_mult__": "1.0"
},
"inputs": []
},
{
"op": "null",
"name": "net_00_dense0_bias",
"attrs": {
"__dtype__": "0",
"__init__": "zeros",
"__lr_mult__": "1.0",
"__shape__": "(400,)",
"__storage_type__": "0",
"__wd_mult__": "1.0"
},
"inputs": []
},
{
"op": "FullyConnected",
"name": "net_00_dense0_fwd",
"attrs": {
"flatten": "True",
"no_bias": "False",
"num_hidden": "400"
},
"inputs": [[0, 0, 0], [1, 0, 0], [2, 0, 0]]
},
{
"op": "Activation",
"name": "net_00_relu0_fwd",
"attrs": {"act_type": "relu"},
"inputs": [[3, 0, 0]]
},
{
"op": "null",
"name": "net_00_dense1_weight",
"attrs": {
"__dtype__": "0",
"__lr_mult__": "1.0",
"__shape__": "(300, 0)",
"__storage_type__": "0",
"__wd_mult__": "1.0"
},
"inputs": []
},
{
"op": "null",
"name": "net_00_dense1_bias",
"attrs": {
"__dtype__": "0",
"__init__": "zeros",
"__lr_mult__": "1.0",
"__shape__": "(300,)",
"__storage_type__": "0",
"__wd_mult__": "1.0"
},
"inputs": []
},
{
"op": "FullyConnected",
"name": "net_00_dense1_fwd",
"attrs": {
"flatten": "True",
"no_bias": "False",
"num_hidden": "300"
},
"inputs": [[4, 0, 0], [5, 0, 0], [6, 0, 0]]
},
{
"op": "Activation",
"name": "net_00_relu1_fwd",
"attrs": {"act_type": "relu"},
"inputs": [[7, 0, 0]]
},
{
"op": "null",
"name": "net_00_dense2_weight",
"attrs": {
"__dtype__": "0",
"__lr_mult__": "1.0",
"__shape__": "(63, 0)",
"__storage_type__": "0",
"__wd_mult__": "1.0"
},
"inputs": []
},
{
"op": "null",
"name": "net_00_dense2_bias",
"attrs": {
"__dtype__": "0",
"__init__": "zeros",
"__lr_mult__": "1.0",
"__shape__": "(63,)",
"__storage_type__": "0",
"__wd_mult__": "1.0"
},
"inputs": []
},
{
"op": "FullyConnected",
"name": "net_00_dense2_fwd",
"attrs": {
"flatten": "True",
"no_bias": "False",
"num_hidden": "63"
},
"inputs": [[8, 0, 0], [9, 0, 0], [10, 0, 0]]
},
{
"op": "Activation",
"name": "net_00_tanh0_fwd",
"attrs": {"act_type": "tanh"},
"inputs": [[11, 0, 0]]
}
],
"arg_nodes": [0, 1, 2, 5, 6, 9, 10],