Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
monticore
EmbeddedMontiArc
generators
EMADL2CPP
Commits
d91b80d0
Commit
d91b80d0
authored
May 10, 2019
by
Nicola Gatto
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add generation test for gym environment reinforcement model
parent
aa7af2ba
Changes
25
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
25 changed files
with
1778 additions
and
22 deletions
+1778
-22
src/test/java/de/monticore/lang/monticar/emadl/GenerationTest.java
...java/de/monticore/lang/monticar/emadl/GenerationTest.java
+26
-1
src/test/resources/models/reinforcementModel/cartpole/agent/CartPoleDQN.cnnt
...models/reinforcementModel/cartpole/agent/CartPoleDQN.cnnt
+10
-13
src/test/resources/models/reinforcementModel/cartpole/agent/CartPoleDQN.emadl
...odels/reinforcementModel/cartpole/agent/CartPoleDQN.emadl
+3
-3
src/test/resources/models/reinforcementModel/cartpole/agent/Reward.emadl
...ces/models/reinforcementModel/cartpole/agent/Reward.emadl
+3
-3
src/test/resources/models/reinforcementModel/cartpole/policy/Greedy.emadl
...es/models/reinforcementModel/cartpole/policy/Greedy.emadl
+2
-2
src/test/resources/target_code/gluon/reinforcementModel/cartpole/CMakeLists.txt
...get_code/gluon/reinforcementModel/cartpole/CMakeLists.txt
+27
-0
src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNBufferFile.h
...et_code/gluon/reinforcementModel/cartpole/CNNBufferFile.h
+51
-0
src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNCreator_cartpole_master_dqn.py
...forcementModel/cartpole/CNNCreator_cartpole_master_dqn.py
+56
-0
src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNNet_cartpole_master_dqn.py
...reinforcementModel/cartpole/CNNNet_cartpole_master_dqn.py
+103
-0
src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNPredictor_cartpole_master_dqn.h
...orcementModel/cartpole/CNNPredictor_cartpole_master_dqn.h
+104
-0
src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNTrainer_cartpole_master_dqn.py
...forcementModel/cartpole/CNNTrainer_cartpole_master_dqn.py
+59
-0
src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNTranslator.h
...et_code/gluon/reinforcementModel/cartpole/CNNTranslator.h
+127
-0
src/test/resources/target_code/gluon/reinforcementModel/cartpole/HelperA.h
...s/target_code/gluon/reinforcementModel/cartpole/HelperA.h
+141
-0
src/test/resources/target_code/gluon/reinforcementModel/cartpole/cartpole_master.cpp
...ode/gluon/reinforcementModel/cartpole/cartpole_master.cpp
+1
-0
src/test/resources/target_code/gluon/reinforcementModel/cartpole/cartpole_master.h
..._code/gluon/reinforcementModel/cartpole/cartpole_master.h
+32
-0
src/test/resources/target_code/gluon/reinforcementModel/cartpole/cartpole_master_dqn.h
...e/gluon/reinforcementModel/cartpole/cartpole_master_dqn.h
+31
-0
src/test/resources/target_code/gluon/reinforcementModel/cartpole/cartpole_master_policy.h
...luon/reinforcementModel/cartpole/cartpole_master_policy.h
+30
-0
src/test/resources/target_code/gluon/reinforcementModel/cartpole/cmake/FindArmadillo.cmake
...uon/reinforcementModel/cartpole/cmake/FindArmadillo.cmake
+38
-0
src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/__init__.py
...orcementModel/cartpole/reinforcement_learning/__init__.py
+0
-0
src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/action_policy.py
...entModel/cartpole/reinforcement_learning/action_policy.py
+73
-0
src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/agent.py
...inforcementModel/cartpole/reinforcement_learning/agent.py
+503
-0
src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/environment.py
...ementModel/cartpole/reinforcement_learning/environment.py
+67
-0
src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/replay_memory.py
...entModel/cartpole/reinforcement_learning/replay_memory.py
+155
-0
src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/util.py
...einforcementModel/cartpole/reinforcement_learning/util.py
+134
-0
src/test/resources/target_code/gluon/reinforcementModel/cartpole/start_training.sh
..._code/gluon/reinforcementModel/cartpole/start_training.sh
+2
-0
No files found.
src/test/java/de/monticore/lang/monticar/emadl/GenerationTest.java
View file @
d91b80d0
...
...
@@ -190,11 +190,36 @@ public class GenerationTest extends AbstractSymtabTest {
}
@Test
public
void
testGluonReinforcementModel
()
{
public
void
testGluonReinforcementModel
GymEnvironment
()
{
Log
.
getFindings
().
clear
();
String
[]
args
=
{
"-m"
,
"src/test/resources/models/reinforcementModel"
,
"-r"
,
"cartpole.Master"
,
"-b"
,
"GLUON"
,
"-f"
,
"n"
,
"-c"
,
"n"
};
EMADLGeneratorCli
.
main
(
args
);
assertTrue
(
Log
.
getFindings
().
stream
().
filter
(
Finding:
:
isError
).
collect
(
Collectors
.
toList
()).
isEmpty
());
checkFilesAreEqual
(
Paths
.
get
(
"./target/generated-sources-emadl"
),
Paths
.
get
(
"./src/test/resources/target_code/gluon/reinforcementModel/cartpole"
),
Arrays
.
asList
(
"cartpole_master.cpp"
,
"cartpole_master.h"
,
"cartpole_master_dqn.h"
,
"cartpole_master_policy.h"
,
"CMakeLists.txt"
,
"CNNBufferFile.h"
,
"CNNCreator_cartpole_master_dqn.py"
,
"CNNNet_cartpole_master_dqn.py"
,
"CNNPredictor_cartpole_master_dqn.h"
,
"CNNTrainer_cartpole_master_dqn.py"
,
"CNNTranslator.h"
,
"HelperA.h"
,
"start_training.sh"
,
"reinforcement_learning/__init__.py"
,
"reinforcement_learning/action_policy.py"
,
"reinforcement_learning/agent.py"
,
"reinforcement_learning/environment.py"
,
"reinforcement_learning/replay_memory.py"
,
"reinforcement_learning/util.py"
)
);
}
@Test
...
...
src/test/resources/models/reinforcementModel/cartpole/agent/CartPoleDQN.cnnt
View file @
d91b80d0
configuration CartPoleDQN {
learning_method : reinforcement
environment : gym { name : "CartPole-v0" }
context : cpu
agent_name : "reinforcement_agent"
reward_function : cartpole.agent.reward.reward
learning_method : reinforcement
environment : gym {name : "CartPole-v0"}
num_episodes : 1
00
0
num_episodes : 1
6
0
target_score : 185.5
discount_factor : 0.999
num_max_steps : 50
0
num_max_steps :
2
50
training_interval : 1
use_fix_target_network : true
target_network_update_interval : 200
snapshot_interval :
5
0
snapshot_interval :
2
0
use_double_dqn :
tru
e
use_double_dqn :
fals
e
loss :
huber_loss
loss :
euclidean
replay_memory : buffer{
memory_size :
2
0000
memory_size :
1
0000
sample_size : 32
}
...
...
@@ -31,7 +28,7 @@ configuration CartPoleDQN {
epsilon : 1.0
min_epsilon : 0.01
epsilon_decay_method: linear
epsilon_decay : 0.
0
01
epsilon_decay : 0.01
}
optimizer : rmsprop{
...
...
src/test/resources/models/reinforcementModel/cartpole/agent/CartPoleDQN.emadl
View file @
d91b80d0
...
...
@@ -7,10 +7,10 @@ component CartPoleDQN {
implementation
CNN
{
state
->
FullyConnected
(
units
=
256
)
->
Relu
()
->
FullyConnected
(
units
=
128
)
->
Relu
()
->
Tanh
()
->
FullyConnected
(
units
=
256
)
->
Tanh
()
->
FullyConnected
(
units
=
2
)
->
qvalues
}
...
...
src/test/resources/models/reinforcementModel/cartpole/agent/
reward/
Reward.emadl
→
src/test/resources/models/reinforcementModel/cartpole/agent/Reward.emadl
View file @
d91b80d0
package
cartpole
.
agent
.
reward
;
package
cartpole
.
agent
;
component
Reward
{
ports
in
Q
^{
4
}
state
,
in
B
isTerminal
,
out
Q
reward
;
implementation
Math
{
Q
rew
=
state
(
1
);
reward
=
rew
;
reward
=
2
;
}
}
\ No newline at end of file
src/test/resources/models/reinforcementModel/cartpole/policy/Greedy.emadl
View file @
d91b80d0
...
...
@@ -6,11 +6,11 @@ component Greedy {
out
Z
action
;
implementation
Math
{
Q
maxQValue
=
values
(
0
);
Q
maxQValue
=
values
(
1
);
Z
maxValueAction
=
0
;
for
i
=
1
:
2
if
maxQValue
>
values
(
i
)
if
values
(
i
)
>
maxQValue
maxQValue
=
values
(
i
);
maxValueAction
=
i
-
1
;
end
...
...
src/test/resources/target_code/gluon/reinforcementModel/cartpole/CMakeLists.txt
0 → 100644
View file @
d91b80d0
cmake_minimum_required
(
VERSION 3.5
)
set
(
CMAKE_CXX_STANDARD 14
)
project
(
cartpole_master LANGUAGES CXX
)
#set cmake module path
set
(
CMAKE_MODULE_PATH
${
CMAKE_MODULE_PATH
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/cmake
)
# add dependencies
find_package
(
Armadillo REQUIRED
)
set
(
INCLUDE_DIRS
${
INCLUDE_DIRS
}
${
Armadillo_INCLUDE_DIRS
}
)
set
(
LIBS
${
LIBS
}
${
Armadillo_LIBRARIES
}
)
# additional commands
set
(
LIBS
${
LIBS
}
mxnet
)
# create static library
include_directories
(
${
INCLUDE_DIRS
}
)
add_library
(
cartpole_master cartpole_master.cpp
)
target_include_directories
(
cartpole_master PUBLIC
${
CMAKE_CURRENT_SOURCE_DIR
}
${
INCLUDE_DIRS
}
)
target_link_libraries
(
cartpole_master PUBLIC
${
LIBS
}
)
set_target_properties
(
cartpole_master PROPERTIES LINKER_LANGUAGE CXX
)
# export cmake project
export
(
TARGETS cartpole_master FILE cartpole_master.cmake
)
# additional commands end
src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNBufferFile.h
0 → 100644
View file @
d91b80d0
#ifndef CNNBUFFERFILE_H
#define CNNBUFFERFILE_H
#include <stdio.h>
#include <iostream>
#include <fstream>
// Read file to buffer
class
BufferFile
{
public
:
std
::
string
file_path_
;
int
length_
;
char
*
buffer_
;
explicit
BufferFile
(
std
::
string
file_path
)
:
file_path_
(
file_path
)
{
std
::
ifstream
ifs
(
file_path
.
c_str
(),
std
::
ios
::
in
|
std
::
ios
::
binary
);
if
(
!
ifs
)
{
std
::
cerr
<<
"Can't open the file. Please check "
<<
file_path
<<
".
\n
"
;
length_
=
0
;
buffer_
=
NULL
;
return
;
}
ifs
.
seekg
(
0
,
std
::
ios
::
end
);
length_
=
ifs
.
tellg
();
ifs
.
seekg
(
0
,
std
::
ios
::
beg
);
std
::
cout
<<
file_path
.
c_str
()
<<
" ... "
<<
length_
<<
" bytes
\n
"
;
buffer_
=
new
char
[
sizeof
(
char
)
*
length_
];
ifs
.
read
(
buffer_
,
length_
);
ifs
.
close
();
}
int
GetLength
()
{
return
length_
;
}
char
*
GetBuffer
()
{
return
buffer_
;
}
~
BufferFile
()
{
if
(
buffer_
)
{
delete
[]
buffer_
;
buffer_
=
NULL
;
}
}
};
#endif // CNNBUFFERFILE_H
src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNCreator_cartpole_master_dqn.py
0 → 100644
View file @
d91b80d0
import
mxnet
as
mx
import
logging
import
os
from
CNNNet_cartpole_master_dqn
import
Net
class
CNNCreator_cartpole_master_dqn
:
_model_dir_
=
"model/cartpole.agent.CartPoleDQN/"
_model_prefix_
=
"model"
_input_shapes_
=
[(
4
,)]
def
__init__
(
self
):
self
.
weight_initializer
=
mx
.
init
.
Normal
()
self
.
net
=
None
def
get_input_shapes
(
self
):
return
self
.
_input_shapes_
def
load
(
self
,
context
):
lastEpoch
=
0
param_file
=
None
try
:
os
.
remove
(
self
.
_model_dir_
+
self
.
_model_prefix_
+
"_newest-0000.params"
)
except
OSError
:
pass
try
:
os
.
remove
(
self
.
_model_dir_
+
self
.
_model_prefix_
+
"_newest-symbol.json"
)
except
OSError
:
pass
if
os
.
path
.
isdir
(
self
.
_model_dir_
):
for
file
in
os
.
listdir
(
self
.
_model_dir_
):
if
".params"
in
file
and
self
.
_model_prefix_
in
file
:
epochStr
=
file
.
replace
(
".params"
,
""
).
replace
(
self
.
_model_prefix_
+
"-"
,
""
)
epoch
=
int
(
epochStr
)
if
epoch
>
lastEpoch
:
lastEpoch
=
epoch
param_file
=
file
if
param_file
is
None
:
return
0
else
:
logging
.
info
(
"Loading checkpoint: "
+
param_file
)
self
.
net
.
load_parameters
(
self
.
_model_dir_
+
param_file
)
return
lastEpoch
def
construct
(
self
,
context
,
data_mean
=
None
,
data_std
=
None
):
self
.
net
=
Net
(
data_mean
=
data_mean
,
data_std
=
data_std
)
self
.
net
.
collect_params
().
initialize
(
self
.
weight_initializer
,
ctx
=
context
)
self
.
net
.
hybridize
()
self
.
net
(
mx
.
nd
.
zeros
((
1
,)
+
self
.
_input_shapes_
[
0
],
ctx
=
context
))
if
not
os
.
path
.
exists
(
self
.
_model_dir_
):
os
.
makedirs
(
self
.
_model_dir_
)
self
.
net
.
export
(
self
.
_model_dir_
+
self
.
_model_prefix_
,
epoch
=
0
)
src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNNet_cartpole_master_dqn.py
0 → 100644
View file @
d91b80d0
import
mxnet
as
mx
import
numpy
as
np
from
mxnet
import
gluon
class
Softmax
(
gluon
.
HybridBlock
):
def
__init__
(
self
,
**
kwargs
):
super
(
Softmax
,
self
).
__init__
(
**
kwargs
)
def
hybrid_forward
(
self
,
F
,
x
):
return
F
.
softmax
(
x
)
class
Split
(
gluon
.
HybridBlock
):
def
__init__
(
self
,
num_outputs
,
axis
=
1
,
**
kwargs
):
super
(
Split
,
self
).
__init__
(
**
kwargs
)
with
self
.
name_scope
():
self
.
axis
=
axis
self
.
num_outputs
=
num_outputs
def
hybrid_forward
(
self
,
F
,
x
):
return
F
.
split
(
data
=
x
,
axis
=
self
.
axis
,
num_outputs
=
self
.
num_outputs
)
class
Concatenate
(
gluon
.
HybridBlock
):
def
__init__
(
self
,
dim
=
1
,
**
kwargs
):
super
(
Concatenate
,
self
).
__init__
(
**
kwargs
)
with
self
.
name_scope
():
self
.
dim
=
dim
def
hybrid_forward
(
self
,
F
,
*
x
):
return
F
.
concat
(
*
x
,
dim
=
self
.
dim
)
class
ZScoreNormalization
(
gluon
.
HybridBlock
):
def
__init__
(
self
,
data_mean
,
data_std
,
**
kwargs
):
super
(
ZScoreNormalization
,
self
).
__init__
(
**
kwargs
)
with
self
.
name_scope
():
self
.
data_mean
=
self
.
params
.
get
(
'data_mean'
,
shape
=
data_mean
.
shape
,
init
=
mx
.
init
.
Constant
(
data_mean
.
asnumpy
().
tolist
()),
differentiable
=
False
)
self
.
data_std
=
self
.
params
.
get
(
'data_std'
,
shape
=
data_mean
.
shape
,
init
=
mx
.
init
.
Constant
(
data_std
.
asnumpy
().
tolist
()),
differentiable
=
False
)
def
hybrid_forward
(
self
,
F
,
x
,
data_mean
,
data_std
):
x
=
F
.
broadcast_sub
(
x
,
data_mean
)
x
=
F
.
broadcast_div
(
x
,
data_std
)
return
x
class
Padding
(
gluon
.
HybridBlock
):
def
__init__
(
self
,
padding
,
**
kwargs
):
super
(
Padding
,
self
).
__init__
(
**
kwargs
)
with
self
.
name_scope
():
self
.
pad_width
=
padding
def
hybrid_forward
(
self
,
F
,
x
):
x
=
F
.
pad
(
data
=
x
,
mode
=
'constant'
,
pad_width
=
self
.
pad_width
,
constant_value
=
0
)
return
x
class
NoNormalization
(
gluon
.
HybridBlock
):
def
__init__
(
self
,
**
kwargs
):
super
(
NoNormalization
,
self
).
__init__
(
**
kwargs
)
def
hybrid_forward
(
self
,
F
,
x
):
return
x
class
Net
(
gluon
.
HybridBlock
):
def
__init__
(
self
,
data_mean
=
None
,
data_std
=
None
,
**
kwargs
):
super
(
Net
,
self
).
__init__
(
**
kwargs
)
with
self
.
name_scope
():
if
not
data_mean
is
None
:
assert
(
not
data_std
is
None
)
self
.
input_normalization
=
ZScoreNormalization
(
data_mean
=
data_mean
,
data_std
=
data_std
)
else
:
self
.
input_normalization
=
NoNormalization
()
self
.
fc1_
=
gluon
.
nn
.
Dense
(
units
=
128
,
use_bias
=
True
)
# fc1_, output shape: {[128,1,1]}
self
.
tanh1_
=
gluon
.
nn
.
Activation
(
activation
=
'tanh'
)
self
.
fc2_
=
gluon
.
nn
.
Dense
(
units
=
256
,
use_bias
=
True
)
# fc2_, output shape: {[256,1,1]}
self
.
tanh2_
=
gluon
.
nn
.
Activation
(
activation
=
'tanh'
)
self
.
fc3_
=
gluon
.
nn
.
Dense
(
units
=
2
,
use_bias
=
True
)
# fc3_, output shape: {[2,1,1]}
self
.
last_layer
=
'linear'
def
hybrid_forward
(
self
,
F
,
x
):
state
=
self
.
input_normalization
(
x
)
fc1_
=
self
.
fc1_
(
state
)
tanh1_
=
self
.
tanh1_
(
fc1_
)
fc2_
=
self
.
fc2_
(
tanh1_
)
tanh2_
=
self
.
tanh2_
(
fc2_
)
fc3_
=
self
.
fc3_
(
tanh2_
)
return
fc3_
src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNPredictor_cartpole_master_dqn.h
0 → 100644
View file @
d91b80d0
#ifndef CNNPREDICTOR_CARTPOLE_MASTER_DQN
#define CNNPREDICTOR_CARTPOLE_MASTER_DQN
#include <mxnet/c_predict_api.h>
#include <cassert>
#include <string>
#include <vector>
#include <CNNBufferFile.h>
class
CNNPredictor_cartpole_master_dqn
{
public:
const
std
::
string
json_file
=
"model/cartpole.agent.CartPoleDQN/model_newest-symbol.json"
;
const
std
::
string
param_file
=
"model/cartpole.agent.CartPoleDQN/model_newest-0000.params"
;
//const std::vector<std::string> input_keys = {"data"};
const
std
::
vector
<
std
::
string
>
input_keys
=
{
"state"
};
const
std
::
vector
<
std
::
vector
<
mx_uint
>>
input_shapes
=
{{
1
,
4
}};
const
bool
use_gpu
=
false
;
PredictorHandle
handle
;
explicit
CNNPredictor_cartpole_master_dqn
(){
init
(
json_file
,
param_file
,
input_keys
,
input_shapes
,
use_gpu
);
}
~
CNNPredictor_cartpole_master_dqn
(){
if
(
handle
)
MXPredFree
(
handle
);
}
void
predict
(
const
std
::
vector
<
float
>
&
state
,
std
::
vector
<
float
>
&
qvalues
){
MXPredSetInput
(
handle
,
"data"
,
state
.
data
(),
static_cast
<
mx_uint
>
(
state
.
size
()));
MXPredForward
(
handle
);
mx_uint
output_index
;
mx_uint
*
shape
=
0
;
mx_uint
shape_len
;
size_t
size
;
output_index
=
0
;
MXPredGetOutputShape
(
handle
,
output_index
,
&
shape
,
&
shape_len
);
size
=
1
;
for
(
mx_uint
i
=
0
;
i
<
shape_len
;
++
i
)
size
*=
shape
[
i
];
assert
(
size
==
qvalues
.
size
());
MXPredGetOutput
(
handle
,
0
,
&
(
qvalues
[
0
]),
qvalues
.
size
());
}
void
init
(
const
std
::
string
&
json_file
,
const
std
::
string
&
param_file
,
const
std
::
vector
<
std
::
string
>
&
input_keys
,
const
std
::
vector
<
std
::
vector
<
mx_uint
>>
&
input_shapes
,
const
bool
&
use_gpu
){
BufferFile
json_data
(
json_file
);
BufferFile
param_data
(
param_file
);
int
dev_type
=
use_gpu
?
2
:
1
;
int
dev_id
=
0
;
if
(
json_data
.
GetLength
()
==
0
||
param_data
.
GetLength
()
==
0
)
{
std
::
exit
(
-
1
);
}
const
mx_uint
num_input_nodes
=
input_keys
.
size
();
const
char
*
input_key
[
1
]
=
{
"data"
};
const
char
**
input_keys_ptr
=
input_key
;
mx_uint
shape_data_size
=
0
;
mx_uint
input_shape_indptr
[
input_shapes
.
size
()
+
1
];
input_shape_indptr
[
0
]
=
0
;
for
(
mx_uint
i
=
0
;
i
<
input_shapes
.
size
();
i
++
){
input_shape_indptr
[
i
+
1
]
=
input_shapes
[
i
].
size
();
shape_data_size
+=
input_shapes
[
i
].
size
();
}
mx_uint
input_shape_data
[
shape_data_size
];
mx_uint
index
=
0
;
for
(
mx_uint
i
=
0
;
i
<
input_shapes
.
size
();
i
++
){
for
(
mx_uint
j
=
0
;
j
<
input_shapes
[
i
].
size
();
j
++
){
input_shape_data
[
index
]
=
input_shapes
[
i
][
j
];
index
++
;
}
}
MXPredCreate
(
static_cast
<
const
char
*>
(
json_data
.
GetBuffer
()),
static_cast
<
const
char
*>
(
param_data
.
GetBuffer
()),
static_cast
<
size_t
>
(
param_data
.
GetLength
()),
dev_type
,
dev_id
,
num_input_nodes
,
input_keys_ptr
,
input_shape_indptr
,
input_shape_data
,
&
handle
);
assert
(
handle
);
}
};
#endif // CNNPREDICTOR_CARTPOLE_MASTER_DQN
src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNTrainer_cartpole_master_dqn.py
0 → 100644
View file @
d91b80d0
from
reinforcement_learning.agent
import
DqnAgent
import
reinforcement_learning.environment
import
CNNCreator_cartpole_master_dqn
import
logging
import
mxnet
as
mx
if
__name__
==
"__main__"
:
env
=
reinforcement_learning
.
environment
.
GymEnvironment
(
'CartPole-v0'
)
context
=
mx
.
cpu
()
net_creator
=
CNNCreator_cartpole_master_dqn
.
CNNCreator_cartpole_master_dqn
()
net_creator
.
construct
(
context
)
replay_memory_params
=
{
'method'
:
'buffer'
,
'memory_size'
:
10000
,
'sample_size'
:
32
,
'state_dtype'
:
'float32'
,
'action_dtype'
:
'uint8'
,
'rewards_dtype'
:
'float32'
}
policy_params
=
{
'method'
:
'epsgreedy'
,
'epsilon'
:
1
,
'min_epsilon'
:
0.01
,
'epsilon_decay_method'
:
'linear'
,
'epsilon_decay'
:
0.01
,
}
agent
=
DqnAgent
(
network
=
net_creator
.
net
,
environment
=
env
,
replay_memory_params
=
replay_memory_params
,
policy_params
=
policy_params
,
state_dim
=
net_creator
.
get_input_shapes
()[
0
],
ctx
=
'cpu'
,
discount_factor
=
0.999
,
loss_function
=
'euclidean'
,
optimizer
=
'rmsprop'
,
optimizer_params
=
{
'learning_rate'
:
0.001
},
training_episodes
=
160
,
train_interval
=
1
,
use_fix_target
=
True
,
target_update_interval
=
200
,
double_dqn
=
False
,
snapshot_interval
=
20
,
agent_name
=
'cartpole_master_dqn'
,
max_episode_step
=
250
,
output_directory
=
'model'
,
verbose
=
True
,
live_plot
=
True
,
make_logfile
=
True
,
target_score
=
185.5
)
train_successfull
=
agent
.
train
()
agent
.
save_best_network
(
net_creator
.
_model_dir_
+
net_creator
.
_model_prefix_
+
'_newest'
,
epoch
=
0
)
\ No newline at end of file
src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNTranslator.h
0 → 100644
View file @
d91b80d0
#ifndef CNNTRANSLATOR_H
#define CNNTRANSLATOR_H
#include <armadillo>
#include <cassert>
using
namespace
std
;
using
namespace
arma
;
class
CNNTranslator
{
public:
template
<
typename
T
>
static
void
addColToSTDVector
(
const
Col
<
T
>
&
source
,
vector
<
float
>
&
data
){
for
(
size_t
i
=
0
;
i
<
source
.
n_elem
;
i
++
){