Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Open sidebar
monticore
EmbeddedMontiArc
generators
EMADL2CPP
Commits
3d680f2e
Commit
3d680f2e
authored
Aug 12, 2019
by
Sebastian N.
Browse files
Options
Browse Files
Download
Plain Diff
Merge
parents
b85b4bce
d3c1bc00
Changes
39
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
39 changed files
with
2119 additions
and
576 deletions
+2119
-576
pom.xml
pom.xml
+1
-1
src/main/java/de/monticore/lang/monticar/emadl/generator/Backend.java
...a/de/monticore/lang/monticar/emadl/generator/Backend.java
+1
-0
src/main/java/de/monticore/lang/monticar/emadl/generator/EMADLGenerator.java
...nticore/lang/monticar/emadl/generator/EMADLGenerator.java
+41
-10
src/main/java/de/monticore/lang/monticar/emadl/generator/reinforcementlearning/RewardFunctionCppGenerator.java
...tor/reinforcementlearning/RewardFunctionCppGenerator.java
+29
-9
src/test/java/de/monticore/lang/monticar/emadl/GenerationTest.java
...java/de/monticore/lang/monticar/emadl/GenerationTest.java
+2
-2
src/test/resources/models/reinforcementModel/cartpole/agent/CartPoleDQN.cnnt
...models/reinforcementModel/cartpole/agent/CartPoleDQN.cnnt
+1
-1
src/test/resources/models/reinforcementModel/mountaincar/agent/MountaincarCritic.cnna
...inforcementModel/mountaincar/agent/MountaincarCritic.cnna
+0
-12
src/test/resources/models/reinforcementModel/mountaincar/agent/MountaincarCritic.emadl
...nforcementModel/mountaincar/agent/MountaincarCritic.emadl
+24
-0
src/test/resources/models/reinforcementModel/torcs/agent/dqn/TorcsDQN.cnnt
...s/models/reinforcementModel/torcs/agent/dqn/TorcsDQN.cnnt
+1
-1
src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNDataLoader_cartpole_master_dqn.py
...cementModel/cartpole/CNNDataLoader_cartpole_master_dqn.py
+58
-22
src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNNet_cartpole_master_dqn.py
...reinforcementModel/cartpole/CNNNet_cartpole_master_dqn.py
+0
-1
src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNTrainer_cartpole_master_dqn.py
...forcementModel/cartpole/CNNTrainer_cartpole_master_dqn.py
+4
-4
src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/agent.py
...inforcementModel/cartpole/reinforcement_learning/agent.py
+405
-36
src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/strategy.py
...orcementModel/cartpole/reinforcement_learning/strategy.py
+53
-7
src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/util.py
...einforcementModel/cartpole/reinforcement_learning/util.py
+55
-31
src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNCreator_mountaincar_master_actor.py
...tModel/mountaincar/CNNCreator_mountaincar_master_actor.py
+41
-38
src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNDataLoader_mountaincar_master_actor.py
...del/mountaincar/CNNDataLoader_mountaincar_master_actor.py
+58
-22
src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNNet_mountaincar_master_actor.py
...ementModel/mountaincar/CNNNet_mountaincar_master_actor.py
+23
-10
src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNPredictor_mountaincar_master_actor.h
...Model/mountaincar/CNNPredictor_mountaincar_master_actor.h
+14
-11
src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNTrainer_mountaincar_master_actor.py
...tModel/mountaincar/CNNTrainer_mountaincar_master_actor.py
+7
-7
src/test/resources/target_code/gluon/reinforcementModel/mountaincar/mountaincar_master_actor.h
...reinforcementModel/mountaincar/mountaincar_master_actor.h
+2
-2
src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/CNNCreator_MountaincarCritic.py
...ar/reinforcement_learning/CNNCreator_MountaincarCritic.py
+0
-56
src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/CNNCreator_mountaincar_agent_mountaincarCritic.py
...earning/CNNCreator_mountaincar_agent_mountaincarCritic.py
+59
-0
src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/CNNNet_mountaincar_agent_mountaincarCritic.py
...nt_learning/CNNNet_mountaincar_agent_mountaincarCritic.py
+29
-15
src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/agent.py
...orcementModel/mountaincar/reinforcement_learning/agent.py
+405
-36
src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/strategy.py
...ementModel/mountaincar/reinforcement_learning/strategy.py
+53
-7
src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/util.py
...forcementModel/mountaincar/reinforcement_learning/util.py
+55
-31
src/test/resources/target_code/gluon/reinforcementModel/torcs/CNNCreator_torcs_agent_torcsAgent_dqn.py
...ementModel/torcs/CNNCreator_torcs_agent_torcsAgent_dqn.py
+41
-38
src/test/resources/target_code/gluon/reinforcementModel/torcs/CNNDataLoader_torcs_agent_torcsAgent_dqn.py
...ntModel/torcs/CNNDataLoader_torcs_agent_torcsAgent_dqn.py
+58
-22
src/test/resources/target_code/gluon/reinforcementModel/torcs/CNNNet_torcs_agent_torcsAgent_dqn.py
...forcementModel/torcs/CNNNet_torcs_agent_torcsAgent_dqn.py
+23
-10
src/test/resources/target_code/gluon/reinforcementModel/torcs/CNNPredictor_torcs_agent_torcsAgent_dqn.h
...mentModel/torcs/CNNPredictor_torcs_agent_torcsAgent_dqn.h
+14
-11
src/test/resources/target_code/gluon/reinforcementModel/torcs/CNNTrainer_torcs_agent_torcsAgent_dqn.py
...ementModel/torcs/CNNTrainer_torcs_agent_torcsAgent_dqn.py
+5
-5
src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/_torcs_agent_dqn_reward_executor.so
...einforcement_learning/_torcs_agent_dqn_reward_executor.so
+0
-0
src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/agent.py
.../reinforcementModel/torcs/reinforcement_learning/agent.py
+405
-36
src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/environment.py
...orcementModel/torcs/reinforcement_learning/environment.py
+15
-10
src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/strategy.py
...inforcementModel/torcs/reinforcement_learning/strategy.py
+53
-7
src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/torcs_agent_dqn_reward_executor.py
...reinforcement_learning/torcs_agent_dqn_reward_executor.py
+27
-32
src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/util.py
...n/reinforcementModel/torcs/reinforcement_learning/util.py
+55
-31
src/test/resources/target_code/gluon/reinforcementModel/torcs/torcs_agent_torcsAgent_dqn.h
...uon/reinforcementModel/torcs/torcs_agent_torcsAgent_dqn.h
+2
-2
No files found.
pom.xml
View file @
3d680f2e
...
...
@@ -8,7 +8,7 @@
<groupId>
de.monticore.lang.monticar
</groupId>
<artifactId>
embedded-montiarc-emadl-generator
</artifactId>
<version>
0.3.
3
-SNAPSHOT
</version>
<version>
0.3.
4
-SNAPSHOT
</version>
<!-- == PROJECT DEPENDENCIES ============================================= -->
...
...
src/main/java/de/monticore/lang/monticar/emadl/generator/Backend.java
View file @
3d680f2e
...
...
@@ -9,6 +9,7 @@ import de.monticore.lang.monticar.cnnarch.mxnetgenerator.CNNArch2MxNet;
import
de.monticore.lang.monticar.cnnarch.caffe2generator.CNNArch2Caffe2
;
import
de.monticore.lang.monticar.cnnarch.mxnetgenerator.CNNTrain2MxNet
;
import
de.monticore.lang.monticar.cnnarch.caffe2generator.CNNTrain2Caffe2
;
import
de.monticore.lang.monticar.emadl.generator.reinforcementlearning.RewardFunctionCppGenerator
;
import
java.util.Optional
;
...
...
src/main/java/de/monticore/lang/monticar/emadl/generator/EMADLGenerator.java
View file @
3d680f2e
...
...
@@ -34,6 +34,8 @@ import de.monticore.lang.monticar.cnnarch._symboltable.ArchitectureSymbol;
import
de.monticore.lang.monticar.cnnarch._symboltable.SerialCompositeElementSymbol
;
import
de.monticore.lang.monticar.cnnarch.gluongenerator.CNNTrain2Gluon
;
import
de.monticore.lang.monticar.cnnarch.gluongenerator.annotations.ArchitectureAdapter
;
import
de.monticore.lang.monticar.cnntrain._cocos.CNNTrainCoCoChecker
;
import
de.monticore.lang.monticar.cnntrain._cocos.CNNTrainCocos
;
import
de.monticore.lang.monticar.cnntrain._symboltable.ConfigurationSymbol
;
import
de.monticore.lang.monticar.emadl._cocos.EMADLCocos
;
import
de.monticore.lang.monticar.generator.FileContent
;
...
...
@@ -115,6 +117,18 @@ public class EMADLGenerator {
processedArchitecture
=
new
HashMap
<>();
setModelsPath
(
modelPath
);
TaggingResolver
symtab
=
EMADLAbstractSymtab
.
createSymTabAndTaggingResolver
(
getModelsPath
());
EMAComponentInstanceSymbol
instance
=
resolveComponentInstanceSymbol
(
qualifiedName
,
symtab
);
generateFiles
(
symtab
,
instance
,
symtab
,
pythonPath
,
forced
);
if
(
doCompile
)
{
compile
();
}
processedArchitecture
=
null
;
}
private
EMAComponentInstanceSymbol
resolveComponentInstanceSymbol
(
String
qualifiedName
,
TaggingResolver
symtab
)
{
EMAComponentSymbol
component
=
symtab
.<
EMAComponentSymbol
>
resolve
(
qualifiedName
,
EMAComponentSymbol
.
KIND
).
orElse
(
null
);
List
<
String
>
splitName
=
Splitters
.
DOT
.
splitToList
(
qualifiedName
);
...
...
@@ -126,15 +140,7 @@ public class EMADLGenerator {
System
.
exit
(
1
);
}
EMAComponentInstanceSymbol
instance
=
component
.
getEnclosingScope
().<
EMAComponentInstanceSymbol
>
resolve
(
instanceName
,
EMAComponentInstanceSymbol
.
KIND
).
get
();
generateFiles
(
symtab
,
instance
,
symtab
,
pythonPath
,
forced
);
if
(
doCompile
)
{
compile
();
}
processedArchitecture
=
null
;
return
component
.
getEnclosingScope
().<
EMAComponentInstanceSymbol
>
resolve
(
instanceName
,
EMAComponentInstanceSymbol
.
KIND
).
get
();
}
public
void
compile
()
throws
IOException
{
...
...
@@ -530,7 +536,32 @@ public class EMADLGenerator {
final
String
fullConfigName
=
String
.
join
(
"."
,
names
);
ArchitectureSymbol
correspondingArchitecture
=
this
.
processedArchitecture
.
get
(
fullConfigName
);
assert
correspondingArchitecture
!=
null
:
"No architecture found for train "
+
fullConfigName
+
" configuration!"
;
configuration
.
setTrainedArchitecture
(
new
ArchitectureAdapter
(
correspondingArchitecture
));
configuration
.
setTrainedArchitecture
(
new
ArchitectureAdapter
(
correspondingArchitecture
.
getName
(),
correspondingArchitecture
));
CNNTrainCocos
.
checkTrainedArchitectureCoCos
(
configuration
);
// Resolve critic network if critic is present
if
(
configuration
.
getCriticName
().
isPresent
())
{
String
fullCriticName
=
configuration
.
getCriticName
().
get
();
int
indexOfFirstNameCharacter
=
fullCriticName
.
lastIndexOf
(
'.'
)
+
1
;
fullCriticName
=
fullCriticName
.
substring
(
0
,
indexOfFirstNameCharacter
)
+
fullCriticName
.
substring
(
indexOfFirstNameCharacter
,
indexOfFirstNameCharacter
+
1
).
toUpperCase
()
+
fullCriticName
.
substring
(
indexOfFirstNameCharacter
+
1
);
TaggingResolver
symtab
=
EMADLAbstractSymtab
.
createSymTabAndTaggingResolver
(
getModelsPath
());
EMAComponentInstanceSymbol
instanceSymbol
=
resolveComponentInstanceSymbol
(
fullCriticName
,
symtab
);
EMADLCocos
.
checkAll
(
instanceSymbol
);
Optional
<
ArchitectureSymbol
>
critic
=
instanceSymbol
.
getSpannedScope
().
resolve
(
""
,
ArchitectureSymbol
.
KIND
);
if
(!
critic
.
isPresent
())
{
Log
.
error
(
"During the resolving of critic component: Critic component "
+
fullCriticName
+
" does not have a CNN implementation but is required to have one"
);
System
.
exit
(-
1
);
}
critic
.
get
().
setComponentName
(
fullCriticName
);
configuration
.
setCriticNetwork
(
new
ArchitectureAdapter
(
fullCriticName
,
critic
.
get
()));
CNNTrainCocos
.
checkCriticCocos
(
configuration
);
}
cnnTrainGenerator
.
setInstanceName
(
componentInstance
.
getFullName
().
replaceAll
(
"\\."
,
"_"
));
Map
<
String
,
String
>
fileContentMap
=
cnnTrainGenerator
.
generateStrings
(
configuration
);
...
...
src/main/java/de/monticore/lang/monticar/emadl/generator/RewardFunctionCppGenerator.java
→
src/main/java/de/monticore/lang/monticar/emadl/generator/
reinforcementlearning/
RewardFunctionCppGenerator.java
View file @
3d680f2e
package
de.monticore.lang.monticar.emadl.generator
;
package
de.monticore.lang.monticar.emadl.generator
.reinforcementlearning
;
import
de.monticore.lang.embeddedmontiarc.embeddedmontiarc._symboltable.instanceStructure.EMAComponentInstanceSymbol
;
import
de.monticore.lang.monticar.cnnarch.gluongenerator.reinforcement.RewardFunctionSourceGenerator
;
import
de.monticore.lang.monticar.emadl.generator.EMADLAbstractSymtab
;
import
de.monticore.lang.monticar.generator.cpp.GeneratorEMAMOpt2CPP
;
import
de.monticore.lang.tagging._symboltable.TaggingResolver
;
import
de.se_rwth.commons.logging.Log
;
...
...
@@ -9,30 +10,49 @@ import de.se_rwth.commons.logging.Log;
import
java.io.IOException
;
import
java.util.Optional
;
public
class
RewardFunctionCppGenerator
implements
RewardFunctionSourceGenerator
{
public
class
RewardFunctionCppGenerator
implements
RewardFunctionSourceGenerator
{
public
RewardFunctionCppGenerator
()
{
}
@Override
public
void
generate
(
String
modelPath
,
String
rootModel
,
String
targetPath
)
{
GeneratorEMAMOpt2CPP
generator
=
new
GeneratorEMAMOpt2CPP
();
generator
.
useArmadilloBackend
();
TaggingResolver
taggingResolver
=
EMADLAbstractSymtab
.
createSymTabAndTaggingResolver
(
modelPath
);
@Override
public
EMAComponentInstanceSymbol
resolveSymbol
(
TaggingResolver
taggingResolver
,
String
rootModel
)
{
Optional
<
EMAComponentInstanceSymbol
>
instanceSymbol
=
taggingResolver
.<
EMAComponentInstanceSymbol
>
resolve
(
rootModel
,
EMAComponentInstanceSymbol
.
KIND
);
if
(!
instanceSymbol
.
isPresent
())
{
Log
.
error
(
"Generation of reward function is not possible: Cannot resolve component instance "
+
rootModel
);
+
rootModel
);
}
return
instanceSymbol
.
get
();
}
@Override
public
void
generate
(
EMAComponentInstanceSymbol
componentInstanceSymbol
,
TaggingResolver
taggingResolver
,
String
targetPath
)
{
GeneratorEMAMOpt2CPP
generator
=
new
GeneratorEMAMOpt2CPP
();
generator
.
useArmadilloBackend
();
generator
.
setGenerationTargetPath
(
targetPath
);
try
{
generator
.
generate
(
i
nstanceSymbol
.
get
()
,
taggingResolver
);
generator
.
generate
(
componentI
nstanceSymbol
,
taggingResolver
);
}
catch
(
IOException
e
)
{
Log
.
error
(
"Generation of reward function is not possible: "
+
e
.
getMessage
());
}
}
@Override
public
void
generate
(
String
modelPath
,
String
rootModel
,
String
targetPath
)
{
TaggingResolver
taggingResolver
=
createTaggingResolver
(
modelPath
);
EMAComponentInstanceSymbol
instanceSymbol
=
resolveSymbol
(
taggingResolver
,
rootModel
);
generate
(
instanceSymbol
,
taggingResolver
,
targetPath
);
}
@Override
public
TaggingResolver
createTaggingResolver
(
final
String
modelPath
)
{
return
EMADLAbstractSymtab
.
createSymTabAndTaggingResolver
(
modelPath
);
}
}
src/test/java/de/monticore/lang/monticar/emadl/GenerationTest.java
View file @
3d680f2e
...
...
@@ -275,8 +275,8 @@ public class GenerationTest extends AbstractSymtabTest {
"HelperA.h"
,
"start_training.sh"
,
"reinforcement_learning/__init__.py"
,
"reinforcement_learning/CNNCreator_
M
ountaincarCritic.py"
,
"reinforcement_learning/CNNNet_
M
ountaincarCritic.py"
,
"reinforcement_learning/CNNCreator_
mountaincar_agent_m
ountaincarCritic.py"
,
"reinforcement_learning/CNNNet_
mountaincar_agent_m
ountaincarCritic.py"
,
"reinforcement_learning/strategy.py"
,
"reinforcement_learning/agent.py"
,
"reinforcement_learning/environment.py"
,
...
...
src/test/resources/models/reinforcementModel/cartpole/agent/CartPoleDQN.cnnt
View file @
3d680f2e
...
...
@@ -17,7 +17,7 @@ configuration CartPoleDQN {
use_double_dqn : false
loss :
euclidean
loss :
huber
replay_memory : buffer{
memory_size : 10000
...
...
src/test/resources/models/reinforcementModel/mountaincar/agent/MountaincarCritic.cnna
deleted
100644 → 0
View file @
b85b4bce
implementation Critic(state, action) {
(state ->
FullyConnected(units=400) ->
Relu() ->
FullyConnected(units=300)
|
action ->
FullyConnected(units=300)
) ->
Add() ->
Relu();
}
\ No newline at end of file
src/test/resources/models/reinforcementModel/mountaincar/agent/MountaincarCritic.emadl
0 → 100644
View file @
3d680f2e
package
mountaincar
.
agent
;
component
MountaincarCritic
{
ports
in
Q
^{
2
}
state
,
in
Q
(-
1
:
1
)^{
1
}
action
,
out
Q
(-
oo
:
oo
)^{
1
}
qvalues
;
implementation
CNN
{
(
state
->
FullyConnected
(
units
=
400
)
->
Relu
()
->
FullyConnected
(
units
=
300
)
|
action
->
FullyConnected
(
units
=
300
)
)
->
Add
()
->
Relu
()
->
FullyConnected
(
units
=
1
)
->
qvalues
;
}
}
\ No newline at end of file
src/test/resources/models/reinforcementModel/torcs/agent/dqn/TorcsDQN.cnnt
View file @
3d680f2e
...
...
@@ -23,7 +23,7 @@ configuration TorcsDQN {
use_double_dqn : true
loss :
euclidean
loss :
huber
replay_memory : buffer{
memory_size : 1000000
...
...
src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNDataLoader_cartpole_master_dqn.py
View file @
3d680f2e
...
...
@@ -3,8 +3,9 @@ import h5py
import
mxnet
as
mx
import
logging
import
sys
from
mxnet
import
nd
class
cartpole_master_dqn
DataLoader
:
class
CNNDataLoader_
cartpole_master_dqn
:
_input_names_
=
[
'state'
]
_output_names_
=
[
'qvalues_label'
]
...
...
@@ -14,21 +15,38 @@ class cartpole_master_dqnDataLoader:
def
load_data
(
self
,
batch_size
):
train_h5
,
test_h5
=
self
.
load_h5_files
()
data_mean
=
train_h5
[
self
.
_input_names_
[
0
]][:].
mean
(
axis
=
0
)
data_std
=
train_h5
[
self
.
_input_names_
[
0
]][:].
std
(
axis
=
0
)
+
1e-5
train_data
=
{}
data_mean
=
{}
data_std
=
{}
for
input_name
in
self
.
_input_names_
:
train_data
[
input_name
]
=
train_h5
[
input_name
]
data_mean
[
input_name
]
=
nd
.
array
(
train_h5
[
input_name
][:].
mean
(
axis
=
0
))
data_std
[
input_name
]
=
nd
.
array
(
train_h5
[
input_name
][:].
std
(
axis
=
0
)
+
1e-5
)
train_label
=
{}
for
output_name
in
self
.
_output_names_
:
train_label
[
output_name
]
=
train_h5
[
output_name
]
train_iter
=
mx
.
io
.
NDArrayIter
(
data
=
train_data
,
label
=
train_label
,
batch_size
=
batch_size
)
train_iter
=
mx
.
io
.
NDArrayIter
(
train_h5
[
self
.
_input_names_
[
0
]],
train_h5
[
self
.
_output_names_
[
0
]],
batch_size
=
batch_size
,
data_name
=
self
.
_input_names_
[
0
],
label_name
=
self
.
_output_names_
[
0
])
test_iter
=
None
if
test_h5
!=
None
:
test_iter
=
mx
.
io
.
NDArrayIter
(
test_h5
[
self
.
_input_names_
[
0
]],
test_h5
[
self
.
_output_names_
[
0
]],
batch_size
=
batch_size
,
data_name
=
self
.
_input_names_
[
0
],
label_name
=
self
.
_output_names_
[
0
])
test_data
=
{}
for
input_name
in
self
.
_input_names_
:
test_data
[
input_name
]
=
test_h5
[
input_name
]
test_label
=
{}
for
output_name
in
self
.
_output_names_
:
test_label
[
output_name
]
=
test_h5
[
output_name
]
test_iter
=
mx
.
io
.
NDArrayIter
(
data
=
test_data
,
label
=
test_label
,
batch_size
=
batch_size
)
return
train_iter
,
test_iter
,
data_mean
,
data_std
def
load_h5_files
(
self
):
...
...
@@ -36,21 +54,39 @@ class cartpole_master_dqnDataLoader:
test_h5
=
None
train_path
=
self
.
_data_dir
+
"train.h5"
test_path
=
self
.
_data_dir
+
"test.h5"
if
os
.
path
.
isfile
(
train_path
):
train_h5
=
h5py
.
File
(
train_path
,
'r'
)
if
not
(
self
.
_input_names_
[
0
]
in
train_h5
and
self
.
_output_names_
[
0
]
in
train_h5
):
logging
.
error
(
"The HDF5 file '"
+
os
.
path
.
abspath
(
train_path
)
+
"' has to contain the datasets: "
+
"'"
+
self
.
_input_names_
[
0
]
+
"', '"
+
self
.
_output_names_
[
0
]
+
"'"
)
sys
.
exit
(
1
)
test_iter
=
None
for
input_name
in
self
.
_input_names_
:
if
not
input_name
in
train_h5
:
logging
.
error
(
"The HDF5 file '"
+
os
.
path
.
abspath
(
train_path
)
+
"' has to contain the dataset "
+
"'"
+
input_name
+
"'"
)
sys
.
exit
(
1
)
for
output_name
in
self
.
_output_names_
:
if
not
output_name
in
train_h5
:
logging
.
error
(
"The HDF5 file '"
+
os
.
path
.
abspath
(
train_path
)
+
"' has to contain the dataset "
+
"'"
+
output_name
+
"'"
)
sys
.
exit
(
1
)
if
os
.
path
.
isfile
(
test_path
):
test_h5
=
h5py
.
File
(
test_path
,
'r'
)
if
not
(
self
.
_input_names_
[
0
]
in
test_h5
and
self
.
_output_names_
[
0
]
in
test_h5
):
logging
.
error
(
"The HDF5 file '"
+
os
.
path
.
abspath
(
test_path
)
+
"' has to contain the datasets: "
+
"'"
+
self
.
_input_names_
[
0
]
+
"', '"
+
self
.
_output_names_
[
0
]
+
"'"
)
sys
.
exit
(
1
)
for
input_name
in
self
.
_input_names_
:
if
not
input_name
in
test_h5
:
logging
.
error
(
"The HDF5 file '"
+
os
.
path
.
abspath
(
test_path
)
+
"' has to contain the dataset "
+
"'"
+
input_name
+
"'"
)
sys
.
exit
(
1
)
for
output_name
in
self
.
_output_names_
:
if
not
output_name
in
test_h5
:
logging
.
error
(
"The HDF5 file '"
+
os
.
path
.
abspath
(
test_path
)
+
"' has to contain the dataset "
+
"'"
+
output_name
+
"'"
)
sys
.
exit
(
1
)
else
:
logging
.
warning
(
"Couldn't load test set. File '"
+
os
.
path
.
abspath
(
test_path
)
+
"' does not exist."
)
return
train_h5
,
test_h5
else
:
logging
.
error
(
"Data loading failure. File '"
+
os
.
path
.
abspath
(
train_path
)
+
"' does not exist."
)
...
...
src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNNet_cartpole_master_dqn.py
View file @
3d680f2e
...
...
@@ -101,7 +101,6 @@ class Net_0(gluon.HybridBlock):
self
.
fc3_
=
gluon
.
nn
.
Dense
(
units
=
2
,
use_bias
=
True
)
# fc3_, output shape: {[2,1,1]}
self
.
last_layers
[
'qvalues'
]
=
'linear'
def
hybrid_forward
(
self
,
F
,
state
):
...
...
src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNTrainer_cartpole_master_dqn.py
View file @
3d680f2e
...
...
@@ -56,7 +56,7 @@ if __name__ == "__main__":
'memory_size'
:
10000
,
'sample_size'
:
32
,
'state_dtype'
:
'float32'
,
'action_dtype'
:
'
float32
'
,
'action_dtype'
:
'
uint8
'
,
'rewards_dtype'
:
'float32'
},
'strategy_params'
:
{
...
...
@@ -78,10 +78,10 @@ if __name__ == "__main__":
'snapshot_interval'
:
20
,
'max_episode_step'
:
250
,
'target_score'
:
185.5
,
'qnet'
:
qnet_creator
.
net
,
'qnet'
:
qnet_creator
.
net
works
[
0
]
,
'use_fix_target'
:
True
,
'target_update_interval'
:
200
,
'loss_function'
:
'
euclidean
'
,
'loss_function'
:
'
huber
'
,
'optimizer'
:
'rmsprop'
,
'optimizer_params'
:
{
'learning_rate'
:
0.001
},
...
...
@@ -108,4 +108,4 @@ if __name__ == "__main__":
train_successful
=
agent
.
train
()
if
train_successful
:
agent
.
save
_best_network
(
qnet_creator
.
_model_dir_
+
qnet_creator
.
_model_prefix_
+
'_0_newest'
,
epoch
=
0
)
agent
.
export
_best_network
(
path
=
qnet_creator
.
_model_dir_
+
qnet_creator
.
_model_prefix_
+
'_0_newest'
,
epoch
=
0
)
src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/agent.py
View file @
3d680f2e
This diff is collapsed.
Click to expand it.
src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/strategy.py
View file @
3d680f2e
...
...
@@ -13,18 +13,21 @@ class StrategyBuilder(object):
epsilon_decay_method
=
'no'
,
epsilon_decay
=
0.0
,
epsilon_decay_start
=
0
,
epsilon_decay_per_step
=
False
,
action_dim
=
None
,
action_low
=
None
,
action_high
=
None
,
mu
=
0.0
,
theta
=
0.5
,
sigma
=
0.3
sigma
=
0.3
,
noise_variance
=
0.1
):
if
epsilon_decay_method
==
'linear'
:
decay
=
LinearDecay
(
eps_decay
=
epsilon_decay
,
min_eps
=
min_epsilon
,
decay_start
=
epsilon_decay_start
)
decay_start
=
epsilon_decay_start
,
decay_per_step
=
epsilon_decay_per_step
)
else
:
decay
=
NoDecay
()
...
...
@@ -44,6 +47,13 @@ class StrategyBuilder(object):
return
OrnsteinUhlenbeckStrategy
(
action_dim
,
action_low
,
action_high
,
epsilon
,
mu
,
theta
,
sigma
,
decay
)
elif
method
==
'gaussian'
:
assert
action_dim
is
not
None
assert
action_low
is
not
None
assert
action_high
is
not
None
assert
noise_variance
is
not
None
return
GaussianNoiseStrategy
(
action_dim
,
action_low
,
action_high
,
epsilon
,
noise_variance
,
decay
)
else
:
assert
action_dim
is
not
None
assert
len
(
action_dim
)
==
1
...
...
@@ -70,17 +80,27 @@ class NoDecay(BaseDecay):
class
LinearDecay
(
BaseDecay
):
def
__init__
(
self
,
eps_decay
,
min_eps
=
0
,
decay_start
=
0
):
def
__init__
(
self
,
eps_decay
,
min_eps
=
0
,
decay_start
=
0
,
decay_per_step
=
False
):
super
(
LinearDecay
,
self
).
__init__
()
self
.
eps_decay
=
eps_decay
self
.
min_eps
=
min_eps
self
.
decay_start
=
decay_start
self
.
decay_per_step
=
decay_per_step
self
.
last_episode
=
-
1
def
decay
(
self
,
cur_eps
,
episode
):
if
episode
<
self
.
decay_
start
:
return
cur_eps
def
do_
decay
(
self
,
episode
):
if
self
.
decay_
per_step
:
do
=
(
episode
>=
self
.
decay_start
)
else
:
do
=
((
self
.
last_episode
!=
episode
)
and
(
episode
>=
self
.
decay_start
))
self
.
last_episode
=
episode
return
do
def
decay
(
self
,
cur_eps
,
episode
):
if
self
.
do_decay
(
episode
):
return
max
(
cur_eps
-
self
.
eps_decay
,
self
.
min_eps
)
else
:
return
cur_eps
class
BaseStrategy
(
object
):
...
...
@@ -168,5 +188,31 @@ class OrnsteinUhlenbeckStrategy(BaseStrategy):
def
select_action
(
self
,
values
):
noise
=
self
.
_evolve_state
()
action
=
values
+
(
self
.
cur_eps
*
noise
)
action
=
(
1.0
-
self
.
cur_eps
)
*
values
+
(
self
.
cur_eps
*
noise
)
return
np
.
clip
(
action
,
self
.
_action_low
,
self
.
_action_high
)
class
GaussianNoiseStrategy
(
BaseStrategy
):
def
__init__
(
self
,
action_dim
,
action_low
,
action_high
,
eps
,
noise_variance
,
decay
=
NoDecay
()
):
super
(
GaussianNoiseStrategy
,
self
).
__init__
(
decay
)
self
.
eps
=
eps
self
.
cur_eps
=
eps
self
.
_action_dim
=
action_dim
self
.
_action_low
=
action_low
self
.
_action_high
=
action_high
self
.
_noise_variance
=
noise_variance
def
select_action
(
self
,
values
):
noise
=
np
.
random
.
normal
(
loc
=
0.0
,
scale
=
self
.
_noise_variance
,
size
=
self
.
_action_dim
)
action
=
values
+
self
.
cur_eps
*
noise
return
np
.
clip
(
action
,
self
.
_action_low
,
self
.
_action_high
)
src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/util.py
View file @
3d680f2e
...
...
@@ -11,8 +11,8 @@ import cnnarch_logger
LOSS_FUNCTIONS
=
{
'l1'
:
gluon
.
loss
.
L1Loss
(),
'
euclidean
'
:
gluon
.
loss
.
L2Loss
(),
'huber
_loss
'
:
gluon
.
loss
.
HuberLoss
(),
'
l2
'
:
gluon
.
loss
.
L2Loss
(),
'huber'
:
gluon
.
loss
.
HuberLoss
(),
'softmax_cross_entropy'
:
gluon
.
loss
.
SoftmaxCrossEntropyLoss
(),
'sigmoid_cross_entropy'
:
gluon
.
loss
.
SigmoidBinaryCrossEntropyLoss
()}
...
...
@@ -127,13 +127,15 @@ class TrainingStats(object):
else
:
return
self
.
_all_total_rewards
[
0
]
def
save
(
self
,
path
):
np
.
save
(
os
.
path
.
join
(
path
,
'total_rewards'
),
self
.
_all_total_rewards
)
np
.
save
(
os
.
path
.
join
(
path
,
'eps'
),
self
.
_all_eps
)
np
.
save
(
os
.
path
.
join
(
path
,
'time'
),
self
.
_all_time
)
def
save
(
self
,
path
,
episode
=
None
):
if
episode
is
None
:
episode
=
self
.
_max_episodes
np
.
save
(
os
.
path
.
join
(
path
,
'total_rewards'
),
self
.
_all_total_rewards
[:
episode
])
np
.
save
(
os
.
path
.
join
(
path
,
'eps'
),
self
.
_all_eps
[:
episode
])
np
.
save
(
os
.
path
.
join
(
path
,
'time'
),
self
.
_all_time
[:
episode
])
np
.
save
(
os
.
path
.
join
(
path
,
'mean_reward'
),
self
.
_all_mean_reward_last_100_episodes
)
self
.
_all_mean_reward_last_100_episodes
[:
episode
]
)
def
_log_episode
(
self
,
episode
,
start_time
,
training_steps
,
eps
,
reward
):
self
.
add_eps
(
episode
,
eps
)
...
...
@@ -170,33 +172,43 @@ class DqnTrainingStats(TrainingStats):
self
.
_logger
.
info
(
info
)
return
avg_reward
def
save_stats
(
self
,
path
):
def
save_stats
(
self
,
path
,
episode
=
None
):
if
episode
is
None
:
episode
=
self
.
_max_episodes
all_total_rewards
=
self
.
_all_total_rewards
[:
episode
]
all_avg_loss
=
self
.
_all_avg_loss
[:
episode
]
all_eps
=
self
.
_all_eps
[:
episode
]
all_mean_reward_last_100_episodes
=
self
.
_all_mean_reward_last_100_episodes
[:
episode
]
fig
=
plt
.
figure
(
figsize
=
(
20
,
20
))
sub_rewards
=
fig
.
add_subplot
(
221
)
sub_rewards
.
set_title
(
'Total Rewards per episode'
)
sub_rewards
.
plot
(
np
.
arange
(
self
.
_max_
episode
s
),
self
.
_
all_total_rewards
)
np
.
arange
(
episode
),
all_total_rewards
)
sub_loss
=
fig
.
add_subplot
(
222
)