Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
monticore
EmbeddedMontiArc
generators
EMADL2CPP
Commits
869ec892
Commit
869ec892
authored
Jul 17, 2019
by
Nicola Gatto
Browse files
Adapt tests to new templates
parent
1fb97868
Changes
13
Expand all
Hide whitespace changes
Inline
Side-by-side
src/test/resources/target_code/gluon/reinforcementModel/cartpole/CNNTrainer_cartpole_master_dqn.py
View file @
869ec892
...
...
@@ -78,10 +78,10 @@ if __name__ == "__main__":
'snapshot_interval'
:
20
,
'max_episode_step'
:
250
,
'target_score'
:
185.5
,
'qnet'
:
qnet_creator
.
net
,
'qnet'
:
qnet_creator
.
net
works
[
0
]
,
'use_fix_target'
:
True
,
'target_update_interval'
:
200
,
'loss'
:
'huber'
,
'loss
_function
'
:
'huber'
,
'optimizer'
:
'rmsprop'
,
'optimizer_params'
:
{
'learning_rate'
:
0.001
},
...
...
@@ -108,4 +108,4 @@ if __name__ == "__main__":
train_successful
=
agent
.
train
()
if
train_successful
:
agent
.
save
_best_network
(
qnet_creator
.
_model_dir_
+
qnet_creator
.
_model_prefix_
+
'_0_newest'
,
epoch
=
0
)
agent
.
export
_best_network
(
path
=
qnet_creator
.
_model_dir_
+
qnet_creator
.
_model_prefix_
+
'_0_newest'
,
epoch
=
0
)
src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/agent.py
View file @
869ec892
This diff is collapsed.
Click to expand it.
src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/strategy.py
View file @
869ec892
...
...
@@ -13,18 +13,21 @@ class StrategyBuilder(object):
epsilon_decay_method
=
'no'
,
epsilon_decay
=
0.0
,
epsilon_decay_start
=
0
,
epsilon_decay_per_step
=
False
,
action_dim
=
None
,
action_low
=
None
,
action_high
=
None
,
mu
=
0.0
,
theta
=
0.5
,
sigma
=
0.3
sigma
=
0.3
,
noise_variance
=
0.1
):
if
epsilon_decay_method
==
'linear'
:
decay
=
LinearDecay
(
eps_decay
=
epsilon_decay
,
min_eps
=
min_epsilon
,
decay_start
=
epsilon_decay_start
)
decay_start
=
epsilon_decay_start
,
decay_per_step
=
epsilon_decay_per_step
)
else
:
decay
=
NoDecay
()
...
...
@@ -44,6 +47,13 @@ class StrategyBuilder(object):
return
OrnsteinUhlenbeckStrategy
(
action_dim
,
action_low
,
action_high
,
epsilon
,
mu
,
theta
,
sigma
,
decay
)
elif
method
==
'gaussian'
:
assert
action_dim
is
not
None
assert
action_low
is
not
None
assert
action_high
is
not
None
assert
noise_variance
is
not
None
return
GaussianNoiseStrategy
(
action_dim
,
action_low
,
action_high
,
epsilon
,
noise_variance
,
decay
)
else
:
assert
action_dim
is
not
None
assert
len
(
action_dim
)
==
1
...
...
@@ -70,17 +80,27 @@ class NoDecay(BaseDecay):
class
LinearDecay
(
BaseDecay
):
def
__init__
(
self
,
eps_decay
,
min_eps
=
0
,
decay_start
=
0
):
def
__init__
(
self
,
eps_decay
,
min_eps
=
0
,
decay_start
=
0
,
decay_per_step
=
False
):
super
(
LinearDecay
,
self
).
__init__
()
self
.
eps_decay
=
eps_decay
self
.
min_eps
=
min_eps
self
.
decay_start
=
decay_start
self
.
decay_per_step
=
decay_per_step
self
.
last_episode
=
-
1
def
decay
(
self
,
cur_eps
,
episode
):
if
episode
<
self
.
decay_
start
:
return
cur_eps
def
do_
decay
(
self
,
episode
):
if
self
.
decay_
per_step
:
do
=
(
episode
>=
self
.
decay_start
)
else
:
do
=
((
self
.
last_episode
!=
episode
)
and
(
episode
>=
self
.
decay_start
))
self
.
last_episode
=
episode
return
do
def
decay
(
self
,
cur_eps
,
episode
):
if
self
.
do_decay
(
episode
):
return
max
(
cur_eps
-
self
.
eps_decay
,
self
.
min_eps
)
else
:
return
cur_eps
class
BaseStrategy
(
object
):
...
...
@@ -170,3 +190,29 @@ class OrnsteinUhlenbeckStrategy(BaseStrategy):
noise
=
self
.
_evolve_state
()
action
=
(
1.0
-
self
.
cur_eps
)
*
values
+
(
self
.
cur_eps
*
noise
)
return
np
.
clip
(
action
,
self
.
_action_low
,
self
.
_action_high
)
class
GaussianNoiseStrategy
(
BaseStrategy
):
def
__init__
(
self
,
action_dim
,
action_low
,
action_high
,
eps
,
noise_variance
,
decay
=
NoDecay
()
):
super
(
GaussianNoiseStrategy
,
self
).
__init__
(
decay
)
self
.
eps
=
eps
self
.
cur_eps
=
eps
self
.
_action_dim
=
action_dim
self
.
_action_low
=
action_low
self
.
_action_high
=
action_high
self
.
_noise_variance
=
noise_variance
def
select_action
(
self
,
values
):
noise
=
np
.
random
.
normal
(
loc
=
0.0
,
scale
=
self
.
_noise_variance
,
size
=
self
.
_action_dim
)
action
=
values
+
self
.
cur_eps
*
noise
return
np
.
clip
(
action
,
self
.
_action_low
,
self
.
_action_high
)
src/test/resources/target_code/gluon/reinforcementModel/cartpole/reinforcement_learning/util.py
View file @
869ec892
...
...
@@ -11,8 +11,8 @@ import cnnarch_logger
LOSS_FUNCTIONS
=
{
'l1'
:
gluon
.
loss
.
L1Loss
(),
'
euclidean
'
:
gluon
.
loss
.
L2Loss
(),
'huber
_loss
'
:
gluon
.
loss
.
HuberLoss
(),
'
l2
'
:
gluon
.
loss
.
L2Loss
(),
'huber'
:
gluon
.
loss
.
HuberLoss
(),
'softmax_cross_entropy'
:
gluon
.
loss
.
SoftmaxCrossEntropyLoss
(),
'sigmoid_cross_entropy'
:
gluon
.
loss
.
SigmoidBinaryCrossEntropyLoss
()}
...
...
src/test/resources/target_code/gluon/reinforcementModel/mountaincar/CNNTrainer_mountaincar_master_actor.py
View file @
869ec892
...
...
@@ -85,8 +85,8 @@ if __name__ == "__main__":
'train_interval'
:
1
,
'snapshot_interval'
:
20
,
'max_episode_step'
:
1000
,
'actor'
:
actor_creator
.
net
,
'critic'
:
critic_creator
.
net
,
'actor'
:
actor_creator
.
net
works
[
0
]
,
'critic'
:
critic_creator
.
net
works
[
0
]
,
'actor_optimizer'
:
'adam'
,
'actor_optimizer_params'
:
{
'learning_rate'
:
1.0E-4
},
...
...
@@ -116,4 +116,4 @@ if __name__ == "__main__":
train_successful
=
agent
.
train
()
if
train_successful
:
agent
.
save
_best_network
(
actor_creator
.
_model_dir_
+
actor_creator
.
_model_prefix_
+
'_0_newest'
,
epoch
=
0
)
agent
.
export
_best_network
(
path
=
actor_creator
.
_model_dir_
+
actor_creator
.
_model_prefix_
+
'_0_newest'
,
epoch
=
0
)
src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/agent.py
View file @
869ec892
This diff is collapsed.
Click to expand it.
src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/strategy.py
View file @
869ec892
...
...
@@ -13,18 +13,21 @@ class StrategyBuilder(object):
epsilon_decay_method
=
'no'
,
epsilon_decay
=
0.0
,
epsilon_decay_start
=
0
,
epsilon_decay_per_step
=
False
,
action_dim
=
None
,
action_low
=
None
,
action_high
=
None
,
mu
=
0.0
,
theta
=
0.5
,
sigma
=
0.3
sigma
=
0.3
,
noise_variance
=
0.1
):
if
epsilon_decay_method
==
'linear'
:
decay
=
LinearDecay
(
eps_decay
=
epsilon_decay
,
min_eps
=
min_epsilon
,
decay_start
=
epsilon_decay_start
)
decay_start
=
epsilon_decay_start
,
decay_per_step
=
epsilon_decay_per_step
)
else
:
decay
=
NoDecay
()
...
...
@@ -44,6 +47,13 @@ class StrategyBuilder(object):
return
OrnsteinUhlenbeckStrategy
(
action_dim
,
action_low
,
action_high
,
epsilon
,
mu
,
theta
,
sigma
,
decay
)
elif
method
==
'gaussian'
:
assert
action_dim
is
not
None
assert
action_low
is
not
None
assert
action_high
is
not
None
assert
noise_variance
is
not
None
return
GaussianNoiseStrategy
(
action_dim
,
action_low
,
action_high
,
epsilon
,
noise_variance
,
decay
)
else
:
assert
action_dim
is
not
None
assert
len
(
action_dim
)
==
1
...
...
@@ -70,17 +80,27 @@ class NoDecay(BaseDecay):
class
LinearDecay
(
BaseDecay
):
def
__init__
(
self
,
eps_decay
,
min_eps
=
0
,
decay_start
=
0
):
def
__init__
(
self
,
eps_decay
,
min_eps
=
0
,
decay_start
=
0
,
decay_per_step
=
False
):
super
(
LinearDecay
,
self
).
__init__
()
self
.
eps_decay
=
eps_decay
self
.
min_eps
=
min_eps
self
.
decay_start
=
decay_start
self
.
decay_per_step
=
decay_per_step
self
.
last_episode
=
-
1
def
decay
(
self
,
cur_eps
,
episode
):
if
episode
<
self
.
decay_
start
:
return
cur_eps
def
do_
decay
(
self
,
episode
):
if
self
.
decay_
per_step
:
do
=
(
episode
>=
self
.
decay_start
)
else
:
do
=
((
self
.
last_episode
!=
episode
)
and
(
episode
>=
self
.
decay_start
))
self
.
last_episode
=
episode
return
do
def
decay
(
self
,
cur_eps
,
episode
):
if
self
.
do_decay
(
episode
):
return
max
(
cur_eps
-
self
.
eps_decay
,
self
.
min_eps
)
else
:
return
cur_eps
class
BaseStrategy
(
object
):
...
...
@@ -170,3 +190,29 @@ class OrnsteinUhlenbeckStrategy(BaseStrategy):
noise
=
self
.
_evolve_state
()
action
=
(
1.0
-
self
.
cur_eps
)
*
values
+
(
self
.
cur_eps
*
noise
)
return
np
.
clip
(
action
,
self
.
_action_low
,
self
.
_action_high
)
class
GaussianNoiseStrategy
(
BaseStrategy
):
def
__init__
(
self
,
action_dim
,
action_low
,
action_high
,
eps
,
noise_variance
,
decay
=
NoDecay
()
):
super
(
GaussianNoiseStrategy
,
self
).
__init__
(
decay
)
self
.
eps
=
eps
self
.
cur_eps
=
eps
self
.
_action_dim
=
action_dim
self
.
_action_low
=
action_low
self
.
_action_high
=
action_high
self
.
_noise_variance
=
noise_variance
def
select_action
(
self
,
values
):
noise
=
np
.
random
.
normal
(
loc
=
0.0
,
scale
=
self
.
_noise_variance
,
size
=
self
.
_action_dim
)
action
=
values
+
self
.
cur_eps
*
noise
return
np
.
clip
(
action
,
self
.
_action_low
,
self
.
_action_high
)
src/test/resources/target_code/gluon/reinforcementModel/mountaincar/reinforcement_learning/util.py
View file @
869ec892
...
...
@@ -11,8 +11,8 @@ import cnnarch_logger
LOSS_FUNCTIONS
=
{
'l1'
:
gluon
.
loss
.
L1Loss
(),
'
euclidean
'
:
gluon
.
loss
.
L2Loss
(),
'huber
_loss
'
:
gluon
.
loss
.
HuberLoss
(),
'
l2
'
:
gluon
.
loss
.
L2Loss
(),
'huber'
:
gluon
.
loss
.
HuberLoss
(),
'softmax_cross_entropy'
:
gluon
.
loss
.
SoftmaxCrossEntropyLoss
(),
'sigmoid_cross_entropy'
:
gluon
.
loss
.
SigmoidBinaryCrossEntropyLoss
()}
...
...
src/test/resources/target_code/gluon/reinforcementModel/torcs/CNNTrainer_torcs_agent_torcsAgent_dqn.py
View file @
869ec892
...
...
@@ -84,10 +84,10 @@ if __name__ == "__main__":
'train_interval'
:
1
,
'snapshot_interval'
:
1000
,
'max_episode_step'
:
999999999
,
'qnet'
:
qnet_creator
.
net
,
'qnet'
:
qnet_creator
.
net
works
[
0
]
,
'use_fix_target'
:
True
,
'target_update_interval'
:
500
,
'loss'
:
'huber'
,
'loss
_function
'
:
'huber'
,
'optimizer'
:
'rmsprop'
,
'optimizer_params'
:
{
'learning_rate'
:
0.001
},
...
...
@@ -114,4 +114,4 @@ if __name__ == "__main__":
train_successful
=
agent
.
train
()
if
train_successful
:
agent
.
save
_best_network
(
qnet_creator
.
_model_dir_
+
qnet_creator
.
_model_prefix_
+
'_0_newest'
,
epoch
=
0
)
agent
.
export
_best_network
(
path
=
qnet_creator
.
_model_dir_
+
qnet_creator
.
_model_prefix_
+
'_0_newest'
,
epoch
=
0
)
src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/agent.py
View file @
869ec892
This diff is collapsed.
Click to expand it.
src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/environment.py
View file @
869ec892
...
...
@@ -84,7 +84,6 @@ class RosEnvironment(Environment):
def
reset
(
self
):
self
.
__in_reset
=
True
time
.
sleep
(
0.5
)
reset_message
=
Bool
()
reset_message
.
data
=
True
self
.
__waiting_for_state_update
=
True
...
...
@@ -110,7 +109,8 @@ class RosEnvironment(Environment):
next_state
=
self
.
__last_received_state
terminal
=
self
.
__last_received_terminal
reward
=
self
.
__calc_reward
(
next_state
,
terminal
)
rospy
.
logdebug
(
'Calculated reward: {}'
.
format
(
reward
))
logger
.
debug
(
'Transition: ({}, {}, {}, {})'
.
format
(
action
,
reward
,
next_state
,
terminal
))
return
next_state
,
reward
,
terminal
,
0
...
...
@@ -129,20 +129,19 @@ class RosEnvironment(Environment):
else
:
rospy
.
logerr
(
"Timeout 3 times in a row: Terminate application"
)
exit
()
time
.
sleep
(
1
00
/
10
00
)
time
.
sleep
(
1
/
5
00
)
def
close
(
self
):
rospy
.
signal_shutdown
(
'Program ended!'
)
def
__state_callback
(
self
,
data
):
self
.
__last_received_state
=
np
.
array
(
data
.
data
,
dtype
=
'float32'
).
reshape
((
5
,))
rospy
.
log
debug
(
'Received state: {}'
.
format
(
self
.
__last_received_state
))
logger
.
debug
(
'Received state: {}'
.
format
(
self
.
__last_received_state
))
self
.
__waiting_for_state_update
=
False
def
__terminal_state_callback
(
self
,
data
):
self
.
__last_received_terminal
=
data
.
data
rospy
.
logdebug
(
'Received terminal flag: {}'
.
format
(
self
.
__last_received_terminal
))
logger
.
debug
(
'Received terminal: {}'
.
format
(
self
.
__last_received_terminal
))
self
.
__last_received_terminal
=
np
.
bool
(
data
.
data
)
logger
.
debug
(
'Received terminal flag: {}'
.
format
(
self
.
__last_received_terminal
))
self
.
__waiting_for_terminal_update
=
False
def
__calc_reward
(
self
,
state
,
terminal
):
...
...
src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/strategy.py
View file @
869ec892
...
...
@@ -13,18 +13,21 @@ class StrategyBuilder(object):
epsilon_decay_method
=
'no'
,
epsilon_decay
=
0.0
,
epsilon_decay_start
=
0
,
epsilon_decay_per_step
=
False
,
action_dim
=
None
,
action_low
=
None
,
action_high
=
None
,
mu
=
0.0
,
theta
=
0.5
,
sigma
=
0.3
sigma
=
0.3
,
noise_variance
=
0.1
):
if
epsilon_decay_method
==
'linear'
:
decay
=
LinearDecay
(
eps_decay
=
epsilon_decay
,
min_eps
=
min_epsilon
,
decay_start
=
epsilon_decay_start
)
decay_start
=
epsilon_decay_start
,
decay_per_step
=
epsilon_decay_per_step
)
else
:
decay
=
NoDecay
()
...
...
@@ -44,6 +47,13 @@ class StrategyBuilder(object):
return
OrnsteinUhlenbeckStrategy
(
action_dim
,
action_low
,
action_high
,
epsilon
,
mu
,
theta
,
sigma
,
decay
)
elif
method
==
'gaussian'
:
assert
action_dim
is
not
None
assert
action_low
is
not
None
assert
action_high
is
not
None
assert
noise_variance
is
not
None
return
GaussianNoiseStrategy
(
action_dim
,
action_low
,
action_high
,
epsilon
,
noise_variance
,
decay
)
else
:
assert
action_dim
is
not
None
assert
len
(
action_dim
)
==
1
...
...
@@ -70,17 +80,27 @@ class NoDecay(BaseDecay):
class
LinearDecay
(
BaseDecay
):
def
__init__
(
self
,
eps_decay
,
min_eps
=
0
,
decay_start
=
0
):
def
__init__
(
self
,
eps_decay
,
min_eps
=
0
,
decay_start
=
0
,
decay_per_step
=
False
):
super
(
LinearDecay
,
self
).
__init__
()
self
.
eps_decay
=
eps_decay
self
.
min_eps
=
min_eps
self
.
decay_start
=
decay_start
self
.
decay_per_step
=
decay_per_step
self
.
last_episode
=
-
1
def
decay
(
self
,
cur_eps
,
episode
):
if
episode
<
self
.
decay_
start
:
return
cur_eps
def
do_
decay
(
self
,
episode
):
if
self
.
decay_
per_step
:
do
=
(
episode
>=
self
.
decay_start
)
else
:
do
=
((
self
.
last_episode
!=
episode
)
and
(
episode
>=
self
.
decay_start
))
self
.
last_episode
=
episode
return
do
def
decay
(
self
,
cur_eps
,
episode
):
if
self
.
do_decay
(
episode
):
return
max
(
cur_eps
-
self
.
eps_decay
,
self
.
min_eps
)
else
:
return
cur_eps
class
BaseStrategy
(
object
):
...
...
@@ -170,3 +190,29 @@ class OrnsteinUhlenbeckStrategy(BaseStrategy):
noise
=
self
.
_evolve_state
()
action
=
(
1.0
-
self
.
cur_eps
)
*
values
+
(
self
.
cur_eps
*
noise
)
return
np
.
clip
(
action
,
self
.
_action_low
,
self
.
_action_high
)
class
GaussianNoiseStrategy
(
BaseStrategy
):
def
__init__
(
self
,
action_dim
,
action_low
,
action_high
,
eps
,
noise_variance
,
decay
=
NoDecay
()
):
super
(
GaussianNoiseStrategy
,
self
).
__init__
(
decay
)
self
.
eps
=
eps
self
.
cur_eps
=
eps
self
.
_action_dim
=
action_dim
self
.
_action_low
=
action_low
self
.
_action_high
=
action_high
self
.
_noise_variance
=
noise_variance
def
select_action
(
self
,
values
):
noise
=
np
.
random
.
normal
(
loc
=
0.0
,
scale
=
self
.
_noise_variance
,
size
=
self
.
_action_dim
)
action
=
values
+
self
.
cur_eps
*
noise
return
np
.
clip
(
action
,
self
.
_action_low
,
self
.
_action_high
)
src/test/resources/target_code/gluon/reinforcementModel/torcs/reinforcement_learning/util.py
View file @
869ec892
...
...
@@ -11,8 +11,8 @@ import cnnarch_logger
LOSS_FUNCTIONS
=
{
'l1'
:
gluon
.
loss
.
L1Loss
(),
'
euclidean
'
:
gluon
.
loss
.
L2Loss
(),
'huber
_loss
'
:
gluon
.
loss
.
HuberLoss
(),
'
l2
'
:
gluon
.
loss
.
L2Loss
(),
'huber'
:
gluon
.
loss
.
HuberLoss
(),
'softmax_cross_entropy'
:
gluon
.
loss
.
SoftmaxCrossEntropyLoss
(),
'sigmoid_cross_entropy'
:
gluon
.
loss
.
SigmoidBinaryCrossEntropyLoss
()}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment