Commit 2a2abdc5 authored by Nicola Gatto's avatar Nicola Gatto
Browse files

Add parameter for gaussian strategy

parent e20ab619
......@@ -176,11 +176,14 @@ grammar CNNTrain extends de.monticore.lang.monticar.Common2, de.monticore.Number
interface StrategyOrnsteinUhlenbeckEntry extends Entry;
StrategyOrnsteinUhlenbeckValue implements StrategyValue = name:"ornstein_uhlenbeck" ("{" params:StrategyOrnsteinUhlenbeckEntry* "}")?;
interface StrategyGaussianEntry extends Entry;
StrategyGaussianValue implements StrategyValue = name:"gaussian" ("{" params:StrategyGaussianEntry* "}")?;
StrategyOUMu implements StrategyOrnsteinUhlenbeckEntry = name: "mu" ":" value:DoubleVectorValue;
StrategyOUTheta implements StrategyOrnsteinUhlenbeckEntry = name: "theta" ":" value:DoubleVectorValue;
StrategyOUSigma implements StrategyOrnsteinUhlenbeckEntry = name: "sigma" ":" value:DoubleVectorValue;
interface GeneralStrategyEntry extends StrategyEpsGreedyEntry, StrategyOrnsteinUhlenbeckEntry;
interface GeneralStrategyEntry extends StrategyEpsGreedyEntry, StrategyOrnsteinUhlenbeckEntry, StrategyGaussianEntry;
GreedyEpsilonEntry implements GeneralStrategyEntry = name:"epsilon" ":" value:NumberValue;
MinEpsilonEntry implements GeneralStrategyEntry = name:"min_epsilon" ":" value:NumberValue;
......
......@@ -30,6 +30,7 @@ import java.util.Set;
public class CheckContinuousRLAlgorithmUsesContinuousStrategy implements CNNTrainASTConfigurationCoCo{
private static final Set<String> CONTINUOUS_STRATEGIES = ImmutableSet.<String>builder()
.add("ornstein_uhlenbeck")
.add("gaussian")
.build();
@Override
......
......@@ -42,6 +42,7 @@ public class AllCoCoTest extends AbstractCoCoTest{
checkValid("valid_tests", "ReinforcementConfig");
checkValid("valid_tests", "ReinforcementConfig2");
checkValid("valid_tests", "DdpgConfig");
checkValid("valid_tests", "TD3Config");
checkValid("valid_tests", "ReinforcementWithRosReward");
}
......
configuration TD3Config {
learning_method : reinforcement
rl_algorithm : ddpg-algorithm
critic : path.to.component
environment : gym { name:"CartPole-v1" }
soft_target_update_rate: 0.001
actor_optimizer : adam{
learning_rate : 0.0001
learning_rate_minimum : 0.00005
learning_rate_decay : 0.9
learning_rate_policy : step
}
critic_optimizer : rmsprop{
learning_rate : 0.001
learning_rate_minimum : 0.0001
learning_rate_decay : 0.5
learning_rate_policy : step
}
strategy : gaussian {
epsilon: 1.0
min_epsilon: 0.001
epsilon_decay_method: linear
epsilon_decay : 0.0001
epsilon_decay_start: 50
}
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment