TorcsActor.cnnt 1.28 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
/* (c) https://github.com/MontiCore/monticore */
configuration TorcsActor {
    context : gpu

    learning_method : reinforcement

    agent_name: "TorcsAgent"

    rl_algorithm: td3-algorithm

    policy_noise: 0.2
    noise_clip: 0.5
    policy_delay: 2

    critic: torcs.agent.network.torcsCritic

    environment : ros_interface {
        state_topic: "/torcs/state"
        terminal_state_topic: "/torcs/terminal"
        action_topic: "/torcs/step"
        reset_topic: "/torcs/reset"
    }

    reward_function: torcs.agent.network.reward

    num_episodes : 3500
    discount_factor : 0.99
    num_max_steps : 900000
    training_interval : 1
    start_training_at: 0

    evaluation_samples: 1
    soft_target_update_rate: 0.005

    snapshot_interval : 150

    replay_memory : buffer{
        memory_size : 120000
        sample_size : 100
    }

    strategy : ornstein_uhlenbeck{
        epsilon : 1.0
        min_epsilon : 0.0001
        epsilon_decay_method: linear
        epsilon_decay : 0.000008
        epsilon_decay_start: 10
        epsilon_decay_per_step: true
        theta: (0.6, 1.0, 1.0)
        mu: (0.0, 0.0, -1.2)
        sigma: (0.3, 0.2, 0.05)
    }

    actor_optimizer: adam {
        learning_rate: 0.001
    }

    critic_optimizer: adam {
        learning_rate: 0.001
    }
}