CartPoleDQN.cnnt 708 Bytes
Newer Older
1
configuration CartPoleDQN {
2
    context : cpu
3

4 5
    learning_method : reinforcement
    environment : gym {name : "CartPole-v0"}
6

7
    num_episodes : 160
8 9
    target_score : 185.5
    discount_factor : 0.999
10
    num_max_steps : 250
11 12 13 14 15
    training_interval : 1

    use_fix_target_network : true
    target_network_update_interval : 200

16
    snapshot_interval : 20
17

18
    use_double_dqn : false
19

20
    loss : euclidean
21 22

    replay_memory : buffer{
23
        memory_size : 10000
24 25 26
        sample_size : 32
    }

Nicola Gatto's avatar
Nicola Gatto committed
27
    strategy : epsgreedy{
28 29 30
        epsilon : 1.0
        min_epsilon : 0.01
        epsilon_decay_method: linear
31
        epsilon_decay : 0.01
32 33 34 35 36 37
    }

    optimizer : rmsprop{
        learning_rate : 0.001
    }
}