configuration TorcsDQN { context : cpu learning_method : reinforcement environment : ros_interface { state_topic : "preprocessor_state" terminal_state_topic : "prepocessor_is_terminal" action_topic : "postprocessor_action" reset_topic : "torcs_reset" } reward_function : torcs.agent.dqn.reward num_episodes : 20000 discount_factor : 0.999 num_max_steps : 999999999 training_interval : 1 use_fix_target_network : true target_network_update_interval : 500 snapshot_interval : 1000 use_double_dqn : true loss : euclidean replay_memory : buffer{ memory_size : 1000000 sample_size : 32 } strategy : epsgreedy{ epsilon : 1.0 min_epsilon : 0.01 epsilon_decay_method: linear epsilon_decay : 0.0001 } optimizer : rmsprop{ learning_rate : 0.001 } }