hpm tuning
Compare changes
+ 28
− 12
hypp.exploration_fraction = 0.05 # the fraction of total_timesteps it takes to go from start_e to end_e
hypp.start_learning = 5000 # the timestep the learning starts (before that the replay buffer is filled)