Skip to content
Snippets Groups Projects

hpm tuning

Merged Siva Renganathan requested to merge siva/RLAssign-24:main into main
1 file
+ 28
12
Compare changes
  • Side-by-side
  • Inline
%% Cell type:code id:0d58f9e7 tags:
%% Cell type:code id:0d58f9e7 tags:
``` python
``` python
import os
import os
import copy
import copy
 
%% Output
 
The autoreload extension is already loaded. To reload it, use:
 
%reload_ext autoreload
%% Cell type:code id:45b9b8fb tags:
%% Cell type:code id:45b9b8fb tags:
%% Cell type:code id:b80e1114 tags:
%% Cell type:code id:b80e1114 tags:
def __init__(self, env):
def __init__(self, env):
super().__init__()
super().__init__()
self.q_network = nn.Sequential(
self.q_network = nn.Sequential(
nn.Linear(np.array(env.single_observation_space.shape).prod(), 128),
nn.Linear(np.array(env.single_observation_space.shape).prod(), 256),
nn.LeakyReLU(),
nn.LeakyReLU(),
nn.Linear(128, 128),
nn.Linear(256, 256),
nn.ReLU(),
nn.ReLU(),
nn.Linear(128, env.single_action_space.n),
nn.Linear(256, env.single_action_space.n),
)
)
self.target_network = copy.deepcopy(self.q_network)
self.target_network = copy.deepcopy(self.q_network)
%% Cell type:code id:53f8844a tags:
%% Cell type:code id:53f8844a tags:
# agent training specific parameters and hyperparameters
# agent training specific parameters and hyperparameters
hypp.total_timesteps = 100000 # the training duration in number of time steps
hypp.total_timesteps = 100000 # the training duration in number of time steps
hypp.learning_rate = 3e-4 # the learning rate for the optimizer
hypp.learning_rate = 1e-4 # the learning rate for the optimizer
hypp.gamma = 0.99 # decay factor of future rewards
hypp.gamma = 0.99 # decay factor of future rewards
hypp.buffer_size = 5000 # the size of the replay memory buffer
hypp.buffer_size = 50000 # the size of the replay memory buffer
hypp.target_network_frequency = 500 # the frequency of synchronization with target network
hypp.target_network_frequency = 1000 # the frequency of synchronization with target network
hypp.batch_size = 128 # number of samples taken from the replay buffer for one step
hypp.batch_size = 32 # number of samples taken from the replay buffer for one step
hypp.start_e = 1 # probability of exploration (epsilon) at timestep 0
hypp.start_e = 1.0 # probability of exploration (epsilon) at timestep 0
hypp.end_e = 0.05 # minimal probability of exploration (epsilon)
hypp.end_e = 0.1 # minimal probability of exploration (epsilon)
hypp.exploration_fraction = 0.05 # the fraction of total_timesteps it takes to go from start_e to end_e
hypp.exploration_fraction = 0.0002 # the fraction of total_timesteps it takes to go from start_e to end_e
hypp.start_learning = 5000 # the timestep the learning starts (before that the replay buffer is filled)
hypp.start_learning = 1000 # the timestep the learning starts (before that the replay buffer is filled)
hypp.train_frequency = 10 # the frequency of training
hypp.train_frequency = 1 # the frequency of training
%% Cell type:markdown id:d0155331-bfae-478c-a71c-4413613f2dd5 tags:
%% Cell type:markdown id:d0155331-bfae-478c-a71c-4413613f2dd5 tags:
%% Cell type:code id:0efaad3d-04b1-41f7-8596-a529a2773ad9 tags:
%% Cell type:code id:0efaad3d-04b1-41f7-8596-a529a2773ad9 tags:
%% Cell type:code id:2892bcc9-7e04-444f-9e9b-000d58503569 tags:
%% Cell type:code id:2892bcc9-7e04-444f-9e9b-000d58503569 tags:
%% Cell type:code id:6b8ccd37-8e65-444b-9e0d-0465bae02926 tags:
%% Cell type:code id:6b8ccd37-8e65-444b-9e0d-0465bae02926 tags:
%% Cell type:code id:52deb542-1e49-417e-91d3-65ddc32cb2fe tags:
%% Cell type:code id:52deb542-1e49-417e-91d3-65ddc32cb2fe tags:
%% Cell type:code id:3af83dcd-1f86-4309-848b-4756a61b83fc tags:
%% Cell type:code id:3af83dcd-1f86-4309-848b-4756a61b83fc tags:
%% Cell type:code id:0da39b1c tags:
%% Cell type:code id:0da39b1c tags:
``` python
``` python
# ------------------ RUN INIT - DO NOT EDIT ---------------------- #
# ------------------ RUN INIT - DO NOT EDIT ---------------------- #
# reinit run_name
# reinit run_name
 
%% Output
%% Cell type:code id:c49f4c2b tags:
%% Cell type:code id:c49f4c2b tags:
``` python
``` python
agent_name = exp.run_name
agent_name = exp.run_name
agent_exp_type = exp.exp_type # both are needed to identify the agent location
agent_exp_type = exp.exp_type # both are needed to identify the agent location
 
%% Output
 
<IPython.core.display.Video object>
%% Cell type:code id:008addbe tags:
%% Cell type:code id:008addbe tags:
``` python
``` python
eval_params = edict() # eval_params - evaluation settings for trained agent
eval_params = edict() # eval_params - evaluation settings for trained agent
eval_params.run_name00 = exp.run_name
eval_params.run_name00 = exp.run_name
 
%% Output
 
Loading