Skip to content
Snippets Groups Projects
Commit 578a3833 authored by Tobias Seibel's avatar Tobias Seibel
Browse files

minor bug fixes

parent ce99d71b
Branches
No related tags found
No related merge requests found
...@@ -2,3 +2,5 @@ ...@@ -2,3 +2,5 @@
*/__pycache__ */__pycache__
*/trained_ddpm */trained_ddpm
root root
experiments
trainer/__pycache__
\ No newline at end of file
...@@ -62,7 +62,7 @@ class UnconditionalDataset(Dataset): ...@@ -62,7 +62,7 @@ class UnconditionalDataset(Dataset):
return len(self.df) return len(self.df)
def __getitem__(self,idx): def __getitem__(self,idx):
path = self.df.iloc[idx].Filepaths path = self.df.iloc[idx].Filepath
img = Image.open(path) img = Image.open(path)
return self.transform(img),0 return self.transform(img),0
......
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
from trainer.train import * from trainer.train import *
from dataloader.load import * from dataloader.load import *
from models.Framework import * from models.Framework import *
from models.all_unets import * from models.all_unets import *
import torch import torch
from torch import nn from torch import nn
``` ```
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# Prepare experiment # Prepare experiment
1. Choose Hyperparameter Settings 1. Choose Hyperparameter Settings
2. Run notebook on local maschine to generate experiment folder with the JSON files containing the settings 2. Run notebook on local maschine to generate experiment folder with the JSON files containing the settings
3. scp experiment folder to the HPC 3. scp experiment folder to the HPC
4. Run Pipeline by adding following to batch file: 4. Run Pipeline by adding following to batch file:
- Train Model: &emsp;&emsp;&emsp;&emsp;&emsp; `python main.py train "<absolute path of experiment folder in hpc>"` - Train Model: &emsp;&emsp;&emsp;&emsp;&emsp; `python main.py train "<absolute path of experiment folder in hpc>"`
- Sample Images: &emsp;&emsp;&emsp; `python main.py sample "<absolute path of experiment folder in hpc>"` - Sample Images: &emsp;&emsp;&emsp; `python main.py sample "<absolute path of experiment folder in hpc>"`
- Evaluate Model: &emsp;&emsp;&emsp; `python main.py evaluate "<absolute path of experiment folder in hpc>"` - Evaluate Model: &emsp;&emsp;&emsp; `python main.py evaluate "<absolute path of experiment folder in hpc>"`
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
import torch import torch
#### ####
# Settings # Settings
#### ####
# Dataset path # Dataset path
datapath = "/work/lect0100/lhq_256" datapath = "/work/lect0100/lhq_256"
# Experiment setup # Experiment setup
run_name = 'batch_timesteps' # WANDB and experiment folder Name! run_name = 'main_test0' # WANDB and experiment folder Name!
checkpoint = None #'model_epoch_8.pth' # Name of checkpoint pth file or None checkpoint = None #'model_epoch_8.pth' # Name of checkpoint pth file or None
experiment_path = '/work/lect0100/experiments_gonzalo/'+ run_name +'/' experiment_path = "/work/lect0100/main_experiment/" + run_name +'/'
# Path to save generated experiment folder on local machine # Path to save generated experiment folder on local machine
local_path ="/Users/gonzalo/Desktop/" + run_name + '/settings' local_path ="experiments/" + run_name + '/settings'
# Diffusion Model Settings # Diffusion Model Settings
diffusion_steps = 200 diffusion_steps = 500
image_size = 64 image_size = 128
channels = 3 channels = 3
# Training # Training
batchsize = 32 batchsize = 32
epochs = 30 epochs = 20
store_iter = 1 store_iter = 5
eval_iter = 500 eval_iter = 2
learning_rate = 0.0001 learning_rate = 0.0001
optimizername = "torch.optim.AdamW" optimizername = "torch.optim.AdamW"
optimizer_params = None optimizer_params = None
verbose = True verbose = True
# checkpoint = None #(If no checkpoint training, ie. random weights) # checkpoint = None #(If no checkpoint training, ie. random weights)
# Sampling # Sampling
sample_size = 10 sample_size = 20
intermediate = False # True if you want to sample one image and all ist intermediate latents intermediate = False # True if you want to sample one image and all ist intermediate latents
# Evaluating # Evaluating
... ...
### ###
# Advanced Settings Dictionaries # Advanced Settings Dictionaries
### ###
meta_setting = dict(modelname = "UNet_Res", meta_setting = dict(modelname = "UNet_Res",
dataset = "UnconditionalDataset", dataset = "UnconditionalDataset",
framework = "DDPM", framework = "DDPM",
trainloop_function = "ddpm_trainer", trainloop_function = "ddpm_trainer",
sampling_function = 'ddpm_sampler', sampling_function = 'ddpm_sampler',
evaluation_function = 'ddpm_evaluator', evaluation_function = 'ddpm_evaluator',
batchsize = batchsize batchsize = batchsize
) )
dataset_setting = dict(fpath = datapath, dataset_setting = dict(fpath = datapath,
img_size = image_size, img_size = image_size,
frac =0.8, frac =0.8,
skip_first_n = 0, skip_first_n = 0,
ext = ".png", ext = ".png",
transform=True transform=True
) )
model_setting = dict( n_channels=64, model_setting = dict( n_channels=64,
fctr = [1,2,4,4,8], fctr = [1,2,4,4,8],
time_dim=256, time_dim=256,
attention = True,
) )
""" """
outdated outdated
model_setting = dict( channels_in=channels, model_setting = dict( channels_in=channels,
channels_out =channels , channels_out =channels ,
activation='relu', # activation function. Options: {'relu', 'leakyrelu', 'selu', 'gelu', 'silu'/'swish'} activation='relu', # activation function. Options: {'relu', 'leakyrelu', 'selu', 'gelu', 'silu'/'swish'}
weight_init='he', # weight initialization. Options: {'he', 'torch'} weight_init='he', # weight initialization. Options: {'he', 'torch'}
projection_features=64, # number of image features after first convolution layer projection_features=64, # number of image features after first convolution layer
time_dim=batchsize, #dont chnage!!! time_dim=batchsize, #dont chnage!!!
time_channels=diffusion_steps, # number of time channels #TODO same as diffusion steps? time_channels=diffusion_steps, # number of time channels #TODO same as diffusion steps?
num_stages=4, # number of stages in contracting/expansive path num_stages=4, # number of stages in contracting/expansive path
stage_list=None, # specify number of features produced by stages stage_list=None, # specify number of features produced by stages
num_blocks=1, # number of ConvResBlock in each contracting/expansive path num_blocks=1, # number of ConvResBlock in each contracting/expansive path
num_groupnorm_groups=32, # number of groups used in Group Normalization inside a ConvResBlock num_groupnorm_groups=32, # number of groups used in Group Normalization inside a ConvResBlock
dropout=0.1, # drop-out to be applied inside a ConvResBlock dropout=0.1, # drop-out to be applied inside a ConvResBlock
attention_list=None, # specify MHA pattern across stages attention_list=None, # specify MHA pattern across stages
num_attention_heads=1, num_attention_heads=1,
) )
""" """
framework_setting = dict( framework_setting = dict(
diffusion_steps = diffusion_steps, # dont change!! diffusion_steps = diffusion_steps, # dont change!!
out_shape = (channels,image_size,image_size), # dont change!! out_shape = (channels,image_size,image_size), # dont change!!
noise_schedule = 'linear', noise_schedule = 'linear',
beta_1 = 1e-4, beta_1 = 1e-4,
beta_T = 0.02, beta_T = 0.02,
alpha_bar_lower_bound = 0.9, alpha_bar_lower_bound = 0.9,
var_schedule = 'same', var_schedule = 'same',
kl_loss = 'simplified', kl_loss = 'simplified',
recon_loss = 'nll', recon_loss = 'nll',
) )
training_setting = dict( training_setting = dict(
epochs = epochs, epochs = epochs,
store_iter = store_iter, store_iter = store_iter,
eval_iter = eval_iter, eval_iter = eval_iter,
optimizer_class=optimizername, optimizer_class=optimizername,
optimizer_params = optimizer_params, optimizer_params = optimizer_params,
#optimizer_params=dict(lr=learning_rate), # don't change! #optimizer_params=dict(lr=learning_rate), # don't change!
learning_rate = learning_rate, learning_rate = learning_rate,
run_name=run_name, run_name=run_name,
checkpoint= checkpoint, checkpoint= checkpoint,
experiment_path = experiment_path, experiment_path = experiment_path,
verbose = verbose, verbose = verbose,
T_max = 5*10000, # cosine lr param T_max = 0.8*90000/32*150, # cosine lr param len(train_ds)/batchsize * total epochs to 0
eta_min= 1e-5, # cosine lr param eta_min= 1e-10, # cosine lr param
) )
sampling_setting = dict( sampling_setting = dict(
checkpoint = checkpoint, checkpoint = checkpoint,
experiment_path = experiment_path, experiment_path = experiment_path,
batch_size = sample_size, batch_size = sample_size,
intermediate = intermediate intermediate = intermediate
) )
# TODO # TODO
evaluation_setting = dict( evaluation_setting = dict(
checkpoint = checkpoint, checkpoint = checkpoint,
experiment_path = experiment_path, experiment_path = experiment_path,
) )
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
import os import os
import json import json
f = local_path f = local_path
if os.path.exists(f): if os.path.exists(f):
print("path already exists, pick a new name!") print("path already exists, pick a new name!")
print("break") print("break")
else: else:
print("create folder") print("create folder")
#os.mkdir(f) #os.mkdir(f)
os.makedirs(f, exist_ok=True) os.makedirs(f, exist_ok=True)
print("folder created ") print("folder created ")
with open(f+"/meta_setting.json","w+") as fp: with open(f+"/meta_setting.json","w+") as fp:
json.dump(meta_setting,fp) json.dump(meta_setting,fp)
with open(f+"/dataset_setting.json","w+") as fp: with open(f+"/dataset_setting.json","w+") as fp:
json.dump(dataset_setting,fp) json.dump(dataset_setting,fp)
with open(f+"/model_setting.json","w+") as fp: with open(f+"/model_setting.json","w+") as fp:
json.dump(model_setting,fp) json.dump(model_setting,fp)
with open(f+"/framework_setting.json","w+") as fp: with open(f+"/framework_setting.json","w+") as fp:
json.dump(framework_setting,fp) json.dump(framework_setting,fp)
with open(f+"/training_setting.json","w+") as fp: with open(f+"/training_setting.json","w+") as fp:
json.dump(training_setting,fp) json.dump(training_setting,fp)
with open(f+"/sampling_setting.json","w+") as fp: with open(f+"/sampling_setting.json","w+") as fp:
json.dump(sampling_setting,fp) json.dump(sampling_setting,fp)
with open(f+"/evaluation_setting.json","w+") as fp: with open(f+"/evaluation_setting.json","w+") as fp:
json.dump(evaluation_setting,fp) json.dump(evaluation_setting,fp)
print("stored json files in folder") print("stored json files in folder")
print(meta_setting) print(meta_setting)
print(dataset_setting) print(dataset_setting)
print(model_setting) print(model_setting)
print(framework_setting) print(framework_setting)
print(training_setting) print(training_setting)
print(sampling_setting) print(sampling_setting)
print(evaluation_setting) print(evaluation_setting)
``` ```
%% Output %% Output
create folder create folder
folder created folder created
stored json files in folder stored json files in folder
{'modelname': 'UNet_Unconditional_Diffusion_Bottleneck_Variant', 'dataset': 'UnconditionalDataset', 'framework': 'DDPM', 'trainloop_function': 'ddpm_trainer', 'sampling_function': 'ddpm_sampler', 'evaluation_function': 'ddpm_evaluator', 'batchsize': 32} {'modelname': 'UNet_Res', 'dataset': 'UnconditionalDataset', 'framework': 'DDPM', 'trainloop_function': 'ddpm_trainer', 'sampling_function': 'ddpm_sampler', 'evaluation_function': 'ddpm_evaluator', 'batchsize': 32}
{'fpath': '/work/lect0100/lhq_256', 'img_size': 64, 'frac': 0.8, 'skip_first_n': 0, 'ext': '.png', 'transform': True} {'fpath': '/work/lect0100/lhq_256', 'img_size': 128, 'frac': 0.8, 'skip_first_n': 0, 'ext': '.png', 'transform': True}
{'channels_in': 3, 'channels_out': 3, 'activation': 'relu', 'weight_init': 'he', 'projection_features': 64, 'time_dim': 32, 'time_channels': 200, 'num_stages': 4, 'stage_list': None, 'num_blocks': 1, 'num_groupnorm_groups': 32, 'dropout': 0.1, 'attention_list': None, 'num_attention_heads': 1} {'n_channels': 64, 'fctr': [1, 2, 4, 4, 8], 'time_dim': 256}
{'diffusion_steps': 200, 'out_shape': (3, 64, 64), 'noise_schedule': 'linear', 'beta_1': 0.0001, 'beta_T': 0.02, 'alpha_bar_lower_bound': 0.9, 'var_schedule': 'same', 'kl_loss': 'simplified', 'recon_loss': 'nll'} {'diffusion_steps': 500, 'out_shape': (3, 128, 128), 'noise_schedule': 'linear', 'beta_1': 0.0001, 'beta_T': 0.02, 'alpha_bar_lower_bound': 0.9, 'var_schedule': 'same', 'kl_loss': 'simplified', 'recon_loss': 'nll'}
{'epochs': 30, 'store_iter': 1, 'eval_iter': 500, 'optimizer_class': 'torch.optim.AdamW', 'optimizer_params': None, 'learning_rate': 0.0001, 'run_name': 'batch_timesteps', 'checkpoint': None, 'experiment_path': '/work/lect0100/experiments_gonzalo/batch_timesteps/', 'verbose': True} {'epochs': 10, 'store_iter': 2, 'eval_iter': 2, 'optimizer_class': 'torch.optim.AdamW', 'optimizer_params': None, 'learning_rate': 0.0001, 'run_name': 'main_testing', 'checkpoint': None, 'experiment_path': '/work/lect0100/tobi/main_test/main_testing/', 'verbose': True, 'T_max': 9000000, 'eta_min': 1e-10}
{'checkpoint': None, 'experiment_path': '/work/lect0100/experiments_gonzalo/batch_timesteps/', 'batch_size': 10, 'intermediate': False} {'checkpoint': None, 'experiment_path': '/work/lect0100/tobi/main_test/main_testing/', 'batch_size': 10, 'intermediate': False}
{'checkpoint': None, 'experiment_path': '/work/lect0100/experiments_gonzalo/batch_timesteps/'} {'checkpoint': None, 'experiment_path': '/work/lect0100/tobi/main_test/main_testing/'}
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
``` ```
......
This diff is collapsed.
...@@ -174,7 +174,7 @@ def ddpm_trainer(model, ...@@ -174,7 +174,7 @@ def ddpm_trainer(model,
ema = ModelEmaV2(model, decay=decay, device = model.device) ema = ModelEmaV2(model, decay=decay, device = model.device)
# Using W&B # Using W&B
with wandb.init(project='test-project', name=run_name, entity='gonzalomartingarcia0', id=run_name, resume=True) as run: with wandb.init(project='Unconditional Landscapes', name=run_name, entity='deep-lab-', id=run_name, resume=True) as run:
# Log some info # Log some info
run.config.learning_rate = learning_rate run.config.learning_rate = learning_rate
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment