diff --git a/.gitignore b/.gitignore index 6c311be704b9b912702a7ad4f6c67f7367e03fa4..0ffc9575a2d120e1cfad342dedb833397cdb99bc 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ */__pycache__ */trained_ddpm root +experiments +trainer/__pycache__ \ No newline at end of file diff --git a/dataloader/load.py b/dataloader/load.py index 93d033b4f1d6ff30ae0bc8abde1aebcaaa894c49..4737e3e72c0da46629aa524c921d70415c232566 100644 --- a/dataloader/load.py +++ b/dataloader/load.py @@ -62,7 +62,7 @@ class UnconditionalDataset(Dataset): return len(self.df) def __getitem__(self,idx): - path = self.df.iloc[idx].Filepaths + path = self.df.iloc[idx].Filepath img = Image.open(path) return self.transform(img),0 diff --git a/experiment_creator.ipynb b/experiment_creator.ipynb index 17963ce4ccd8bc3c2e41dc95cea6aca9c9c783ea..9d24f221cce771cfe10d3377d092f8639540d409 100644 --- a/experiment_creator.ipynb +++ b/experiment_creator.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 7, + "execution_count": 11, "metadata": { "scrolled": true }, @@ -33,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -47,23 +47,23 @@ "datapath = \"/work/lect0100/lhq_256\"\n", "\n", "# Experiment setup\n", - "run_name = 'batch_timesteps' # WANDB and experiment folder Name!\n", + "run_name = 'main_test0' # WANDB and experiment folder Name!\n", "checkpoint = None #'model_epoch_8.pth' # Name of checkpoint pth file or None \n", - "experiment_path = '/work/lect0100/experiments_gonzalo/'+ run_name +'/'\n", + "experiment_path = \"/work/lect0100/main_experiment/\" + run_name +'/'\n", "\n", "# Path to save generated experiment folder on local machine\n", - "local_path =\"/Users/gonzalo/Desktop/\" + run_name + '/settings'\n", + "local_path =\"experiments/\" + run_name + '/settings'\n", "\n", "# Diffusion Model Settings\n", - "diffusion_steps = 200\n", - "image_size = 64\n", + "diffusion_steps = 500\n", + "image_size = 128\n", "channels = 3\n", "\n", "# Training\n", "batchsize = 32\n", - "epochs = 30\n", - "store_iter = 1\n", - "eval_iter = 500\n", + "epochs = 20\n", + "store_iter = 5\n", + "eval_iter = 2\n", "learning_rate = 0.0001\n", "optimizername = \"torch.optim.AdamW\"\n", "optimizer_params = None\n", @@ -71,7 +71,7 @@ "# checkpoint = None #(If no checkpoint training, ie. random weights)\n", "\n", "# Sampling \n", - "sample_size = 10\n", + "sample_size = 20\n", "intermediate = False # True if you want to sample one image and all ist intermediate latents\n", "\n", "\n", @@ -103,6 +103,7 @@ "model_setting = dict( n_channels=64,\n", " fctr = [1,2,4,4,8],\n", " time_dim=256,\n", + " attention = True,\n", " )\n", "\"\"\"\n", "outdated\n", @@ -145,8 +146,8 @@ " checkpoint= checkpoint,\n", " experiment_path = experiment_path,\n", " verbose = verbose,\n", - " T_max = 5*10000, # cosine lr param\n", - " eta_min= 1e-5, # cosine lr param\n", + " T_max = 0.8*90000/32*150, # cosine lr param len(train_ds)/batchsize * total epochs to 0 \n", + " eta_min= 1e-10, # cosine lr param\n", " )\n", "sampling_setting = dict( \n", " checkpoint = checkpoint, \n", @@ -163,7 +164,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -173,13 +174,13 @@ "create folder\n", "folder created \n", "stored json files in folder\n", - "{'modelname': 'UNet_Unconditional_Diffusion_Bottleneck_Variant', 'dataset': 'UnconditionalDataset', 'framework': 'DDPM', 'trainloop_function': 'ddpm_trainer', 'sampling_function': 'ddpm_sampler', 'evaluation_function': 'ddpm_evaluator', 'batchsize': 32}\n", - "{'fpath': '/work/lect0100/lhq_256', 'img_size': 64, 'frac': 0.8, 'skip_first_n': 0, 'ext': '.png', 'transform': True}\n", - "{'channels_in': 3, 'channels_out': 3, 'activation': 'relu', 'weight_init': 'he', 'projection_features': 64, 'time_dim': 32, 'time_channels': 200, 'num_stages': 4, 'stage_list': None, 'num_blocks': 1, 'num_groupnorm_groups': 32, 'dropout': 0.1, 'attention_list': None, 'num_attention_heads': 1}\n", - "{'diffusion_steps': 200, 'out_shape': (3, 64, 64), 'noise_schedule': 'linear', 'beta_1': 0.0001, 'beta_T': 0.02, 'alpha_bar_lower_bound': 0.9, 'var_schedule': 'same', 'kl_loss': 'simplified', 'recon_loss': 'nll'}\n", - "{'epochs': 30, 'store_iter': 1, 'eval_iter': 500, 'optimizer_class': 'torch.optim.AdamW', 'optimizer_params': None, 'learning_rate': 0.0001, 'run_name': 'batch_timesteps', 'checkpoint': None, 'experiment_path': '/work/lect0100/experiments_gonzalo/batch_timesteps/', 'verbose': True}\n", - "{'checkpoint': None, 'experiment_path': '/work/lect0100/experiments_gonzalo/batch_timesteps/', 'batch_size': 10, 'intermediate': False}\n", - "{'checkpoint': None, 'experiment_path': '/work/lect0100/experiments_gonzalo/batch_timesteps/'}\n" + "{'modelname': 'UNet_Res', 'dataset': 'UnconditionalDataset', 'framework': 'DDPM', 'trainloop_function': 'ddpm_trainer', 'sampling_function': 'ddpm_sampler', 'evaluation_function': 'ddpm_evaluator', 'batchsize': 32}\n", + "{'fpath': '/work/lect0100/lhq_256', 'img_size': 128, 'frac': 0.8, 'skip_first_n': 0, 'ext': '.png', 'transform': True}\n", + "{'n_channels': 64, 'fctr': [1, 2, 4, 4, 8], 'time_dim': 256}\n", + "{'diffusion_steps': 500, 'out_shape': (3, 128, 128), 'noise_schedule': 'linear', 'beta_1': 0.0001, 'beta_T': 0.02, 'alpha_bar_lower_bound': 0.9, 'var_schedule': 'same', 'kl_loss': 'simplified', 'recon_loss': 'nll'}\n", + "{'epochs': 10, 'store_iter': 2, 'eval_iter': 2, 'optimizer_class': 'torch.optim.AdamW', 'optimizer_params': None, 'learning_rate': 0.0001, 'run_name': 'main_testing', 'checkpoint': None, 'experiment_path': '/work/lect0100/tobi/main_test/main_testing/', 'verbose': True, 'T_max': 9000000, 'eta_min': 1e-10}\n", + "{'checkpoint': None, 'experiment_path': '/work/lect0100/tobi/main_test/main_testing/', 'batch_size': 10, 'intermediate': False}\n", + "{'checkpoint': None, 'experiment_path': '/work/lect0100/tobi/main_test/main_testing/'}\n" ] } ], @@ -227,13 +228,6 @@ " " ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": null, diff --git a/playground.ipynb b/playground.ipynb index 16d2dfac2baba377cc529970900bc5bb0ef10d71..d7d3ea922ad8e2bba9a29fbfe4f98fff54859e0c 100644 --- a/playground.ipynb +++ b/playground.ipynb @@ -11,6 +11,44 @@ "from models.model import VanillaDiffusion\n" ] }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.0009572050015330874\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 640x480 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np \n", + "\n", + "eta_min = 0.00001\n", + "eta_max = 0.001\n", + "T_max = 0.8*90000/32*20 # len(trainset)= 0.8*90.000 T_max = len(trainset)/batch_size*epochs <- in the last epochsm eta min will be reached. \n", + "\n", + "\n", + "x = np.arange(T_max)\n", + "y = eta_min+ 0.5*(eta_max-eta_min)*(1+np.cos(np.pi*x/T_max))\n", + "plt.plot(x,y)\n", + "print(y[6000]) # one epoch has about 2.4k steps -> in the third epoch this will be the loss \n" + ] + }, { "cell_type": "code", "execution_count": null, @@ -20,8 +58,22 @@ } ], "metadata": { + "kernelspec": { + "display_name": "env", + "language": "python", + "name": "env" + }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" }, "orig_nbformat": 4 }, diff --git a/trainer/train.py b/trainer/train.py index 80431b0c02342e1dfba41a4b47cb9afc13fa87ce..0d78a6b90745c9ce287d2d4282547229c7725cd9 100644 --- a/trainer/train.py +++ b/trainer/train.py @@ -174,7 +174,7 @@ def ddpm_trainer(model, ema = ModelEmaV2(model, decay=decay, device = model.device) # Using W&B - with wandb.init(project='test-project', name=run_name, entity='gonzalomartingarcia0', id=run_name, resume=True) as run: + with wandb.init(project='Unconditional Landscapes', name=run_name, entity='deep-lab-', id=run_name, resume=True) as run: # Log some info run.config.learning_rate = learning_rate