More intuitive creator notebook. Experiment folders have the name of the wandb...

More intuitive creator notebook. Experiment folders have the name of the wandb run; samples are associated with the model epoch they were generated from. Other small changes

More intuitive creator notebook. Experiment folders have the name of the wandb...
89776614 · Gonzalo Martin Garcia · 8f2613b1 · 89776614 · 89776614 · 89776614
Commit 89776614 authored Jun 15, 2023 by Gonzalo Martin Garcia
--- a/evaluation/evaluate.py
+++ b/evaluation/evaluate.py
@@ -3,15 +3,15 @@ from evaluation.sample import ddpm_sampler
 def ddpm_evaluator(model, 
                  device,
                  dataloader,
-                  checkpoint_path,
+                  checkpoint,
                  experiment_path
                  ):
  '''
-  Takes a trained diffusion model from 'checkpoint_path' and evaluates its performance on the test 
+  Takes a trained diffusion model from 'checkpoint' and evaluates its performance on the test 
  dataset 'dataloader'. ....
    
  model:           Properly initialized DDPM model
-  checkpoint_path: Path to the checkpoint of the trained diffusion model weights and biases
+  checkpoint:      Name of the saved pth. file containing the trained weights and biases  
  experiment_path: Path to the experiment folder where the evaluation results will be stored  
  testloader:      Loads the test dataset
  TODO ...

--- a/evaluation/sample.py
+++ b/evaluation/sample.py
 import os
 import torch
 from torchvision import transforms
+import re

-def ddpm_sampler(model, checkpoint_path, experiment_path, device, intermediate=False, batch_size=15):
+def ddpm_sampler(model, checkpoint, experiment_path, device, intermediate=False, batch_size=15):
    '''
-    Samples a tensor of 'batch_size' images from a trained diffusion model on 'checkpoint_path'. The generated 
-    images are stored in the directory 'experiment_path/samples/sample_{j}. Where j is an index  
+    Samples a tensor of 'batch_size' images from a trained diffusion model with 'checkpoint'. The generated 
+    images are stored in the directory 'experiment_path/samples/epoch_{e}/sample_{j}. Where e is the epoch 
+    w.r.t. the model which we are sampling form and j is an index separating images from each call of the 
+    sampling function for the given mode.
  
    model:           Diffusion model
-    checkpoint_path: Path to the .pth checkpoint containing the trained weights and biases
+    checkpoint:      Name of the saved pth. file containing the trained weights and biases
    experiment_path: Path to the experiment directory where the samples will saved under the diectory samples
    batch_size:      The number of images to sample
    intermediate:    Bool value. If False the sampling function will draw a batch of images, else it will just 
@@ -17,6 +20,7 @@ def ddpm_sampler(model, checkpoint_path, experiment_path, device, intermediate=F

    # load model
    try:
+        checkpoint_path = f'{experiment_path}trained_ddpm/{checkpoint}'
        checkpoint = torch.load(checkpoint_path)
        # load weights and biases of the U-Net
        net_state_dict = checkpoint['model']
@@ -25,15 +29,26 @@ def ddpm_sampler(model, checkpoint_path, experiment_path, device, intermediate=F
    except Exception as e:
        print("Error loading checkpoint. Exception:", e)

-    # create samples directory (if first time sampling images)
-    output_dir = f'{experiment_path}/samples/'
+    # create samples directory for the complete experiment (if first time sampling images)
+    output_dir = f'{experiment_path}samples/'
+    #output_dir = os.path.join(experiment_path,'/samples/')
    os.makedirs(output_dir, exist_ok=True)

-    # create the sample directory for this sampling run    
-    sample_dir_list = [d for d in os.listdir(output_dir) if os.path.isdir(os.path.join(output_dir, d))]
+    # create sample directory for the current version of the trained model
+    model_name = os.path.basename(checkpoint_path)
+    epoch = re.findall(r'\d+', model_name)
+    if epoch:
+        e = int(epoch[0])
+    else:
+        raise ValueError(f"No digit found in the filename: {filename}")
+    model_dir = os.path.join(output_dir,f'epoch_{e}')
+    os.makedirs(model_dir, exist_ok=True)
+
+    # create the sample directory for this sampling run for the current version of the model    
+    sample_dir_list = [d for d in os.listdir(model_dir) if os.path.isdir(os.path.join(model_dir, d))]
    indx_list = [int(d.split('_')[1]) for d in sample_dir_list if d.startswith('sample_')]
    j = max(indx_list, default=-1) + 1
-    sample_dir = os.path.join(output_dir, f'sample_{j}')
+    sample_dir = os.path.join(model_dir, f'sample_{j}')
    os.makedirs(sample_dir, exist_ok=True)

    # transform

--- a/experiment_creator.ipynb
+++ b/experiment_creator.ipynb
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 7,
   "metadata": {
    "scrolled": true
   },
@@ -25,14 +25,14 @@
    "2. Run notebook on local maschine to generate experiment folder with the JSON files containing the settings\n",
    "3. scp experiment folder to the HPC\n",
    "4. Run Pipeline by adding following to batch file:\n",
-    "- Train Model: &emsp;&emsp;&emsp;&emsp;&emsp; `python main.py train \"<absolute path of folder in hpc>\"`\n",
-    "- Generate Images: &emsp;&emsp; `python main.py generate \"<absolute path of folder in hpc>\"`\n",
-    "- Evaluate Model: &emsp;&emsp;&emsp; `python main.py evaluate \"<absolute path of folder in hpc>\"`"
+    "- Train Model: &emsp;&emsp;&emsp;&emsp;&emsp; `python main.py train \"<absolute path of experiment folder in hpc>\"`\n",
+    "- Sample Images: &emsp;&emsp;&emsp; `python main.py sample \"<absolute path of experiment folder in hpc>\"`\n",
+    "- Evaluate Model: &emsp;&emsp;&emsp; `python main.py evaluate \"<absolute path of experiment folder in hpc>\"`"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -46,33 +46,29 @@
    "datapath = \"/work/lect0100/lhq_256\"\n",
    "\n",
    "# Experiment setup\n",
-    "experiment_name = \"exp_0\"\n",
-    "model_name = 'model_epoch_20.pth'\n",
-    "checkpoint_path = '/work/lect0100/experiments_gonzalo/'+ experiment_name + '/trained_ddpm/' + model_name\n",
-    "experiment_path = '/work/lect0100/experiments_gonzalo/'+ experiment_name +'/'\n",
-    "\n",
+    "run_name = 'big_diffusers_fin' # WANDB and experiment folder Name!\n",
+    "checkpoint = 'model_epoch_8.pth' # Name of checkpoint pth file or None \n",
+    "experiment_path = '/work/lect0100/experiments_gonzalo/'+ run_name +'/'\n",
    "\n",
    "# Path to save generated experiment folder on local machine\n",
-    "local_path =\"/Users/gonzalo/Desktop/experiments/\" + experiment_name + '/'\n",
+    "local_path =\"/Users/gonzalo/Desktop/\" + run_name + '/settings'\n",
    "\n",
    "# Diffusion Model Settings\n",
    "diffusion_steps = 200\n",
-    "image_size = 64\n",
+    "image_size = 128\n",
    "channels = 3\n",
    "\n",
-    "\n",
    "# Training\n",
-    "batchsize = 64\n",
-    "epochs = 5\n",
-    "store_iter = 6\n",
-    "eval_iter = 0\n",
+    "batchsize = 8\n",
+    "epochs = 30\n",
+    "store_iter = 3\n",
+    "eval_iter = 500\n",
    "learning_rate = 0.0001\n",
    "lr_schedule = False\n",
    "optimizername = \"torch.optim.AdamW\"\n",
    "optimizer_params = None\n",
-    "run_name = 'diffusers_recon_20'# id for WANDB\n",
    "verbose = True\n",
-    "# checkpoint_path = None #(If no checkpoint training, ie. random weights)\n",
+    "# checkpoint = None #(If no checkpoint training, ie. random weights)\n",
    "\n",
    "# Sampling \n",
    "sample_size = 10\n",
@@ -126,7 +122,7 @@
    "                 beta_T = 0.02,\n",
    "                 alpha_bar_lower_bound = 0.9,\n",
    "                 var_schedule = 'same', \n",
-    "                 kl_loss = 'weighted', \n",
+    "                 kl_loss = 'simplified', \n",
    "                 recon_loss = 'nll',\n",
    "                 )\n",
    "training_setting = dict(\n",
@@ -139,25 +135,26 @@
    "                learning_rate = learning_rate,\n",
    "                lr_schedule = lr_schedule,\n",
    "                run_name=run_name,\n",
-    "                checkpoint_path= checkpoint_path,\n",
+    "                checkpoint= checkpoint,\n",
+    "                experiment_path = experiment_path,\n",
    "                verbose = verbose,\n",
    "                )\n",
    "sampling_setting = dict( \n",
-    "                checkpoint_path = checkpoint_path , \n",
+    "                checkpoint = checkpoint, \n",
    "                experiment_path = experiment_path, \n",
    "                batch_size = sample_size,\n",
    "                intermediate = intermediate\n",
    "                )\n",
    "# TODO\n",
    "evaluation_setting = dict(\n",
-    "                    checkpoint_path = checkpoint_path,\n",
+    "                    checkpoint = checkpoint,\n",
    "                    experiment_path = experiment_path,\n",
    "                    )                  "
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
@@ -167,13 +164,13 @@
      "create folder\n",
      "folder created \n",
      "stored json files in folder\n",
-      "{'modelname': 'UNet_Unconditional_Diffusion_Bottleneck_Variant', 'dataset': 'UnconditionalDataset', 'framework': 'DDPM', 'trainloop_function': 'ddpm_trainer', 'sampling_function': 'ddpm_sampler', 'evaluation_function': 'ddpm_evaluator', 'batchsize': 64}\n",
-      "{'fpath': '/work/lect0100/lhq_256', 'img_size': 64, 'frac': 0.8, 'skip_first_n': 0, 'ext': '.png', 'transform': True}\n",
-      "{'channels_in': 3, 'channels_out': 3, 'activation': 'relu', 'weight_init': 'he', 'projection_features': 64, 'time_dim': 64, 'time_channels': 200, 'num_stages': 4, 'stage_list': None, 'num_blocks': 1, 'num_groupnorm_groups': 32, 'dropout': 0.1, 'attention_list': None, 'num_attention_heads': 1}\n",
-      "{'diffusion_steps': 200, 'out_shape': (3, 64, 64), 'noise_schedule': 'linear', 'beta_1': 0.0001, 'beta_T': 0.02, 'alpha_bar_lower_bound': 0.9, 'var_schedule': 'same', 'kl_loss': 'weighted', 'recon_loss': 'nll'}\n",
-      "{'epochs': 5, 'store_iter': 6, 'eval_iter': 0, 'optimizer_class': 'torch.optim.AdamW', 'optimizer_params': None, 'learning_rate': 0.0001, 'lr_schedule': False, 'run_name': 'diffusers_recon_20', 'checkpoint_path': '/work/lect0100/experiments_377031/exp_0/trained_ddpm/model_epoch_20.pth', 'verbose': True}\n",
-      "{'checkpoint_path': '/work/lect0100/experiments_377031/exp_0/trained_ddpm/model_epoch_20.pth', 'experiment_path': '/work/lect0100/experiments_377031/exp_0/', 'batch_size': 10, 'intermediate': False}\n",
-      "{'checkpoint_path': '/work/lect0100/experiments_377031/exp_0/trained_ddpm/model_epoch_20.pth', 'experiment_path': '/work/lect0100/experiments_377031/exp_0/'}\n"
+      "{'modelname': 'UNet_Unconditional_Diffusion_Bottleneck_Variant', 'dataset': 'UnconditionalDataset', 'framework': 'DDPM', 'trainloop_function': 'ddpm_trainer', 'sampling_function': 'ddpm_sampler', 'evaluation_function': 'ddpm_evaluator', 'batchsize': 8}\n",
+      "{'fpath': '/work/lect0100/lhq_256', 'img_size': 128, 'frac': 0.8, 'skip_first_n': 0, 'ext': '.png', 'transform': True}\n",
+      "{'channels_in': 3, 'channels_out': 3, 'activation': 'relu', 'weight_init': 'he', 'projection_features': 64, 'time_dim': 8, 'time_channels': 200, 'num_stages': 4, 'stage_list': None, 'num_blocks': 1, 'num_groupnorm_groups': 32, 'dropout': 0.1, 'attention_list': None, 'num_attention_heads': 1}\n",
+      "{'diffusion_steps': 200, 'out_shape': (3, 128, 128), 'noise_schedule': 'linear', 'beta_1': 0.0001, 'beta_T': 0.02, 'alpha_bar_lower_bound': 0.9, 'var_schedule': 'same', 'kl_loss': 'simplified', 'recon_loss': 'nll'}\n",
+      "{'epochs': 30, 'store_iter': 3, 'eval_iter': 500, 'optimizer_class': 'torch.optim.AdamW', 'optimizer_params': None, 'learning_rate': 0.0001, 'lr_schedule': False, 'run_name': 'big_diffusers_fin', 'checkpoint': 'model_epoch_8.pth', 'experiment_path': '/work/lect0100/experiments_gonzalo/big_diffusers_fin/', 'verbose': True}\n",
+      "{'checkpoint': 'model_epoch_8.pth', 'experiment_path': '/work/lect0100/experiments_gonzalo/big_diffusers_fin/', 'batch_size': 10, 'intermediate': False}\n",
+      "{'checkpoint': 'model_epoch_8.pth', 'experiment_path': '/work/lect0100/experiments_gonzalo/big_diffusers_fin/'}\n"
     ]
    }
   ],
@@ -186,7 +183,8 @@
    "    print(\"break\")\n",
    "else:\n",
    "    print(\"create folder\")\n",
-    "    os.mkdir(f)\n",
+    "    #os.mkdir(f)\n",
+    "    os.makedirs(f, exist_ok=True)\n",
    "    print(\"folder created \")\n",
    "    with open(f+\"/meta_setting.json\",\"w+\") as fp:\n",
    "        json.dump(meta_setting,fp)\n",
@@ -219,6 +217,20 @@
    "    print(evaluation_setting)\n",
    "    "
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {

 %% Cell type:code id: tags:

 ``` python
 from trainer.train import *
 from dataloader.load import  *
 from models.Framework import *
 from models.unet_unconditional_diffusion import *
 import torch
 from torch import nn
 ```

 %% Cell type:markdown id: tags:

 # Prepare experiment
 1. Choose Hyperparameter Settings
 2. Run notebook on local maschine to generate experiment folder with the JSON files containing the settings
 3. scp experiment folder to the HPC
 4. Run Pipeline by adding following to batch file:
- Train Model: &emsp;&emsp;&emsp;&emsp;&emsp; `python main.py train "<absolute path of folder in hpc>"`
- Generate Images: &emsp;&emsp; `python main.py generate "<absolute path of folder in hpc>"`
- Evaluate Model: &emsp;&emsp;&emsp; `python main.py evaluate "<absolute path of folder in hpc>"`
+- Train Model: &emsp;&emsp;&emsp;&emsp;&emsp; `python main.py train "<absolute path of experiment folder in hpc>"`
+- Sample Images: &emsp;&emsp;&emsp; `python main.py sample "<absolute path of experiment folder in hpc>"`
+- Evaluate Model: &emsp;&emsp;&emsp; `python main.py evaluate "<absolute path of experiment folder in hpc>"`

 %% Cell type:code id: tags:

 ``` python
 import torch

 ####
 # Settings
 ####

 # Dataset path
 datapath = "/work/lect0100/lhq_256"

 # Experiment setup
-experiment_name = "exp_0"
-model_name = 'model_epoch_20.pth'
-checkpoint_path = '/work/lect0100/experiments_gonzalo/'+ experiment_name + '/trained_ddpm/' + model_name
-experiment_path = '/work/lect0100/experiments_gonzalo/'+ experiment_name +'/'
-
+run_name = 'big_diffusers_fin' # WANDB and experiment folder Name!
+checkpoint = 'model_epoch_8.pth' # Name of checkpoint pth file or None
+experiment_path = '/work/lect0100/experiments_gonzalo/'+ run_name +'/'

 # Path to save generated experiment folder on local machine
-local_path ="/Users/gonzalo/Desktop/experiments/" + experiment_name + '/'
+local_path ="/Users/gonzalo/Desktop/" + run_name + '/settings'

 # Diffusion Model Settings
 diffusion_steps = 200
-image_size = 64
+image_size = 128
 channels = 3

-
 # Training
-batchsize = 64
-epochs = 5
-store_iter = 6
-eval_iter = 0
+batchsize = 8
+epochs = 30
+store_iter = 3
+eval_iter = 500
 learning_rate = 0.0001
 lr_schedule = False
 optimizername = "torch.optim.AdamW"
 optimizer_params = None
-run_name = 'diffusers_recon_20'# id for WANDB
 verbose = True
-# checkpoint_path = None #(If no checkpoint training, ie. random weights)
+# checkpoint = None #(If no checkpoint training, ie. random weights)

 # Sampling
 sample_size = 10
 intermediate = False # True if you want to sample one image and all ist intermediate latents


 # Evaluating
 ...



 ###
 # Advanced Settings Dictionaries
 ###

 meta_setting = dict(modelname = "UNet_Unconditional_Diffusion_Bottleneck_Variant",
                    dataset = "UnconditionalDataset",
                    framework = "DDPM",
                    trainloop_function = "ddpm_trainer",
                    sampling_function = 'ddpm_sampler',
                    evaluation_function = 'ddpm_evaluator',
                    batchsize = batchsize
                    )
 dataset_setting = dict(fpath = datapath,
                                img_size = image_size,
                                frac =0.8,
                                skip_first_n = 0,
                                ext = ".png",
                                transform=True
                                )
 model_setting = dict( channels_in=channels,
               channels_out =channels ,
               activation='relu',           # activation function. Options: {'relu', 'leakyrelu', 'selu', 'gelu', 'silu'/'swish'}
               weight_init='he',            # weight initialization. Options: {'he', 'torch'}
               projection_features=64,      # number of image features after first convolution layer
               time_dim=batchsize,                 #dont chnage!!!
               time_channels=diffusion_steps,           # number of time channels #TODO same as diffusion steps?
               num_stages=4,                # number of stages in contracting/expansive path
               stage_list=None,             # specify number of features produced by stages
               num_blocks=1,                # number of ConvResBlock in each contracting/expansive path
               num_groupnorm_groups=32,     # number of groups used in Group Normalization inside a ConvResBlock
               dropout=0.1,                 # drop-out to be applied inside a ConvResBlock
               attention_list=None,         # specify MHA pattern across stages
               num_attention_heads=1,
               )
 framework_setting = dict(
                 diffusion_steps = diffusion_steps,  # dont change!!
                 out_shape = (channels,image_size,image_size),  # dont change!!
                 noise_schedule = 'linear',
                 beta_1 = 1e-4,
                 beta_T = 0.02,
                 alpha_bar_lower_bound = 0.9,
                 var_schedule = 'same',
-                 kl_loss = 'weighted',
+                 kl_loss = 'simplified',
                 recon_loss = 'nll',
                 )
 training_setting = dict(
                epochs = epochs,
                store_iter = store_iter,
                eval_iter = eval_iter,
                optimizer_class=optimizername,
                optimizer_params = optimizer_params,
                #optimizer_params=dict(lr=learning_rate), # don't change!
                learning_rate = learning_rate,
                lr_schedule = lr_schedule,
                run_name=run_name,
-                checkpoint_path= checkpoint_path,
+                checkpoint= checkpoint,
+                experiment_path = experiment_path,
                verbose = verbose,
                )
 sampling_setting = dict(
-                checkpoint_path = checkpoint_path ,
+                checkpoint = checkpoint,
                experiment_path = experiment_path,
                batch_size = sample_size,
                intermediate = intermediate
                )
 # TODO
 evaluation_setting = dict(
-                    checkpoint_path = checkpoint_path,
+                    checkpoint = checkpoint,
                    experiment_path = experiment_path,
                    )
 ```

 %% Cell type:code id: tags:

 ``` python
 import os
 import json
 f =  local_path
 if os.path.exists(f):
    print("path already exists, pick a new name!")
    print("break")
 else:
    print("create folder")
-    os.mkdir(f)
+    #os.mkdir(f)
+    os.makedirs(f, exist_ok=True)
    print("folder created ")
    with open(f+"/meta_setting.json","w+") as fp:
        json.dump(meta_setting,fp)

    with open(f+"/dataset_setting.json","w+") as fp:
        json.dump(dataset_setting,fp)

    with open(f+"/model_setting.json","w+") as fp:
        json.dump(model_setting,fp)

    with open(f+"/framework_setting.json","w+") as fp:
        json.dump(framework_setting,fp)

    with open(f+"/training_setting.json","w+") as fp:
        json.dump(training_setting,fp)

    with open(f+"/sampling_setting.json","w+") as fp:
        json.dump(sampling_setting,fp)

    with open(f+"/evaluation_setting.json","w+") as fp:
        json.dump(evaluation_setting,fp)

    print("stored json files in folder")
    print(meta_setting)
    print(dataset_setting)
    print(model_setting)
    print(framework_setting)
    print(training_setting)
    print(sampling_setting)
    print(evaluation_setting)

 ```

 %% Output

    create folder
    folder created
    stored json files in folder
-    {'modelname': 'UNet_Unconditional_Diffusion_Bottleneck_Variant', 'dataset': 'UnconditionalDataset', 'framework': 'DDPM', 'trainloop_function': 'ddpm_trainer', 'sampling_function': 'ddpm_sampler', 'evaluation_function': 'ddpm_evaluator', 'batchsize': 64}
-    {'fpath': '/work/lect0100/lhq_256', 'img_size': 64, 'frac': 0.8, 'skip_first_n': 0, 'ext': '.png', 'transform': True}
-    {'channels_in': 3, 'channels_out': 3, 'activation': 'relu', 'weight_init': 'he', 'projection_features': 64, 'time_dim': 64, 'time_channels': 200, 'num_stages': 4, 'stage_list': None, 'num_blocks': 1, 'num_groupnorm_groups': 32, 'dropout': 0.1, 'attention_list': None, 'num_attention_heads': 1}
-    {'diffusion_steps': 200, 'out_shape': (3, 64, 64), 'noise_schedule': 'linear', 'beta_1': 0.0001, 'beta_T': 0.02, 'alpha_bar_lower_bound': 0.9, 'var_schedule': 'same', 'kl_loss': 'weighted', 'recon_loss': 'nll'}
-    {'epochs': 5, 'store_iter': 6, 'eval_iter': 0, 'optimizer_class': 'torch.optim.AdamW', 'optimizer_params': None, 'learning_rate': 0.0001, 'lr_schedule': False, 'run_name': 'diffusers_recon_20', 'checkpoint_path': '/work/lect0100/experiments_377031/exp_0/trained_ddpm/model_epoch_20.pth', 'verbose': True}
-    {'checkpoint_path': '/work/lect0100/experiments_377031/exp_0/trained_ddpm/model_epoch_20.pth', 'experiment_path': '/work/lect0100/experiments_377031/exp_0/', 'batch_size': 10, 'intermediate': False}
-    {'checkpoint_path': '/work/lect0100/experiments_377031/exp_0/trained_ddpm/model_epoch_20.pth', 'experiment_path': '/work/lect0100/experiments_377031/exp_0/'}
+    {'modelname': 'UNet_Unconditional_Diffusion_Bottleneck_Variant', 'dataset': 'UnconditionalDataset', 'framework': 'DDPM', 'trainloop_function': 'ddpm_trainer', 'sampling_function': 'ddpm_sampler', 'evaluation_function': 'ddpm_evaluator', 'batchsize': 8}
+    {'fpath': '/work/lect0100/lhq_256', 'img_size': 128, 'frac': 0.8, 'skip_first_n': 0, 'ext': '.png', 'transform': True}
+    {'channels_in': 3, 'channels_out': 3, 'activation': 'relu', 'weight_init': 'he', 'projection_features': 64, 'time_dim': 8, 'time_channels': 200, 'num_stages': 4, 'stage_list': None, 'num_blocks': 1, 'num_groupnorm_groups': 32, 'dropout': 0.1, 'attention_list': None, 'num_attention_heads': 1}
+    {'diffusion_steps': 200, 'out_shape': (3, 128, 128), 'noise_schedule': 'linear', 'beta_1': 0.0001, 'beta_T': 0.02, 'alpha_bar_lower_bound': 0.9, 'var_schedule': 'same', 'kl_loss': 'simplified', 'recon_loss': 'nll'}
+    {'epochs': 30, 'store_iter': 3, 'eval_iter': 500, 'optimizer_class': 'torch.optim.AdamW', 'optimizer_params': None, 'learning_rate': 0.0001, 'lr_schedule': False, 'run_name': 'big_diffusers_fin', 'checkpoint': 'model_epoch_8.pth', 'experiment_path': '/work/lect0100/experiments_gonzalo/big_diffusers_fin/', 'verbose': True}
+    {'checkpoint': 'model_epoch_8.pth', 'experiment_path': '/work/lect0100/experiments_gonzalo/big_diffusers_fin/', 'batch_size': 10, 'intermediate': False}
+    {'checkpoint': 'model_epoch_8.pth', 'experiment_path': '/work/lect0100/experiments_gonzalo/big_diffusers_fin/'}
+
+%% Cell type:code id: tags:
+
+``` python
+```
+
+%% Cell type:code id: tags:
+
+``` python
+```

--- a/main.py
+++ b/main.py
@@ -73,7 +73,7 @@ def train_func(f):



-def generate_func(f):
+def sample_func(f):
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
  print(f"device: {device}\n\n")
  print(f"folderpath: {f}\n\n")
@@ -224,7 +224,7 @@ if __name__ == '__main__':
  from torch import nn 
  
  print(sys.argv)
-  functions = {'train': train_func,'generate': generate_func,'evaluate': evaluate_func,"hello":hello}
+  functions = {'train': train_func,'sample': sample_func,'evaluate': evaluate_func,"hello":hello}
  functions[sys.argv[1]](sys.argv[2])
  
  

--- a/models/Framework.py
+++ b/models/Framework.py
@@ -371,8 +371,9 @@ class DDPM(nn.Module):
        x (tensor): Contains the self.diffusion_steps+1 denoised image tensors
        '''
        # start with an image of pure noise and store it as part of the output 
-        x_t_1 = torch.randn(self.out_shape, device=self.device)
-        x = torch.empty((self.diffusion_steps+1,) + self.out_shape, device=self.device)
+        x_t_1 = torch.randn((1,) + tuple(self.out_shape), device=self.device)
+        #x_t_1 = torch.randn(self.out_shape, device=self.device)
+        x = torch.empty((self.diffusion_steps+1,) + tuple(self.out_shape), device=self.device)
        x[-1] = x_t_1
        # apply reverse trajectory
        for t in reversed(range(1, self.diffusion_steps+1)):
@@ -387,7 +388,9 @@ class DDPM(nn.Module):
            x_t_1 = mean + std*noise
            # store noised image
            x[t-1] = x_t_1
-        return x
+        x_sq = x.squeeze(1)
+        return x_sq
+        #return x


    # Loss functions

--- a/trainer/train.py
+++ b/trainer/train.py
@@ -67,7 +67,7 @@ def ddpm_trainer(model,
                 lr_schedule = False,
                 verbose = False,
                 run_name=None,
-                 checkpoint_path= None,
+                 checkpoint= None,
                 experiment_path = None,
                 **args
                 ):
@@ -86,7 +86,7 @@ def ddpm_trainer(model,
                     'run_name' FOR THE DATA TO BE LOGGED ON THE SAME WANDB RUN!
    trainloader:     Loads the train dataset
    testloader:      Loads the test dataset
-    checkpoint_path: Path to where the checkpoint of a trained ddpm to resume training
+    checkpoint:      Name of the saved pth. file containing the trained weights and biases
    '''

    # set optimizer parameters and learning rate
@@ -98,8 +98,9 @@ def ddpm_trainer(model,
    
    # if checkpoint path is given, load the model from checkpoint
    last_epoch = -1
-    if checkpoint_path:
+    if checkpoint:
        try:
+            checkpoint_path = f'{experiment_path}trained_ddpm/{checkpoint}'
            # Load the checkpoint 
            checkpoint = torch.load(checkpoint_path)
            # update last_epoch