diff --git a/evaluation/eval_full/evaluate_full.py b/evaluation/eval_full/evaluate_full.py
index 8d1c724b0ed6c925f6d0be7ec7bb7423516ce12c..8e8d2224583041e662e6c3563f3ed6e3462e0c20 100644
--- a/evaluation/eval_full/evaluate_full.py
+++ b/evaluation/eval_full/evaluate_full.py
@@ -10,6 +10,7 @@ from torchvision.models import resnet50
 from kNN import *
 from metrics import *
 
+
 if __name__ == '__main__':
 
     #device = "mps" if torch.backends.mps.is_available() else "cpu"
@@ -23,8 +24,10 @@ if __name__ == '__main__':
                         help='path to generated images', type=str)
     parser.add_argument('-d', '--data', nargs='?', const='lhq', default='lhq',
                         help='choose between "lhq" and "face" dataset', type=str)
-    parser.add_argument('-a', '--arch', nargs='?', const='cnn', default='cnn', 
-                        help='choose between "clip" and "cnn", default "cnn"', type=str)
+    parser.add_argument('--size', nargs='?', const=128, default=128,
+                        help='resolution of image the model was trained on, default 128 (int)', type=int)
+    parser.add_argument('-a', '--arch', nargs='?', const='clip', default='clip', 
+                        help='choose between "clip" and "cnn", default "clip"', type=str)
     parser.add_argument('-m', '--mode', nargs='?', const='kNN', default='kNN', 
                         help='choose between "kNN" and "pairs" for closest_pairs, default "kNN"', type=str)
     parser.add_argument('-k', '--k', nargs='?', const=3, default=3,
@@ -46,22 +49,25 @@ if __name__ == '__main__':
     sample = args['sample']
     name_appendix = args['name']
     fid_bool = args['fid']
+    size = args['size']
     print('Start')
+    output_path = Path(os.path.join(os.getcwd(),'output'))
+    if not output_path.is_dir():
+        os.mkdir(output_path)
     txt_filename = 'output/evaluation_' + dataset + '_' + arch + '_' + mode + '-' + name_appendix + '.txt'
     with open(txt_filename, 'w') as f:
         f.write(f'Path to real images: {path_to_real_images}\n')
         f.write(f'Path to generated images: {path_to_generated_images}\n')
-        f.write(f'Experiment on {dataset} dataset\n')
+        f.write(f'Experiment on {dataset} dataset with images of resolution {size}x{size}\n')
         f.write(f'Using {arch} model to extract features\n')
         f.write(f'Plot of {mode} on {sample} samples\n')
+        f.write(f'Quantitative metrics computed: {fid_bool}\n')
+        
     
     # load data
     path_to_training_images = os.path.join(path_to_real_images, 'train')
     path_to_test_images = os.path.join(path_to_real_images, 'test')
     if fid_bool == 'yes':
-        # load data
-        #path_to_training_images = os.path.join(path_to_real_images, 'train')
-        #path_to_test_images = os.path.join(path_to_real_images, 'test')
 
         # metrics eval
         eval_images = image_to_tensor(path_to_test_images)
@@ -92,23 +98,19 @@ if __name__ == '__main__':
     # kNN-based eval
     if dataset == 'lhq':
         print('Dataset ', dataset)
-        #pth = '/Users/roy/Desktop/Workspace/RWTH/SoSe 2023/Deep Learning Lab/DLL_vsc/data/features/lhq_features'
         pth = '/home/wn455752/repo/evaluation/features/lhq'
         # load pretrained ResNet50 
         if arch == 'cnn':
-            #path_to_pretrained_weights = '/Users/roy/Desktop/Workspace/RWTH/SoSe 2023/Deep Learning Lab/DLL_vsc/pretrained/resnet50_places365_pretrained/resnet50_places365_weights.pth'
-            print('loading model...')
+            print('Loading pretrained ResNet50...')
             path_to_pretrained_weights = '/home/wn455752/repo/evaluation/pretrained/resnet50_places365_pretrained/resnet50_places365_weights.pth'
-            print('loading weights...')
             weights = torch.load(path_to_pretrained_weights)
             model = resnet50().to(device)
-            print('initializing model with pretrained weights')
             model.load_state_dict(weights)
-            transform = transforms.Compose([transforms.ToTensor(), 
-                                            transforms.Lambda(lambda x: x * 255)])
+            transform = transforms.Compose([transforms.ToTensor(),                       # transform PIL.Image to torch.Tensor
+                                            transforms.Lambda(lambda x: x * 255)])       # scale values to VGG input range
             with torch.no_grad():
                 model.eval()
-            print('checking for saved dataset features')
+            print('Checking for existing training dataset features...')
             # check for saved dataset features
             name_pth = Path(os.path.join(pth, 'resnet_features/real_name_list'))
             if name_pth.is_file():
@@ -116,22 +118,23 @@ if __name__ == '__main__':
                     real_names = pickle.load(fp)
             feature_pth = Path(os.path.join(pth, 'resnet_features/real_image_features.pt'))
             if name_pth.is_file():
-                print('Loading ResNet features of real images...')
+                print('Loading existing training dataset features...')
                 real_features = torch.load(feature_pth, map_location="cpu")
                 real_features = real_features.to(device)
                 feature_flag = True
         # load CLIP
         elif arch == 'clip':
-            print('loading model...')
+            print('Loading pretrained CLIP...')
             model, transform = clip.load("ViT-B/32", device=device)
             # check for saved dataset features
+            print('Checking for existing training dataset features...')
             name_pth = Path(os.path.join(pth, 'clip_features/real_name_list'))
             if name_pth.is_file():
                 with open(name_pth, 'rb') as fp:
                     real_names = pickle.load(fp)
             feature_pth = Path(os.path.join(pth, 'clip_features/real_image_features.pt'))
             if name_pth.is_file():
-                print('Loading CLIP features of real images...')
+                print('Loading existing training dataset features...')
                 real_features = torch.load(feature_pth, map_location="cpu")
                 real_features = real_features.to(device)
                 feature_flag = True
@@ -140,45 +143,45 @@ if __name__ == '__main__':
 
     elif dataset == 'faces':
         print('Dataset ', dataset)
-        #pth = '/Users/roy/Desktop/Workspace/RWTH/SoSe 2023/Deep Learning Lab/DLL_vsc/data/features/face_features'
         pth = '/home/wn455752/repo/evaluation/features/faces' 
         # load pretrained VGGFace
         if arch == 'cnn':
-            print('loading model...')
-            #path_to_pretrained_weights = '/Users/roy/Desktop/Workspace/RWTH/SoSe 2023/Deep Learning Lab/DLL_vsc/pretrained/vggface_pretrained/VGG_FACE.t7'
+            print('Loading pretrained VGGFace...')
             path_to_pretrained_weights = '/home/wn455752/repo/evaluation/pretrained/vggface_pretrained/VGG_FACE.t7'
             model = VGG_16().to(device)
             model.load_weights(path=path_to_pretrained_weights)
-            transform = transforms.Compose([transforms.ToTensor(),
-                                        transforms.Resize((224,224)),
-                                        transforms.Lambda(lambda x: x * 255)])
+            transform = transforms.Compose([transforms.ToTensor(),                   # transform PIL.Image to torch.Tensor
+                                        transforms.Resize((224,224)),                # resize to VGG input shape
+                                        transforms.Lambda(lambda x: x * 255)])       # scale values to VGG input range
             with torch.no_grad():
                 model.eval()
             
             # check for saved dataset features
+            print('Checking for existing training dataset features...')
             name_pth = Path(os.path.join(pth, 'vggface_features/real_name_list'))
             if name_pth.is_file():
                 with open(name_pth, 'rb') as fp:
                     real_names = pickle.load(fp)
             feature_pth = Path(os.path.join(pth, 'vggface_features/real_image_features.pt'))
             if name_pth.is_file():
-                print('Loading VGGFace features of real images...')
+                print('Loading existing training dataset features...')
                 real_features = torch.load(feature_pth, map_location="cpu")
                 real_features = real_features.to(device)
                 feature_flag = True
 
         # load CLIP
         elif arch == 'clip':
-            print('loading model...')
+            print('Loading pretrained CLIP...')
             model, transform = clip.load("ViT-B/32", device=device)
             # check for saved dataset features
+            print('Checking for existing training dataset features...')
             name_pth = Path(os.path.join(pth, 'clip_features/real_name_list'))
             if name_pth.is_file():
                 with open(name_pth, 'rb') as fp:
                     real_names = pickle.load(fp)
             feature_pth = Path(os.path.join(pth, 'clip_features/real_image_features.pt'))
             if name_pth.is_file():
-                print('Loading CLIP features of real images...')
+                print('Loading existing training dataset features...')
                 real_features = torch.load(feature_pth, map_location="cpu")
                 real_features = real_features.to(device)
                 feature_flag = True
@@ -186,7 +189,7 @@ if __name__ == '__main__':
     knn = kNN()
     # get images
     if not feature_flag:
-        print('Collecting real images...')
+        print('Collecting training images...')
         real_names, real_tensor = knn.get_images(path_to_training_images, transform)
         with open(name_pth, 'wb') as fp:
             pickle.dump(real_names, fp)
@@ -195,7 +198,7 @@ if __name__ == '__main__':
 
     # extract features
     if not feature_flag:
-        print('Extracting features from real images...')
+        print('Extracting features from training images...')
         real_features = knn.feature_extractor(real_tensor, model, device)
         torch.save(real_features, feature_pth)
     print('Extracting features from generated images...')
@@ -206,7 +209,6 @@ if __name__ == '__main__':
     else:
         sample_size = int(sample)
 
-
     if mode == 'kNN':
         print('Finding kNNs...')
         knn.kNN(real_names, generated_names, 
@@ -214,11 +216,14 @@ if __name__ == '__main__':
             path_to_training_images, path_to_generated_images, 
             k=k_kNN, 
             sample=sample_size, 
+            size=size,
             name_appendix=name_appendix)
     elif mode == 'pairs':
+        print('Finding closest pairs...')
         knn.nearest_neighbor(real_names, generated_names, 
                          real_features, generated_features, 
                          path_to_training_images, path_to_generated_images, 
                          sample=sample_size, 
+                         size=size,
                          name_appendix=name_appendix)
     print('Finish!')
diff --git a/evaluation/eval_full/evaluation_readme.md b/evaluation/eval_full/evaluation_readme.md
index f28b92baa560a5c1da32764e1f80caccdef5e699..caa7eb7bac5ddc10cad5a92bce998a979d148b45 100644
--- a/evaluation/eval_full/evaluation_readme.md
+++ b/evaluation/eval_full/evaluation_readme.md
@@ -3,55 +3,63 @@
 We conduct two types of evaluation - qualitative and quantitative.
 
 ### Quantitative evaluations -
-Quantitative evaluations are carried out to compare different backbone architectures of our unconditional diffusion model.
-A set of 10,000 generated samples from each model variant is compared with the test set of the real dataset.
+<pre>
+Quantitative evaluations are carried out to compare different backbone architectures of our unconditional diffusion model. 
+A set of 10,000 generated samples from each model variant is compared with the test set of the real dataset. 
 These evaluations include - 
-    1. FID score
-    2. Inception score
-    3. Clean FID score (with CLIP)
-    4. FID infinity and IS infinity scores
+    1. FID score 
+    2. Inception score 
+    3. Clean FID score (with CLIP) 
+    4. FID infinity and IS infinity scores 
+</pre>
 
 ### Qualitative evaluations -
-The aim of this set of evaluations is to qualitatively inspect whether our model has overfit to the training images. For this,
-the entire set of 10,000 generated samples from the best performing model from quanititative evaluation is compared with the
-training set of the real dataset. Additionally, the quality check is also done on a hand-selected subset of best generations.
-
-The comparison is implemented as MSE values between features of the generated and training samples. The features are extracted
-by using a pretrained model (ResNet50-Places365/VGGFace or CLIP). Based on the MSE scores we compute -
-    1. kNN - plot the k nearest neighbors of the generated samples
-    2. Closest pairs - plot the top pairs with smallest MSE value
-
-
-Execution starts with evaluate_full.py file. Input arguments are -
-
-* -rp, --realpath : Path to real images (string)
-* -gp, --genpath  : Path to generated images (string)
-* -d, --data      : Choose between 'lhq' (for LHQ landscape dataset) and 'faces' (for CelebAHQ faces dataset). 
-                    Default = 'lhq' (string)
-* -a, --arch      : Choose between 'cnn' and 'clip'. Chosen pretrained model is used to extract features from the images.
-                    If 'cnn' is selected, for LHQ dataset the model is a ResNet50 pretrained on Places365 dataset and for
-                    CelebAHQ dataset the model is a pretrained VGGFace. Default = 'cnn' (string)
-* -m, --mode      : Choose between 'kNN' and 'pairs' (for closest pairs), default = 'kNN' (string)
-* -k, --k         : k value for kNN, default = 3 (int)
-* -s, --sample    : Choose between an int and 'all'. If mode is 'kNN', plot kNN for this many samples (first s samples 
-                    in the directory of generated images). If mode is 'pairs', plot the top s closest pairs from entire 
-                    directory of generated images. Default 10 (int or 'all')
-* -n, --name      : Name appendix (string)
-* --fid           : Choose between 'yes' and 'no'. Compute FID, Inception score and upgraded FID scores. Default 'no' (string)   
+<pre>
+The aim of this set of evaluations is to qualitatively inspect whether our model has overfit to the training images. 
+For this, the entire set of 10,000 generated samples from the best performing model from quanititative evaluation is 
+compared with the training set of the real dataset. 
+Additionally, the quality check is also done on a hand-selected subset of best generations. 
 
+The comparison is implemented as MSE values between features of the generated and training samples. The features are 
+extracted by using a pretrained model (ResNet50-Places365/VGGFace or CLIP). Based on the MSE scores we compute - 
+    1. kNN - plot the k nearest neighbors of the generated samples 
+    2. Closest pairs - plot the top pairs with smallest MSE value 
+</pre>
 
+### Argumnets - 
+<pre>
+Execution starts with evaluate_full.py file. Input arguments are - 
+</pre>
+* <pre>-rp, --realpath : Path to real images (string) </pre>
+* <pre>-gp, --genpath  : Path to generated images (string) </pre>
+* <pre>-d, --data      : Choose between 'lhq' (for LHQ landscape dataset) and 'faces' (for CelebAHQ faces dataset). 
+                         Default = 'lhq' (string)</pre>
+* <pre>--size          : Resolution of images the model was trained on, default 128 (int) </pre>                  
+* <pre>-a, --arch      : Choose between 'cnn' and 'clip'. Chosen pretrained model is used to extract features from the images. </pre>
+                         If 'cnn' is selected, for LHQ dataset the model is a ResNet50 pretrained on Places365 dataset and for 
+                         CelebAHQ dataset the model is a pretrained VGGFace. Not relevant in computing FID, IS scores. Default = 'clip' (string) </pre>
+* <pre>-m, --mode      : Choose between 'kNN' and 'pairs' (for closest pairs), default = 'kNN' (string) </pre>
+* <pre>-k, --k         : k value for kNN, default = 3 (int) </pre>
+* <pre>-s, --sample    : Choose between an int and 'all'. If mode is 'kNN', plot kNN for this many samples (first s samples 
+                         in the directory of generated images). If mode is 'pairs', plot the top s closest pairs from entire 
+                         directory of generated images. Default 10 (int or 'all') </pre>
+* <pre>-n, --name      : Name appendix (string) </pre>
+* <pre>--fid           : Choose between 'yes' and 'no'. Compute FID, Inception score and upgraded FID scores. Default 'no' (string)   </pre>
+
+<pre>
 Path to real images leads to a directory with two sub-directories - train and test.
 
-data
-|_ lhq
-|    |_ train
-|    |_ test
-|_ celebahq256_imgs
-|    |_ train
-|    |_ test
+data 
+|_ lhq 
+|    |_ train 
+|    |_ test 
+|_ celebahq256_imgs 
+|    |_ train 
+|    |_ test 
 
-CLIP and CNN (ResNet50 or VGGFace) features of training images are saved after the first execution. This alleviates the need
+CLIP and CNN (ResNet50 or VGGFace) features of training images are saved after the first execution. This alleviates the need \
 to recompute features of real images for different sets of generated samples.
+</pre>
 
 ### Links
 1. ResNet50 pretrained on Places365 - https://github.com/CSAILVision/places365
diff --git a/evaluation/eval_full/kNN.py b/evaluation/eval_full/kNN.py
index 185de21191da1928bbd244ab7344f511af0ede7e..89c61e9e3e71ab9f49a16d709f4cc4d933107af1 100644
--- a/evaluation/eval_full/kNN.py
+++ b/evaluation/eval_full/kNN.py
@@ -1,4 +1,5 @@
 import os
+from pathlib import Path
 import torch
 import torchvision.transforms as transforms
 from torch.utils.data import DataLoader
@@ -12,7 +13,7 @@ class kNN():
     def __init__(self):
         pass
 
-    def get_images(self, path, transform, *args, **kwargs):
+    def get_images(self, path, transform, size=128, *args, **kwargs):
         '''
         returns 
         names: list of filenames
@@ -30,8 +31,8 @@ class kNN():
                 filepath = os.path.join(path, file)
                 names.append(file)
                 im = Image.open(filepath)
-                if im.size[0] != 128:
-                    im = im.resize((128,128))              # DDPM was trained on 128x128 images
+                if im.size[0] != size:
+                    im = im.resize((size,size))              # DDPM was trained on 128x128 images
                 im = transform(im)  
                 images_list.append(im)
         
@@ -68,12 +69,14 @@ class kNN():
             real_features, generated_features, 
             path_to_real_images, path_to_generated_images, 
             k=3, 
-            sample=10, 
+            sample=10, size=128,
             name_appendix='',
             *args, **kwargs):
         '''
         creates a plot with (generated image: k nearest real images) pairs
         '''
+        if sample > 50:
+            print('Cannot plot for more than 50 samples! sample <= 50')
         fig, ax = plt.subplots(sample, k+1, figsize=((k+1)*3,sample*2))
 
         for i in range(len(generated_features)):
@@ -94,6 +97,8 @@ class kNN():
             # draw the k real images
             for idx in knn.indices:
                 im = Image.open(os.path.join(path_to_real_images, real_names[idx.item()]))
+                if im.size[0] != size:
+                    im = im.resize((size,size))
                 ax[i, j].imshow(im)
                 ax[i, j].set_xticks([])
                 ax[i, j].set_yticks([])
@@ -103,27 +108,32 @@ class kNN():
                 break
         
         # savefig
-        
+        output_path = Path(os.path.join(os.getcwd(),'output'))
+        if not output_path.is_dir():
+            os.mkdir(output_path)
         plot_name = f'{k}NN_{sample}_samples'
         if name_appendix != '':
-            plot_name = plot_name + name_appendix 
-        fig.savefig('output/' + plot_name + '.png')
+            plot_name = plot_name + '_' + name_appendix + '.png'
+        fig.savefig(os.path.join(output_path, plot_name))
 
     def nearest_neighbor(self, real_names, generated_names, 
                     real_features, generated_features, 
                     path_to_real_images, path_to_generated_images, 
-                    sample=10, 
+                    sample=10, size=128,
                     name_appendix='',
                     *args, **kwargs):
         
         print('Computing nearest neighbors...')
+        if sample > 50:
+            print('Cannot plot for more than 50 samples! sample <= 50')
         fig, ax = plt.subplots(sample, 2, figsize=(2*3,sample*2))
         nn_dict = OrderedDict()
         
         for i in range(len(generated_features)):
             # l2 norm of one generated feature and all real features
-            #dist = torch.linalg.vector_norm(real_features - generated_features[i], ord=2, dim=1)
-            dist = torch.norm(real_features - generated_features[i], dim=1, p=2)
+            #dist = torch.linalg.vector_norm(real_features - generated_features[i], ord=2, dim=1)   # no mps support
+            dist = torch.norm(real_features - generated_features[i], dim=1, p=2)                    # soon to be deprecated
+            
             # nearest neighbor of the generated image
             knn = dist.topk(1, largest=False)
             # insert to the dict: generated_image: (distance, index of the nearest neighbor)
@@ -145,13 +155,19 @@ class kNN():
             # draw the real image
             knn_score, real_img_idx = nn_dict_sorted[gen_names[i]]
             im = Image.open(os.path.join(path_to_real_images, real_names[real_img_idx]))
+            if im.size[0] != size:
+                im = im.resize((size,size))
             ax[i, 1].imshow(im)
             ax[i, 1].set_xticks([])
             ax[i, 1].set_yticks([])
             ax[i, 1].set_title(f'{real_names[real_img_idx][:-4]}, {knn_score:.2f}', fontsize=8)
                 
         #savefig
+        output_path = Path(os.path.join(os.getcwd(),'output'))
+        if not output_path.is_dir():
+            os.mkdir(output_path)
         plot_name = f'closest_pairs_top_{sample}'
         if name_appendix != '':
-            plot_name = plot_name + name_appendix
-        fig.savefig('output/' + plot_name + '.png')
\ No newline at end of file
+            plot_name = plot_name + '_' + name_appendix + '.png'
+        fig.savefig(os.path.join(output_path, plot_name))
+        
\ No newline at end of file