diff --git a/evaluation/eval_full/evaluate_full.py b/evaluation/eval_full/evaluate_full.py index 8d1c724b0ed6c925f6d0be7ec7bb7423516ce12c..8e8d2224583041e662e6c3563f3ed6e3462e0c20 100644 --- a/evaluation/eval_full/evaluate_full.py +++ b/evaluation/eval_full/evaluate_full.py @@ -10,6 +10,7 @@ from torchvision.models import resnet50 from kNN import * from metrics import * + if __name__ == '__main__': #device = "mps" if torch.backends.mps.is_available() else "cpu" @@ -23,8 +24,10 @@ if __name__ == '__main__': help='path to generated images', type=str) parser.add_argument('-d', '--data', nargs='?', const='lhq', default='lhq', help='choose between "lhq" and "face" dataset', type=str) - parser.add_argument('-a', '--arch', nargs='?', const='cnn', default='cnn', - help='choose between "clip" and "cnn", default "cnn"', type=str) + parser.add_argument('--size', nargs='?', const=128, default=128, + help='resolution of image the model was trained on, default 128 (int)', type=int) + parser.add_argument('-a', '--arch', nargs='?', const='clip', default='clip', + help='choose between "clip" and "cnn", default "clip"', type=str) parser.add_argument('-m', '--mode', nargs='?', const='kNN', default='kNN', help='choose between "kNN" and "pairs" for closest_pairs, default "kNN"', type=str) parser.add_argument('-k', '--k', nargs='?', const=3, default=3, @@ -46,22 +49,25 @@ if __name__ == '__main__': sample = args['sample'] name_appendix = args['name'] fid_bool = args['fid'] + size = args['size'] print('Start') + output_path = Path(os.path.join(os.getcwd(),'output')) + if not output_path.is_dir(): + os.mkdir(output_path) txt_filename = 'output/evaluation_' + dataset + '_' + arch + '_' + mode + '-' + name_appendix + '.txt' with open(txt_filename, 'w') as f: f.write(f'Path to real images: {path_to_real_images}\n') f.write(f'Path to generated images: {path_to_generated_images}\n') - f.write(f'Experiment on {dataset} dataset\n') + f.write(f'Experiment on {dataset} dataset with images of resolution {size}x{size}\n') f.write(f'Using {arch} model to extract features\n') f.write(f'Plot of {mode} on {sample} samples\n') + f.write(f'Quantitative metrics computed: {fid_bool}\n') + # load data path_to_training_images = os.path.join(path_to_real_images, 'train') path_to_test_images = os.path.join(path_to_real_images, 'test') if fid_bool == 'yes': - # load data - #path_to_training_images = os.path.join(path_to_real_images, 'train') - #path_to_test_images = os.path.join(path_to_real_images, 'test') # metrics eval eval_images = image_to_tensor(path_to_test_images) @@ -92,23 +98,19 @@ if __name__ == '__main__': # kNN-based eval if dataset == 'lhq': print('Dataset ', dataset) - #pth = '/Users/roy/Desktop/Workspace/RWTH/SoSe 2023/Deep Learning Lab/DLL_vsc/data/features/lhq_features' pth = '/home/wn455752/repo/evaluation/features/lhq' # load pretrained ResNet50 if arch == 'cnn': - #path_to_pretrained_weights = '/Users/roy/Desktop/Workspace/RWTH/SoSe 2023/Deep Learning Lab/DLL_vsc/pretrained/resnet50_places365_pretrained/resnet50_places365_weights.pth' - print('loading model...') + print('Loading pretrained ResNet50...') path_to_pretrained_weights = '/home/wn455752/repo/evaluation/pretrained/resnet50_places365_pretrained/resnet50_places365_weights.pth' - print('loading weights...') weights = torch.load(path_to_pretrained_weights) model = resnet50().to(device) - print('initializing model with pretrained weights') model.load_state_dict(weights) - transform = transforms.Compose([transforms.ToTensor(), - transforms.Lambda(lambda x: x * 255)]) + transform = transforms.Compose([transforms.ToTensor(), # transform PIL.Image to torch.Tensor + transforms.Lambda(lambda x: x * 255)]) # scale values to VGG input range with torch.no_grad(): model.eval() - print('checking for saved dataset features') + print('Checking for existing training dataset features...') # check for saved dataset features name_pth = Path(os.path.join(pth, 'resnet_features/real_name_list')) if name_pth.is_file(): @@ -116,22 +118,23 @@ if __name__ == '__main__': real_names = pickle.load(fp) feature_pth = Path(os.path.join(pth, 'resnet_features/real_image_features.pt')) if name_pth.is_file(): - print('Loading ResNet features of real images...') + print('Loading existing training dataset features...') real_features = torch.load(feature_pth, map_location="cpu") real_features = real_features.to(device) feature_flag = True # load CLIP elif arch == 'clip': - print('loading model...') + print('Loading pretrained CLIP...') model, transform = clip.load("ViT-B/32", device=device) # check for saved dataset features + print('Checking for existing training dataset features...') name_pth = Path(os.path.join(pth, 'clip_features/real_name_list')) if name_pth.is_file(): with open(name_pth, 'rb') as fp: real_names = pickle.load(fp) feature_pth = Path(os.path.join(pth, 'clip_features/real_image_features.pt')) if name_pth.is_file(): - print('Loading CLIP features of real images...') + print('Loading existing training dataset features...') real_features = torch.load(feature_pth, map_location="cpu") real_features = real_features.to(device) feature_flag = True @@ -140,45 +143,45 @@ if __name__ == '__main__': elif dataset == 'faces': print('Dataset ', dataset) - #pth = '/Users/roy/Desktop/Workspace/RWTH/SoSe 2023/Deep Learning Lab/DLL_vsc/data/features/face_features' pth = '/home/wn455752/repo/evaluation/features/faces' # load pretrained VGGFace if arch == 'cnn': - print('loading model...') - #path_to_pretrained_weights = '/Users/roy/Desktop/Workspace/RWTH/SoSe 2023/Deep Learning Lab/DLL_vsc/pretrained/vggface_pretrained/VGG_FACE.t7' + print('Loading pretrained VGGFace...') path_to_pretrained_weights = '/home/wn455752/repo/evaluation/pretrained/vggface_pretrained/VGG_FACE.t7' model = VGG_16().to(device) model.load_weights(path=path_to_pretrained_weights) - transform = transforms.Compose([transforms.ToTensor(), - transforms.Resize((224,224)), - transforms.Lambda(lambda x: x * 255)]) + transform = transforms.Compose([transforms.ToTensor(), # transform PIL.Image to torch.Tensor + transforms.Resize((224,224)), # resize to VGG input shape + transforms.Lambda(lambda x: x * 255)]) # scale values to VGG input range with torch.no_grad(): model.eval() # check for saved dataset features + print('Checking for existing training dataset features...') name_pth = Path(os.path.join(pth, 'vggface_features/real_name_list')) if name_pth.is_file(): with open(name_pth, 'rb') as fp: real_names = pickle.load(fp) feature_pth = Path(os.path.join(pth, 'vggface_features/real_image_features.pt')) if name_pth.is_file(): - print('Loading VGGFace features of real images...') + print('Loading existing training dataset features...') real_features = torch.load(feature_pth, map_location="cpu") real_features = real_features.to(device) feature_flag = True # load CLIP elif arch == 'clip': - print('loading model...') + print('Loading pretrained CLIP...') model, transform = clip.load("ViT-B/32", device=device) # check for saved dataset features + print('Checking for existing training dataset features...') name_pth = Path(os.path.join(pth, 'clip_features/real_name_list')) if name_pth.is_file(): with open(name_pth, 'rb') as fp: real_names = pickle.load(fp) feature_pth = Path(os.path.join(pth, 'clip_features/real_image_features.pt')) if name_pth.is_file(): - print('Loading CLIP features of real images...') + print('Loading existing training dataset features...') real_features = torch.load(feature_pth, map_location="cpu") real_features = real_features.to(device) feature_flag = True @@ -186,7 +189,7 @@ if __name__ == '__main__': knn = kNN() # get images if not feature_flag: - print('Collecting real images...') + print('Collecting training images...') real_names, real_tensor = knn.get_images(path_to_training_images, transform) with open(name_pth, 'wb') as fp: pickle.dump(real_names, fp) @@ -195,7 +198,7 @@ if __name__ == '__main__': # extract features if not feature_flag: - print('Extracting features from real images...') + print('Extracting features from training images...') real_features = knn.feature_extractor(real_tensor, model, device) torch.save(real_features, feature_pth) print('Extracting features from generated images...') @@ -206,7 +209,6 @@ if __name__ == '__main__': else: sample_size = int(sample) - if mode == 'kNN': print('Finding kNNs...') knn.kNN(real_names, generated_names, @@ -214,11 +216,14 @@ if __name__ == '__main__': path_to_training_images, path_to_generated_images, k=k_kNN, sample=sample_size, + size=size, name_appendix=name_appendix) elif mode == 'pairs': + print('Finding closest pairs...') knn.nearest_neighbor(real_names, generated_names, real_features, generated_features, path_to_training_images, path_to_generated_images, sample=sample_size, + size=size, name_appendix=name_appendix) print('Finish!') diff --git a/evaluation/eval_full/evaluation_readme.md b/evaluation/eval_full/evaluation_readme.md index f28b92baa560a5c1da32764e1f80caccdef5e699..caa7eb7bac5ddc10cad5a92bce998a979d148b45 100644 --- a/evaluation/eval_full/evaluation_readme.md +++ b/evaluation/eval_full/evaluation_readme.md @@ -3,55 +3,63 @@ We conduct two types of evaluation - qualitative and quantitative. ### Quantitative evaluations - -Quantitative evaluations are carried out to compare different backbone architectures of our unconditional diffusion model. -A set of 10,000 generated samples from each model variant is compared with the test set of the real dataset. +<pre> +Quantitative evaluations are carried out to compare different backbone architectures of our unconditional diffusion model. +A set of 10,000 generated samples from each model variant is compared with the test set of the real dataset. These evaluations include - - 1. FID score - 2. Inception score - 3. Clean FID score (with CLIP) - 4. FID infinity and IS infinity scores + 1. FID score + 2. Inception score + 3. Clean FID score (with CLIP) + 4. FID infinity and IS infinity scores +</pre> ### Qualitative evaluations - -The aim of this set of evaluations is to qualitatively inspect whether our model has overfit to the training images. For this, -the entire set of 10,000 generated samples from the best performing model from quanititative evaluation is compared with the -training set of the real dataset. Additionally, the quality check is also done on a hand-selected subset of best generations. - -The comparison is implemented as MSE values between features of the generated and training samples. The features are extracted -by using a pretrained model (ResNet50-Places365/VGGFace or CLIP). Based on the MSE scores we compute - - 1. kNN - plot the k nearest neighbors of the generated samples - 2. Closest pairs - plot the top pairs with smallest MSE value - - -Execution starts with evaluate_full.py file. Input arguments are - - -* -rp, --realpath : Path to real images (string) -* -gp, --genpath : Path to generated images (string) -* -d, --data : Choose between 'lhq' (for LHQ landscape dataset) and 'faces' (for CelebAHQ faces dataset). - Default = 'lhq' (string) -* -a, --arch : Choose between 'cnn' and 'clip'. Chosen pretrained model is used to extract features from the images. - If 'cnn' is selected, for LHQ dataset the model is a ResNet50 pretrained on Places365 dataset and for - CelebAHQ dataset the model is a pretrained VGGFace. Default = 'cnn' (string) -* -m, --mode : Choose between 'kNN' and 'pairs' (for closest pairs), default = 'kNN' (string) -* -k, --k : k value for kNN, default = 3 (int) -* -s, --sample : Choose between an int and 'all'. If mode is 'kNN', plot kNN for this many samples (first s samples - in the directory of generated images). If mode is 'pairs', plot the top s closest pairs from entire - directory of generated images. Default 10 (int or 'all') -* -n, --name : Name appendix (string) -* --fid : Choose between 'yes' and 'no'. Compute FID, Inception score and upgraded FID scores. Default 'no' (string) +<pre> +The aim of this set of evaluations is to qualitatively inspect whether our model has overfit to the training images. +For this, the entire set of 10,000 generated samples from the best performing model from quanititative evaluation is +compared with the training set of the real dataset. +Additionally, the quality check is also done on a hand-selected subset of best generations. +The comparison is implemented as MSE values between features of the generated and training samples. The features are +extracted by using a pretrained model (ResNet50-Places365/VGGFace or CLIP). Based on the MSE scores we compute - + 1. kNN - plot the k nearest neighbors of the generated samples + 2. Closest pairs - plot the top pairs with smallest MSE value +</pre> +### Argumnets - +<pre> +Execution starts with evaluate_full.py file. Input arguments are - +</pre> +* <pre>-rp, --realpath : Path to real images (string) </pre> +* <pre>-gp, --genpath : Path to generated images (string) </pre> +* <pre>-d, --data : Choose between 'lhq' (for LHQ landscape dataset) and 'faces' (for CelebAHQ faces dataset). + Default = 'lhq' (string)</pre> +* <pre>--size : Resolution of images the model was trained on, default 128 (int) </pre> +* <pre>-a, --arch : Choose between 'cnn' and 'clip'. Chosen pretrained model is used to extract features from the images. </pre> + If 'cnn' is selected, for LHQ dataset the model is a ResNet50 pretrained on Places365 dataset and for + CelebAHQ dataset the model is a pretrained VGGFace. Not relevant in computing FID, IS scores. Default = 'clip' (string) </pre> +* <pre>-m, --mode : Choose between 'kNN' and 'pairs' (for closest pairs), default = 'kNN' (string) </pre> +* <pre>-k, --k : k value for kNN, default = 3 (int) </pre> +* <pre>-s, --sample : Choose between an int and 'all'. If mode is 'kNN', plot kNN for this many samples (first s samples + in the directory of generated images). If mode is 'pairs', plot the top s closest pairs from entire + directory of generated images. Default 10 (int or 'all') </pre> +* <pre>-n, --name : Name appendix (string) </pre> +* <pre>--fid : Choose between 'yes' and 'no'. Compute FID, Inception score and upgraded FID scores. Default 'no' (string) </pre> + +<pre> Path to real images leads to a directory with two sub-directories - train and test. -data -|_ lhq -| |_ train -| |_ test -|_ celebahq256_imgs -| |_ train -| |_ test +data +|_ lhq +| |_ train +| |_ test +|_ celebahq256_imgs +| |_ train +| |_ test -CLIP and CNN (ResNet50 or VGGFace) features of training images are saved after the first execution. This alleviates the need +CLIP and CNN (ResNet50 or VGGFace) features of training images are saved after the first execution. This alleviates the need \ to recompute features of real images for different sets of generated samples. +</pre> ### Links 1. ResNet50 pretrained on Places365 - https://github.com/CSAILVision/places365 diff --git a/evaluation/eval_full/kNN.py b/evaluation/eval_full/kNN.py index 185de21191da1928bbd244ab7344f511af0ede7e..89c61e9e3e71ab9f49a16d709f4cc4d933107af1 100644 --- a/evaluation/eval_full/kNN.py +++ b/evaluation/eval_full/kNN.py @@ -1,4 +1,5 @@ import os +from pathlib import Path import torch import torchvision.transforms as transforms from torch.utils.data import DataLoader @@ -12,7 +13,7 @@ class kNN(): def __init__(self): pass - def get_images(self, path, transform, *args, **kwargs): + def get_images(self, path, transform, size=128, *args, **kwargs): ''' returns names: list of filenames @@ -30,8 +31,8 @@ class kNN(): filepath = os.path.join(path, file) names.append(file) im = Image.open(filepath) - if im.size[0] != 128: - im = im.resize((128,128)) # DDPM was trained on 128x128 images + if im.size[0] != size: + im = im.resize((size,size)) # DDPM was trained on 128x128 images im = transform(im) images_list.append(im) @@ -68,12 +69,14 @@ class kNN(): real_features, generated_features, path_to_real_images, path_to_generated_images, k=3, - sample=10, + sample=10, size=128, name_appendix='', *args, **kwargs): ''' creates a plot with (generated image: k nearest real images) pairs ''' + if sample > 50: + print('Cannot plot for more than 50 samples! sample <= 50') fig, ax = plt.subplots(sample, k+1, figsize=((k+1)*3,sample*2)) for i in range(len(generated_features)): @@ -94,6 +97,8 @@ class kNN(): # draw the k real images for idx in knn.indices: im = Image.open(os.path.join(path_to_real_images, real_names[idx.item()])) + if im.size[0] != size: + im = im.resize((size,size)) ax[i, j].imshow(im) ax[i, j].set_xticks([]) ax[i, j].set_yticks([]) @@ -103,27 +108,32 @@ class kNN(): break # savefig - + output_path = Path(os.path.join(os.getcwd(),'output')) + if not output_path.is_dir(): + os.mkdir(output_path) plot_name = f'{k}NN_{sample}_samples' if name_appendix != '': - plot_name = plot_name + name_appendix - fig.savefig('output/' + plot_name + '.png') + plot_name = plot_name + '_' + name_appendix + '.png' + fig.savefig(os.path.join(output_path, plot_name)) def nearest_neighbor(self, real_names, generated_names, real_features, generated_features, path_to_real_images, path_to_generated_images, - sample=10, + sample=10, size=128, name_appendix='', *args, **kwargs): print('Computing nearest neighbors...') + if sample > 50: + print('Cannot plot for more than 50 samples! sample <= 50') fig, ax = plt.subplots(sample, 2, figsize=(2*3,sample*2)) nn_dict = OrderedDict() for i in range(len(generated_features)): # l2 norm of one generated feature and all real features - #dist = torch.linalg.vector_norm(real_features - generated_features[i], ord=2, dim=1) - dist = torch.norm(real_features - generated_features[i], dim=1, p=2) + #dist = torch.linalg.vector_norm(real_features - generated_features[i], ord=2, dim=1) # no mps support + dist = torch.norm(real_features - generated_features[i], dim=1, p=2) # soon to be deprecated + # nearest neighbor of the generated image knn = dist.topk(1, largest=False) # insert to the dict: generated_image: (distance, index of the nearest neighbor) @@ -145,13 +155,19 @@ class kNN(): # draw the real image knn_score, real_img_idx = nn_dict_sorted[gen_names[i]] im = Image.open(os.path.join(path_to_real_images, real_names[real_img_idx])) + if im.size[0] != size: + im = im.resize((size,size)) ax[i, 1].imshow(im) ax[i, 1].set_xticks([]) ax[i, 1].set_yticks([]) ax[i, 1].set_title(f'{real_names[real_img_idx][:-4]}, {knn_score:.2f}', fontsize=8) #savefig + output_path = Path(os.path.join(os.getcwd(),'output')) + if not output_path.is_dir(): + os.mkdir(output_path) plot_name = f'closest_pairs_top_{sample}' if name_appendix != '': - plot_name = plot_name + name_appendix - fig.savefig('output/' + plot_name + '.png') \ No newline at end of file + plot_name = plot_name + '_' + name_appendix + '.png' + fig.savefig(os.path.join(output_path, plot_name)) + \ No newline at end of file