From 8d4c7bce79e63243bcecaa3a8f41d8e517ba6234 Mon Sep 17 00:00:00 2001
From: Srijeet Roy <srijeet.11@gmail.com>
Date: Mon, 31 Jul 2023 14:12:42 +0200
Subject: [PATCH] add evaluation helpers and updated readme

---
 evaluation/evaluation_readme.md               |  79 ++++
 evaluation/helpers/__init__.py                |   0
 .../__pycache__/__init__.cpython-39.pyc       | Bin 0 -> 213 bytes
 .../__pycache__/inception.cpython-39.pyc      | Bin 0 -> 2180 bytes
 .../helpers/__pycache__/kNN.cpython-39.pyc    | Bin 0 -> 5119 bytes
 .../__pycache__/metrics.cpython-39.pyc        | Bin 0 -> 3481 bytes
 .../__pycache__/score_infinity.cpython-39.pyc | Bin 0 -> 12813 bytes
 .../__pycache__/vggface.cpython-39.pyc        | Bin 0 -> 2867 bytes
 evaluation/helpers/inception.py               |  74 +++
 evaluation/helpers/kNN.py                     | 177 +++++++
 evaluation/helpers/metrics.py                 | 101 ++++
 evaluation/helpers/score_infinity.py          | 433 ++++++++++++++++++
 evaluation/helpers/vggface.py                 |  93 ++++
 13 files changed, 957 insertions(+)
 create mode 100644 evaluation/evaluation_readme.md
 create mode 100644 evaluation/helpers/__init__.py
 create mode 100644 evaluation/helpers/__pycache__/__init__.cpython-39.pyc
 create mode 100644 evaluation/helpers/__pycache__/inception.cpython-39.pyc
 create mode 100644 evaluation/helpers/__pycache__/kNN.cpython-39.pyc
 create mode 100644 evaluation/helpers/__pycache__/metrics.cpython-39.pyc
 create mode 100644 evaluation/helpers/__pycache__/score_infinity.cpython-39.pyc
 create mode 100644 evaluation/helpers/__pycache__/vggface.cpython-39.pyc
 create mode 100644 evaluation/helpers/inception.py
 create mode 100644 evaluation/helpers/kNN.py
 create mode 100644 evaluation/helpers/metrics.py
 create mode 100644 evaluation/helpers/score_infinity.py
 create mode 100644 evaluation/helpers/vggface.py

diff --git a/evaluation/evaluation_readme.md b/evaluation/evaluation_readme.md
new file mode 100644
index 0000000..4b0c24e
--- /dev/null
+++ b/evaluation/evaluation_readme.md
@@ -0,0 +1,79 @@
+# Evaluation Pipeline
+
+We conduct two types of evaluation - qualitative and quantitative.
+
+### Quantitative evaluations -
+
+Quantitative metrics can be further categorised into two groups - content variant and content invariant metrics.
+
+Content variant metrics are useful when the model can generate different samples from a noise vector. \
+These evaluations are carried out to compare different backbone architectures of our unconditional diffusion model. \ 
+A set of 10,000 generated samples from each model variant is compared with the test set of the real dataset. \
+These evaluations include - 
+1. FID score 
+2. Inception score 
+3. Clean FID score (with CLIP) 
+4. FID infinity and IS infinity scores 
+
+Content invariant metrics are useful when the model output can be compared w.r.t a ground truth. \
+For example, our model can output the reconstructed version of an input training image (following the entire forward \
+and reverse trajectories). \
+These evaluation include -
+1. SSIM (Structural Similarity Index Metric)
+2. PSNR 
+
+
+### Qualitative evaluations -
+
+The aim of this set of evaluations is to qualitatively inspect whether our model has overfit to the training images. \
+For this, the entire set of 10,000 generated samples from the best performing model from quanititative evaluation is \
+compared with the training set of the real dataset. \
+Additionally, the quality check is also done on a hand-selected subset of best generations. \
+
+The comparison is implemented as MSE values between features of the generated and training samples. The features are \
+extracted by using a pretrained model (ResNet50-Places365/VGGFace or CLIP). Based on the MSE scores we compute - \
+1. kNN - plot the k nearest neighbors of the generated samples 
+2. Closest pairs - plot the top pairs with smallest MSE value 
+
+
+### Argumnets - 
+
+Execution starts with evaluate_full.py file. Input arguments are - 
+
+* <pre>-rp, --realpath : Path to real images (string) </pre>
+* <pre>-gp, --genpath  : Path to generated images (string) </pre>
+* <pre>--size          : Resolution of images the model was trained on, default 128 (int) </pre>                  
+* <pre>-a, --arch      : Choose between 'cnn' and 'clip'. Chosen pretrained model is used to extract features from the images.
+                         Default = 'clip' (string) 
+                         **!!! Currently no CNN models are supported**</pre>
+* <pre>-m, --mode      : Choose between 'kNN' and 'pairs' (for closest pairs) or both, default = 'both' (string) </pre>
+* <pre>-k, --k         : k value for kNN, default = 3 (int) </pre>
+* <pre>-s, --sample    : Choose between an int and 'all'. If mode is 'kNN', plot kNN for this many samples (first s samples 
+                         in the directory of generated images). If mode is 'pairs', plot the top s closest pairs from entire 
+                         directory of generated images. Default 10 (int or 'all') </pre>
+* <pre>-n, --name      : Name appendix (string) </pre>
+* <pre>--fid           : Choose between 'yes' and 'no'. Compute FID, Inception score and their variants. Default 'no' (string)   </pre>
+
+
+Path to real images leads to a directory with two sub-directories - train and test.
+
+<pre>
+data 
+|_ afhq 
+|    |_ train 
+|           |_ cat
+|           |_ dog
+|           |_ wild
+|    |_ test 
+|           |_ cat
+|           |_ dog
+|           |_ wild
+</pre>
+
+CLIP features of training images are saved after the first execution. This alleviates the need to recompute \
+features of real images for different sets of generated samples.
+
+
+### Links
+3. Clean FID - https://github.com/GaParmar/clean-fid/tree/main
+4. FID infinity, IS infinity - https://github.com/mchong6/FID_IS_infinity/tree/master
\ No newline at end of file
diff --git a/evaluation/helpers/__init__.py b/evaluation/helpers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/evaluation/helpers/__pycache__/__init__.cpython-39.pyc b/evaluation/helpers/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4e784a4d38cb2fd7130c2afd8f5260a97e44c696
GIT binary patch
literal 213
zcmYe~<>g{vU|^Uia4-c#KL!!Vn2~{j!GVE+p_qk%fgyz<m_d`#ZzV$!NEku<s?rZF
zPAw|dFUqgfcS$YIF3B&@56>^kE-pw+PSp<z5Ao0s&JRviFfuSQ)^|xwEl}`DO)SdG
z%u84BNlemr@$rc-D^Au=&d*E9EXmBzOU%(t$xKTtEzZo((@!l+%qdL-3F~L1<`jVJ
fiI30B%PfhH*DI*J#bJ}1pHiBWY6o)DXOP<gv?@8T

literal 0
HcmV?d00001

diff --git a/evaluation/helpers/__pycache__/inception.cpython-39.pyc b/evaluation/helpers/__pycache__/inception.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..be41bb7f878b99656f1890f1480d12f3e3a0ff83
GIT binary patch
literal 2180
zcmYe~<>g{vU|^Uia4^M+i-F-Wh=Yt-7#J8F7#J9e;}{qiQW#Pga~Pr^G-EDP6cZze
z&6LBK%N)f5W;5ro=CVbxf!T~X?72)#42%r998sK%4DJjmEGeul3@NOsOj%sb%u(Da
zoGI)n94(AdJSmL944PaoK{oqoGT!10NGwXsO)W_+(qz2FlbM&CT2PXipBG<doD5Qe
zjG3XFVig7khE#?q#uSDqrgnxj#uUaBrWTGU<`m`>mKKI6mUadfhA7rx22Ivmyx~QO
z1)eZdlbN7KfG7}~oq>S?WNdK)0|P?|LkVLILo?F?riBcQ3^j~34Drk*EG4W9*wPtN
z7#A@vWL&_W!j!_C!j!_akOkz!UZxs`cn-K4Hk@iWQy797G}-+$nbWzhFD|g<x^JI;
zvTWacDgC?l>C>FMbDsacY5$Uufq~)W|NsC0yJ<4s;w?%oEX^!REsjqwN=(sYPM3{J
zvA*wr);_)RrJ5nfnltw4XU~9vy(ZT!*5cBF)S@C#9NyxHkI&4@EQyc5#hjN~a*HwG
z7Hdg<QF6vDwvyDm;{2jpEM=Lg<+oUJQxo%UF&CGlXfoYmDNfBvD*`2um5jGI<KvTa
z5{rxD<5x2Ls@4xJPAw|dFUqgfcS$YIF3B&@56>^kE-pw+PSp<z5Ao0s&JRviFfuSQ
z)^|xwEl}`DO)SdG%u84BNlemr@$rc-D^Au=&d*E91jTG(j&4e3T3TswW`3T2YFT1V
zX(C8iKO;4#0Avq53FsA67I88#Fz|rVqyQ+!82K347^N7Q{&O&a$SN_E$kl^+8zq)O
zA;Q7Hz~BrrC5Mp#9Isgn5)3sAS&Y&QwTv~4SxhO6k_;(K+04Z{U>-AsS7cYhoW)YZ
z2#V<xwiNbWmRiOdhAh?vY&DDv8KG=;BsK?xUBaBjS;LsZoWhvG(aQ`~#|4oC>*t2B
zv8v;N$br@IBC+|9*!)Or0VK8{5?cs~EsVq#L1K#{vBe<l67dBLHH-@+Y8V$trf@D~
z1k+p#8B@3yGN&*uWU6HXyG*KtIZL{RDTOD6x0h*wL=6)}bqe=FriF~P%r#6k%xO%)
z44Qm?FG1Ptr8Xl2!%I*`(_|_VXJB9e6Sp{$GIKKXQWJ|pG?{O)6lYeY-eSqkPf5MS
zo0yZCo)@2-UzC?xRIJH#i#t2DC@(c99;}9~xTGjECAA2YU5Z#irm-<FFo5z5M@dm)
zCMfA?$`tW~BtXgL7He@vVnOOHMz>qsnRz9tMFsgei6yDv#D0s{IX|z=C?(!7G2Xbs
z7|u09<0gS+xpEUL;tTTgbBt2pDvguk4J!=cToZ^!4&Tg*)RcJBq+2jnGMtqHW0@tw
zS#Wh`aCK$~b*V6Q=5QV6a2@8!MW7r~#0T=P6axdpEvB^OTkI)C`33o<C7LWnf*>)*
zid!rom)~N`$xqKLDJ~KLDF+jxAbIw*{G#&2q7-3JHZcHaHUTCXMj<91Fcd%{AtGRs
z8B8)MF{v<BiK1pksCk-fpyDVmF*h|n{uWn!d~SY9X%2|Z6CYn#nwSHT0q64~OOQi&
zp=D2UYEDjkJd!hoKn@1E6<o-1gWL(u+l)NyU=g=uP<jBl9E3rs4kQiE_IlvL9aKxC
zF!wUmGM0dnHPb?dX2u1~3qeHyLy=Yq%K}zN7N`MLP%M5SZkkNDxC)9=!D%Wr<raTQ
zQDR<kT7FS(d}dxjX^AFV5hz82Ex*MAD#>oKB$uWn-eStjyT#*@Sds{;+H!JIb2OQ5
zaTFvLC4$(u_~8W{C@@lUkh}=;-7TS<{KS-agrdCE5?7FCL1i-oBM+khBMT!JW04yJ
z1A```n<g(f_HS{3OL)D!ydo!1IDx!h1PY@l8L&iICaAd817)t9Vm-LwU=Q391}o9a
z%hO9M%}WLq#)&z%xDf0jMNq`B`GTVk>|L<!NDfD&BMuu#4BLT%rWlkkIT(4sP>4my
F3IMaqNudA$

literal 0
HcmV?d00001

diff --git a/evaluation/helpers/__pycache__/kNN.cpython-39.pyc b/evaluation/helpers/__pycache__/kNN.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2a38f88d4a57f4d7868c8388d8f35949446d7038
GIT binary patch
literal 5119
zcmYe~<>g{vU|^6}KbRsZ%fRp$#6iZ)3=9ko3=9m#SquyeDGVu$ISf%Cnkk1dmnn*g
z5yWQ9VUA)3%dzCLMzMm$m~+^2*`wIOY?d63T+S#?Fq<)lE0;Tp2h3*8;mzfX;$viR
zXGmd7VQ*nbVQ*%N;!j}=X3*q#337#>CgUxZfW(pvO~zYXE{P?HKKY3$sYRNMw^%)M
z6Vp>Q8E<j>7p0^YrKY%KCYL0G^dMsvnCopA7#LC+q8L*cqL@+`Q<z#9qL@<zQ&>`1
zTNtBQQiM|2Q#e`}qgYdfQ+QH%TNtC*QbbbtQutdKqu5gff*CYLZ!u^4`6V+VnGRxu
zurnxlKsKc?1T$zd`mJODxw<$tC#{H`fq?-`{3_KCElw>e)-TGh)OSfO&MwI>&=1cq
z$}TQQOitAg3J>wn56%xxRWLFzGS+uVO)XIHNlh%u%gjqx@JUS4ck%IwFDp*gPtMOv
z$t=mt&r8hFP0374D=p5<&(lvWOUx-v1PSYBq~;WW?8)}?(<`XF#StH$nU`4-AI}SN
zBooN>j8!}!NtndT1_lO(WRUYgk{~R|z`(%4z`)=S(sF=-fgzotmaT+g0b>p0LPi&c
zSdAFwTJ~CwTFx3y7lv4gTCN(l1xzIjS<DMqYS?SI7BVelWMn9mDdbCMsO3&)sO70)
zb76?hsO7EUUBH^cxR9|p2Fzkw$XMh8)yG)FyO61tuZDX8Qw=Xje+k<H_8Pv0jJ5nV
z{8=0+%#sW>%-PIEvugMkaMm!_FsCtr<(O*tn?Y)L7BJQDgY*@;lyEKJuHjk8SSwJ&
zT_cdj6wIK>QkB7_00l*<C8b4q#R_nNyu{qpVk?E5%;FM-{4|BM%$!uP5L`hfD5b@h
zq~;ap7g;GlXod33k_?5!oE!zPY%yGM6^mY0LHaKiy@I^-B2enk<hjL^Uwn(DJTWKx
z7F%LLL26#gEtZ1Bl8jp{S^1fHw>VPsQi?&w6@en*7E69XYThlD;>@blTWm$CV4Afg
zzbHB57He@yVsf^oU=b*GiUb%K7;bTv6eZ>rr{x#r7KwoPEQv+w#kbh9%RzJ$H`sCU
zATJf)Vl24DUR;`#l37%Iixn!Ac8eA4SE$P3cu*+ZVgbqC;sB9gcQR$>-r|861@S#N
zNka%ukgZ(lsU`7X1;rp&6)S;~5jzW`2qOoh1QQDr3lj&U2qPaO7b6!VA0r<l2O|$N
z7gH4<C><iS-eOKJC{1Ps>4jo$<dk@Vfq|ifp@y-B1(bn%nQPh88EQFdm=-WDWJqDo
zW-7`lVOqevkfGm%k)ej&g&|h1ma~Rw0ZR?%0@fPlg^UXsYq@GT7O>TDfzydjI712x
z3j+&7GczMY;Wk5t;sgT*Mlg(IU}VVif|_OoHBAO&8qBO_RI{p(&B_xnWGG&PY*q<V
z7JCgx3Tp~mFH<d74OcLOCVSOnc#?spNF;t@PJCKwVo7NcC}D=VySpVOrz*fi6w>mG
z6pB(4F_K<-YF=tlVo7QWA<b}w=!w@&lj#;$QesJRMm#v(u@`5QrlsYi-ePg~@eJ^T
zq-Vxk%qf|<njA%-s^J!AJSYM|iKs{nloZ(W^5fHs5>syRq~;~(r=&uX#Vw|i{33pk
zJackli6%EV1>a(WB$->Rx%nxnIk(tSQp+-vQ*SXP6&Fc@3Qj4I2CkIElEfTvu^+_?
z3j=66%`8dHy~P0&gd|9Ckqb_hf>4S0)QXa##N?9vB7IPa1~Qd_osW@)k%y6sk%Liy
zQHKdc^D$~M@iA73f)XW$&X*w1z64d3w-}Onpve<Ni8C-TfP}$?WdsugLk(vNV>U~X
zKnX(%Q!^tYLkVLRQw?VgM+&nz1B6Ur5do=U6=6tYN@43|j$x|htK~0YE@7!*ZDwp{
zbYYml7`v-hpoBGxEsMQ|rG~XeK$1a%A%!)CIfcEKxmK`*V*zIgR~GjIo*MQVW{@nX
z(5)4!;ja-$;YeeWWLUskBLqt3NFsdc3}_<!=ptDHDV!->3wR)AE)Y!NUdWijBgwEp
zFokO&V+wN$Z!41|!$QUs-V{C%Yc|7NrWF1ZfnFv?h8n>vp&E`9!4#ohrdr_?=30>&
z;VfYnhS)o`qDby#t6{1UT_CcML6QOLW}z(66b?y-8m?^S35-R4H9`x-YPf2+zy&<m
zM>QhoK7xuM`3Ne4<RgekmH^m4a65_dQ;KknaF%$DND8wAgCqmUttle2S>`gseJYZ|
zY|c<CGJ&yBphlpE6QnMismO(qVFFVjQ=v-@;{u6=3=^1&#Y(sqNP;R&Mur-W*$gS7
zHJq~<QpD!6)QZ(`r-)}WPhcu?s1Zxy5@)F47H3G25ND_rtKnN91&v$A8Zl59F*2m^
z1~X_%R)xTeh~%Qw#FErvg+zsdoct0<Wu$?Yt*sQY74lLOi&BeAkc$gVg@VM)B1DDt
z(uje9;g^wfVqRW;3B)*10h60wl&VmYk(j4oYM@Y@m|KvOTC50VDA?F4m>RqU)ow-R
z3=9k}LG6l_jJMd+GSfljh9=W3=KP|RB27>x2C4eR?G7uOtQ2lB#{Xi}fBFCa|Nnk3
z|NQ^||0Sq8TgiBfBP~C#1f<^$WL8dMQF>}|$uA}yg<s5iMrl#Zetz+{IO3shDY66^
z4$6s|(zjR(iZb&`ZZQ|+l-%MdE=>XjO7Sh$qQt!P)LYCssd<pHi7h8HFEJ<m7I#@H
zs5FSr167-#O1DTDWEiN41g=$!KrJ9p=~SGNUw(_LIJG3cq9ijpyBNZ*gs?e5;w70S
zIjOf;iwklxOK!20<QHV$V$aM=$xKcyE;0cb3aXWhL_q}^8>o?%T6~Kwvp7B_v*;FU
zZZ?QwFHS5=P0LKz6o(Xw-1(&?1*IkN;F^~URL;kPO4s6B{P6gN3h;s@p+!~kEg^(F
zTuc~bN_<IvJXk5TxRrnjA~Zl{7_)D&LBbc(n&1W591m%;WL6**%FJn*>9?2?D{e7n
z-eO6~EG{VmRr$A=v-9$9F=pLj&P=Je#R-Z9uv5_r<{~x*1_pOfQEUY&LAf{>IT$sV
zgqWn5B^ZUk(2bdok%gIyQHD`~kq;~)#2ClK!^p$P!3=6c2rzOn3NTi&gNkZUs~pr$
z02u|spqc}epuh!k789t=kirNmh-)}fn6g=lBuW@km_dAS!OQ{b0@QGTiez|U%v;M>
z!raVI%U{A$!U`*rW8G>6N>Gbq_7s*B4p4DiBbddJ!VW4rdzos5YJ{>l7jP|Ps1eHI
z2GMCuHT;kQ)rO(at%M<k8*Dps2~Q3G0^WrT;0^~v3STc%Gh?lA315xy0{(>z&5X4o
zB@8M2V6`kY9MEdPr&gduV1Zzb2)NK^tQD={s}V>M02h!#HKO1G9w8zOE#MI%BIqJn
zqM#ysff&Sv3&c}|7c!=ZNHQ!CPZ3(kn8KdI04nGgGNv%3h=N$)B3~>;99-lJL);<B
z5W`d}Rx1v68B>iosEVi+tr5+VND+`^sNn<`?9*yQ7f9A{)^LIgc8I&#G29IiL3TGp
z1liqSku1>^P(=Z@jTl#_NYsd>NP>%TsOt+))i5rQ0vFVJCDIFIAO&@b6sV}4%Ty~-
z!<8ZpE~0~KBvORL8EUx18B%1#8EPeJco)c)NN33{5UXLVk$@J_puV51UzMD5er`c&
z38>!&D}3`(Gt)DY@{5Y~^z@2ANxsM$R5I9tN={H#D6#`_K|NGWCP<kn&A`CmR^-XR
zz)&UO1}aYS(iM_(@{2)5Ik@!J)6>J~ih>H^U{J0C6^b<sDU6Z~DNK?eXEFN~flLQC
z-@tiMlMzxlf!j)80^DHYu*pfxO-f0$o5aAt@EMeus?<PP4C;}1m`C(%a`KZCbBgWs
zU>Y<TZ!u@5R#pkS6eX5}{9clg3a(2)UIx_wMUEgpI)MmKce%&~#Bv1@ZXg0w8Weeh
zSUwC43{gT*&&Gp28()%N5RX*e%NO~9RDv>iQ2>Yq>M|4sf>=Qy0^AN|0T=F3Abt#p
z02jv5;BvS)zo;ZN<rXWb^;mq11=8ioPOU5kcfcTZK`2O17>Ec55fLB)R4Wukf>`iE
zy(kvM1y>w#AXYqx0Cj;hCBdb6Q36OH5kw?`2+#mUQ3{Ba3L?@#L<Wceo0th=fy%EU
zP#d)fG~!Z}4GItTyuA37%;b`y9FU4!5RnHG=YxvJL%bBlnVy;lE^&*&<#7~ec3xh5
zadLi9Y7`H&21t+3OsOa;1Q`L2crXEueF5}nt^y?{P?RySgNkSlMj1vnCIMy%W<Dl4
zMh+$kCLTr(Mm}a(0qw>tz{H1CVEciKYDjTiC4wy-`Drp1fr@TTrXp34gTQPSaFe3Q
z0ww^Ccui(>0S?g62Do~Pzr__FpPQdjnv)tIe~TwRzOXbg2O?7hYMB?QfQ&K&5#Zcb
z4Pt@4jUYe)0IrrmPACRN1_u)dGnYJ<G`K_fo6C%Wk>L+mg{D9eD86s8gKE*7%%mbv
zz5}-}qQt=6^s-FQn2a90V_tlV3yCcNR-;#1l9^Mi2kP2Dx>3vlo<88@cS|rgu>@3e
z=42-66;y&)x7Z+b5h%sq;!e)b$w^HH4fYflfm1%%R|vm=Ss)MH;;?}PiybHjf_vs1
NpemFRRFQ%)GXScJ1a$xa

literal 0
HcmV?d00001

diff --git a/evaluation/helpers/__pycache__/metrics.cpython-39.pyc b/evaluation/helpers/__pycache__/metrics.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e6966f95bbd6388953180eef69f20d8f35c6bab4
GIT binary patch
literal 3481
zcmYe~<>g{vU|>j5J(%)}kAdMah=Yuo85kHG7#J9e&wx}hq%h_%L@}g*XvQeU6owS0
z9Hv~RC?-ab7;_GDE=v>(n9Y*In#&f&24=J7u;+3_ae&!uIh?s%QCwg)dk%LlPZSTB
z&5^^K%NNB5W^?B7=L$p#fZ1F*g1JIbLb<|G!eBmkj!3R(lqi_ZlOvWZ9wnYD5hcOM
zkRp}BpCZu07$un^m?G4|5G9o&oFdY~5G9==og$VZ-oh9qlOmBK*}@Pdo5C2(pegeb
z<WE0M##<~Ug(<n3jJH@la}(23H5qSll@ulB6{qDF<raf@E{P?HKKY3$sYM_~$(6}D
zAVm^xMXAXdsU@Cy$*Bb;nfZAxnZ+fEdC4F-KDc0Ta()p=MBF>IC@(b!E)G}BbW0()
zq^LBxq_ik8Cpa@VGbgbqv!v29FD13YH#M=iv?%qKNI+_0c5r5TUSf`4er9oMP-01D
zz9!=>=CsU|l}xv!lM{22OLG!SQsdn`UE(wI(lYZhODf|F5=$~}Ng*UXgVE%e6LWHI
zF(((4CWE3G6dWKdz`(%3!N9=a42pI;Mh1owh6Riz3=5bRf>_Kcj0-_LmK3HG<`#y9
zjOok^8EctQ)Uc$mK;@VgGS;%BGt{z{ur6S$VOYrM!Vs$w!(7W&%U;V-!{NdZD^bf?
z!@7XIgf)v}0cQ<c4d+6pg^Y|0g)xPGC0r%kHLT5y&5Vo;C0sSEDXfwVAa)I73R^Z)
z(YzG)TCO~n8pc|#bcR~)8devE*qmCP8lD9_DI5zKiw}WWoC_I?RzXc?sNq@2RLfh!
zyMV8TcOhdfUkzUtKgiS+#%$)IPc_Une9erte0dx-ED%-=_X74BK9GA#xNEqZ8B@5j
znTl31GE87Blqg(P!j-}e5ueR4mkH#u5`hJRHQWms7YNmWe9m9PAIzZ1<M*<Gfq@~J
z5fr#k3}S=AJQx(lpa8F7NMRIVSin%in8mb^F_>W`li$n#3=9k_nQyU_R2HP(VlBxp
zO3t{&TAG<xVzH9(7GuRqh9YSO28Lgi`k}?CMaB9>`IY)Esm0kP`33sn`9;~q1&PV2
z`a$6#9{R!g!Kn&H21dsEE~%*n3O=ccMR}Qd=?XrHN%}56KJjJ6$@<Cpc`2Eobex!@
zo06H9R$82ypQoQ%mY7otN^W`j8L2r1AbWCCONug+i}eaBZ*kb<B<3ciB-#lwFfe=u
z`K?MZ6O@VKOY-AOQuB)Qi}Y-A@{<#DitY4ZYDo)-B4Gvwh9XfAAqgTtUIzQSNCw1}
zWnf?s0x{%31SnXGK*9O)|NsC0t62213eu}s^a}FQUy3j=FjR>qmlhSJ=9MTEC*~I9
zq$(6=R;5}gXbKmxFfcIOVt3BZEyyoUy~Pod9|Cs#Ew-RkkeXX;K45>|V#+VR#ZsP_
zlYNUVv7jI|FXa{sIG3?x<!9#IVl61j%qzLYoRgY&iz78JrMNt^B%=rv&$n3e3sUoL
zv49LK5(invT3nKtoPCR_BwtgYNCPCo265Cawv^Pe%;eN40eC4CUj%UlCtTzfM_Oi1
zYEEWx$t}i$TkOT9Nhz5{#kW{NqQ$ou({6D?42uU@c#8`XCt&3)ASDp(V3#vx=HB9g
z$U`Cw61d=C28Hk~eyAiYLis^S)(8~WOe%~rj4X^Ij2w&-j694yj2w(?Oe~Bnj2w(2
zjC_n-Ak4(U$OV=WViI61vIpe@P<Vi}gD@`x0|P6pthvAdDr-PF0aUj1GJ{H$8paex
zD37Itv6*236F6tFmN3KFY$YskHhT$c4RZ-w4NEg)Gm{HLY+MXeEk`Y94F^;oM-3;G
z#aY9e#a_da!j!_&%f!e4=5eO5LV0W`av&Z;j(q`p4JW8fDD<mgUBI!Bp_Z$L4NP;_
zu!Cuy8m=1d8lE)fU<OS#zamij3|Yx^iz_LyBsl{TCG5o+rD<v4iiD*oH8ICcld}jE
z5Jl{ubZh`3I6(xcAil+1m03{422LZT1u2OoskhjZ^K%PIOHws?i$KNnEzb1Rywsw^
zlGKzUP}JPw2bme4l30?M1FpDkiNF;gi5BsI%w*2aOu5CG39H6#F&5ooOuxmMmYEV?
z4640uac0Ar9GS%sLG)C@2XzocwpbsONR&V^%*4mY#{^0oJWOJYT;N2(#|VNvOdO0L
zSQH3K6igtsPz=h9Am4&~R;&X~6j_Wl4Dh7DQp;4r02g6NVeMt9Wv*eWVNPQL2ly@4
z<eb#RydqF(pviQLB{x4M^%i$tX>NRZeo=O6QE?P+a!zJJd|75md{VrzktS0Ss2sk<
z1yYz3pO=`Ms>uWnn<#FWx6(3GG+Ay5gR%^$Yy$@`IAavwl7I;!f*2~p4|Yd9A|yq?
z_M(ZR2PG$5-W?Q*ppal-5@Hl$DvAQ72vBN5sjorF286-E1q$;L22fztFr_ep^AKYV
zBbd!p%TmK!!vYQ@CO=KaB1e#Mpd6^lS`-Fihl7#>CpeXY%MBio%VEulTilt&@b&|`
zoB5zB;~{GN5f0~L<Y6j`14S`%zUM~H_cOpzj4$7(fFhX@nsd?eehqU8D?Gnj#4v$d
z7VJ>9>_|DEHH)o=qlP_&4V>TEka>k}pf<=thFZ=Vwgv3q9L-h32@WZCNd9#z0=0tE
zAu05OJ*Zg-%3bkAiFxU%noOXgKQ*QJ7F$VTQF>~LCI=)lgWA?b;LHs2aFGirkU{ZT
z<O*WBF)%P_a)UE)5h$v`S+~doB<0D#z!1e!T%4I3#Zpk5S5)K!67>a%a)Cr4*({0+
zBnM%m$A|<h<${$X)D?pwxmW_^5l}W{5@6(Egk?+tNQO)Txr+&0j}{4n*o@#JOjE8%
z732y~^;HDQueX>3JbfV58#lOhQkDs7Ge9a+0kDW(X-Q^Iu^uR>z#(#rGqWVMs3bo>
zrx;wIMTvox!I~7{e503^nF1~bqhzp1!po^5P|=80e|Ba{5vcHt62jD91db9=Y(#N@
z^BKr8P%#*#h-m8QL7O^y;PMZV==nh22i1SziVR$ELGmOheHVd(4Jj57H7JKoZhlH>
jPAaG^%}@-=p`daPTqc66O$ZAV5dutH3LFv~99$d#f+3B<

literal 0
HcmV?d00001

diff --git a/evaluation/helpers/__pycache__/score_infinity.cpython-39.pyc b/evaluation/helpers/__pycache__/score_infinity.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..90689d79ccc3697c24e2e013bbf11a8443bb102f
GIT binary patch
literal 12813
zcmYe~<>g{vU|^Uia4<#Fnt|alh=Yt-7#J8F7#J9etr!^?QW#Pga~Pr^G-EDP6cZza
z&78}`#K6dq%M!&37GcU^%Vm#Z2eTP-IC43oIKgb@9IjmMC~h#DF^4CL7i=P56kiHM
z3QG=uu0WIkR1PA>m?Ib^2v#E$C6vOD!kQzTD;y;ZR>PJfk}Db|3TCtCh~<h$iG$f3
zIT9fZ?hGlMDO@cKDO}A=QIaXzDZD9sEsRl8DLN^FDMBrbQPL^8DZ(itEsRkzDS9cQ
zDPk>*QL^p~DdH&-Eet6VsqD?nQF1AgDN-#AQSvGJDbgu2EsRkLDY7YYEeuhLDe@@_
zEeuggDT*mdEeui0Dat7-EeugADF!KODe5hZQK~71DVix-EsRlWDU87knno`{q3Wl}
zc#GX7u_UoLwM3Kg7QbJ9QEp;Rps%xQUV3I;swU$t0iVpg)Wo8o)byg%;^NHwJWa-1
zEG300xtff(SUqzS(?N>Zax(K0bJ8^#Z!u~mgUm(7%&-_!VqjoMWr$)-VTfW%VT@u<
z;Y?vkVQpcIVo703VQ*oGVol))X3*ri#aon^my#D>oS0jXlUkHm$IQUMrJ$go;GUY7
zT9jClTC7l2=ci$yW2mW6TAZ1et^iV>pQ`}Y0OIEtDHJCcCFUmOq^2kY=O^XoC={m_
zmZs(<rxxphjdCnXFSY`+KnCZfWae5aXk_M<Xu`!n;vpHS3MrYnsd*reE99psR6%ru
z<VuTE<BLH?TPbKH<>%+1>hw%g2q`K})q%JvRiR2Dtq9~o9Inz+aLp-3s77%FL}N)}
zUP@w7ib5VZh7?jVi%W_!lS)f6^YajH0=dg2H7&6;r^HIZEitD!6>bv9xtV!o_#InX
zoT`wSSC(2-oT}jL;-*lNuTWBym{**ZUz7{;LTI#sj$tfF3K9jVwnMxDGRG;uLf5x6
zCnvQCVMZbz=YoO@?%L$U<cw7OZY@t$0IO0c&d*I%NX*SI%_{*#5hS_`@-y>FiWL&`
zQWT0(^NUhIuE<EuMRjaQMrN@>X0bwUVrE`Wr9x?OYKlTyKG?mPd7yOU?&w#nP@a*R
zr%;{>l2#}w&sQi;El32VLbzi=p$@S%9g;xvi$J<ElQR@DQgaH56^cs|lQMHMt5Ow8
zGEzaI0X7}gEjXeLWH#6$*9vfMD7LcV0`ox<c6N5q^i`x_3(f@aOrv3FWT692f{1hp
zQeLHC3)7;PQj}P(VQ8SKpp2BWq74mn6by|lVin3#lS}f8z?Qj%B(oxAX%HJ!4m*R&
zVKW8>h8o5ihIob=mKuh5#u}y~of4)R#v0}n#$KixhIr-@mIbWo3=2VO*z(v?7(ikv
z48aVVOn#b7w-{4y@n+_g#lsVtCi^WGP=>t43Xb0*Q2B9-9h|pQi*B)&<QFAp++r(9
z%`46?y2T4tlM)Y!Z%x))EXAogX+=B?3=C16pfnH8qEYN1W@cVl5ibJ+!%BuCVFm_<
zUk&=9#i>Qb`bGJb`Yx%(*(Lb}`r-LS*~JBk$*KB5;UOOS!TG_d3PuJ-#`-R)sRar?
zsfk74gyEBzr0?S66JJ)Gte>2pmy!ue*ois1DVb?$rJ%w>Kea3|rxcvt^+AaMWKVH&
zeo<<CW?oumUS>(9UP0w8j`(;GJ3d|y6p|oEGcd9-vN5tS@-gx;Rf&PhHF%Wj!4xMm
zA;kqKbU^U|j*A9RTrgxYq%f2+6iJmZE&#;?Ll$#7(?Z5zh8l(}mZE|Z)*6N^wr0i}
z#%zWnzZ!-th6U^;94U;=j0+h{7#DED*pi@lVgsp7VW?qjWs+p5WdiYP7*iNPyc&jh
zwi+g|sSFF)YZyT`Kw~u><i3U$Jx5KhB0dHNh9Z6j28LU#VBdhF1{@8ym~&F|iUdJY
zETE)#i#ao|<Q4}=RY7V|t|oJl5J-+IDX}CuBfdDZDixLriox**Aw)ptOE54nn1g~4
zq=SKxi&21yjggN@ftim{fRTl%N(f{qA|OGw`Drq{g}7<5f#M)9F*h|n{uWn!d~SY9
zX%2|Z6CYn#nwSHXVULeb$xn`tFA{~>2C*EJj36wqGdLL-7(k3-HU<U;4n`J9F3uv5
z*vo*!3=A(pfshQ60fi0-3otM+urn|)fTL~)0|P?|Lo-7yTM1(gV+vz4Q!RT5Qw>86
zTMbJMdks@FYYbB@M=fVMLoHVc^8%I>rW%$S)`iRqSW}o6GS+g}aJw+X>ecd;FlVtX
zV6Wj=$jHc0!;!*L!vl(}THYG21so-uHEcD!HO$S-AlX8P66OV5HCziB7jUPrrm!t!
zs^u!-X=Yf!o5EJZ4HJ`QsNq`3RLfVxw}20%K7~D<F`aQC<3h$14oQYu{u=&Z22IYy
z8YXBR?VOmCT$%%}q*K$<Qj<$E%ThsQYhF@jBB*?I^K=1a0cgdYT2WGzSdgCs&M4rb
z2DM^NPt60xNotOjf<|7Ro-a6h5G5L@5CqlHB}Ivud8sK1@bXs=R-%DQ^Th1bVuj2+
zM56&zE994!6qJ?}gY5(v1Hy@U3Ynl54X7H4)-}{oFpNbM(x4)V)HVVl>EhA}R-ph&
z+#pNg?uR=oy(GRMu_VJvL8G{&2-V{Oi6t4JYPdMDEHy>J&C>-`&y-{qmt-avgA6Mv
zDos^LNi0c3@<VBEJoc83UukYqY7s~qI0TA86<%UdDl9}nm0?M~0<=&|1=$9QMTE(q
z;Dc9TrUnKE25>LrmF7Z=XKaChuo#pw{6O}?Z7IoDNXslyNL0wl%uB^#m!T=xqL7yz
z!k{uHGq1$h$nWL<|NsBLWCUea5V4Y}2t<JiP5xU#Ir)hx@tJwasRf{7J3cS91e{H8
zi6tf{m*#>~Z+s#s<${xC@hzsjf?FIpnR(!f;TBtBaY<!C>Mb@<p^~3_i@i9bG%YPB
z^%h%VK|yL>3aCzk=7D%OPnTP)iA6<;mABZ7Qj0SZ3sQ?fHRmnnw9JxQ>;*-sDVfP7
zngX{t;VB(bKuUuuFm`C{-(mxWVP@_vE)Wf=>TdBPJdm58lA3divkKN?j^ajkJ!AYW
zmbA>2;#=HlnJMw$1eaQTix19$6s`OqKC(9EqSW+TJRnh6jRh&`z-1n&l)ELC49n{v
ze}d8js4}Z`2NlFppaM>mjhTyCgo%Y&gqeehhmnU-gi(lDicx~8$dQ48;iVBMev}y)
z7*HA)pduDtDjG0COGVZih6Ri%j0+iy<Vu)o7@8Slm}=Q-+4Bram}?knm}*#4n0lEZ
zC1eRp7HbVh3UdleFH;Fi7F!JmsC4UPVqyU4C}FN)L{iUH%UQx+!_^EfL2H<^S&A#X
z7*g1q8Jif>8ESb-SQc>9FxPO`uq<R=z?s6akg=AxhS!B5HmH`bge8k>0XL}pWMrt}
zO5v>G1DCY?VEg%z?B|EuFHkF3!c!wqBUr;$!`{qND^$a?fVV~nTpCX)VOhXe!?Tca
z0e=ct3im>$TAmVtW`+fVDcm)@FfnO{8lHtrwZb*R3xq)GQ<&kUF^?oetw@bXFoPy<
zq74;FV}<n0veY~UaP9&XLB*i5rYJQr2i&MmOU#B8E?{}oLK~!rV5tRC1TLM5Q%e-`
zixdiqQj_y@3rb5;Qxx>_3aXF_Geph@*-Fr4kRq7L2(93B4^HaXN+D1s0#*Sjib0(K
zP{|1@C{l|MhM<*lV84UJax?SdLDph3+BY*VGq*HXArHNTMJrDgaCYQC#VOn>L<xY+
zGAfm(zgR#~u#)i>E4XL@7n~~@Z?Pm+WEQ)vWW2>zlwV$!SoF&W;VF>!!J)2@TUuPA
zkd&&BUX+?x0`7Mu=7BptxzLaR7hvFy7E1MBq{6_!;8&yyDizc~ggOHQgQjqi2B;Ot
zk(!rMT%K8yaZ3agDd0p9UtE$1t?zD$LlY0WM3ELqH%o46V%{z0<ovQCagfQ{AVLR3
z=z<7P>-!ccf(k0*5oKChQEGBVYDqk(tDTsaoLZz0Qfmkzz$F)`94|5kaZNxD6}`m?
zO4DEmL~(+G1I#P}mE^ZLU@4>s)J!e10F~}+VCA_{5V|;u1I$lME-A7CNppaWNK7ud
z#Q}<Q5U<D@WSb3$umurzAipu@-eN4i#q8$kQe+Pj1PzX%m(<b-Ux8{_Sb+|9U^%F`
z289KK2pcmOBOj9xqa2eE69*#~Bh!BtW<=pF$H>RX11rG8(Ti<R>lR*Y!yB`#@Ww0~
zsMxMytzoWVgEnT_YdO*xYB`~eS!Qq%3u(-9)o_6uv)mYsS#EF<%TvP%Zp`vDGp4Xg
zGJuOuNMn{0+?ZucVPD8p%L#4Fve$4e;0BE_Eo72rsNr151S-OzjagQB@d;|o^40Kx
z8?)NP7N4HMpjvOVEvuk_R)FTG!CH>c5d=^v2};IPX?;S19cQsjPUF)vIG+5rXA!6o
z11-M54NbH%j2~Qvf!mlxnxH%iDwK+_6hh!4=9VNPw<6kj;K73;oTU$cktxVv&=3N+
z#3=#|%ODDyB1@1gs9d?lmXn{JSyFt9$usyCmuE1%LC6bZgDc5foSwn)5S8F!<rX(2
z(7`P{^x{Pn;b0_H>p=wzxE&~rvmF=@Dn>vBDX6&!!l2;-crkJW)+c5uVXOfSGO?sE
zf?Ij4wV>jJt(mcw9n?3@W+^uCVn|_vw%RyAMM@1*4SNkUynoDD!wD`@xS&M}8>rBz
zVM}4D;Q|*a+%+8FB83~NNbv#ncR@XASn*NI0WVTGVPet@H5{O}S`81@B89hx7hI&=
zVkEME3<*GNMGE;1GEnmkUk?&gw}9*J;?xpEt^&7*z=;7{O9L&#)4j#y8BFB{6D1`;
zkqan$d4UK>BL+_?;0sa(ZiV121&Sg->Tu?FzFRDyScT;EAdpgc!vfrVD2fD$1%rqv
z5D^U`Vn9SJhyb?~;y^5LcMeQ|`*c!Rvog4$a2J%H6+oF(h>aOr0|DN<O9bU=kV0e(
z8ngtpO^QKn4a7813R4S16f>x|)WQ(O0_rKXfF^?2Q#e`}qS#V6gFzEQT$#D?p!Rri
zY6;5VC&*0Dh#GhV4OHSm2cj7kFfIfQUowG8f)dae8E9~mc>x=!2U)_rfPEoDIx~1c
zngcpy`4Z$bP0m{!AZy}NGK+5Uq^Fj|XXd7Zg1Pt>2b{$PAE++A#qOM+TaaIzdW$V6
zwHP!&$>p4ySCU%fT$EpMiz6gI1Tsj?3D=^@1Rm5ZO2G_z@Yo_RBLhP*s0;*+19LI4
zFmf<*FtYuv5=I07Vi*{d$Y3Fi5)z>1WeGzTLkVLR(*kDDux|?ELZ%d^6y{zgG;tP~
zxHv;CV-09xkkzk>LrY6vOE0S+Jw%fc)T&J`Eh^3|ONELT<fVhg+0%3KlWsBP7Z)Xf
zTA?gi`I&iOZ$Z6yixnJjXjMf~Dgy(97|4?xAeS>rFiQMqVX6{G@*Z-qqeM2Snt+dV
z%RxuFYZxKp(hC?%Kwf164NTTD*Dwb&tYq}lWGPAm1#}LG0Hu>#Ecpegd73QXNVvtE
znNkr2ir9G2Fz+o^@W>aMgKu%i$ET;3WR|4n#>Y#891b!EJjBdcC5Gf|BxT9S5d&g_
zA_tsAK~vi$pmBJ{X2xKKq9RbgfeF$j0XbBY5$rH@C$PuI=cMMv$18xW2bsmdSS5sF
zHCWnDlf6h5<Wo@C6v=~Fps^HikQbGKxS*y!lHu@NUzEhaz`z4C9TY1Zj2xUCtQ@Rh
zQ53&`VgQ7}ehCA|zybzPmMU@s&owYCWCSIm6edZA6y_9WP{mrqn8JeKF)`FK)iT#G
zE?{2Bzz7<)VNPQTW+-7vVXa{hV*pj(Da^qPnru~W;Qp>Vcmx8JjzD7%pqvRBEC3A-
zq-Ex$>L`GEg5c4JqSTVoqCAD%QZTPLGd(vEY(NpnrMH-JOZ{%Kf<=D`c;=NQ=47TQ
zfK0VgP%YMEE%FA14X7}_#R4+%7He`2C{ObQmFAUX=BBz973CLcvOv-~V_H!NNO>rT
z00$tL0F|9ZJ`4;D%AimLwZa%!xEQ$@l|UJo2`rNgnyCRLdJqPsV-N<puNb7Sgdqzw
zTf?xBu~?#nIfbzX8Xh%FDNLmdMQSB1%?!0npt+l7##-hQwk-B6jx5d^<{Cy&(b>yf
z%TmHs!&1Z4%oNO^$y((I_C9L(CBmwBq=19=no&Xy(gG-|0`<n-G?~Cv%q`B8qWpsR
zoW$Z1P41$6kmteKo-MT^v$&+Vs1_sw>ZpSg4RpSzv?Md9_!bK&-fnS$78&^DC#Iwp
z-QovjaipO}O;&K&f-Nd?1(hc3AU(yYCAYXh3{a$`79k~EaA<?-QGHNogNsWR9!4G}
z4n{Uc4n`5iB9I(P*#T-&f@WgC;XRFkfgzotmMNW~mKl^An;BggVq<DqYFI$kMlB1d
zBw5JNZ@~y|a)O5%S!>xqgNEsh=Ac<H@Qh{+s4`&!i!g^Xq%g5CurM?;Gcptk7%~(m
z889${VI%`cq=ab!TMbhR^Fk(2)w6(OAp=;18Ce9B6Es<>AmirlsU-^Vw!Z>WSV5K%
zfI_7>wFDf=MWC?~KkyKZCT9^SQQcxmE=@^<L;`zWetddSV#+N>w_EJR`DrD&i50h)
zlM_pBF(((4-eS!w%`K?ZWG`w1MIborSqk#=bHL4qTWsJGvG^7TNGQH6G3OQ+q)Px|
zqsK=J0|SE%C_cb*a4am0EKEF%piu-KMlMDprXr9$%0MqDm%!uX3^-2G8JHMqL5+I`
zaCyR1!y?VFkP#d|dA01I%6uV1e-I->4f6ul8kU8O3)mJi)N+)t*D%y@G&43c)^gS`
z)^LJjjH{NrhP8$Z96zizY$Y5EIBU2;)gV}e4JF283>k_WP-095-1G;HMlhu?)i8ld
zvRYQK7#k?g;9_hwtP8joGA!U($WX%uV?krm2^>AJH7k&rCCG{dSgtF{NX;x#@Pti7
zAjdN_6C%YeFL(kqJ@pn#ZemHsEtcfe%p7nW7j=N*4m1T?)DFr#Y$f@ic1uwwNVp3`
zbc2W<5YY>&ad;qoMNlgN(oF<+4ngBoMSUPepr+$3#>`ucRZ);ZsLb5-q6r|Gi68=8
z;DHHnJ_L1WeL(Rp1}fNCxERG4xj-XDOptgNVH9BkcY0B35>TxOuSsMW7#Kh$UJ8>q
zg9t+{Lk+_MQ2oKQkO{;NX3%7*G69DrWOY(zRVpYLApr`>575avP%RN{0GhS~D+f<Q
zGcY``2WJsT4q?2-nw*oETL4<BRDqs^r!p`w#Dly7uH!*P3S$vS45jD>Rgs`v1};iK
zV{s)6HB8_n2P%XavzS1$w<%1$OeKt2u;Ljs_W(-PEH$h(Y&Fa^EX^#So-4Shwg$Tu
z-t1NYcl4osJ>-lJnzRH}S|Cs66;y$Bf&EnE#=yYfR^$PST2^pX10M7&@&iSdKZpnb
zNpV1mR_4Uyk|?(HlK8xWswg&4K@y(}rHi9j!Lv{hs<;SL!=ne00Rsa=7AS~7!N9=6
z!zjnd$5aFo1qU)pp$iH{ZrJ?x5^x5uWh!B)fn?~WTF}f5sCCMi!YIj5!zjfd!cfBu
zN(40wSx927OeqYYB?oDsB?v4v;NlsS`9Z~W3DW`)pS6Ysl(j*4A)^E+5wVsqEdXIQ
z(9F<6riF~PtTjvv*h`oeaMZAZ8i|Yyg*+fNHB1XZO-D`;4bHgCkg^o)9ZjyHNuVsn
z3YobBw-K0g^3!i|r&bgs=B31^Wabuw1Ns(oacM4O_=Y*PqTm*DaY>3M+btH*3?F3L
zuDBovH0;6#j^E;2teJT!sTEONAO-Qs8KrsIQ9Ri>h*?SWfCqI4OF#h+uI?CF7<m{?
z7&RD$7=;+YUB&0UZ?`V^er$ssMi~o=25@3E0Z*naV5|YP(-=w^7BGT54T=)bIuTIY
zt%P9#Qw?Z20VrCSYnZZ_vzdx*ycj^MLz)<CKna=|EEWJ8ZUIfcNifv1f+xKg7qEeJ
zgIgp_kO{gH(Ap5Tg^Zv&uXtAqYYmGSsKn+3mDh|QH*kSWO<@C#;?}T;fX0Ja7O;X!
zad8HaX*HbU44~+$Wh-G=z+J<(kZ}TI5laa}7EcMo0$vb5iw`umo5In{T>P(uVF7;$
z!vcXCwk$rdF0K;66wU=gph-GV>#~L|OBigD2uLTWhFHJ`N)9Cq3q(LGSj0gt5&`)^
zoFRp)hFgR|0<=zvJJF1pfx!<{RVrkH)+FYp=7FXj^7B9w=AboQZjiYG1((d?63|=$
zr1%Dnk-%hNvj_@FsU_v9sd);ZrDVCKIVG88iA9--C8-MTiKWHGnTdJD3K8*!3Uvy8
z8o8zMhB^w)@rICze^3b!5pM*OG6G2%X)0u*E@DWDGg7cssHp*IR?t<*EsZy-sfjaE
z&{ha3(r}J9RM1v%jyF=!RWQ;jE-Wem328aU8)<4n+#Fnzn3R*M09pf>nV+YSRH@*S
zUz(nnqu`~d5L{Z4ky@0Kn3n=k8<1F(m<wG-sSC2oP(eY#N&ym3;8|~l<ovvnMDTJq
z@DvboftH`9kf@N8SP5BnP?CXIs|Xq_R!~4{0Oh5YC}`wlW~V9?fm`~iDIkY|0zR!Y
zFBvq6ub>WURDv>7CYWEW4hb`;Q6LI5L<L=7k(vTs1P5IW3U<5^*zuq^gRC`E08JVx
z<d>xuAy>>g;F-!q=mdX?LVlh?Vjj3K2H63{MX3cvsl|{)QU;kPRe+DJ>FO$g`yz%`
z;8FABd~gEDOHNhDO)M$OtiZ5R0l&+^h8YoS80GE{0!INN+Cfnc3N=_tN6iCOrXirk
zIiLk3p!8dkT3iANSjh6iVug%EPzYtFrKJ|7=9MVqq~@iUWE5AKV`@W$6oxMNBHdy)
z(E8+*%;JKoz_iR11z5cbO3MXB`6;Ezsl^JQHN&MjiJ(bmkVuKOLSjk^$c3ulN$!-)
z#Ps~U#2irOPR=h=NG&eO%muAz0#~=`e^!3H<eL;=53(q+BtNH0473b7GcU2IQUTPm
z%g+P(qq_DbsH?6i3a*TAaV3`Iq$U=Z#2cm{*hVQuvqAMUsCROU6_guuZ!xFjm)zpW
zECw$UOufaNn3HphwV(*JG>$p7GW8ZuW--W;oYac^q^w&Ui8(ot-YN&o)wftct@c~Y
z;D&x;Qt>UWu*95FNH6vl8)zkOV#zHQ&|Kp!){>&c<Wx<bTg<tohPT+j3CR$|F@kc8
zZZW48fVyy*X=%6Elk>|!Spq!1c#Ao=IQ<q|ep*^_Y6-YYe2c53C?2K++&lym;5H(t
z0a6F5yg=<G24OBnE+!D>1I;ipfk|c#W(h_<MkcTbs1GN>%)=<bD8$Ie$i~FQB*e(^
zpNWy}e-X$eO{OA6P_Tj4lWQ^;If2;V85pntUP(qjO-4w~&RDb*)GAU3H=%EFfLE33
z<>lRC%FBxqhVk@Z#S5tGgJ6T3*0%&8s=&QmJy1ic2o&K_VqoF2OwcM&J@~+1aS^CB
z8YPmH50=&g7crT6>3W5^$>0^PMWCiRIIR>-22KBp6ldpvR-5X9);=W`K~}+m*A79t
z-+3TkgW93Dm;*e0ia?e8E!N`X%!0}yP;!crfv%zgtq+GTK-GiyC?Qi(phY-Epo!cf
z&@@aDXw(+ceFqIlfcwzkP8+!O4sHiS+IgUs2sj>)nkew7zQtja3tpsW2O91u2F*Be
yfM$&uLCb$Y{Xq~9G^h!pL1G+Ce8yY`Tozn<T>1jy96VgS90DA|9NZjSTwDO&INOT=

literal 0
HcmV?d00001

diff --git a/evaluation/helpers/__pycache__/vggface.cpython-39.pyc b/evaluation/helpers/__pycache__/vggface.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5b793827af5561c5635da291b4278f40e2c5feea
GIT binary patch
literal 2867
zcmYe~<>g{vU|^Uia4=;DF9XA45C<8vFfcGUFfcF_yD%^?q%fo~<}gG-XvSQoC?-Y-
zn>m+>iGh(JmnDi7D#8}U=FX79l)~J?kiwkGl*Qi69L15s7|fu_@)BgeUowb9#>`Mo
zu>k`ELn=cQV+unQQwn1gb2~#CBiJO4D3%nK6xJ4oDAp9V6!sQ|D7JP67KSMHU<OT&
zTWn$O?(v3ZRU%vp3JMCoiJ5r{&N+$2#b9nSBh+*d1!A)^Fff2@EKXo#U?^c|W>~<u
zkb#jQogsyB5n~NQJW~mC77NJM6s8ogt-Y)@4DqaBQRWn86j3&)C>x3>J6M!01yz&-
zD$0(cmJ=+>o`NdMg;kUrD$0Rk0uNY}BL!8I7po{AR#ASeq5@b&1xuJ02&HhQa4lr2
zVTcz7^SGcq5h#xvEGL@66U?B=TO|vQ22jj6=jRod6qP2I<QKukG+AFVF)%O`fnww(
zh$sT3I|T6(lmuUb;!Tt37F%&iQD#c&E%t)Ml$6Z8^p_0`3=A(B7#SEc8JIvs5{M9B
zV92!NVqoyo6uZS*Tw0J?bc-WCJ~J<~BtHHYS5i)Xa&~-iW>xAfro6mcY|i<4WkxBt
zIFj@8%Hj><4dE;!D9Z>gW)u$-Glq*9BZ(P9b(p|qP2jR7@i19axU4B$)->Mu7Mo9I
zUTR{|E#|ajGZ0}8A}ll+Z?P1o=A^A;yu}$GpA1UY@$oAeepTv+7N-^!>lfu$>bs;C
zXP4v`=!fSQWfvDDCa3BLg@<_P2j>T;Di|3U8SA^GrWPppq$U>SW#*+T_#`IjyZHFT
zmlY@LC+Fv-WR_&+=OyOorevn2l@@2_=jo@GCFYbSf`s)mQgaGG_LQZkrzIw*>J?NL
z@h~tj@PY~k6;PI9;bP=s6vGMm82K2h_#qio4;Bl_peP4v24M~c1_m|;1_o!4kv!l$
zxqz{TaUo+ZQwn1(a|&ZEO9@jAQx<bGqYFdqs~DzQ)>^h2Hc%cdWh|1ZVM$@mW-QuJ
z!dk<S!cxOh!<@z>#?Z`E%U;8r!f4J=%Ur`;!;r;R!;-=%!63<y%~F&HG8e=*XQ*W<
zVPC)jGHoH_0?r!tES80gDXfwVAXXMv3abqRRGu3u&kdL7hR7GnfNf^0VS(Dp0=AV3
z=B6+OGib6`6)3os<|Tt7L!l&JAtygEMFA8!`9;YY3I#=}B}Ivud8sK}@Eqk>lwNEF
z=YjHFL1IaUl>&%VNXsu$D9K30Y63_p2p553!|x?40|P@97dTs}ro^kJ=vA?#C99@r
z^4#Jq0a>1wnUi{p1>~MvoT+)Gxv52oC8@XAbMsS5b5e_Mv6ZK0re~DgVoy&kNh~QT
zDgq^$TdZKMx7bQj^NRC}Zn2kTrk2Mi7T;n?Ni0dc#gdenSggrWq{zU)aEk@x;9IP@
z`6;P6x7d^OOY=%ni*B)k^Y1Oj%v-EEiIu5EQCy(h5f5e;ff6KyPz9O519DtE#2v*T
zxnd<yf@9-iWMO1s6k_CJ<X{v6V-`jZMg>MCCO$A-B@9ik$of&zEGYeg!UvpYi(qM%
zVF42;-7aLTWh`M>z*xhufEkI;Qp1?Sn8LV_87|9;Bnwi_hQ!BI&5k4sQq6(J=S1RT
z>gGa{1?lES<MSZ#F?I7I$%1tAq4D{V_(-~I7#9fCFfI^GVOq$T!n}|P?sFj|^};ob
zDJ(S%St3XwBZ^DDhGBsiNG_N`leH>WA)peRo<PaEJh2FGCaADd$jmD!Em6qKO-xT!
z&@eJGsW37!(L`jNpwyDmqP$`&1#l^=kdvRDSyGIU23Jv_xMq0y|NsC0>7b;<!0^Cc
zQ~VaA+bx!&)SS{H0Z>&Y2ugcgxrr6=1^M|oMkz%?AW>luAp#;qL4+8H5C;(wAVLyE
zNP!4x5FrCHf(4X0Zn1!>sv<d%0DDSNenEa|$t@0Wrq9evFOmnTR$yRY&}4$-8paAp
z1^^}ZB5*EXPs=X?1&%GKd<C_z7&rwO1sM4l5hRR<ObdbKL29b_pxFUxkR}JHKFv$a
zO$F7b@$rxxA0K~<CqBNgG%*J%!yX@>lAjzO53VnYEI<x*0{Ma$T8$;A=H$f3Be@oo
zIl)dY0%b2ywNMNqI2c)2c*Hp*!J?X+MY<qEIKc6)mzP(>$H2f4B@E;0r9sQe#GG4P
n2zHSb$a-~<PBvd~0R=V<$#O(^ao9jSWCv=EKwZECDkqo#e0aoT

literal 0
HcmV?d00001

diff --git a/evaluation/helpers/inception.py b/evaluation/helpers/inception.py
new file mode 100644
index 0000000..246e3bb
--- /dev/null
+++ b/evaluation/helpers/inception.py
@@ -0,0 +1,74 @@
+# Source - https://github.com/mchong6/FID_IS_infinity
+
+import torch
+import torch.nn as nn
+from torch.nn import Parameter as P
+from torchvision.models.inception import inception_v3
+import torch.nn.functional as F
+
+# Module that wraps the inception network to enable use with dataparallel and
+# returning pool features and logits.
+class WrapInception(nn.Module):
+    def __init__(self, net):
+        super(WrapInception,self).__init__()
+        self.net = net
+        self.mean = P(torch.tensor([0.485, 0.456, 0.406]).view(1, -1, 1, 1),
+                      requires_grad=False)
+        self.std = P(torch.tensor([0.229, 0.224, 0.225]).view(1, -1, 1, 1),
+                     requires_grad=False)
+    def forward(self, x):
+        x = (x - self.mean) / self.std
+        # Upsample if necessary
+        if x.shape[2] != 299 or x.shape[3] != 299:
+            x = F.interpolate(x, size=(299, 299), mode='bilinear', align_corners=True)
+        # 299 x 299 x 3
+        x = self.net.Conv2d_1a_3x3(x)
+        # 149 x 149 x 32
+        x = self.net.Conv2d_2a_3x3(x)
+        # 147 x 147 x 32
+        x = self.net.Conv2d_2b_3x3(x)
+        # 147 x 147 x 64
+        x = F.max_pool2d(x, kernel_size=3, stride=2)
+        # 73 x 73 x 64
+        x = self.net.Conv2d_3b_1x1(x)
+        # 73 x 73 x 80
+        x = self.net.Conv2d_4a_3x3(x)
+        # 71 x 71 x 192
+        x = F.max_pool2d(x, kernel_size=3, stride=2)
+        # 35 x 35 x 192
+        x = self.net.Mixed_5b(x)
+        # 35 x 35 x 256
+        x = self.net.Mixed_5c(x)
+        # 35 x 35 x 288
+        x = self.net.Mixed_5d(x)
+        # 35 x 35 x 288
+        x = self.net.Mixed_6a(x)
+        # 17 x 17 x 768
+        x = self.net.Mixed_6b(x)
+        # 17 x 17 x 768
+        x = self.net.Mixed_6c(x)
+        # 17 x 17 x 768
+        x = self.net.Mixed_6d(x)
+        # 17 x 17 x 768
+        x = self.net.Mixed_6e(x)
+        # 17 x 17 x 768
+        # 17 x 17 x 768
+        x = self.net.Mixed_7a(x)
+        # 8 x 8 x 1280
+        x = self.net.Mixed_7b(x)
+        # 8 x 8 x 2048
+        x = self.net.Mixed_7c(x)
+        # 8 x 8 x 2048
+        pool = torch.mean(x.view(x.size(0), x.size(1), -1), 2)
+        # 1 x 1 x 2048
+        logits = self.net.fc(F.dropout(pool, training=False).view(pool.size(0), -1))
+        # 1000 (num_classes)
+        return pool, logits
+
+# Load and wrap the Inception model
+def load_inception_net(parallel=False):
+    inception_model = inception_v3(pretrained=True, transform_input=False)
+    inception_model = WrapInception(inception_model.eval()).cuda()
+    if parallel:
+        inception_model = nn.DataParallel(inception_model)
+    return inception_model
diff --git a/evaluation/helpers/kNN.py b/evaluation/helpers/kNN.py
new file mode 100644
index 0000000..60876dd
--- /dev/null
+++ b/evaluation/helpers/kNN.py
@@ -0,0 +1,177 @@
+import os
+from pathlib import Path
+import torch
+import torchvision.transforms as transforms
+from torch.utils.data import DataLoader
+from PIL import Image
+import matplotlib.pyplot as plt
+from collections import OrderedDict
+
+
+class kNN():
+
+    def __init__(self):
+        pass
+
+    def get_images(self, path, transform, size=128, *args, **kwargs):
+        '''
+        returns 
+        names: list of filenames
+        image_tensor: tensor with all images
+        '''
+        # path to real image files
+        image_files = []
+        for p, subdirs, files in os.walk(path):
+            for f in files:
+                image_files.append(os.path.join(p,f))
+        # list to store filenames
+        names = []
+        # list to store images (transformed to tensors)
+        images_list = []
+        
+        for file in image_files:
+            if file.endswith('.jpg') or file.endswith('.png'):
+                filepath = os.path.join(path, file)
+                names.append(file)
+                im = Image.open(filepath)
+                if im.size[0] != size:
+                    im = im.resize((size,size))              # DDPM was trained on 128x128 images
+                im = transform(im)  
+                images_list.append(im)
+        
+        # tensor with all real image tensors
+        image_tensor = torch.stack(images_list)
+
+        return names, image_tensor
+
+    def feature_extractor(self, images, model, device='cpu', bs=128, *args, **kwargs):
+        '''
+        returns
+        real_features: VGGFace features for real images
+        generated_features: VGGFace features for generated images
+        '''
+        # extract features for real and generated images
+        dataloader = DataLoader(images, batch_size=bs, shuffle=False)
+        features_list = []
+        if model._get_name() == 'CLIP':
+            with torch.no_grad():
+                for item in dataloader:
+                    features = model.encode_image(item.to(device))
+                    features_list.append(features)
+        else:
+            with torch.no_grad():
+                for item in dataloader:
+                    features = model(item.to(device))
+                    features_list.append(features)
+
+        features = torch.cat(features_list, dim=0)
+        return features
+
+
+    def kNN(self, output_path, real_names, generated_names, 
+            real_features, generated_features, 
+            path_to_real_images, path_to_generated_images, 
+            k=3, 
+            sample=10, size=128,
+            name_appendix='',
+            *args, **kwargs):
+        '''
+        creates a plot with (generated image: k nearest real images) pairs
+        '''
+        if sample > 50:
+            print('Cannot plot for more than 50 samples! sample <= 50')
+        fig, ax = plt.subplots(sample, k+1, figsize=((k+1)*3,sample*2))
+
+        for i in range(len(generated_features)):
+            # l2 norm of one generated feature and all real features
+            dist = torch.linalg.vector_norm(real_features - generated_features[i], ord=2, dim=1)
+            
+            # draw the generated image
+            im = Image.open(os.path.join(path_to_generated_images, generated_names[i]))
+            ax[i, 0].imshow(im)
+            ax[i, 0].set_xticks([])
+            ax[i, 0].set_yticks([])
+            ax[i, 0].set_title(f'Generated: {"_".join(generated_names[i].split("/")[-1].split("_")[1:])[:-4]}', fontsize=8)
+            
+            # kNN of the generated image
+            knn = dist.topk(k, largest=False)
+            j=1
+
+            # draw the k real images
+            for idx in knn.indices:
+                im = Image.open(os.path.join(path_to_real_images, real_names[idx.item()]))
+                if im.size[0] != size:
+                    im = im.resize((size,size))
+                ax[i, j].imshow(im)
+                ax[i, j].set_xticks([])
+                ax[i, j].set_yticks([])
+                ax[i, j].set_title(f'{"_".join(real_names[idx.item()].split("/")[-1].split("_")[1:])[:-4]}, {knn.values[j-1].item():.2f}', fontsize=8)
+                #ax[i, 1].set_title(f'{"_".join(real_names[idx.item()].split("/")[-1].split("_")[1:])[:-4]}', fontsize=8)
+                j += 1
+            if i == sample-1:
+                break
+        
+        # savefig
+        if not output_path.is_dir():
+            os.mkdir(output_path)
+        plot_name = f'{k}NN_{sample}_samples'
+        if name_appendix != '':
+            plot_name = plot_name + '_' + name_appendix + '.png'
+        fig.savefig(os.path.join(output_path, plot_name))
+
+    def nearest_neighbor(self, output_path, real_names, generated_names, 
+                    real_features, generated_features, 
+                    path_to_real_images, path_to_generated_images, 
+                    sample=10, size=128,
+                    name_appendix='',
+                    *args, **kwargs):
+        
+        print('Computing nearest neighbors...')
+        if sample > 50:
+            print('Cannot plot for more than 50 samples! sample <= 50')
+        fig, ax = plt.subplots(sample, 2, figsize=(2*3,sample*2))
+        nn_dict = OrderedDict()
+        
+        for i in range(len(generated_features)):
+            # l2 norm of one generated feature and all real features
+            #dist = torch.linalg.vector_norm(real_features - generated_features[i], ord=2, dim=1)   # no mps support
+            dist = torch.norm(real_features - generated_features[i], dim=1, p=2)                    # soon to be deprecated
+            
+            # nearest neighbor of the generated image
+            knn = dist.topk(1, largest=False)
+            # insert to the dict: generated_image: (distance, index of the nearest neighbor)
+            nn_dict[generated_names[i]] = (knn.values.item(), knn.indices.item())
+        print('Finding closest pairs...')
+        # sort to get the generated-real pairs that were the closest
+        nn_dict_sorted = OrderedDict(sorted(nn_dict.items(), key=lambda item: item[1][0]))
+        # names of the generated images that look closest to the real images
+        gen_names = list(nn_dict_sorted.keys())
+        print('Drawing the plot...')
+        for i in range(sample):
+            # draw the generated image
+            #im = Image.open(os.path.join(path_to_generated_images, gen_names[i]))
+            im = Image.open(gen_names[i])
+            ax[i, 0].imshow(im)
+            ax[i, 0].set_xticks([])
+            ax[i, 0].set_yticks([])
+            ax[i, 0].set_title(f'Generated: {"_".join(generated_names[i].split("/")[-1].split("_")[1:])[:-4]}', fontsize=8)
+            
+            # draw the real image
+            knn_score, real_img_idx = nn_dict_sorted[gen_names[i]]
+            #im = Image.open(os.path.join(path_to_real_images, real_names[real_img_idx]))
+            im = Image.open(real_names[real_img_idx])
+            if im.size[0] != size:
+                im = im.resize((size,size))
+            ax[i, 1].imshow(im)
+            ax[i, 1].set_xticks([])
+            ax[i, 1].set_yticks([])
+            ax[i, 1].set_title(f'{"_".join(real_names[real_img_idx].split("/")[-1].split("_")[1:])[:-4]}, {knn_score:.2f}', fontsize=8)
+                
+        #savefig
+        if not output_path.is_dir():
+            os.mkdir(output_path)
+        plot_name = f'closest_pairs_top_{sample}'
+        if name_appendix != '':
+            plot_name = plot_name + '_' + name_appendix + '.png'
+        fig.savefig(os.path.join(output_path, plot_name))
+        
\ No newline at end of file
diff --git a/evaluation/helpers/metrics.py b/evaluation/helpers/metrics.py
new file mode 100644
index 0000000..93b11a4
--- /dev/null
+++ b/evaluation/helpers/metrics.py
@@ -0,0 +1,101 @@
+import os
+import torch
+from tqdm import tqdm
+from PIL import Image
+from torchvision import transforms
+from torch.utils.data import DataLoader
+from itertools import cycle
+from torchmetrics.image.fid import FrechetInceptionDistance
+from torchmetrics.image.inception import InceptionScore
+from torchmetrics.image.kid import KernelInceptionDistance
+from torchmetrics.image import StructuralSimilarityIndexMeasure, PeakSignalNoiseRatio
+from cleanfid import fid
+from evaluation.helpers.score_infinity import calculate_FID_infinity_path, calculate_IS_infinity_path
+
+def image_to_tensor(path, sample='all', device='cpu'):
+
+    transform_resize = transforms.Compose([transforms.ToTensor(), transforms.Resize(128), transforms.Lambda(lambda x: (x * 255).type(torch.uint8))])
+    transform = transforms.Compose([transforms.ToTensor(), transforms.Lambda(lambda x: (x * 255).type(torch.uint8)) ])
+    filelist = []
+    for p, subdirs, files in os.walk(path):
+        for f in files:
+            filelist.append(os.path.join(p,f))
+    print(len(filelist))
+    print(filelist[0])
+    if sample == 'all':
+        sample_size = -1
+    else:
+        sample_size = sample
+    image_list = []
+    for file in filelist:
+        if file.endswith('.jpg') or file.endswith('.png'):
+            filepath = os.path.join(path, file)
+            im = Image.open(filepath)
+            if im.size[0] != 128:
+                im = transform_resize(im)
+            else: 
+                im = transform(im)
+            image_list.append(im)
+            if len(image_list) == 20:#sample_size:
+                break
+    print(f'current sample size: {len(image_list)}')
+    # convert list of tensors to tensor
+    image_tensor = torch.stack(image_list).to(device)
+    return image_tensor
+
+
+def compute_scores(real, generated, device):
+
+    real_dataloader = DataLoader(real, batch_size=128, shuffle=True)
+    generated_dataloader = DataLoader(generated, batch_size=128, shuffle=True)
+
+    fid = FrechetInceptionDistance().to(device)
+    #kid = KernelInceptionDistance().to(device)         # subset_size < samples !
+    inception = InceptionScore().to(device)
+
+    for r, g in zip(real_dataloader, cycle(generated_dataloader)):
+        r = r.to(device)
+        g = g.to(device)
+        fid.update(r, real=True)
+        fid.update(g, real=False)
+        #kid.update(r, real=True)
+        #kid.update(g, real=False)
+        inception.update(g)
+    
+    fid_score = fid.compute()
+    #kid_score = kid.compute()
+    kid_score = 0.0
+    is_score = inception.compute()
+    return fid_score, kid_score, is_score
+
+
+def clean_fid(path_to_real_images, path_to_generated_images):
+
+    clean_fid_score = fid.compute_fid(path_to_real_images, path_to_generated_images, mode="clean", num_workers=0)
+    clip_clean_fid_score = fid.compute_fid(path_to_real_images, path_to_generated_images, mode="clean", model_name="clip_vit_b_32")
+
+    return clean_fid_score, clip_clean_fid_score
+
+
+def fid_inf_is_inf(path_to_real_images, path_to_generated_images, batchsize=128):
+
+    fid_infinity = calculate_FID_infinity_path(path_to_real_images, path_to_generated_images, batch_size=batchsize)
+    is_infinity = calculate_IS_infinity_path(path_to_generated_images, batch_size=batchsize)
+
+    return fid_infinity, is_infinity
+
+
+def compute_ssim_psnr_scores(real, generated, device):
+    real_dataloader = DataLoader(real, batch_size=128, shuffle=False)
+    generated_dataloader = DataLoader(generated, batch_size=128, shuffle=False)
+
+    ssim = StructuralSimilarityIndexMeasure(data_range=1.0).to(device)
+    psnr = PeakSignalNoiseRatio().to(device)
+    for r, g in zip(real_dataloader, cycle(generated_dataloader)):
+        r = r.to(device)
+        g = g.to(device)
+        ssim.update(preds=g, target=r)
+        psnr.update(preds=g, target=r)
+    ssim_score = ssim.compute()
+    psnr_score = psnr.compute()
+    return psnr_score, ssim_score
\ No newline at end of file
diff --git a/evaluation/helpers/score_infinity.py b/evaluation/helpers/score_infinity.py
new file mode 100644
index 0000000..638f730
--- /dev/null
+++ b/evaluation/helpers/score_infinity.py
@@ -0,0 +1,433 @@
+# Source - https://github.com/mchong6/FID_IS_infinity
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.utils.data import Dataset
+import torchvision.transforms as transforms
+from botorch.sampling.qmc import NormalQMCEngine
+import numpy as np
+import math
+from sklearn.linear_model import LinearRegression
+import math
+import os
+import glob
+from tqdm import tqdm
+from PIL import Image
+from scipy import linalg 
+from evaluation.helpers.inception import *
+
+class randn_sampler():
+    """
+    Generates z~N(0,1) using random sampling or scrambled Sobol sequences.
+    Args:
+        ndim: (int)
+            The dimension of z.
+        use_sobol: (bool)
+            If True, sample z from scrambled Sobol sequence. Else, sample 
+            from standard normal distribution.
+            Default: False
+        use_inv: (bool)
+            If True, use inverse CDF to transform z from U[0,1] to N(0,1).
+            Else, use Box-Muller transformation.
+            Default: True
+        cache: (bool)
+            If True, we cache some amount of Sobol points and reorder them.
+            This is mainly used for training GANs when we use two separate
+            Sobol generators which helps stabilize the training.
+            Default: False
+            
+    Examples::
+
+        >>> sampler = randn_sampler(128, True)
+        >>> z = sampler.draw(10) # Generates [10, 128] vector
+    """
+
+    def __init__(self, ndim, use_sobol=False, use_inv=True, cache=False):
+        self.ndim = ndim
+        self.cache = cache
+        if use_sobol:
+            self.sampler = NormalQMCEngine(d=ndim, inv_transform=use_inv)
+            self.cached_points = torch.tensor([])
+        else:
+            self.sampler = None
+
+    def draw(self, batch_size):
+        if self.sampler is None:
+            return torch.randn([batch_size, self.ndim])
+        else:
+            if self.cache:
+                if len(self.cached_points) < batch_size:
+                    # sample from sampler and reorder the points
+                    self.cached_points = self.sampler.draw(int(1e6))[torch.randperm(int(1e6))]
+
+                # Sample without replacement from cached points
+                samples = self.cached_points[:batch_size]
+                self.cached_points = self.cached_points[batch_size:]
+                return samples
+            else:
+                return self.sampler.draw(batch_size)
+
+def calculate_FID_infinity(gen_model, ndim, batch_size, gt_path, num_im=50000, num_points=15):
+    """
+    Calculates effectively unbiased FID_inf using extrapolation
+    Args:
+        gen_model: (nn.Module)
+            The trained generator. Generator takes in z~N(0,1) and outputs
+            an image of [-1, 1].
+        ndim: (int)
+            The dimension of z.
+        batch_size: (int)
+            The batch size of generator
+        gt_path: (str)
+            Path to saved FID statistics of true data.
+        num_im: (int)
+            Number of images we are generating to evaluate FID_inf.
+            Default: 50000
+        num_points: (int)
+            Number of FID_N we evaluate to fit a line.
+            Default: 15
+    """
+    # load pretrained inception model 
+    inception_model = load_inception_net()
+
+    # define a sobol_inv sampler
+    z_sampler = randn_sampler(ndim, True)
+
+    # get all activations of generated images
+    activations, _ =  accumulate_activations(gen_model, inception_model, num_im, z_sampler, batch_size)
+
+    fids = []
+
+    # Choose the number of images to evaluate FID_N at regular intervals over N
+    fid_batches = np.linspace(5000, num_im, num_points).astype('int32')
+
+    # Evaluate FID_N
+    for fid_batch_size in fid_batches:
+        # sample with replacement
+        np.random.shuffle(activations)
+        fid_activations = activations[:fid_batch_size]
+        fids.append(calculate_FID(inception_model, fid_activations, gt_path))
+    fids = np.array(fids).reshape(-1, 1)
+    
+    # Fit linear regression
+    reg = LinearRegression().fit(1/fid_batches.reshape(-1, 1), fids)
+    fid_infinity = reg.predict(np.array([[0]]))[0,0]
+
+    return fid_infinity
+
+def calculate_FID_infinity_path(real_path, fake_path, batch_size=50, min_fake=1000, num_points=15):
+    """
+    Calculates effectively unbiased FID_inf using extrapolation given 
+    paths to real and fake data
+    Args:
+        real_path: (str)
+            Path to real dataset or precomputed .npz statistics.
+        fake_path: (str)
+            Path to fake dataset.
+        batch_size: (int)
+            The batch size for dataloader.
+            Default: 50
+        min_fake: (int)
+            Minimum number of images to evaluate FID on.
+            Default: 5000
+        num_points: (int)
+            Number of FID_N we evaluate to fit a line.
+            Default: 15
+    """
+    # load pretrained inception model 
+    inception_model = load_inception_net()
+
+    # get all activations of generated images
+    if real_path.endswith('.npz'):
+        real_m, real_s = load_path_statistics(real_path)
+    else:
+        real_act, _ = compute_path_statistics(real_path, batch_size, model=inception_model)
+        real_m, real_s = np.mean(real_act, axis=0), np.cov(real_act, rowvar=False)
+
+    fake_act, _ = compute_path_statistics(fake_path, batch_size, model=inception_model)
+
+    num_fake = len(fake_act)
+    assert num_fake > min_fake, \
+        'number of fake data must be greater than the minimum point for extrapolation'
+
+    fids = []
+
+    # Choose the number of images to evaluate FID_N at regular intervals over N
+    fid_batches = np.linspace(min_fake, num_fake, num_points).astype('int32')
+
+    # Evaluate FID_N
+    for fid_batch_size in fid_batches:
+        # sample with replacement
+        np.random.shuffle(fake_act)
+        fid_activations = fake_act[:fid_batch_size]
+        m, s = np.mean(fid_activations, axis=0), np.cov(fid_activations, rowvar=False)
+        FID = numpy_calculate_frechet_distance(m, s, real_m, real_s)
+        fids.append(FID)
+    fids = np.array(fids).reshape(-1, 1)
+    
+    # Fit linear regression
+    reg = LinearRegression().fit(1/fid_batches.reshape(-1, 1), fids)
+    fid_infinity = reg.predict(np.array([[0]]))[0,0]
+
+    return fid_infinity
+
+def calculate_IS_infinity(gen_model, ndim, batch_size, num_im=50000, num_points=15):
+    """
+    Calculates effectively unbiased IS_inf using extrapolation
+    Args:
+        gen_model: (nn.Module)
+            The trained generator. Generator takes in z~N(0,1) and outputs
+            an image of [-1, 1].
+        ndim: (int)
+            The dimension of z.
+        batch_size: (int)
+            The batch size of generator
+        num_im: (int)
+            Number of images we are generating to evaluate IS_inf.
+            Default: 50000
+        num_points: (int)
+            Number of IS_N we evaluate to fit a line.
+            Default: 15
+    """
+    # load pretrained inception model 
+    inception_model = load_inception_net()
+
+    # define a sobol_inv sampler
+    z_sampler = randn_sampler(ndim, True)
+
+    # get all activations of generated images
+    _, logits =  accumulate_activations(gen_model, inception_model, num_im, z_sampler, batch_size)
+
+    IS = []
+
+    # Choose the number of images to evaluate IS_N at regular intervals over N
+    IS_batches = np.linspace(5000, num_im, num_points).astype('int32')
+
+    # Evaluate IS_N
+    for IS_batch_size in IS_batches:
+        # sample with replacement
+        np.random.shuffle(logits)
+        IS_logits = logits[:IS_batch_size]
+        IS.append(calculate_inception_score(IS_logits)[0])
+    IS = np.array(IS).reshape(-1, 1)
+    
+    # Fit linear regression
+    reg = LinearRegression().fit(1/IS_batches.reshape(-1, 1), IS)
+    IS_infinity = reg.predict(np.array([[0]]))[0,0]
+
+    return IS_infinity
+
+def calculate_IS_infinity_path(path, batch_size=50, min_fake=1000, num_points=15):
+    """
+    Calculates effectively unbiased IS_inf using extrapolation given 
+    paths to real and fake data
+    Args:
+        path: (str)
+            Path to fake dataset.
+        batch_size: (int)
+            The batch size for dataloader.
+            Default: 50
+        min_fake: (int)
+            Minimum number of images to evaluate IS on.
+            Default: 5000
+        num_points: (int)
+            Number of IS_N we evaluate to fit a line.
+            Default: 15
+    """
+    # load pretrained inception model 
+    inception_model = load_inception_net()
+
+    # get all activations of generated images
+    _, logits = compute_path_statistics(path, batch_size, model=inception_model)
+
+    num_fake = len(logits)
+    assert num_fake > min_fake, \
+        'number of fake data must be greater than the minimum point for extrapolation'
+
+    IS = []
+
+    # Choose the number of images to evaluate FID_N at regular intervals over N
+    IS_batches = np.linspace(min_fake, num_fake, num_points).astype('int32')
+
+    # Evaluate IS_N
+    for IS_batch_size in IS_batches:
+        # sample with replacement
+        np.random.shuffle(logits)
+        IS_logits = logits[:IS_batch_size]
+        IS.append(calculate_inception_score(IS_logits)[0])
+    IS = np.array(IS).reshape(-1, 1)
+    
+    # Fit linear regression
+    reg = LinearRegression().fit(1/IS_batches.reshape(-1, 1), IS)
+    IS_infinity = reg.predict(np.array([[0]]))[0,0]
+
+    return IS_infinity
+
+################# Functions for calculating and saving dataset inception statistics ##################
+class im_dataset(Dataset):
+    def __init__(self, data_dir):
+        self.data_dir = data_dir
+        self.imgpaths = self.get_imgpaths()
+        
+        self.transform = transforms.Compose([
+                       transforms.Resize(64),
+                       transforms.CenterCrop(64),
+                       transforms.ToTensor()])
+
+    def get_imgpaths(self):
+        paths = glob.glob(os.path.join(self.data_dir, "**/*.jpg"), recursive=True) +\
+            glob.glob(os.path.join(self.data_dir, "**/*.png"), recursive=True)
+        return paths
+    
+    def __getitem__(self, idx):
+        img_name = self.imgpaths[idx]
+        image = self.transform(Image.open(img_name))
+        return image
+
+    def __len__(self):
+        return len(self.imgpaths)
+
+def load_path_statistics(path):
+    """
+    Given path to dataset npz file, load and return mu and sigma
+    """
+    if path.endswith('.npz'):
+        f = np.load(path)
+        m, s = f['mu'][:], f['sigma'][:]
+        f.close()
+        return m, s
+    else:
+        raise RuntimeError('Invalid path: %s' % path)
+        
+def compute_path_statistics(path, batch_size, model=None):
+    """
+    Given path to a dataset, load and compute mu and sigma.
+    """
+    if not os.path.exists(path):
+        raise RuntimeError('Invalid path: %s' % path)
+        
+    if model is None:
+        model = load_inception_net()
+    dataset = im_dataset(path)
+    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, drop_last=False)
+    return get_activations(dataloader, model)
+
+def get_activations(dataloader, model):
+    """
+    Get inception activations from dataset
+    """
+    pool = []
+    logits = []
+
+    for images in tqdm(dataloader):
+        images = images.cuda()
+        with torch.no_grad():
+            pool_val, logits_val = model(images)
+            pool += [pool_val]
+            logits += [F.softmax(logits_val, 1)]
+
+    return torch.cat(pool, 0).cpu().numpy(), torch.cat(logits, 0).cpu().numpy()
+
+def accumulate_activations(gen_model, inception_model, num_im, z_sampler, batch_size):
+    """
+    Generate images and compute their Inception activations.
+    """
+    pool, logits = [], []
+    for i in range(math.ceil(num_im/batch_size)):
+        with torch.no_grad():
+            z = z_sampler.draw(batch_size).cuda()
+            fake_img = to_img(gen_model(z))
+
+            pool_val, logits_val = inception_model(fake_img)
+            pool += [pool_val]
+            logits += [F.softmax(logits_val, 1)]
+
+    pool =  torch.cat(pool, 0)[:num_im]
+    logits = torch.cat(logits, 0)[:num_im]
+
+    return pool.cpu().numpy(), logits.cpu().numpy()
+
+def to_img(x):
+    """
+    Normalizes an image from [-1, 1] to [0, 1]
+    """
+    x = 0.5 * (x + 1)
+    x = x.clamp(0, 1)
+    return x
+
+
+
+####################### Functions to help calculate FID and IS #######################
+def calculate_FID(model, act, gt_npz):
+    """
+    calculate score given activations and path to npz
+    """
+    data_m, data_s = load_path_statistics(gt_npz)
+    gen_m, gen_s = np.mean(act, axis=0), np.cov(act, rowvar=False)
+    FID = numpy_calculate_frechet_distance(gen_m, gen_s, data_m, data_s)
+
+    return FID
+
+def calculate_inception_score(pred, num_splits=1):
+    scores = []
+    for index in range(num_splits):
+        pred_chunk = pred[index * (pred.shape[0] // num_splits): (index + 1) * (pred.shape[0] // num_splits), :]
+        kl_inception = pred_chunk * (np.log(pred_chunk) - np.log(np.expand_dims(np.mean(pred_chunk, 0), 0)))
+        kl_inception = np.mean(np.sum(kl_inception, 1))
+        scores.append(np.exp(kl_inception))
+    return np.mean(scores), np.std(scores)
+
+
+def numpy_calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
+    """Numpy implementation of the Frechet Distance.
+    The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
+    and X_2 ~ N(mu_2, C_2) is
+            d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
+    Stable version by Dougal J. Sutherland.
+    Params:
+    -- mu1   : Numpy array containing the activations of a layer of the
+               inception net (like returned by the function 'get_predictions')
+               for generated samples.
+    -- mu2   : The sample mean over activations, precalculated on an
+               representative data set.
+    -- sigma1: The covariance matrix over activations for generated samples.
+    -- sigma2: The covariance matrix over activations, precalculated on an
+               representative data set.
+    Returns:
+    --   : The Frechet Distance.
+    """
+
+    mu1 = np.atleast_1d(mu1)
+    mu2 = np.atleast_1d(mu2)
+
+    sigma1 = np.atleast_2d(sigma1)
+    sigma2 = np.atleast_2d(sigma2)
+
+    assert mu1.shape == mu2.shape, \
+        'Training and test mean vectors have different lengths'
+    assert sigma1.shape == sigma2.shape, \
+        'Training and test covariances have different dimensions'
+
+    diff = mu1 - mu2
+
+    # Product might be almost singular
+    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
+    if not np.isfinite(covmean).all():
+        msg = ('fid calculation produces singular product; '
+               'adding %s to diagonal of cov estimates') % eps
+        print(msg)
+        offset = np.eye(sigma1.shape[0]) * eps
+        covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
+
+    # Numerical error might give slight imaginary component
+    if np.iscomplexobj(covmean):
+        if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
+            m = np.max(np.abs(covmean.imag))
+            raise ValueError('Imaginary component {}'.format(m))
+        covmean = covmean.real
+
+    tr_covmean = np.trace(covmean)
+
+    return (diff.dot(diff) + np.trace(sigma1) +
+            np.trace(sigma2) - 2 * tr_covmean)
diff --git a/evaluation/helpers/vggface.py b/evaluation/helpers/vggface.py
new file mode 100644
index 0000000..2e4086e
--- /dev/null
+++ b/evaluation/helpers/vggface.py
@@ -0,0 +1,93 @@
+# VGG16 model from https://github.com/prlz77/vgg-face.pytorch
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchfile
+
+class VGG_16(nn.Module):
+    """
+    Main Class
+    """
+
+    def __init__(self):
+        """
+        Constructor
+        """
+        super().__init__()
+        self.block_size = [2, 2, 3, 3, 3]
+        self.conv_1_1 = nn.Conv2d(3, 64, 3, stride=1, padding=1)
+        self.conv_1_2 = nn.Conv2d(64, 64, 3, stride=1, padding=1)
+        self.conv_2_1 = nn.Conv2d(64, 128, 3, stride=1, padding=1)
+        self.conv_2_2 = nn.Conv2d(128, 128, 3, stride=1, padding=1)
+        self.conv_3_1 = nn.Conv2d(128, 256, 3, stride=1, padding=1)
+        self.conv_3_2 = nn.Conv2d(256, 256, 3, stride=1, padding=1)
+        self.conv_3_3 = nn.Conv2d(256, 256, 3, stride=1, padding=1)
+        self.conv_4_1 = nn.Conv2d(256, 512, 3, stride=1, padding=1)
+        self.conv_4_2 = nn.Conv2d(512, 512, 3, stride=1, padding=1)
+        self.conv_4_3 = nn.Conv2d(512, 512, 3, stride=1, padding=1)
+        self.conv_5_1 = nn.Conv2d(512, 512, 3, stride=1, padding=1)
+        self.conv_5_2 = nn.Conv2d(512, 512, 3, stride=1, padding=1)
+        self.conv_5_3 = nn.Conv2d(512, 512, 3, stride=1, padding=1)
+        self.fc6 = nn.Linear(512 * 7 * 7, 4096)
+        self.fc7 = nn.Linear(4096, 4096)
+        self.fc8 = nn.Linear(4096, 2622)
+
+    def load_weights(self, path):
+        """ Function to load luatorch pretrained
+
+        Args:
+            path: path for the luatorch pretrained
+        """
+        model = torchfile.load(path)
+        counter = 1
+        block = 1
+        for i, layer in enumerate(model.modules):
+            if layer.weight is not None:
+                if block <= 5:
+                    self_layer = getattr(self, "conv_%d_%d" % (block, counter))
+                    counter += 1
+                    if counter > self.block_size[block - 1]:
+                        counter = 1
+                        block += 1
+                    self_layer.weight.data[...] = torch.tensor(layer.weight).view_as(self_layer.weight)[...]
+                    self_layer.bias.data[...] = torch.tensor(layer.bias).view_as(self_layer.bias)[...]
+                else:
+                    self_layer = getattr(self, "fc%d" % (block))
+                    block += 1
+                    self_layer.weight.data[...] = torch.tensor(layer.weight).view_as(self_layer.weight)[...]
+                    self_layer.bias.data[...] = torch.tensor(layer.bias).view_as(self_layer.bias)[...]
+
+    def forward(self, x):
+        """ Pytorch forward
+
+        Args:
+            x: input image (224x224)
+
+        Returns: class logits
+
+        """
+        x = F.relu(self.conv_1_1(x))
+        x = F.relu(self.conv_1_2(x))
+        x = F.max_pool2d(x, 2, 2)
+        x = F.relu(self.conv_2_1(x))
+        x = F.relu(self.conv_2_2(x))
+        x = F.max_pool2d(x, 2, 2)
+        x = F.relu(self.conv_3_1(x))
+        x = F.relu(self.conv_3_2(x))
+        x = F.relu(self.conv_3_3(x))
+        x = F.max_pool2d(x, 2, 2)
+        x = F.relu(self.conv_4_1(x))
+        x = F.relu(self.conv_4_2(x))
+        x = F.relu(self.conv_4_3(x))
+        x = F.max_pool2d(x, 2, 2)
+        x = F.relu(self.conv_5_1(x))
+        x = F.relu(self.conv_5_2(x))
+        x = F.relu(self.conv_5_3(x))
+        x = F.max_pool2d(x, 2, 2)
+        x = x.view(x.size(0), -1)
+        x = F.relu(self.fc6(x))
+        x = F.dropout(x, 0.5, self.training)
+        x = F.relu(self.fc7(x))
+        x = F.dropout(x, 0.5, self.training)
+        return self.fc8(x)
\ No newline at end of file
-- 
GitLab