Commit d2228f1f authored by Marius Laska's avatar Marius Laska

new evaluation of large experiments plus visualization

parent dc78ab1f
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from il_pipeline.pipeline import Pipeline
from il_pipeline.utility.storable import Storable
from matplotlib.backends.backend_pdf import PdfPages
from scipy.interpolate import interp1d
import seaborn as sns
from analysis.plot_analysis import get_avg_box_size
from analysis.visualize_learning_progress import \
convert_from_2dim_overlapping_grid
from main import calc_acc_c
def calc_reg_stats(reg_size_dict, num_layers=3, dataset="lohan", show_plot=True):
num_avg = 10
dist_list = []
size_list = []
mse_list = []
model_name = "DNN->O ({}L)".format(num_layers)
colors = ["red", "blue", "green"]
col_val = colors[num_layers-1]
# if num_layers == 3:
# nl = ""
# else:
# nl = "_{}l".format(num_layers)
for idx in range(num_avg):
if dataset == "lohan":
file = "/home/laskama/Dropbox/evaluation/lohan/REG{}_{}"
fig_title = "Lohan (avg)"#(fold-{})".format(f_idx + 1)
p: Pipeline = Storable.load(file.format(num_layers, idx + 1))
elif dataset == "UJI":
file = "/media/laskama/Daten/BBOX/GPU/uji/gpu/b0/f{}/output/CNNLoc_{}"
fig_title = "UJIndoorLoc (floor-{})".format(floor)
p: Pipeline = Storable.load(file.format(floor, idx + 1))
if f_idx is None:
y_true = np.concatenate(p.summary.y_true_labels, axis=0)
y_pred = np.concatenate(p.summary.y_pred, axis=0)
else:
y_true = p.summary.y_true_labels[f_idx]
y_pred = p.summary.y_pred[f_idx]
n = len(y_true)
dist = np.sort(np.linalg.norm(y_pred - y_true, axis=1))
res_dict["Model"] += [model_name] * len(dist)
res_dict["MSE"] += dist.tolist()
res_dict["fold"] += [f_idx] * len(dist)
mse_list.append(np.mean(dist))
size = np.pi * np.square(dist)
dist_list.append(dist)
size_list.append(size)
size_avg = np.sort(np.concatenate(size_list, axis=0))
acc_list = [i / (n * num_avg) for i in range(n * num_avg)]
reg_size_dict["Model"].append(model_name)
reg_size_dict["size"].append(size_avg)
reg_size_dict["ACC"].append(np.array(acc_list))
if show_plot:
plt.title(fig_title)
plt.xlabel("Area [" + r'$m^2$' + "]")
plt.ylabel("ACC")
plt.plot(size_avg, acc_list,
linestyle="dashed", color=col_val, label=model_name)
# print("REG_MSE: {}".format(np.mean(np.array(mse_list))))
def calc_box_stats(reg_size_dict, dataset="lohan", show_plot=True, calc_aug_gain=True):
if dataset == "lohan":
beta_range = [5, 10, 15]
f1 = "/media/laskama/Daten/BBOX/lohan/test/output/BBOX_{}l_{}{}_{}"
elif dataset == "UJI":
beta_range = [10.0, 15.0, 20.0]
f1 = "/media/laskama/Daten/BBOX/GPU/uji/gpu/b0/f{}/v2/output/CNNLoc-DLB_delta_{}{}_{}"
mse_list = []
box_dict = {"Model": [], "beta": [], "fold": [], "ACC": [],
"size": [], "c-error (mean)": [], "c-error (median)": [],
"c-error (min)": [], "c-error (max)": []}
"/media/laskama/Daten/BBOX/GPU/"
num_add_aug_ratio = -1
for aug in [""]: # ["_aug", ""]:
for l in [1]: # , 2]:
for o in beta_range:
b_size_list = []
acc_box_list = []
dist_list = []
for idx in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]:
if dataset == "lohan":
file = f1.format(l, o, aug, idx)
elif dataset == "UJI":
file = f1.format(floor, str(o), aug, idx)
if not os.path.exists(file):
print(file)
continue
p: Pipeline = Storable.load(file)
convert_from_2dim_overlapping_grid(p, grid_size=40,
quantile=False,
store=True,
average_samples=True)
if f_idx is None:
y_true = np.concatenate(p.summary.y_true_labels, axis=0)
y_pred = np.concatenate(p.summary.y_pred, axis=0)
else:
y_true = p.summary.y_true_labels[f_idx]
y_pred = p.summary.y_pred[f_idx]
if calc_aug_gain:
if num_add_aug_ratio == -1 and dataset == "UJI":
x, _ = p.data_provider.get_augmented_train_data()
x2, _ = p.data_provider.get_train_data(labels=p.data_provider.grid_labels)
num_add_aug_ratio = (len(x) - len(x2)) / len(x2)
elif num_add_aug_ratio == -1 and dataset == "lohan":
p.data_provider.current_split_idx = f_idx
x, _ = p.data_provider.get_augmented_train_data()
x2, _ = p.data_provider.get_train_data(labels=p.data_provider.grid_labels)
num_add_aug_ratio = (len(x) - len(x2)) / len(x2)
acc_box, wrong_mask, correct_mask = calc_acc_c(y_true,
y_pred)
b_size = get_avg_box_size(p)
acc_box_list.append(acc_box)
b_size_list.append(b_size)
dist = np.linalg.norm(y_pred[:, :2] - y_true, axis=1)
dist_list.append(dist)
mse_list.append(dist)
model_name = "BBOX ({}L{})".format(l, aug)
res_dict["Model"] += [model_name] * len(dist)
res_dict["MSE"] += dist.tolist()
res_dict["fold"] += [f_idx] * len(dist)
plt_label = ""
if o == 10:
plt_label = model_name
if aug == "_aug":
c = "r"
else:
c = "g"
if l == 1:
m = "v"
elif l == 2:
m = "^"
elif l == 3:
m = ">"
size_avg = np.mean(np.array(b_size_list))
acc_avg = np.mean(np.array(acc_box_list))
dist_conc = np.concatenate(dist_list, axis=0)
box_dict["Model"].append(model_name)
box_dict["beta"].append(o)
box_dict["fold"].append(f_idx)
box_dict["ACC"].append(acc_avg)
box_dict["size"].append(size_avg)
box_dict["c-error (mean)"].append(np.mean(dist_conc))
box_dict["c-error (median)"].append(np.median(dist_conc))
box_dict["c-error (min)"].append(np.min(dist_conc))
box_dict["c-error (max)"].append(np.max(dist_conc))
if show_plot:
plt.scatter(np.mean(np.array(b_size_list)),
np.mean(np.array(acc_box_list)), color=c, marker=m,
label=plt_label)
# plt.ylim(0.4, 1.0)
plt.xlim(0, 600.0)
plt.legend(loc="lower right")
# plt.show()
box_df = pd.DataFrame(box_dict)
# print("BBOX_MSE: {}".format(np.mean(np.array(mse_list))))
if calc_aug_gain:
box_dict["best_reg"] = []
box_dict["ACC-gain"] = []
box_dict["size-gain"] = []
# calc ACC gain to each DNN
for _, model in box_df.iterrows():
b_acc = model["ACC"]
b_size = model["size"]
min_gain = 100.0
# find corresponding two values in DNN
for reg_model, reg_size, reg_acc in zip(reg_size_dict["Model"],
reg_size_dict["size"],
reg_size_dict["ACC"]):
# size on x-axis
test = np.argmax(reg_size > b_size)
x = np.array([reg_size[test - 1], reg_size[test]])
y = np.array([reg_acc[test - 1], reg_acc[test]])
f = interp1d(x, y)
reg_acc_int = f(b_size)
gain = b_acc - reg_acc_int
# acc on x-axis
test = np.argmax(reg_acc > b_acc)
x = np.array([reg_acc[test - 1], reg_acc[test]])
y = np.array([reg_size[test - 1], reg_size[test]])
f = interp1d(x, y)
reg_size_int = f(b_acc)
size_gain = b_size - reg_size_int
if gain < min_gain:
min_gain = gain
min_size_gain = size_gain
min_model = reg_model
box_dict["ACC-gain"].append(min_gain)
box_dict["size-gain"].append(min_size_gain)
box_dict["best_reg"].append(min_model)
box_df = pd.DataFrame(box_dict)
# print("ACC-gain: {}".format(box_df["ACC-gain"].mean()))
return box_df, num_add_aug_ratio
def main(dataset="UJI"):
plt.figure()
reg_size_dict = {"Model": [], "size": [], "ACC": []}
for l in [1]:
calc_reg_stats(reg_size_dict, num_layers=l, dataset=dataset)
box_df, num_add_aug_ratio = calc_box_stats(reg_size_dict, dataset=dataset, calc_aug_gain=True)
plt.rcParams.update(plt.rcParamsDefault)
plt.rc("savefig", dpi=200)
pdf = PdfPages("{}_metric_{}.pdf".format(dataset, floor))
# self.fig.set_size_inches((19.79, 12.5), forward=False)
pdf.savefig(bbox_inches="tight")
pdf.close()
plt.show()
return box_df, num_add_aug_ratio
if __name__ == "__main__":
res_dict = {"Model": [], "MSE": [], "fold": []}
num_add_aug_ratio = []
dataset = "UJI"
f_idx = None
floor = 0
box_df_concat = None
if dataset == "UJI":
for idx in range(1):
floor = idx
box_df, num_add_aug = main(dataset)
num_add_aug_ratio.append(num_add_aug)
if box_df_concat is None:
box_df_concat = box_df
else:
box_df_concat = pd.concat([box_df_concat, box_df],
ignore_index=True)
elif dataset == "lohan":
for idx in [0,1,2,3,4]:
f_idx = idx
box_df, num_add_aug = main(dataset)
num_add_aug_ratio.append(num_add_aug)
if box_df_concat is None:
box_df_concat = box_df
else:
box_df_concat = pd.concat([box_df_concat, box_df],
ignore_index=True)
# print("SIZE-GAIN (aug): {}".format(box_df_concat[box_df_concat["Model"].str.contains("aug")]["size-gain"].mean()))
# print("SIZE-GAIN (no-aug): {}".format(box_df_concat[~box_df_concat["Model"].str.contains("aug")]["size-gain"].mean()))
# print("ACC-GAIN (aug): {}".format(box_df_concat[box_df_concat["Model"].str.contains("aug")]["ACC-gain"].mean()))
# print("ACC-GAIN (no-aug): {}".format(box_df_concat[~box_df_concat["Model"].str.contains("aug")]["ACC-gain"].mean()))
# print("NUM-ADD_AUG_RATIO: {}".format(np.mean(num_add_aug_ratio)))
#
# df = pd.DataFrame(res_dict)
#
# print("MEAN-MSE: {}".format(df[df["Model"].str.contains("aug")]["MSE"].mean()))
# print("MEDIAN-MSE: {}".format(
# df[df["Model"].str.contains("aug")]["MSE"].median()))
# print("MIN-MSE: {}".format(
# df[df["Model"].str.contains("aug")]["MSE"].min()))
# print("MAX-MSE: {}".format(
# df[df["Model"].str.contains("aug")]["MSE"].max()))
#
# print(df[df["Model"].str.contains("aug")]["MSE"].describe())
#
# # df2 = pd.DataFrame(
# # group.describe().rename(columns={'MSE': name}).squeeze()
# # for name, group in df.groupby('Model'))
# # print(df)
# print(df[df["Model"].isin(["BBOX (2L_aug)", "DNN->O (2L)"])].groupby('Model')["MSE"].describe().unstack(1))
# BOX plot
# fig = plt.figure()
# sns.boxplot("Model", "MSE", data=df, color="skyblue")
#
# plt.rcParams.update(plt.rcParamsDefault)
# plt.rc("savefig", dpi=200)
#
# pdf = PdfPages("{}_boxplot.pdf".format(dataset))
#
# fig.set_size_inches(fig.get_size_inches()*[1.6, 1.3], forward=False)
#
# pdf.savefig(bbox_inches="tight")
# pdf.close()
#
# # BOX plot zoom
# fig = plt.figure()
# sns.boxplot("Model", "MSE", data=df, color="skyblue")
#
# plt.rcParams.update(plt.rcParamsDefault)
# plt.rc("savefig", dpi=200)
#
# pdf = PdfPages("{}_boxplot_zoom.pdf".format(dataset))
#
# plt.ylim(-0.5, 20.0)
# fig.set_size_inches(fig.get_size_inches() * [1.6, 1.3], forward=False)
#
# pdf.savefig(bbox_inches="tight")
# pdf.close()
# plt.show()
\ No newline at end of file
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from il_pipeline.pipeline import Pipeline
from il_pipeline.utility.storable import Storable
from matplotlib.backends.backend_pdf import PdfPages
from scipy.interpolate import interp1d
import seaborn as sns
from analysis.plot_analysis import get_avg_box_size
from analysis.visualize_learning_progress import \
convert_from_2dim_overlapping_grid
from main import calc_acc_c
def calc_reg_stats(reg_size_dict, num_layers=3, dataset="lohan", show_plot=True):
num_avg = 10
dist_list = []
size_list = []
mse_list = []
model_name = "DNN->O ({}L)".format(num_layers)
colors = ["red", "blue", "green"]
col_val = colors[num_layers-1]
# if num_layers == 3:
# nl = ""
# else:
# nl = "_{}l".format(num_layers)
for idx in range(num_avg):
if dataset == "lohan":
file = "/home/laskama/Dropbox/evaluation/lohan/REG{}_{}"
fig_title = "Lohan (avg)"#(fold-{})".format(f_idx + 1)
p: Pipeline = Storable.load(file.format(num_layers, idx + 1))
elif dataset == "UJI":
file = "/media/laskama/Daten/BBOX/GPU/uji/gpu/b0/f{}/output/DNN_hidden_layers_{}_{}"
fig_title = "UJIndoorLoc (floor-{})".format(floor)
p: Pipeline = Storable.load(file.format(floor, num_layers, idx + 1))
if f_idx is None:
y_true = np.concatenate(p.summary.y_true_labels, axis=0)
y_pred = np.concatenate(p.summary.y_pred, axis=0)
else:
y_true = p.summary.y_true_labels[f_idx]
y_pred = p.summary.y_pred[f_idx]
n = len(y_true)
dist = np.sort(np.linalg.norm(y_pred - y_true, axis=1))
res_dict["Model"] += [model_name] * len(dist)
res_dict["MSE"] += dist.tolist()
res_dict["fold"] += [f_idx] * len(dist)
mse_list.append(np.mean(dist))
size = np.pi * np.square(dist)
dist_list.append(dist)
size_list.append(size)
size_avg = np.sort(np.concatenate(size_list, axis=0))
acc_list = [i / (n * num_avg) for i in range(n * num_avg)]
reg_size_dict["Model"].append(model_name)
reg_size_dict["size"].append(size_avg)
reg_size_dict["ACC"].append(np.array(acc_list))
if show_plot:
plt.title(fig_title)
plt.xlabel("Area [" + r'$m^2$' + "]")
plt.ylabel("ACC")
plt.plot(size_avg, acc_list,
linestyle="dashed", color=col_val, label=model_name)
# print("REG_MSE: {}".format(np.mean(np.array(mse_list))))
def calc_box_stats(reg_size_dict, dataset="lohan", show_plot=True, calc_aug_gain=True):
if dataset == "lohan":
beta_range = [5, 10, 15]
f1 = "/media/laskama/Daten/BBOX/lohan/test/output/BBOX_{}l_{}{}_{}"
elif dataset == "UJI":
beta_range = [10.0, 15.0, 20.0]
f1 = "/media/laskama/Daten/BBOX/GPU/uji/gpu/b0/f{}/v2/output/DNN-DLB_hidden_layers_{}_delta_{}{}_{}"
mse_list = []
box_dict = {"Model": [], "beta": [], "fold": [], "ACC": [],
"size": [], "c-error (mean)": [], "c-error (median)": [],
"c-error (min)": [], "c-error (max)": []}
num_add_aug_ratio = -1
for aug in [""]: # ["_aug", ""]:
for l in [1, 2]: # , 2]:
for o in beta_range:
b_size_list = []
acc_box_list = []
dist_list = []
for idx in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]:
if dataset == "lohan":
file = f1.format(l, o, aug, idx)
elif dataset == "UJI":
file = f1.format(floor, l, str(o), aug, idx)
if not os.path.exists(file):
print(file)
continue
p: Pipeline = Storable.load(file)
convert_from_2dim_overlapping_grid(p, grid_size=40,
quantile=False,
store=True,
average_samples=True)
if f_idx is None:
y_true = np.concatenate(p.summary.y_true_labels, axis=0)
y_pred = np.concatenate(p.summary.y_pred, axis=0)
else:
y_true = p.summary.y_true_labels[f_idx]
y_pred = p.summary.y_pred[f_idx]
if calc_aug_gain:
if num_add_aug_ratio == -1 and dataset == "UJI":
x, _ = p.data_provider.get_augmented_train_data()
x2, _ = p.data_provider.get_train_data(labels=p.data_provider.grid_labels)
num_add_aug_ratio = (len(x) - len(x2)) / len(x2)
elif num_add_aug_ratio == -1 and dataset == "lohan":
p.data_provider.current_split_idx = f_idx
x, _ = p.data_provider.get_augmented_train_data()
x2, _ = p.data_provider.get_train_data(labels=p.data_provider.grid_labels)
num_add_aug_ratio = (len(x) - len(x2)) / len(x2)
acc_box, wrong_mask, correct_mask = calc_acc_c(y_true,
y_pred)
b_size = get_avg_box_size(p)
acc_box_list.append(acc_box)
b_size_list.append(b_size)
dist = np.linalg.norm(y_pred[:, :2] - y_true, axis=1)
dist_list.append(dist)
mse_list.append(dist)
model_name = "BBOX ({}L{})".format(l, aug)
res_dict["Model"] += [model_name] * len(dist)
res_dict["MSE"] += dist.tolist()
res_dict["fold"] += [f_idx] * len(dist)
plt_label = ""
if o == 10:
plt_label = model_name
if aug == "_aug":
c = "r"
else:
c = "g"
if l == 1:
m = "v"
elif l == 2:
m = "^"
elif l == 3:
m = ">"
size_avg = np.mean(np.array(b_size_list))
acc_avg = np.mean(np.array(acc_box_list))
dist_conc = np.concatenate(dist_list, axis=0)
box_dict["Model"].append(model_name)
box_dict["beta"].append(o)
box_dict["fold"].append(f_idx)
box_dict["ACC"].append(acc_avg)
box_dict["size"].append(size_avg)
box_dict["c-error (mean)"].append(np.mean(dist_conc))
box_dict["c-error (median)"].append(np.median(dist_conc))
box_dict["c-error (min)"].append(np.min(dist_conc))
box_dict["c-error (max)"].append(np.max(dist_conc))
if show_plot:
plt.scatter(np.mean(np.array(b_size_list)),
np.mean(np.array(acc_box_list)), color=c, marker=m,
label=plt_label)
# plt.ylim(0.4, 1.0)
plt.xlim(0, 600.0)
plt.legend(loc="lower right")
# plt.show()
box_df = pd.DataFrame(box_dict)
# print("BBOX_MSE: {}".format(np.mean(np.array(mse_list))))
if calc_aug_gain:
box_dict["best_reg"] = []
box_dict["ACC-gain"] = []
box_dict["size-gain"] = []
# calc ACC gain to each DNN
for _, model in box_df.iterrows():
b_acc = model["ACC"]
b_size = model["size"]
min_gain = 100.0
# find corresponding two values in DNN