Commit dc78ab1f authored by Marius Laska's avatar Marius Laska

new train/test split for lohan dataset (DBSCAN)

parent 306536c9
import os
import sys
sys.path.append('/home/laskama/PycharmProjects/bboxPrediction')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from il_pipeline.pipeline import Pipeline
from il_pipeline.utility.storable import Storable
from matplotlib.backends.backend_pdf import PdfPages
from scipy.interpolate import interp1d
import seaborn as sns
from analysis.plot_analysis import get_avg_box_size
from analysis.visualize_learning_progress import \
convert_from_2dim_overlapping_grid
from main import calc_acc_c
def calc_reg_stats(reg_size_dict, num_layers=3, dataset="lohan", show_plot=True):
num_avg = 2
dist_list = []
size_list = []
mse_list = []
model_name = "DNN->O ({}L)".format(num_layers)
colors = ["red", "blue", "green"]
col_val = colors[num_layers-1]
if num_layers == 3:
nl = ""
else:
nl = "_{}l".format(num_layers)
for idx in range(num_avg):
if dataset == "lohan":
#file = "/home/laskama/Dropbox/evaluation/lohan/REG{}_{}"
file = "/media/laskama/Daten/BBOX/GPU/lohan/gpu/cnn_final/output/2D-CNN_{}"
fig_title = "Lohan (avg)"#(fold-{})".format(f_idx + 1)
p: Pipeline = Storable.load(file.format(idx + 1))
elif dataset == "UJI":
# file = "/home/laskama/PycharmProjects/bboxPrediction/evaluation/uji/gpu/b0/f{}/cnn/output/2D-CNN_{}"
file = "/media/laskama/Daten/BBOX/GPU/uji/gpu/b0/f{}/cnn_final/output/2D-CNN_{}"
#file = "/media/laskama/Daten/BBOX/GPU/uji/gpu/b0/f{}/output/DNN_hidden_layers_{}_{}"
file = "/home/laskama/Dropbox/evaluation/UJIndoorLoc/b0/f{}/final/output/REG{}_{}"
fig_title = "UJIndoorLoc (floor-{})".format(floor)
p: Pipeline = Storable.load(file.format(floor, nl, idx + 1))
if f_idx is None:
y_true = np.concatenate(p.summary.y_true_labels, axis=0)
y_pred = np.concatenate(p.summary.y_pred, axis=0)
else:
y_true = p.summary.y_true_labels[f_idx]
y_pred = p.summary.y_pred[f_idx]
n = len(y_true)
dist = np.sort(np.linalg.norm(y_pred - y_true, axis=1))
print(y_pred[:10, :])
res_dict["Model"] += [model_name] * len(dist)
res_dict["MSE"] += dist.tolist()
res_dict["fold"] += [f_idx] * len(dist)
mse_list.append(np.mean(dist))
size = np.pi * np.square(dist)
dist_list.append(dist)
size_list.append(size)
size_avg = np.sort(np.concatenate(size_list, axis=0))
acc_list = [i / (n * num_avg) for i in range(n * num_avg)]
print("MSE: {}".format(mse_list))
reg_size_dict["Model"].append(model_name)
reg_size_dict["size"].append(size_avg)
reg_size_dict["ACC"].append(np.array(acc_list))
if show_plot:
plt.title(fig_title)
plt.xlabel("Area [" + r'$m^2$' + "]")
plt.ylabel("ACC")
plt.plot(size_avg, acc_list,
linestyle="dashed", color=col_val, label=model_name)
# print("REG_MSE: {}".format(np.mean(np.array(mse_list))))
def calc_box_stats(reg_size_dict, dataset="lohan", show_plot=True, calc_aug_gain=True):
if dataset == "lohan":
beta_range = [5.0, 10.0, 15.0]
f1 = "/media/laskama/Daten/BBOX/lohan/test/output/BBOX_{}l_{}{}_{}"
f1 = "/media/laskama/Daten/BBOX/GPU/lohan/gpu/cnn_final/output/2D-CNN-DLB_{}_delta_{}_{}"
elif dataset == "UJI":
beta_range = [10.0, 15.0, 20.0]
f1 = "/media/laskama/Daten/BBOX/GPU/uji/gpu/b0/f{}/cnn_final/output/2D-CNN-DLB_{}_delta_{}_{}"
mse_list = []
box_dict = {"Model": [], "beta": [], "fold": [], "ACC": [],
"size": [], "c-error (mean)": [], "c-error (median)": [],
"c-error (min)": [], "c-error (max)": []}
num_add_aug_ratio = -1
for aug in ["augmentation_0", "augmentation_1"]: # ["_aug", ""]:
for l in [1]: # , 2]:
for o in beta_range:
b_size_list = []
acc_box_list = []
dist_list = []
for idx in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]:
if dataset == "lohan":
file = f1.format(aug, str(o), idx)
elif dataset == "UJI":
file = f1.format(floor, aug, str(o), idx)
if not os.path.exists(file):
print(file)
continue
p: Pipeline = Storable.load(file)
convert_from_2dim_overlapping_grid(p, grid_size=40,
quantile=False,
store=True,
average_samples=True)
if f_idx is None:
y_true = np.concatenate(p.summary.y_true_labels, axis=0)
y_pred = np.concatenate(p.summary.y_pred, axis=0)
else:
y_true = p.summary.y_true_labels[f_idx]
y_pred = p.summary.y_pred[f_idx]
if calc_aug_gain:
if num_add_aug_ratio == -1 and dataset == "UJI":
x, _ = p.data_provider.get_augmented_train_data()
x2, _ = p.data_provider.get_train_data(labels=p.data_provider.grid_labels)
num_add_aug_ratio = (len(x) - len(x2)) / len(x2)
elif num_add_aug_ratio == -1 and dataset == "lohan":
p.data_provider.current_split_idx = f_idx
x, _ = p.data_provider.get_augmented_train_data()
x2, _ = p.data_provider.get_train_data(labels=p.data_provider.grid_labels)
num_add_aug_ratio = (len(x) - len(x2)) / len(x2)
acc_box, wrong_mask, correct_mask = calc_acc_c(y_true,
y_pred)
b_size = get_avg_box_size(p)
acc_box_list.append(acc_box)
b_size_list.append(b_size)
dist = np.linalg.norm(y_pred[:, :2] - y_true, axis=1)
dist_list.append(dist)
mse_list.append(dist)
model_name = "BBOX ({}L{})".format(l, aug)
res_dict["Model"] += [model_name] * len(dist)
res_dict["MSE"] += dist.tolist()
res_dict["fold"] += [f_idx] * len(dist)
plt_label = ""
if o == 10.0:
plt_label = model_name
if aug == "_aug":
c = "r"
else:
c = "g"
if l == 1:
m = "v"
elif l == 2:
m = "^"
elif l == 3:
m = ">"
size_avg = np.mean(np.array(b_size_list))
acc_avg = np.mean(np.array(acc_box_list))
dist_conc = np.concatenate(dist_list, axis=0)
box_dict["Model"].append(model_name)
box_dict["beta"].append(o)
box_dict["fold"].append(f_idx)
box_dict["ACC"].append(acc_avg)
box_dict["size"].append(size_avg)
box_dict["c-error (mean)"].append(np.mean(dist_conc))
box_dict["c-error (median)"].append(np.median(dist_conc))
box_dict["c-error (min)"].append(np.min(dist_conc))
box_dict["c-error (max)"].append(np.max(dist_conc))
if show_plot:
plt.scatter(np.mean(np.array(b_size_list)),
np.mean(np.array(acc_box_list)), color=c, marker=m,
label=plt_label)
# plt.ylim(0.4, 1.0)
plt.xlim(0, 600.0)
plt.legend(loc="lower right")
# plt.show()
box_df = pd.DataFrame(box_dict)
# print("BBOX_MSE: {}".format(np.mean(np.array(mse_list))))
if calc_aug_gain:
box_dict["best_reg"] = []
box_dict["ACC-gain"] = []
box_dict["size-gain"] = []
# calc ACC gain to each DNN
for _, model in box_df.iterrows():
b_acc = model["ACC"]
b_size = model["size"]
min_gain = 100.0
# find corresponding two values in DNN
for reg_model, reg_size, reg_acc in zip(reg_size_dict["Model"],
reg_size_dict["size"],
reg_size_dict["ACC"]):
# size on x-axis
test = np.argmax(reg_size > b_size)
x = np.array([reg_size[test - 1], reg_size[test]])
y = np.array([reg_acc[test - 1], reg_acc[test]])
f = interp1d(x, y)
reg_acc_int = f(b_size)
gain = b_acc - reg_acc_int
# acc on x-axis
test = np.argmax(reg_acc > b_acc)
x = np.array([reg_acc[test - 1], reg_acc[test]])
y = np.array([reg_size[test - 1], reg_size[test]])
f = interp1d(x, y)
reg_size_int = f(b_acc)
size_gain = b_size - reg_size_int
if gain < min_gain:
min_gain = gain
min_size_gain = size_gain
min_model = reg_model
box_dict["ACC-gain"].append(min_gain)
box_dict["size-gain"].append(min_size_gain)
box_dict["best_reg"].append(min_model)
box_df = pd.DataFrame(box_dict)
# print("ACC-gain: {}".format(box_df["ACC-gain"].mean()))
return box_df, num_add_aug_ratio
def main(dataset="UJI"):
plt.figure()
reg_size_dict = {"Model": [], "size": [], "ACC": []}
for l in [1]:
calc_reg_stats(reg_size_dict, num_layers=l, dataset=dataset)
box_df, num_add_aug_ratio = calc_box_stats(reg_size_dict, dataset=dataset, calc_aug_gain=False)
plt.rcParams.update(plt.rcParamsDefault)
plt.rc("savefig", dpi=200)
pdf = PdfPages("{}_metric_{}.pdf".format(dataset, floor))
# self.fig.set_size_inches((19.79, 12.5), forward=False)
pdf.savefig(bbox_inches="tight")
pdf.close()
plt.show()
return box_df, num_add_aug_ratio
if __name__ == "__main__":
res_dict = {"Model": [], "MSE": [], "fold": []}
num_add_aug_ratio = []
dataset = "lohan"
f_idx = None
floor = None
box_df_concat = None
if dataset == "UJI":
for idx in range(4):
floor = idx
box_df, num_add_aug = main(dataset)
num_add_aug_ratio.append(num_add_aug)
if box_df_concat is None:
box_df_concat = box_df
else:
box_df_concat = pd.concat([box_df_concat, box_df],
ignore_index=True)
elif dataset == "lohan":
for idx in [0,1,2,3,4]:
f_idx = idx
floor = idx
box_df, num_add_aug = main(dataset)
num_add_aug_ratio.append(num_add_aug)
if box_df_concat is None:
box_df_concat = box_df
else:
box_df_concat = pd.concat([box_df_concat, box_df],
ignore_index=True)
print("SIZE-GAIN (aug): {}".format(box_df_concat[box_df_concat["Model"].str.contains("aug")]["size-gain"].mean()))
print("SIZE-GAIN (no-aug): {}".format(box_df_concat[~box_df_concat["Model"].str.contains("aug")]["size-gain"].mean()))
print("ACC-GAIN (aug): {}".format(box_df_concat[box_df_concat["Model"].str.contains("aug")]["ACC-gain"].mean()))
print("ACC-GAIN (no-aug): {}".format(box_df_concat[~box_df_concat["Model"].str.contains("aug")]["ACC-gain"].mean()))
print("NUM-ADD_AUG_RATIO: {}".format(np.mean(num_add_aug_ratio)))
df = pd.DataFrame(res_dict)
print("MEAN-MSE: {}".format(df[df["Model"].str.contains("aug")]["MSE"].mean()))
print("MEDIAN-MSE: {}".format(
df[df["Model"].str.contains("aug")]["MSE"].median()))
print("MIN-MSE: {}".format(
df[df["Model"].str.contains("aug")]["MSE"].min()))
print("MAX-MSE: {}".format(
df[df["Model"].str.contains("aug")]["MSE"].max()))
print(df[df["Model"].str.contains("aug")]["MSE"].describe())
# df2 = pd.DataFrame(
# group.describe().rename(columns={'MSE': name}).squeeze()
# for name, group in df.groupby('Model'))
# print(df)
print(df[df["Model"].isin(["BBOX (2L_aug)", "DNN->O (2L)"])].groupby('Model')["MSE"].describe().unstack(1))
# BOX plot
fig = plt.figure()
sns.boxplot("Model", "MSE", data=df, color="skyblue")
plt.rcParams.update(plt.rcParamsDefault)
plt.rc("savefig", dpi=200)
pdf = PdfPages("{}_boxplot.pdf".format(dataset))
fig.set_size_inches(fig.get_size_inches()*[1.6, 1.3], forward=False)
pdf.savefig(bbox_inches="tight")
pdf.close()
# BOX plot zoom
fig = plt.figure()
sns.boxplot("Model", "MSE", data=df, color="skyblue")
plt.rcParams.update(plt.rcParamsDefault)
plt.rc("savefig", dpi=200)
pdf = PdfPages("{}_boxplot_zoom.pdf".format(dataset))
plt.ylim(-0.5, 20.0)
fig.set_size_inches(fig.get_size_inches() * [1.6, 1.3], forward=False)
pdf.savefig(bbox_inches="tight")
pdf.close()
plt.show()
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from il_pipeline.pipeline import Pipeline
from il_pipeline.utility.storable import Storable
from matplotlib.backends.backend_pdf import PdfPages
from scipy.interpolate import interp1d
import seaborn as sns
from analysis.plot_analysis import get_avg_box_size
from analysis.visualize_learning_progress import \
convert_from_2dim_overlapping_grid
from main import calc_acc_c
def calc_reg_stats(reg_size_dict, num_layers=3, dataset="lohan", show_plot=True):
num_avg = 10
dist_list = []
size_list = []
mse_list = []
model_name = "DNN->O ({}L)".format(num_layers)
colors = ["red", "blue", "green"]
col_val = colors[num_layers-1]
# if num_layers == 3:
# nl = ""
# else:
# nl = "_{}l".format(num_layers)
for idx in range(num_avg):
if dataset == "lohan":
file = "/home/laskama/Dropbox/evaluation/lohan/REG{}_{}"
fig_title = "Lohan (avg)"#(fold-{})".format(f_idx + 1)
p: Pipeline = Storable.load(file.format(num_layers, idx + 1))
elif dataset == "UJI":
file = "/media/laskama/Daten/BBOX/GPU/uji/gpu/b0/f{}/output/CNNLoc_{}"
fig_title = "UJIndoorLoc (floor-{})".format(floor)
p: Pipeline = Storable.load(file.format(floor, idx + 1))
if f_idx is None:
y_true = np.concatenate(p.summary.y_true_labels, axis=0)
y_pred = np.concatenate(p.summary.y_pred, axis=0)
else:
y_true = p.summary.y_true_labels[f_idx]
y_pred = p.summary.y_pred[f_idx]
n = len(y_true)
dist = np.sort(np.linalg.norm(y_pred - y_true, axis=1))
res_dict["Model"] += [model_name] * len(dist)
res_dict["MSE"] += dist.tolist()
res_dict["fold"] += [f_idx] * len(dist)
mse_list.append(np.mean(dist))
size = np.pi * np.square(dist)
dist_list.append(dist)
size_list.append(size)
size_avg = np.sort(np.concatenate(size_list, axis=0))
acc_list = [i / (n * num_avg) for i in range(n * num_avg)]
reg_size_dict["Model"].append(model_name)
reg_size_dict["size"].append(size_avg)
reg_size_dict["ACC"].append(np.array(acc_list))
if show_plot:
plt.title(fig_title)
plt.xlabel("Area [" + r'$m^2$' + "]")
plt.ylabel("ACC")
plt.plot(size_avg, acc_list,
linestyle="dashed", color=col_val, label=model_name)
# print("REG_MSE: {}".format(np.mean(np.array(mse_list))))
def calc_box_stats(reg_size_dict, dataset="lohan", show_plot=True, calc_aug_gain=True):
if dataset == "lohan":
beta_range = [5, 10, 15]
f1 = "/media/laskama/Daten/BBOX/lohan/test/output/BBOX_{}l_{}{}_{}"
elif dataset == "UJI":
beta_range = [10.0, 15.0, 20.0]
f1 = "/media/laskama/Daten/BBOX/GPU/uji/gpu/b0/f{}/v2/output/CNNLoc-DLB_delta_{}{}_{}"
mse_list = []
box_dict = {"Model": [], "beta": [], "fold": [], "ACC": [],
"size": [], "c-error (mean)": [], "c-error (median)": [],
"c-error (min)": [], "c-error (max)": []}
"/media/laskama/Daten/BBOX/GPU/"
num_add_aug_ratio = -1
for aug in [""]: # ["_aug", ""]:
for l in [1]: # , 2]:
for o in beta_range:
b_size_list = []
acc_box_list = []
dist_list = []
for idx in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]:
if dataset == "lohan":
file = f1.format(l, o, aug, idx)
elif dataset == "UJI":
file = f1.format(floor, str(o), aug, idx)
if not os.path.exists(file):
print(file)
continue
p: Pipeline = Storable.load(file)
convert_from_2dim_overlapping_grid(p, grid_size=40,
quantile=False,
store=True,
average_samples=True)
if f_idx is None:
y_true = np.concatenate(p.summary.y_true_labels, axis=0)
y_pred = np.concatenate(p.summary.y_pred, axis=0)
else:
y_true = p.summary.y_true_labels[f_idx]
y_pred = p.summary.y_pred[f_idx]
if calc_aug_gain:
if num_add_aug_ratio == -1 and dataset == "UJI":
x, _ = p.data_provider.get_augmented_train_data()
x2, _ = p.data_provider.get_train_data(labels=p.data_provider.grid_labels)
num_add_aug_ratio = (len(x) - len(x2)) / len(x2)
elif num_add_aug_ratio == -1 and dataset == "lohan":
p.data_provider.current_split_idx = f_idx
x, _ = p.data_provider.get_augmented_train_data()
x2, _ = p.data_provider.get_train_data(labels=p.data_provider.grid_labels)
num_add_aug_ratio = (len(x) - len(x2)) / len(x2)
acc_box, wrong_mask, correct_mask = calc_acc_c(y_true,
y_pred)
b_size = get_avg_box_size(p)
acc_box_list.append(acc_box)
b_size_list.append(b_size)
dist = np.linalg.norm(y_pred[:, :2] - y_true, axis=1)
dist_list.append(dist)
mse_list.append(dist)
model_name = "BBOX ({}L{})".format(l, aug)
res_dict["Model"] += [model_name] * len(dist)
res_dict["MSE"] += dist.tolist()
res_dict["fold"] += [f_idx] * len(dist)
plt_label = ""
if o == 10:
plt_label = model_name
if aug == "_aug":
c = "r"
else:
c = "g"
if l == 1:
m = "v"
elif l == 2:
m = "^"
elif l == 3:
m = ">"
size_avg = np.mean(np.array(b_size_list))
acc_avg = np.mean(np.array(acc_box_list))
dist_conc = np.concatenate(dist_list, axis=0)
box_dict["Model"].append(model_name)
box_dict["beta"].append(o)
box_dict["fold"].append(f_idx)
box_dict["ACC"].append(acc_avg)
box_dict["size"].append(size_avg)
box_dict["c-error (mean)"].append(np.mean(dist_conc))
box_dict["c-error (median)"].append(np.median(dist_conc))
box_dict["c-error (min)"].append(np.min(dist_conc))
box_dict["c-error (max)"].append(np.max(dist_conc))
if show_plot:
plt.scatter(np.mean(np.array(b_size_list)),
np.mean(np.array(acc_box_list)), color=c, marker=m,
label=plt_label)
# plt.ylim(0.4, 1.0)
plt.xlim(0, 600.0)
plt.legend(loc="lower right")
# plt.show()
box_df = pd.DataFrame(box_dict)
# print("BBOX_MSE: {}".format(np.mean(np.array(mse_list))))
if calc_aug_gain:
box_dict["best_reg"] = []
box_dict["ACC-gain"] = []
box_dict["size-gain"] = []
# calc ACC gain to each DNN
for _, model in box_df.iterrows():
b_acc = model["ACC"]
b_size = model["size"]
min_gain = 100.0
# find corresponding two values in DNN
for reg_model, reg_size, reg_acc in zip(reg_size_dict["Model"],
reg_size_dict["size"],
reg_size_dict["ACC"]):
# size on x-axis
test = np.argmax(reg_size > b_size)
x = np.array([reg_size[test - 1], reg_size[test]])
y = np.array([reg_acc[test - 1], reg_acc[test]])
f = interp1d(x, y)
reg_acc_int = f(b_size)
gain = b_acc - reg_acc_int
# acc on x-axis
test = np.argmax(reg_acc > b_acc)
x = np.array([reg_acc[test - 1], reg_acc[test]])
y = np.array([reg_size[test - 1], reg_size[test]])
f = interp1d(x, y)
reg_size_int = f(b_acc)
size_gain = b_size - reg_size_int
if gain < min_gain: