... ... @@ -206,16 +206,20 @@ def main(dir_p="../evaluation/lohan/evaluation/new_knn/output/", f_idx=0): return summary def evaluate_correlation(y, x, type, summary): def evaluate_correlation(y, x, x_name, y_name, summary): if len(x.shape) == 1: x = x.reshape(-1, 1) regr = OLS(y, add_constant(x)).fit() summary["y"].append(regr.params[0]) summary["beta"].append(regr.params[1]) summary["p_val"].append(regr.pvalues[1]) summary["r_2"].append(regr.rsquared) summary["type"].append(type) summary["$\\beta_0$"].append(regr.params[0]) summary["$\\beta_1$"].append(regr.params[1]) summary["p-value"].append(regr.pvalues[1]) summary["$R^2$"].append(regr.rsquared) summary["Y"].append(x_name) summary["X"].append(y_name) summary["Y_mean"].append(np.mean(y)) summary["RSE"].append(np.sqrt(regr.scale)) #summary["type"].append(type) def generate_correlation_df(): ... ...
 import os import sys from sklearn import linear_model from analysis.correlation_knn import evaluate_correlation sys.path.append('/home/laskama/PycharmProjects/bboxPrediction') import matplotlib.pyplot as plt ... ... @@ -16,8 +21,20 @@ from analysis.visualize_learning_progress import \ convert_from_2dim_overlapping_grid from main import calc_acc_c dbscan_split = False dataset = "lohan" dbscan_split = True dataset = "UJI" beta_0 = "$\\beta_0$" beta_1 = "$\\beta_1$" r_2 = "$R^2$" e_n = "$e$" e_x_n = "$e_x$" e_y_n = "$e_y$" size_n = "$w \cdot h$" w_n = "$w$" h_n = "$h$" def _calc_reg_stats(reg_size_dict, pipe_name, col_val, model_name, num_avg= 10, show_plot=True): #num_avg = 10 ... ... @@ -61,8 +78,8 @@ def _calc_reg_stats(reg_size_dict, pipe_name, col_val, model_name, num_avg= 10, if show_plot: #plt.title(fig_title) plt.xlabel("Area [" + r'$m^2$' + "]") plt.ylabel("ACC") plt.xlabel("Area (" + r'$\epsilon$' + ") [" + r'$m^2$' + "]") plt.ylabel("Success rate (" + r'$\gamma$' + ")") plt.plot(size_avg, acc_list, linestyle="dashed", color=col_val, label=model_name) ... ... @@ -128,13 +145,22 @@ def calc_reg_stats(reg_size_dict, num_layers=3, dataset="lohan", show_plot=True) plt.legend(loc="lower right") def _calc_box_stats(reg_size_dict, box_dict, mse_list, pipe_name, model_name, plot_name, col_val, marker, num_avg=10, calc_aug_gain=False, show_plot=True): def _calc_box_stats(reg_size_dict, box_dict, mse_list, pipe_name, model_name, plot_name, col_val, marker, num_avg=10, calc_aug_gain=False, show_plot=True, plot_regression=True, thres_idx=-1): b_size_list = [] acc_box_list = [] dist_list = [] num_add_aug_ratio = -1 summary = {"Y": [], "X": [], beta_0: [], beta_1: [], "p-value": [], r_2: [], "model": [], "RSE": [], "Y_mean": []} error_vec_list = [] error_vec_x_list = [] error_vec_y_list = [] size_vec_list = [] size_vec_x_list = [] size_vec_y_list = [] for idx in range(num_avg): file = pipe_name.format(idx + 1) ... ... @@ -144,6 +170,19 @@ def _calc_box_stats(reg_size_dict, box_dict, mse_list, pipe_name, model_name, pl continue p: Pipeline = Storable.load(file) y_true_grid = np.concatenate( [p.data_provider.get_test_data( labels=p.data_provider.grid_labels, split_idx=idx)[1] for idx in range( p.data_provider.num_splits)], axis=0) #_, y_true_grid = p.data_provider.get_test_data(labels=p.data_provider.grid_labels) chosen = np.argmax(np.concatenate(p.summary.y_pred, axis=0)[:, 4::5], axis=1) correct_grid_cell_mask = chosen == y_true_grid[:, 2] convert_from_2dim_overlapping_grid(p, grid_size=p.data_provider.grid_size, quantile=False, store=True, ... ... @@ -155,6 +194,32 @@ def _calc_box_stats(reg_size_dict, box_dict, mse_list, pipe_name, model_name, pl y_true = p.summary.y_true_labels[f_idx] y_pred = p.summary.y_pred[f_idx] thresholds = [120, 275, 550, 800] if thres_idx < len(thresholds) and thres_idx != -1: sub_size_mask = (y_pred[:, 2] * y_pred[:, 3]) < thresholds[thres_idx] # 75, 275, 550, 535 sub_idx = np.where(np.logical_and(correct_grid_cell_mask, sub_size_mask))[0] else: sub_idx = np.where(correct_grid_cell_mask)[0] error_vec = np.linalg.norm(y_pred[sub_idx, :2] - y_true[sub_idx, :2], axis=1) error_vec_x = np.abs(y_pred[sub_idx, 0] - y_true[sub_idx, 0]) error_vec_y = np.abs(y_pred[sub_idx, 1] - y_true[sub_idx, 1]) size_vec = y_pred[sub_idx, 2] * y_pred[sub_idx, 3] size_vec_x = y_pred[sub_idx, 2] size_vec_y = y_pred[sub_idx, 3] error_vec_list.append(error_vec) error_vec_x_list.append(error_vec_x) error_vec_y_list.append(error_vec_y) size_vec_list.append(size_vec) size_vec_x_list.append(size_vec_x) size_vec_y_list.append(size_vec_y) evaluate_correlation(size_vec, error_vec, e_n, size_n, summary) if calc_aug_gain: if num_add_aug_ratio == -1 and dataset == "UJI": x, _ = p.data_provider.get_augmented_train_data() ... ... @@ -163,11 +228,23 @@ def _calc_box_stats(reg_size_dict, box_dict, mse_list, pipe_name, model_name, pl num_add_aug_ratio = (len(x) - len(x2)) / len(x2) elif num_add_aug_ratio == -1 and dataset == "lohan": p.data_provider.current_split_idx = f_idx x, _ = p.data_provider.get_augmented_train_data() x2, _ = p.data_provider.get_train_data( labels=p.data_provider.grid_labels) num_add_aug_ratio = (len(x) - len(x2)) / len(x2) if f_idx is None: x_sum = 0 x2_sum = 0 for f in range(p.data_provider.num_splits): p.data_provider.current_split_idx = f x, _ = p.data_provider.get_augmented_train_data() x2, _ = p.data_provider.get_train_data( labels=p.data_provider.grid_labels) x_sum += len(x) x2_sum += len(x2) num_add_aug_ratio = (x_sum - x2_sum) / x2_sum else: p.data_provider.current_split_idx = f_idx x, _ = p.data_provider.get_augmented_train_data() x2, _ = p.data_provider.get_train_data( labels=p.data_provider.grid_labels) num_add_aug_ratio = (len(x) - len(x2)) / len(x2) acc_box, wrong_mask, correct_mask = calc_acc_c(y_true, y_pred) ... ... @@ -183,6 +260,68 @@ def _calc_box_stats(reg_size_dict, box_dict, mse_list, pipe_name, model_name, pl res_dict["MSE"] += dist.tolist() res_dict["fold"] += [f_idx] * len(dist) if len(error_vec_list) > 0: summary["model"] += [model_name] * 10 test_df = pd.DataFrame(summary) test = test_df.mean() error_vec_concat = np.concatenate(error_vec_list, axis=0) error_vec_x_concat = np.concatenate(error_vec_x_list, axis=0) error_vec_y_concat = np.concatenate(error_vec_y_list, axis=0) size_vec_concat = np.concatenate(size_vec_list, axis=0) size_vec_x_concat = np.concatenate(size_vec_x_list, axis=0) size_vec_y_concat = np.concatenate(size_vec_y_list, axis=0) summary_single = {"Y": [], "X": [], beta_0: [], beta_1: [], "p-value": [], r_2: [], "model": [], "RSE": [], "Y_mean": []} evaluate_correlation(size_vec_concat, error_vec_concat, e_n, size_n, summary_single) evaluate_correlation(size_vec_x_concat, error_vec_x_concat, e_x_n, w_n, summary_single) evaluate_correlation(size_vec_y_concat, error_vec_y_concat, e_y_n, h_n, summary_single) evaluate_correlation(size_vec_x_concat, error_vec_y_concat, e_y_n, w_n, summary_single) evaluate_correlation(size_vec_y_concat, error_vec_x_concat, e_x_n, h_n, summary_single) if plot_regression: regr = linear_model.LinearRegression() # Train the model using the training sets regr.fit(error_vec_concat.reshape(-1, 1), size_vec_concat.reshape(-1, 1)) # Make predictions using the testing set y_val = regr.predict(error_vec_concat.reshape(-1, 1)) plt.figure() plt.title("UJI (bld. 0, floor 0)") plt.xlabel(r'$e \quad [m]$') plt.ylabel(r'$w \cdot h \quad [m^2]$') thres_val = 75 thres_mask = size_vec_concat > thres_val t_mask = np.where(error_vec_concat > 7)[0] red_idx = np.where(thres_mask)[0] blue_idx = np.where(~thres_mask)[0] plt.scatter(error_vec_concat[blue_idx], size_vec_concat[blue_idx], color="blue") plt.scatter(error_vec_concat[red_idx], size_vec_concat[red_idx], color="red") plt.plot(np.array([0, 20]), np.array([thres_val, thres_val]), linestyle='dashed', color="red") plt.plot(error_vec_concat, y_val, color="blue") plt.rcParams.update(plt.rcParamsDefault) plt.rc("savefig", dpi=200) pdf = PdfPages("correlation_{}.pdf".format(model_name)) # self.fig.set_size_inches((19.79, 12.5), forward=False) pdf.savefig(bbox_inches="tight") pdf.close() # plt.show() if len(dist_list) == 0: return ... ... @@ -209,11 +348,13 @@ def _calc_box_stats(reg_size_dict, box_dict, mse_list, pipe_name, model_name, pl plt.legend(loc="lower right") # plt.show() return num_add_aug_ratio, error_vec_list, size_vec_list, summary_single def calc_box_stats(reg_size_dict, dataset="lohan", show_plot=True, calc_aug_gain=True): if dataset == "lohan": beta_range = [5.0, 7.5, 10.0, 15.0] beta_range = [7.5, 10.0, 15.0] if dbscan_split: # dbscan f_dnn = "/media/laskama/Daten/BBOX/GPU/lohan/gpu/new_dropout/output/DNN-DLB_hidden_layers_{}_augmentation_{}_delta_{}" ... ... @@ -236,12 +377,16 @@ def calc_box_stats(reg_size_dict, dataset="lohan", show_plot=True, calc_aug_gain "size": [], "c-error (mean)": [], "c-error (median)": [], "c-error (min)": [], "c-error (max)": []} corr_dicts = {"model": [], "Y": [], "X": [], beta_0: [], beta_1: [], "p-value": [], r_2: [], "RSE": [], "Y_mean": []} # first do DNN-DLB models thres_idx = 0 for aug in [0, 1]: for l in [1]:#, 2]: for o in beta_range: if o == 5.0 and dataset == "UJI": if (o == 5.0 and dataset == "UJI") or (o == 5.0 and dataset == "lohan" and dbscan_split and aug == 0): box_dict['beta'] += [o] else: box_dict['beta'] += [o] * 2 ... ... @@ -256,13 +401,25 @@ def calc_box_stats(reg_size_dict, dataset="lohan", show_plot=True, calc_aug_gain else: pipe_name = f_dnn.format(floor, l, aug, o) + "_{}" model_name = "DNN-DLB ({} HL, {} aug)".format(l, aug) if o == beta_range[0]: if o == beta_range[2]: plot_name = model_name else: plot_name = "" col_val = "r" if aug == 1 else "g" marker = "^" if l == 2 else "v" _calc_box_stats(reg_size_dict, box_dict, mse_list, pipe_name, model_name, plot_name, col_val, marker, num_avg=10, calc_aug_gain=calc_aug_gain) num_add_aug_ratio, e_vec, s_vec, corr_dict = _calc_box_stats(reg_size_dict, box_dict, mse_list, pipe_name, model_name, plot_name, col_val, marker, num_avg=10, calc_aug_gain=calc_aug_gain, thres_idx=-1) thres_idx += 1 corr_dicts["Y"] += corr_dict["Y"] corr_dicts["X"] += corr_dict["X"] corr_dicts["p-value"] += corr_dict["p-value"] corr_dicts[r_2] += corr_dict[r_2] corr_dicts[beta_0] += corr_dict[beta_0] corr_dicts[beta_1] += corr_dict[beta_1] corr_dicts["RSE"] += corr_dict["RSE"] corr_dicts["Y_mean"] += corr_dict["Y_mean"] #corr_dicts["type"] += corr_dict["type"] corr_dicts["model"] += [model_name + str(o)] * 5 # CNN if dataset == "lohan": ... ... @@ -270,13 +427,22 @@ def calc_box_stats(reg_size_dict, dataset="lohan", show_plot=True, calc_aug_gain else: pipe_name = f_cnn.format(floor, aug, o) + "_{}" model_name = "2D-CNN-DLB ({} aug)".format(aug) if o == beta_range[0]: if o == beta_range[2]: plot_name = model_name else: plot_name = "" col_val = "r" if aug == 1 else "g" marker = "o" _calc_box_stats(reg_size_dict, box_dict, mse_list, pipe_name, model_name, plot_name, col_val, marker, num_avg=10, calc_aug_gain=False) _calc_box_stats(reg_size_dict, box_dict, mse_list, pipe_name, model_name, plot_name, col_val, marker, num_avg=10, calc_aug_gain=False, plot_regression=False) # error-size correlation corr_df = pd.DataFrame(corr_dicts) corr_df[beta_0] = corr_df[beta_0].round(2) corr_df[beta_1] = corr_df[beta_1].round(2) corr_df[r_2] = corr_df[r_2].round(2) print(corr_df.to_latex(escape=False, index=False)) box_df = None ... ... @@ -334,22 +500,22 @@ def calc_box_stats(reg_size_dict, dataset="lohan", show_plot=True, calc_aug_gain # print("ACC-gain: {}".format(box_df["ACC-gain"].mean())) return box_df, num_add_aug_ratio return box_df, num_add_aug_ratio, corr_df def main(dataset="UJI"): plt.figure() if dataset == "lohan": plt.title("Tampere (avg)") # (fold-{})".format(f_idx + 1) plt.title("Tampere (floor 1)") # (fold-{})".format(f_idx + 1) else: plt.title("UJIndoorLoc (floor-{})".format(floor)) plt.title("UJI (bld. 0, floor {})".format(floor)) reg_size_dict = {"Model": [], "size": [], "ACC": []} #for l in [1]: calc_reg_stats(reg_size_dict, num_layers=3, dataset=dataset) box_df, num_add_aug_ratio = calc_box_stats(reg_size_dict, dataset=dataset, calc_aug_gain=False) box_df, num_add_aug_ratio, corr_df = calc_box_stats(reg_size_dict, dataset=dataset, calc_aug_gain=True) plt.rcParams.update(plt.rcParamsDefault) plt.rc("savefig", dpi=200) ... ... @@ -359,9 +525,9 @@ def main(dataset="UJI"): # self.fig.set_size_inches((19.79, 12.5), forward=False) pdf.savefig(bbox_inches="tight") pdf.close() plt.show() # plt.show() return box_df, num_add_aug_ratio return box_df, num_add_aug_ratio, corr_df def rename_files(): import glob ... ... @@ -398,18 +564,20 @@ if __name__ == "__main__": #rename_files() res_dict = {"Model": [], "MSE": [], "fold": []} num_add_aug_ratio = [] num_add_aug_ratio_list = [] # dataset = "UJI" f_idx = None floor = None box_df_concat = None if dataset == "UJI": corr_dfs = [] for idx in range(4): floor = idx box_df, num_add_aug = main(dataset) num_add_aug_ratio.append(num_add_aug) box_df, num_add_aug, corr_df = main(dataset) num_add_aug_ratio_list.append(num_add_aug) corr_dfs.append(corr_df) if box_df_concat is None: box_df_concat = box_df ... ... @@ -422,8 +590,8 @@ if __name__ == "__main__": f_idx = None #idx floor = idx box_df, num_add_aug = main(dataset) num_add_aug_ratio.append(num_add_aug) box_df, num_add_aug, corr_df = main(dataset) num_add_aug_ratio_list.append(num_add_aug) if box_df_concat is None: box_df_concat = box_df ... ... @@ -437,29 +605,39 @@ if __name__ == "__main__": print("SIZE-GAIN (no-aug): {}".format(box_df_concat[box_df_concat["Model"].str.contains("0 aug")]["size-gain"].mean())) print("ACC-GAIN (aug): {}".format(box_df_concat[box_df_concat["Model"].str.contains("1 aug")]["ACC-gain"].mean())) print("ACC-GAIN (no-aug): {}".format(box_df_concat[box_df_concat["Model"].str.contains("0 aug")]["ACC-gain"].mean())) exit print("NUM-ADD_AUG_RATIO: {}".format(np.mean(num_add_aug_ratio))) print("NUM-ADD_AUG_RATIO: {}".format(np.mean(num_add_aug_ratio_list))) plt.show() # plt.show() df = pd.DataFrame(res_dict) print("MEAN-MSE: {}".format(df[df["Model"].str.contains("1 aug")]["MSE"].mean())) print("MEDIAN-MSE: {}".format( df[df["Model"].str.contains("1 aug")]["MSE"].median())) print("MIN-MSE: {}".format( df[df["Model"].str.contains("1 aug")]["MSE"].min())) print("MAX-MSE: {}".format( df[df["Model"].str.contains("1 aug")]["MSE"].max())) # print("MEAN-MSE: {}".format(df[df["Model"].str.contains("1 aug")]["MSE"].mean())) # print("MEDIAN-MSE: {}".format( # df[df["Model"].str.contains("1 aug")]["MSE"].median())) # print("MIN-MSE: {}".format( # df[df["Model"].str.contains("1 aug")]["MSE"].min())) # print("MAX-MSE: {}".format( # df[df["Model"].str.contains("1 aug")]["MSE"].max())) print(df[df["Model"].str.contains("1 aug")]["MSE"].describe()) #print(df[df["Model"].str.contains("1 aug")]["MSE"].describe()) # df2 = pd.DataFrame( # group.describe().rename(columns={'MSE': name}).squeeze() # for name, group in df.groupby('Model')) # print(df) print(df[df["Model"].isin(["BBOX (2L_aug)", "DNN->O (2L)"])].groupby('Model')["MSE"].describe().unstack(1)) if dataset == "UJI": print(df[df["Model"].isin( ["DNN-DLB (1 HL, 1 aug)", "DNN -> O (3 HL)", "CNN -> O", "2D-CNN-DLB (1 aug)"])].groupby('Model')["MSE"].describe().unstack( 1)) if dataset == "lohan": print(df[df["Model"].isin( ["DNN-DLB (1 HL, 1 aug)", "DNN -> O (3 HL)", "CNN -> O", "2D-CNN-DLB (1 aug)"])].groupby('Model')["MSE"].describe().unstack( 1)) # BOX plot fig = plt.figure() ... ... @@ -490,4 +668,4 @@ if __name__ == "__main__": pdf.savefig(bbox_inches="tight") pdf.close() #plt.show() # plt.show()