Commit 7f409539 authored by Marius Laska's avatar Marius Laska
Browse files

major commit, including: simulation data, hierarchical model, grid based...

major commit, including: simulation data, hierarchical model, grid based model, upload of grid models, etc.
parent 12284cd4
from il_pipeline.pipeline import Pipeline
from il_pipeline.utility.storable import Storable
import numpy as np
import pandas as pd
from analysis.visualize_learning_progress import convert_from_2dim_grid
from statsmodels.regression.linear_model import OLS
from statsmodels.tools import add_constant
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
def main(dir_p="../evaluation/lohan/evaluation/new_knn/output/", f_idx=0):
bbox: Pipeline = Storable.load(dir_p + "BBOX")
convert_from_2dim_grid(bbox, grid_size=40, quantile=False, store=True,
average_samples=True)
reg: Pipeline = Storable.load(dir_p + "REG")
knn: Pipeline = Storable.load(dir_p + "KNN")
#bbox_m: Pipeline = Storable.load(
# "/Users/mariuslaska/PycharmProjects/boxprediction/evaluation/lohan/evaluation/grid_test/output/BBOX_TEST_3_v5")
#convert_from_2dim_grid(bbox_m, grid_size=40, quantile=False, store=True,
# average_samples=False)
# ground truth positions (of fold f_idx)
y_true = bbox.summary.y_true_labels[f_idx]
# predicted positions (of fold f_idx)
y_pred_box = bbox.summary.y_pred[f_idx]
y_pred_reg = reg.summary.y_pred[f_idx]
y_pred_knn = knn.summary.y_pred[f_idx]
#y_pred_bbox_m = bbox_m.summary.y_pred[f_idx]
#
# Compute non MCD metrics
#
# center statistics
box_error = np.linalg.norm(y_true - y_pred_box[:, :2], axis=1)
box_mse = np.mean(box_error)
print("box_mse: {}".format(box_mse))
reg_error = np.linalg.norm(y_true - y_pred_reg, axis=1)
reg_mse = np.mean(reg_error)
print("reg_mse: {}".format(reg_mse))
# reference error of kNN model (also component-wise)
knn_error = np.linalg.norm(y_true - y_pred_knn, axis=1)
knn_error_x = np.abs(y_true - y_pred_knn)[:, 0]
knn_error_y = np.abs(y_true - y_pred_knn)[:, 1]
knn_mse = np.mean(knn_error)
knn_error = box_error
knn_error_x = np.abs(y_true - y_pred_box[:, :2])[:, 0]
knn_error_y = np.abs(y_true - y_pred_box[:, :2])[:, 1]
# Bbox size (also component-wise)
box_size = np.prod(y_pred_box[:, 2:], axis=1)
size_x = y_pred_box[:, 2]
size_y = y_pred_box[:, 3]
#
# Compute MCD related metrics
#
# compute std of error and size per observation
#m_error = np.linalg.norm(y_pred_bbox_m[:, :2, :] - np.stack([y_true] * 100, axis=2), axis=1)
#error_std = np.std(m_error, axis=1)
#center_std = np.linalg.norm(np.std(y_pred_bbox_m[:, :2, :], axis=2), axis=1)
#size_std = np.std(np.prod(y_pred_bbox_m[:, 2:, :], axis=1), axis=1)
#avg_size = np.mean(np.prod(y_pred_bbox_m[:, 2:, :], axis=1), axis=1)
summary = {"y": [], "beta": [], "p_val": [], "r_2": [], "f": [], "type": []}
#
# Correlation between Error of simple kNN model and box size of BBox model
#
evaluate_correlation(box_size, knn_error, "error -> size", summary)
# component wise correlation
evaluate_correlation(size_x, knn_error_x, "error_x -> size_x", summary)
evaluate_correlation(size_y, knn_error_y, "error_y -> size_y", summary)
evaluate_correlation(size_x, knn_error_y, "error_y -> size_x", summary)
evaluate_correlation(size_y, knn_error_x, "error_x -> size_y", summary)
#
# Correlation between std of Monte-Carlo-Dropout Model (center, size, error)
# with kNN model error
#
#evaluate_correlation(error_std, knn_error, "error -> std(error) [MCD]", summary)
#evaluate_correlation(size_std, knn_error, "error -> std(size) [MCD]", summary)
#evaluate_correlation(avg_size, knn_error, "error -> mean(size) [MCD]", summary)
#evaluate_correlation(center_std, knn_error, "error -> std(center) [MCD]", summary)
summary["f"] += [f_idx] * 5
return summary
def evaluate_correlation(y, x, type, summary):
regr = OLS(y,
add_constant(x.reshape(-1, 1))).fit()
summary["y"].append(regr.params[0])
summary["beta"].append(regr.params[1])
summary["p_val"].append(regr.pvalues[1])
summary["r_2"].append(regr.rsquared)
summary["type"].append(type)
def generate_correlation_df():
y_intercepts = []
betas = []
p_vals = []
r_squared = []
corr_type = []
fold_idx = []
for f_idx in range(5):
summary = main("../evaluation/simulation/evaluation/big/output/", f_idx=f_idx)
y_intercepts += summary["y"]
betas += summary["beta"]
p_vals += summary["p_val"]
r_squared += summary["r_2"]
fold_idx += summary["f"]
corr_type += summary["type"]
df_dict = {"corr_type": corr_type, "y_intercept": y_intercepts,
"beta": betas, "p_val": p_vals,
"r2": r_squared, "fold": fold_idx}
df = pd.DataFrame(data=df_dict)
return df
if __name__ == "__main__":
df = generate_correlation_df()
print(df)#df[df["corr_type"] == "error -> size"])
\ No newline at end of file
......@@ -17,19 +17,35 @@ from analysis.visualize_quantile import get_acc_and_size
PI = 3.14159265359
def main(idx):
def main(p_reg_f=None, p_circle_f=None, p_quantile_f=None, p_box_f=None, vis_idx=(0,1000)):
p_reg: Pipeline = Storable.load("../evaluation/UJIndoorLoc/grid_test/output/REG") #Storable.load("evaluation/lohan/evaluation/final/output/REG_{}".format(idx))
p_circle: Pipeline = None #Storable.load("evaluation/lohan/full_cmp/output/CIRCLE")
p_quantile: Pipeline = None #Storable.load("evaluation/lohan/full_cmp/output/QUANTILE")
p_reg = None
p_quantile = None
p_circle = None
p_box = None
p_box: Pipeline = Storable.load("/home/laskama/Dropbox/BBOX_{}".format(idx)) # Storable.load("evaluation/lohan/evaluation/final/output/BBOX_{}".format(idx)) #Storable.load("../evaluation/UJIndoorLoc/grid/output/BBOX")##"evaluation/lohan/scaled_output_2/output/BBOX") # Storable.load("evaluation/gia/full_cmp/output/BBOX")
if p_reg_f is not None:
p_reg: Pipeline = Storable.load(p_reg_f)
p_box: Pipeline = Storable.load("../evaluation/UJIndoorLoc/grid_test/output/BBOX") #Storable.load("/Users/mariuslaska/Dropbox/BBOX_{}".format(idx)) # Storable.load("evaluation/lohan/evaluation/final/output/BBOX_{}".format(idx)) #Storable.load("../evaluation/UJIndoorLoc/grid/output/BBOX")##"evaluation/lohan/scaled_output_2/output/BBOX") # Storable.load("evaluation/gia/full_cmp/output/BBOX")
if p_quantile_f is not None:
p_quantile: Pipeline = Storable.load(p_quantile_f)
fp_dims = (p_box.data_provider.floorplan_width, p_box.data_provider.floorplan_height)
if p_circle_f is not None:
p_circle: Pipeline = Storable.load(p_circle_f)
convert_from_2dim_grid(p_box, grid_size=40)
if p_box_f is not None:
p_box: Pipeline = Storable.load(p_box_f)
#g_s = p_box.data_provider.
g_s = p_box.data_provider.grid_size
convert_from_2dim_grid(p_box, grid_size=g_s, quantile=False, store=True,
average_samples=True)
# compute avg box size per fold
avg_box_size = [np.mean(np.prod(pred[:, 2:], axis=1)) for pred in p_box.summary.y_pred]
print(avg_box_size)
dp = p_box.data_provider
#transform_quantile_to_bbox(p_box)
#convert_from_grid(p_box)
#p_box.summary.y_pred = [p_box.summary.y_pred[idx]]
......@@ -112,6 +128,7 @@ def main(idx):
#print("avg_center: {}".format(np.mean(y_pred[:, :2], axis=0)))
print("avg_box_size: {}".format(size))
radius_b = None
if p_reg is not None:
print("\n --------REG--------- \n")
......@@ -130,15 +147,18 @@ def main(idx):
#radius_b = find_radius_for_acc(p_reg, accuracy=acc_quantile)
#print("SIZE_REG (QUANTILE): {}".format(PI * math.pow(radius_b, 2)))
#radius_b = 0
visualize(p_reg, p_circle, p_quantile, p_box, radius_b)
#radius_b = 10
visualize(p_reg, p_circle, p_quantile, p_box, radius_b, vis_idx)
def visualize(p_reg: Pipeline=None, p_circle: Pipeline=None, p_quantile: Pipeline=None, p_box: Pipeline=None, radius=9.0):
#img = "../evaluation/gia/gia_floor_4.jpg"
def visualize(p_reg: Pipeline=None, p_circle: Pipeline=None, p_quantile: Pipeline=None, p_box: Pipeline=None, radius=9.0, vis_idx=(0,20)):
img = "../evaluation/gia/gia_floor_4.jpg"
#fp_dims = (83.32, 17.16)
img = resource_filename('data', 'lohan/CrowdsourcedDS1floor.png')
fp_dims = (200, 80)
#img = resource_filename('data', 'lohan/CrowdsourcedDS1floor.png')
#img = "/Users/mariuslaska/sciebo/SensorDatenGIA/Gebäude STL/transparent/2130_4.og.png"
fp_dims = (200, 80) # (200, 80)
fp_dims = (83.32, 17.6) #50.5)
#fp = FloorPlanPlotRec((83.32, 17.16), 20, floorplan_bg_img=img)
# plot_data_heatmap(pipe, floor_plotter=fp)
if p_reg is not None:
......@@ -171,12 +191,13 @@ def visualize(p_reg: Pipeline=None, p_circle: Pipeline=None, p_quantile: Pipelin
alpha=0.5)
fp.show_plot()
fp = FloorPlanPlotRec(fp_dims, 2, floorplan_bg_img=img)
for idx in range(len(y_true)):#len(y_true)):
#idx += 150
#fp = FloorPlanPlotRec(fp_dims, 2, floorplan_bg_img=img)
for idx in range(vis_idx[1]-vis_idx[0]):#len(y_true)):
idx += vis_idx[0]
#if np.prod(y_pred_box[idx, 2:]) < 600:
# continue
#fp = FloorPlanPlotRec(fp_dims, 2, floorplan_bg_img=img)
fp = FloorPlanPlotRec(fp_dims, 2, floorplan_bg_img=img)
fp.draw_points(y_true[idx, 0], y_true[idx, 1], color='g', alpha=0.5)
......@@ -195,9 +216,28 @@ def visualize(p_reg: Pipeline=None, p_circle: Pipeline=None, p_quantile: Pipelin
fp.draw_rectangles_new(anchors=y_pred_box[idx, :], color='black')
#fp.draw_ellipse(y_pred_box[idx, :2], y_pred_box[idx, 2], y_pred_box[idx, 3], color="r")
fp.show_plot()
fp.show_plot()
def transform_quantile_to_bbox(pipe: Pipeline):
summary = []
for fold in pipe.summary.y_pred:
width = fold[:, 2] - fold[:, 0]
height = fold[:, 3] - fold[:, 1]
center_x = fold[:, 0] + width / 2
center_y = fold[:, 1] + height / 2
f = np.stack((center_x, center_y, width, height), axis=1)
summary.append(f)
pipe.summary.y_pred = summary
if __name__ == "__main__":
for idx in range(1):
main(idx+1)
\ No newline at end of file
main(p_box_f="../evaluation/gia/evaluation/floor_classifier_v2/output/FLOOR_4_box_up_pred")#_up_pred")
#p_reg_f="../evaluation/gia/evaluation/floor_classifier_v2/output/FLOOR_1_reg_2")
#p_reg_f="../evaluation/simulation/evaluation/big/output/REG_2")
#main(p_reg_f="../evaluation/lohan/evaluation/new_knn/output/REG_2",
# p_box_f="../evaluation/lohan/evaluation/new_knn/output/BBOX_2")
\ No newline at end of file
import math
import os
from il_pipeline.summary.summary import KFoldClassSummary
from il_pipeline.utility.storable import Storable
from base.BboxModel import BboxModel
from base.bbox_pipeline import BboxPipeline
def load_pipeline_params(model_file, weights_file):
spit_type = "train"
pipe: BboxPipeline = Storable.load(model_file)
params = pipe.model_params
dp = pipe.data_provider
m_type = params['type']
summary = KFoldClassSummary([], [], [])
model = BboxModel(params['type'], summary, pipe.data_provider, params,
pipe.config.output_dir, pipe.filename)
model.setup_model(setup_params=True)
num_train, num_val, num_test = dp.get_train_val_test_num(
area_labels=model.type != "regression")
train_bs = min(num_train, params['batch_size'])
test_bs = min(num_test, train_bs)
for fold in range(dp.num_splits):
dp.current_split_idx = fold
evaluate_model(model, train_bs, spit_type)
# change output dir for correct storage
pipe.config.output_dir = "../" + pipe.config.output_dir
pipe = BboxPipeline(dp, pipe.clusterer,
pipe.config,
pipe.model_params,
pipe.filename + "_pred")
pipe.summary = summary
pipe.store()
def evaluate_model(model, test_bs, split_type="test"):
"""
Evaluates the model on separate test data (supplied by data generator)
:param test_bs: batch size of test data
"""
data_provider = model.data_provider
summary = model.summary
classifier = model.classifier
path = "../" + model.output_dir + "{}_f{}.hdf5".format(model.filename, data_provider.current_split_idx)
if os.path.exists(path):
classifier.load_weights(path)
# calculate the amount of steps on the test data before one batch
# is completed (required by generator)
num_train, num_val, num_test = data_provider.get_train_val_test_num(area_labels=model.type != "regression")
steps_per_epoch = math.ceil(num_train / test_bs)
area_labels = model.type == "classification" or model.type == "cnn"
#y_pred_list = []
#for idx in range(100):
y_pred = classifier.predict_generator(
data_provider.in_mem_data_generator(
mode=split_type, model_type=model.type,
area_labels=area_labels, batch_size=test_bs),
steps=steps_per_epoch,
verbose=0)
#y_pred_list.append(y_pred)
#if self.type != "regression":
if data_provider.area_labels is not None:
_, y_true = data_provider.get_test_data()
summary.y_true.append(y_true)
if split_type == "test":
_, y_true_labels = data_provider.get_test_data(area_labels=False)
elif split_type == "val":
_, y_true_labels = data_provider.get_val_data(area_labels=False)
elif split_type == "train":
_, y_true_labels = data_provider.get_train_data(area_labels=False)
summary.y_pred.append(y_pred)
summary.y_true_labels.append(y_true_labels)
summary.num_folds += 1
if __name__ == "__main__":
m_f = "/Users/mariuslaska/PycharmProjects/boxprediction/evaluation/gia/evaluation/floor_classifier_v2/output/FLOOR_4_box_up"
w_f = "/Users/mariuslaska/PycharmProjects/boxprediction/evaluation/gia/evaluation/floor_classifier_v2/output/FLOOR_4_box_up_f0.hdf5"
load_pipeline_params(m_f, w_f)
from il_pipeline.pipeline import Pipeline
from il_pipeline.utility.storable import Storable
from pkg_resources import resource_filename
import numpy as np
import matplotlib.pyplot as plt
from typing import List
from analysis.visualize_box_overlap import normalize, add_colorbar
from analysis.visualize_learning_progress import convert_from_2dim_grid
from base.floor_plan_plot import FloorPlanPlotRec
import math
PI = math.pi
def main(mcd_box_f=None, mcd_point_f=None, point_f=None, vis_idx=(0,2)):
file = "/Users/mariuslaska/PycharmProjects/boxprediction/evaluation/lohan/evaluation/grid_test/output/BBOX_TEST_3_v5_stat"
if mcd_box_f is not None:
pipe: Pipeline = Storable.load(mcd_box_f)
# calc avg box size
#y_pred = np.concatenate(pipe.summary.y_pred, axis=0)
#box_size_len = np.sqrt(np.mean(np.prod(y_pred[:, 2:, :], axis=1)))
convert_from_2dim_grid(pipe, grid_size=40, quantile=False, store=True, average_samples=False)
if mcd_point_f is not None:
pipe_reg: Pipeline = Storable.load(mcd_point_f)
pipe_reg = convert_reg_to_box(pipe_reg, box_size_len=10)
pipe_reg_deep: Pipeline = Storable.load("/Users/mariuslaska/PycharmProjects/boxprediction/evaluation/lohan/evaluation/grid_test/output/REG_TEST_3_stat")
#pipe_reg_deep = convert_reg_to_box(pipe_reg_deep, box_size_len=10)
if point_f is not None:
pipe_point: Pipeline = Storable.load(point_f)
pipe_list = [pipe_point, pipe, pipe_reg] #[pipe_point, pipe, pipe_reg, pipe_reg_deep]
visualize(p_box_list=pipe_list, vis_idx=vis_idx)
print("test")
def convert_reg_to_box(p_reg: Pipeline=None, box_size_len=10):
# convert p_reg to box with constant box size
y_pred_post = []
for y_pred, y_true in zip(p_reg.summary.y_pred, p_reg.summary.y_true_labels):
y_pred = np.stack(y_pred, axis=2)
box_dims = np.ones_like(y_pred) * box_size_len
y_pred = np.concatenate((y_pred, box_dims), axis=1)
y_pred_post.append(y_pred)
p_reg.summary.y_pred = y_pred_post
return p_reg
def visualize(p_box: Pipeline=None, p_box_list: List[Pipeline]=None, vis_idx=(0,2)):
model_stats = [{"grid_prob": [], "correct": [], "num_cells": [],
"avg_cell_size": [], "draw_map": [], "draw_norm": [],
"prob_map": []} for _ in p_box_list]
plot_num = 1400
# get avg box size per train/test split
# for m_idx, model in enumerate(p_box_list):
# for fold in model.summary.y_pred:
# avg_cell_size = np.mean(np.prod(fold[:, 2:, :], axis=1))
# model_stats[m_idx]["avg_cell_size"].append(avg_cell_size)
# img = resource_filename('data', 'lohan/CrowdsourcedDS1floor.png')
# fp_dims = (200, 80)
# p_box = p_box_list[0]
# for (train, test) in p_box.data_provider.splits_indices:
# fp = FloorPlanPlotRec(fp_dims, 2, floorplan_bg_img=img)
# fp.draw_points(p_box.data_provider.labels[train, 0], p_box.data_provider.labels[train, 1], color='g', alpha=0.5)
# fp.draw_points(p_box.data_provider.labels[test, 0],
# p_box.data_provider.labels[test, 1], color='r',
# alpha=0.5)
# fp.show_plot()
for idx in range(vis_idx[1]-vis_idx[0]):#len(y_true)):
print(idx/plot_num)
idx+=vis_idx[0]
for m_idx, model in enumerate(p_box_list):
y_pred_box = np.concatenate(model.summary.y_pred, axis=0)
y_true = np.concatenate(model.summary.y_true_labels, axis=0)
if len(np.shape(y_pred_box)) == 3:
multi_box = np.transpose(y_pred_box[idx, :, :])
m_type = "box"
else:
multi_box = y_pred_box[idx, :]
m_type = "point"
if not hasattr(model, 'model_stats'):
grid_prob, num_cells, draw_map, draw_norm, prob_map = \
plot_box_overlap_per_grid(
model, multi_box, y_true[idx], plot=True, model=m_type)
model_stats[m_idx]["draw_map"].append(draw_map)
model_stats[m_idx]["draw_norm"].append(draw_norm)
model_stats[m_idx]["prob_map"].append(prob_map)
model_stats[m_idx]["correct"].append(grid_prob > 0)
model_stats[m_idx]["grid_prob"].append(grid_prob)
model_stats[m_idx]["num_cells"].append(num_cells)
else:
pass
#plot_box_existing_stats(model, y_true[idx], idx)
plt.show()
# for m, stat in zip(p_box_list, model_stats):
# m.model_stats = stat
# m.config.output_dir = "../" + m.config.output_dir
# m.store(m.filename + "_stat")
model_stats = [box.model_stats for box in p_box_list]
#
for m_idx, m_stat in enumerate(model_stats):
mask = np.where(~np.isnan(m_stat["grid_prob"]))[0]
print("M_{}: ACC: {}, AVG_GRID_PROB: {}, NUM_CELLS: {}, AVG_CELL_SIZE: {}".format(
m_idx,
len(np.where(np.array(m_stat["correct"]))[0])/plot_num,
np.mean(np.array(m_stat["grid_prob"])[mask]),
np.mean(np.array(m_stat["num_cells"])),
m_stat["avg_cell_size"]))
def plot_box_existing_stats(pipe: Pipeline, y_true, o_idx):
# img = "../evaluation/gia/gia_floor_4.jpg"
img = resource_filename('data', 'lohan/CrowdsourcedDS1floor.png')
# fp_dims = (83.32, 17.16)
fp_dims = (200, 80)
dp = pipe.data_provider
fp_height = dp.floorplan_height
fp_width = dp.floorplan_width
g_size = 2
# colormap
cmap = plt.cm.jet # define the colormap
# extract all colors from the .jet map
cmaplist = [cmap(i) for i in range(cmap.N)]
# force the first color entry to be grey
cmaplist[0] = (.5, .5, .5, 0.1)
# Get the colormap colors
my_cmap = cmap(np.arange(cmap.N))
# Set alpha
my_cmap[:, -1] = 0.8
my_cmap[0] = (.75, .75, .75, 0.8)
num_g_x = int(fp_width / g_size)
num_g_y = int(fp_height / g_size)
fp = FloorPlanPlotRec(fp_dims, 20, floorplan_bg_img=img,
title="Overlap")
plots = [fp]
norms = [pipe.model_stats["draw_norm"][o_idx]]
overlap_count = pipe.model_stats["draw_map"][o_idx]
oc = pipe.model_stats["prob_map"][o_idx]
for y_idx in range(num_g_y):
for x_idx in range(num_g_x):
lower_left = np.array([x_idx * g_size, (y_idx) * g_size])
upper_right = np.array([(x_idx + 1) * g_size, (y_idx + 1) * g_size])
count = overlap_count[num_g_y - y_idx - 1, x_idx]
c_val = oc[num_g_y - y_idx - 1, x_idx]
if count > 0: # and c_val > 0.0001:
fp.draw_rectangles(np.concatenate((lower_left, upper_right)),
color=my_cmap[int(count)], fill=True)
fp.draw_points(y_true[0], y_true[1], color='g', alpha=1)
for p, n in zip(plots, norms):
add_colorbar(p, cmap, n)
def plot_box_overlap_per_grid(pipe: Pipeline, multi_box, y_true, plot=True, model="box"):
dp = pipe.data_provider
fp_height = dp.floorplan_height
fp_width = dp.floorplan_width
g_size = 2
num_g_x = int(fp_width / g_size)
num_g_y = int(fp_height / g_size)
overlap_count = np.full((num_g_y, num_g_x), np.nan)
#kernel_vals = np.full((num_g_y, num_g_x), np.nan)
for y_idx in range(num_g_y):
for x_idx in range(num_g_x):
lower_left = np.array([x_idx * g_size, (y_idx) * g_size])
upper_right = np.array([(x_idx + 1) * g_size, (y_idx + 1) * g_size])
# get average error per grid
if model == "point":
kernel_val = kernel_value(multi_box, lower_left, upper_right)
overlap_count[num_g_y - y_idx - 1, x_idx] = kernel_val
elif model == "box":
overlap = overlap_grid(multi_box, lower_left, upper_right)
overlap_count[num_g_y - y_idx - 1, x_idx] = overlap
overlap_count, norm_overlap_count = normalize(overlap_count, min=1, max=255)
# normalize, such that sum = 1 (for PDF)
oc = overlap_count / np.sum(overlap_count)