Commit 614ce521 authored by Marius Laska's avatar Marius Laska
Browse files

modular implementation of new rot loss

parent 6a4143a1
......@@ -12,6 +12,9 @@ def main():
img = resource_filename('data', 'lohan/CrowdsourcedDS1floor.png')
fp_dims = (200, 80)
walls_h = np.load("hor_walls.npy")
#walls_h_cor = walls_h[np.where(np.arange(len(walls_h)) != 84)[0]]
#np.save("hor_walls.npy", walls_h_cor)
fp = FloorPlanPlotRec(fp_dims, 5, floorplan_bg_img=img, walls_file="ver_walls.npy", add_walls=True)
fp.show_plot()
......@@ -32,5 +35,5 @@ def encode():
print("test")
if __name__ == '__main__':
encode()
#main()
#encode()
main()
No preview for this file type
This diff is collapsed.
No preview for this file type
......@@ -360,10 +360,42 @@ def convert_from_2dim_overlapping_grid(pipe: Pipeline, grid_size=20, padding_rat
if len(res_folds) > 0:
return res_folds
def convert_grid_point_to_global_coord_system(point, grid_cell_idx, height, width, grid_size=40.0, padding_ratio=0.1):
# Determine #row,col for 1st layer
num_row_l1 = np.ceil(height / grid_size) + 1
num_col_l1 = np.ceil(width / grid_size) + 1
num_l1 = int(num_row_l1 * num_col_l1)
num = num_l1
origins = []
# Determine centers & origins (lower-left)
for idx in range(num):
# 1st layer
r_idx = int(idx / num_col_l1)
c_idx = idx % num_col_l1
x = c_idx * grid_size
y = r_idx * grid_size
origins.append(np.array([x, y]))
# Determine closest origin for labels (the corresponding cell is
# responsible for encoding
origins = np.array(origins)
point = origins[grid_cell_idx, :] + point[:, :2] * (
grid_size / 2.0 + grid_size * padding_ratio)
return point
def convert_from_2dim_overlapping_rotated_grid(pipe: Pipeline, grid_size=20, padding_ratio=0.1, quantile=False, store=True, average_samples=True):
n_cols = math.ceil(pipe.data_provider.floorplan_width / grid_size)
res_folds = []
res_grid_cell_folds = []
height = pipe.data_provider.floorplan_height
width = pipe.data_provider.floorplan_width
......@@ -402,6 +434,7 @@ def convert_from_2dim_overlapping_rotated_grid(pipe: Pipeline, grid_size=20, pad
pred_val_samples = pred_vals
pred_fold = np.zeros((len(pred_val_samples[0]), 5, len(pred_val_samples)))
pred_grid_cell = np.zeros((len(pred_val_samples[0]), len(pred_val_samples)))
for s_idx, pred_vals in enumerate(pred_val_samples):
......@@ -420,8 +453,11 @@ def convert_from_2dim_overlapping_rotated_grid(pipe: Pipeline, grid_size=20, pad
local_box[2:4] *= grid_size
pred_fold[idx, :, s_idx] = local_box
pred_grid_cell[idx, s_idx] = chosen[idx]
# average pred fold
res_grid_cell_folds.append(pred_grid_cell)
if average_samples:
pred_fold = np.mean(pred_fold, axis=2)
......@@ -433,6 +469,8 @@ def convert_from_2dim_overlapping_rotated_grid(pipe: Pipeline, grid_size=20, pad
if len(res_folds) > 0:
return res_folds
else:
return res_grid_cell_folds
def convert_from_2dim_grid(pipe: Pipeline, grid_size=20, quantile=False, store=True, average_samples=True):
......
import math
import os
import numpy as np
from data.data_provider_base import DataProviderBase
from il_pipeline.models.dnn_model import DnnModel
......@@ -15,7 +16,10 @@ from base.bbox_model_definition import bbox_model_for_generator
from base.hier_model_definition import hier_model_for_generator
from base.custom_loss import bbox_loss, define_bbox_loss, define_circle_loss, \
define_quantile_loss, \
define_single_quantile_loss, define_yolo_loss, define_grid_quantile_loss, define_yolo_loss_tanh, define_yolo_loss_tanh_no_size, define_rotated_box_loss
define_single_quantile_loss, define_yolo_loss, define_grid_quantile_loss, \
define_yolo_loss_tanh, define_yolo_loss_tanh_no_size, \
define_rotated_box_loss, define_rotated_box_loss_all_walls
from base.custom_rot_loss import define_rot_box_loss_modular
class BboxModel(DnnModel):
......@@ -77,10 +81,20 @@ class BboxModel(DnnModel):
elif self.type == "GRID_ROT-BBOX":
if 'loss' in params:
loss_func = define_rotated_box_loss(
height = self.data_provider.floorplan_height
width = self.data_provider.floorplan_width
grid_size = self.data_provider.grid_size
loss_func = define_rot_box_loss_modular(
params['loss'],
tf_walls_h=self.data_provider.grid_walls_h,
tf_walls_v=self.data_provider.grid_walls_v)
tf_walls_h=self.data_provider.grid_walls_h[:, :5],
tf_walls_v=self.data_provider.grid_walls_v[:, :5],
n_rows=np.ceil(height / grid_size) + 1,
n_cols=np.ceil(width / grid_size) + 1,
gs=grid_size)
# loss_func = define_rotated_box_loss(
# params['loss'],
# tf_walls_h=self.data_provider.grid_walls_h,
# tf_walls_v=self.data_provider.grid_walls_v)
params.update({'last_activation': tanh,
'losses': loss_func})
......
This diff is collapsed.
import numpy as np
import tensorflow as tf
import math as m
def define_rot_box_loss_modular(params, tf_walls_h, tf_walls_v, n_rows, n_cols, gs=40.0, pad_ratio=0.1):
tf_PI = tf.constant(m.pi)
walls_h = tf.constant(tf_walls_h, dtype=tf.float32)
walls_v = tf.constant(tf_walls_v, dtype=tf.float32)
MAX_DIST = tf.constant(100, dtype=tf.float32)
omega_walls = walls_h[:, 4]
# encode walls with respect to each grid cell origin
walls_h, walls_v, num_wh = compute_multi_origin_wall_encoding(
gs, n_rows, n_cols, pad_ratio, walls_h, walls_v)
walls_all = tf.concat([walls_h, walls_v], axis=0)
def yolo_loss(y_true, y_pred):
#
# Setup
#
loss = tf.zeros_like(y_true[:, 0])
grid_cell_true, grid_cell_pred = get_grid_cell_values(y_true, y_pred)
# get the box prediction with highest confidence
# (used for subsequent loss computations)
y_pred_s = get_highest_confidence_prediction(y_true, y_pred)
# get the encoded walls with origin equal to grid cell center of y_true
walls = get_walls_encoding_with_respect_to_y_true_grid_cell(walls_all,
y_true)
d_hor_pos_only, d_hor_neg_only, d_hor, \
d_ver_pos_only, d_ver_neg_only, _ = \
get_distance_between_wall_bound_and_projection_point(
y_pred_s, walls, num_wh, MAX_DIST)
#
# Classification loss: is the right grid cell chosen?
#
loss += class_grid_cell_loss(y_true, y_pred, params)
#
# Wall loss: minimize distance between box boundary and closest wall
#
loss += get_wall_loss(d_hor_pos_only, d_ver_neg_only, d_ver_pos_only,
d_ver_neg_only, grid_cell_true, grid_cell_pred,
MAX_DIST, scale=0.1)
#
# Angle loss: orientation should adapt to local wall structure
# (only uses horizontal walls)
#
loss += get_angle_loss(d_hor, y_pred_s, omega_walls, grid_cell_true, grid_cell_pred, MAX_DIST)
#
# Box loss: classic box loss (requires previous rotation of values)
#
y_true, y_pred_s = get_rotated_values(y_true, y_pred_s, tf_PI)
loss += get_box_loss(y_true, y_pred_s, grid_cell_true, grid_cell_pred, params)
return loss
return yolo_loss
def get_grid_cell_values(y_true, y_pred):
# grid cell classification loss
grid_cell_pred = y_pred[:, 5::6]
# scale from [-1,1] to [0,1]
grid_cell_pred = tf.multiply(tf.add(grid_cell_pred, 1.0), 0.5)
# grid cell true
grid_cell_true = tf.one_hot(tf.cast(y_true[:, 2], dtype=tf.int32),
depth=tf.shape(grid_cell_pred)[1])
return grid_cell_true, grid_cell_pred
def class_grid_cell_loss(y_true, y_pred, params):
# grid cell classification loss
grid_cell_true, grid_cell_pred = get_grid_cell_values(y_true, y_pred)
class_loss = tf.reduce_sum(
tf.squared_difference(grid_cell_pred, grid_cell_true), axis=1)
if "scale" in params["grid"]:
factor = params["grid"]["scale"]
class_loss = tf.multiply(class_loss, tf.constant(factor))
return class_loss
def compute_multi_origin_wall_encoding(gs, n_rows, n_cols, pad_ratio, walls_h, walls_v):
gs = tf.constant(gs, dtype=tf.float32)
gs_idx = tf.range(0, n_rows * n_cols)
r_idx = tf.cast(tf.cast(gs_idx / n_cols, tf.int32), tf.float32)
c_idx = tf.cast(tf.mod(gs_idx, n_cols), tf.float32)
gs_centers_x = c_idx * gs
gs_centers_y = r_idx * gs
gs_centers = tf.concat(
[tf.reshape(gs_centers_x, [-1, 1]), tf.reshape(gs_centers_y, [-1, 1])],
axis=1)
#omega_walls = walls_h[:, 4]
w_h_1 = (walls_h[:, tf.newaxis, :2] - gs_centers) / (
gs / 2.0 + gs * pad_ratio)
w_h_2 = (walls_h[:, tf.newaxis, 2:4] - gs_centers) / (
gs / 2.0 + gs * pad_ratio)
walls_h = tf.concat([w_h_1, w_h_2], axis=2)
w_v_1 = (walls_v[:, tf.newaxis, :2] - gs_centers) / (
gs / 2.0 + gs * pad_ratio)
w_v_2 = (walls_v[:, tf.newaxis, 2:4] - gs_centers) / (
gs / 2.0 + gs * pad_ratio)
walls_v = tf.concat([w_v_1, w_v_2], axis=2)
#walls_all = tf.concat([walls_h, walls_v], axis=0)
num_wh = tf.shape(walls_h)[0]
return walls_h, walls_v, num_wh
def get_highest_confidence_prediction(y_true, y_pred, ):
# pred subset
g_idx = y_true[:, 2]
grid_cell_true, grid_cell_pred = get_grid_cell_values(y_true, y_pred)
# convert to 3-dim tensor
y_pred_sa = tf.reshape(y_pred, (
tf.shape(y_true)[0], tf.shape(grid_cell_pred)[1], 6))
# gather along 2nd dimension (Grids)
grid_sub = tf.gather(y_pred_sa, tf.cast(g_idx, tf.int32),
axis=1) # [0,0,:]
# Read dimensions
# (see: https://stackoverflow.com/questions/57387169/
# getting-the-diagonal-elements-of-only-part-of-a-tensor)
s = tf.shape(grid_sub)
# Make indices for gathering
ii, jj = tf.meshgrid(tf.range(s[0]), tf.range(s[2]), indexing='ij')
idx = tf.stack([ii, ii, jj], axis=-1)
# Gather result
y_pred_s = tf.gather_nd(grid_sub, idx)
#
# SCALE tanh box size output to [0,1]
#
zeros = tf.zeros([tf.shape(y_pred_s)[0], 2])
ones = tf.ones([tf.shape(y_pred_s)[0], 2])
scale_add = tf.concat(
[zeros, ones, tf.zeros([tf.shape(y_pred_s)[0], 2])], axis=1)
y_pred_s = tf.add(y_pred_s, scale_add)
return y_pred_s
def get_walls_encoding_with_respect_to_y_true_grid_cell(walls, y_true):
# 1) compute distance from centers to walls (dist_walls)
#walls = walls_all[:, :, :]
# repeat walls along first axis for batch-size times
walls = tf.reshape(tf.tile(walls, [tf.shape(y_true)[0], 1, 1]),
[tf.shape(y_true)[0], tf.shape(walls)[0],
tf.shape(walls)[1], tf.shape(walls)[2]])
# walls = tf.stack([walls] * tf.shape(y_true)[0], axis=0)
walls = tf.transpose(walls, [0, 2, 1, 3])
gs_selector = tf.cast(tf.reshape(y_true[:, 2], [-1, 1]), tf.int32)
idx = tf.stack(
[tf.reshape(tf.range(tf.shape(walls)[0]), (-1, 1)), gs_selector],
axis=-1)
walls = tf.gather_nd(walls, idx)[:, 0, :, :]
return walls
def get_orthogonal_projection_of_points_to_wall(y_pred_s, walls, num_wh, horizontal_walls=True):
#
# compute orthogonal projection of center onto walls
#
# subtract first wall supporting point from wall coordinates such that
# wall has first supporting point in center (vector projection)
if horizontal_walls:
walls = walls[:, :num_wh, :]
else:
walls = walls[:, num_wh:, :]
s_h = tf.concat(
[(walls[:, :, 2] - walls[:, :, 0])[:, :, tf.newaxis],
(walls[:, :, 3] - walls[:, :, 1])[:, :, tf.newaxis]],
axis=2)
c_h = y_pred_s[:, tf.newaxis, :2] - walls[:, :, :2]
proj = tf.einsum('ij,ijk->ijk',
tf.einsum('ijk,ijk->ij', c_h, s_h) /
tf.einsum('ijk,ijk->ij', s_h, s_h), s_h)
# translate back
proj = proj + walls[:, :, :2]
return proj
def get_range_mask_of_walls(proj, walls, num_wh, horizontal_walls=True):
if horizontal_walls:
walls = walls[:, :num_wh, :]
wall_idx = [0, 2]
w_idx = 0
else:
walls = walls[:, num_wh:, :]
wall_idx = [1, 3]
w_idx = 1
walls_x = tf.concat([(walls[:, :, wall_idx[0]])[:, :, tf.newaxis],
(walls[:, :, wall_idx[1]])[:, :, tf.newaxis]], axis=2)
walls_x_max = tf.reduce_max(walls_x, axis=2)
walls_x_min = tf.reduce_min(walls_x, axis=2)
wall_x_max_range = tf.less_equal(proj[:, :, w_idx], walls_x_max)
wall_x_min_range = tf.greater_equal(proj[:, :, w_idx], walls_x_min)
wall_range = tf.logical_and(wall_x_max_range, wall_x_min_range)
return wall_range
def _get_distance_between_wall_bound_and_projection_point(proj, y_pred_s, wall_range, MAX_DIST, horizontal_walls=True):
if horizontal_walls:
p_idx = 1
w_idx = 3
else:
p_idx = 0
w_idx = 2
# new computation of distance between center and wall
# using vector projection on wall
dist_bound_wall = tf.abs(
proj[:, :, p_idx] - tf.reshape(y_pred_s[:, p_idx], [-1, 1]))
d = dist_bound_wall - tf.reshape(y_pred_s[:, w_idx] / 2, [-1, 1])
d = tf.where(~wall_range, tf.ones_like(d) * MAX_DIST, d)
d = tf.where(dist_bound_wall > 1.0, tf.ones_like(d) * MAX_DIST, d)
pos_mask = tf.reshape(y_pred_s[:, p_idx], [-1, 1]) > proj[:, :, p_idx]
d_pos_only = tf.where(pos_mask, tf.ones_like(d) * MAX_DIST, d)
d_neg_only = tf.where(~pos_mask, tf.ones_like(d) * MAX_DIST, d)
return d_pos_only, d_neg_only, d
def get_distance_between_wall_bound_and_projection_point(y_pred_s, walls, num_wh, MAX_DIST):
# compute orthogonal projection of the box prediction points onto walls
proj_h = get_orthogonal_projection_of_points_to_wall(
y_pred_s, walls, num_wh, horizontal_walls=True)
proj_v = get_orthogonal_projection_of_points_to_wall(
y_pred_s, walls, num_wh, horizontal_walls=False)
wall_h_range = get_range_mask_of_walls(proj_h, walls, num_wh,
horizontal_walls=True)
wall_v_range = get_range_mask_of_walls(proj_v, walls, num_wh,
horizontal_walls=False)
d_hor_pos_only, d_hor_neg_only, d_hor = _get_distance_between_wall_bound_and_projection_point(
proj_h, y_pred_s, wall_h_range, MAX_DIST, horizontal_walls=True)
d_ver_pos_only, d_ver_neg_only, d_ver = _get_distance_between_wall_bound_and_projection_point(
proj_v, y_pred_s, wall_v_range, MAX_DIST, horizontal_walls=False)
return d_hor_pos_only, d_hor_neg_only, d_hor, d_ver_pos_only, d_ver_neg_only, d_ver
def _get_wall_loss(d_pos_only, d_neg_only, MAX_DIST):
wall_loss_pos = tf.reduce_min(tf.abs(d_pos_only), axis=1)
wall_loss_neg = tf.reduce_min(tf.abs(d_neg_only), axis=1)
wall_loss_pos = tf.where(wall_loss_pos == MAX_DIST,
tf.zeros_like(wall_loss_pos),
wall_loss_pos)
wall_loss_neg = tf.where(wall_loss_neg == MAX_DIST,
tf.zeros_like(wall_loss_neg),
wall_loss_neg)
wall_loss = wall_loss_pos + wall_loss_neg
return wall_loss
def get_wall_loss(d_hor_pos_only, d_hor_neg_only, d_ver_pos_only,
d_ver_neg_only, grid_cell_true, grid_cell_pred, MAX_DIST, scale=0.1):
wall_loss_hor = _get_wall_loss(d_hor_pos_only, d_hor_neg_only, MAX_DIST)
wall_loss_ver = _get_wall_loss(d_ver_pos_only, d_ver_neg_only, MAX_DIST)
wall_loss_sum = tf.reduce_sum(
tf.multiply(grid_cell_pred, grid_cell_true), axis=1) * (
wall_loss_hor + wall_loss_ver)
return wall_loss_sum * tf.constant(scale)
def get_angle_loss(d_hor, y_pred_s, omega_walls, grid_cell_true, grid_cell_pred, MAX_DIST):
#
# Angle diff to horizontal walls (inversely weighted by distance)
#
min_dist_wall = tf.reduce_min(np.abs(d_hor), axis=1)
min_dist_wall = tf.where(min_dist_wall >= MAX_DIST,
tf.zeros_like(min_dist_wall), min_dist_wall)
angle_diff_scale = tf.square((1.0 / d_hor) * min_dist_wall[:, tf.newaxis])
angle_diff = tf.square(
y_pred_s[:, 4, tf.newaxis] - omega_walls[tf.newaxis, :])
angle_diff_loss = tf.reduce_sum(angle_diff_scale * angle_diff, axis=1)
return tf.reduce_sum(tf.multiply(grid_cell_pred, grid_cell_true),
axis=1) * angle_diff_loss
def get_rotated_values(y_true, y_pred_s, tf_PI):
#
# following operations are computed with rotated local coordinate system
#
# rotate (c_x, c_y) and (t_x, t_y) by omega * PI around center of grid cell
omega = y_pred_s[:, 4]
rot_cx = tf.multiply(y_pred_s[:, 0],
tf.cos(tf_PI * omega)) - tf.multiply(
y_pred_s[:, 1], tf.sin(tf_PI * omega))
rot_cy = tf.multiply(y_pred_s[:, 1],
tf.cos(tf_PI * omega)) + tf.multiply(
y_pred_s[:, 0], tf.sin(tf_PI * omega))
rot_tx = tf.multiply(y_true[:, 0], tf.cos(tf_PI * omega)) - tf.multiply(
y_true[:, 1], tf.sin(tf_PI * omega))
rot_ty = tf.multiply(y_true[:, 1], tf.cos(tf_PI * omega)) + tf.multiply(
y_true[:, 0], tf.sin(tf_PI * omega))
y_true = tf.concat(
[tf.reshape(rot_tx, [-1, 1]), tf.reshape(rot_ty, [-1, 1]),
tf.reshape(y_true[:, 2], [-1, 1])], axis=1)
y_pred_s = tf.concat(
[tf.reshape(rot_cx, [-1, 1]), tf.reshape(rot_cy, [-1, 1]),
y_pred_s[:, 2:]], axis=1)
return y_true, y_pred_s
def get_box_loss(y_true, y_pred_s, grid_cell_true, grid_cell_pred, params):
# box loss
box_loss = tf.zeros_like(y_true[:, 0])
box_loss_scale = tf.reduce_sum(
tf.multiply(grid_cell_pred, grid_cell_true), axis=1)
# center loss
c_loss = tf.reduce_sum(
tf.square(tf.subtract(y_true[:, :2], y_pred_s[:, :2])),
axis=1)
box_loss += c_loss
if "outside" in params:
delta = 20.0
if "delta" in params['outside']:
delta = params['outside']['delta']
outside_x_loss = tf.subtract(
tf.abs(tf.subtract(y_true[:, 0], y_pred_s[:, 0])),
tf.divide(y_pred_s[:, 2], tf.constant(delta)))
outside_y_loss = tf.subtract(
tf.abs(tf.subtract(y_true[:, 1], y_pred_s[:, 1])),
tf.divide(y_pred_s[:, 3], tf.constant(delta)))
outside_loss = tf.square(outside_x_loss) + tf.square(outside_y_loss)
if "scale" in params["outside"]:
factor = params["outside"]["scale"]
outside_loss = tf.multiply(outside_loss, tf.constant(factor))
box_loss += outside_loss
return box_loss * box_loss_scale
\ No newline at end of file
......@@ -201,10 +201,10 @@ class DataProviderGridBase(DataProviderBase):
self.grid_labels = grid_encoding
self.aug_encoding = aug_encoding
def encode_walls_to_2dim_overlapping_grid_encoding(self, walls, horizontal=True, overlap_strategy="ignore", grid_size=10.0, padding_ratio=0.1, height=None, width=None):
def encode_walls_to_2dim_overlapping_grid_encoding(self, walls, horizontal=True, overlap_strategy="ignore", angle_only=False, grid_size=10.0, padding_ratio=0.1, height=None, width=None):
img = resource_filename('data', 'lohan/CrowdsourcedDS1floor.png')
fp = FloorPlanPlotRec((200, 80), 2, floorplan_bg_img=img)
#fp = FloorPlanPlotRec((200, 80), 2, floorplan_bg_img=img)
if height is None:
height = self.floorplan_height
......@@ -232,7 +232,7 @@ class DataProviderGridBase(DataProviderBase):
y = r_idx * grid_size
origins.append(np.array([x, y]))
fp.axis.text(x+1, y-2, str(idx))
#fp.axis.text(x+1, y-2, str(idx))
# Determine closest origin for labels (the corresponding cell is
# responsible for encoding
......@@ -241,10 +241,10 @@ class DataProviderGridBase(DataProviderBase):
origins = np.array(origins)
fp.draw_rectangles_new(o_plt, color="black")
fp.draw_rectangles_new(o_plt_pad, color="grey")
fp.draw_points(origins[:, 0], origins[:, 1], color="black")
# fp.draw_rectangles_new(o_plt, color="black")
# fp.draw_rectangles_new(o_plt_pad, color="grey")
#
# fp.draw_points(origins[:, 0], origins[:, 1], color="black")
dist_line_center = np.full((len(walls), len(origins)), np.inf)
dist_to_center = np.full((len(walls), len(origins), 2), np.inf)
......@@ -283,6 +283,8 @@ class DataProviderGridBase(DataProviderBase):
walls = walls[no_split_required, :]
resp_grid_center_line_idx = resp_grid_center_line_idx[no_split_required]
resp_grid_center_origins = resp_grid_center_origins[no_split_required, :]
elif overlap_strategy == "keep":
print("keep walls between grid_idx")
if horizontal:
grid_encoding = np.zeros((len(walls[:, :2]), 6))
......@@ -299,8 +301,11 @@ class DataProviderGridBase(DataProviderBase):
# determine final encoding
# grid_encoding = np.zeros((len(walls[:, :2]), 5))
# grid_encoding[:, 4] = resp_grid_center_line_idx