Commit 45ef92b4 authored by Marius Laska's avatar Marius Laska

initial commit

parents
import math
from data.data_provider_base import DataProviderBase
from il_pipeline.models.dnn_model import DnnModel
from il_pipeline.summary.summary import KFoldClassSummary
from il_pipeline.models.tf_model_definition import classification_model_for_generator, cnn_model_for_generator
from tensorflow.keras import Sequential
from tensorflow.keras.activations import softmax, linear
from tensorflow.keras.losses import mean_squared_error
from tensorflow.keras.backend import categorical_crossentropy, relu, softmax
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.optimizers import Adam
from bbox_model_definition import bbox_model_for_generator
from custom_loss import custom_loss_tf, bbox_loss, center_loss
class BboxModel(DnnModel):
def __init__(self, type="classification", summary: KFoldClassSummary=None,
data_provider: DataProviderBase=None,
params=None, output_dir=None, filename=None):
super().__init__(type, summary, data_provider, params, output_dir, filename)
def load_weights(self):
self.classifier.load_weights(self.output_dir + self.filename + ".hdf5")
def setup_params(self):
params = self.params
# update parameters with standard values for classification model
params.update({
'shape': 'brick',
'weight_regulizer': None,
'kernel_initializer': 'he_normal',
'optimizer': Adam,
'activation': relu})
if self.type == "classification" or "CNN" in self.type:
params.update({'last_activation': softmax,
'losses': categorical_crossentropy})
elif self.type == "regression":
params.update({'last_activation': linear,
'losses': mean_squared_error})
elif self.type == "BBOX":
params.update({'last_activation': linear,
'losses': bbox_loss})
def pre_train_model(self):
self.params.update({'losses': center_loss})
self.setup_model()
self._train_model(save_weights_only=True, evaluate=False)
def setup_model(self, setup_params=False):
# use instance variables if no optional variables are supplied
output_dir = self.output_dir
data_provider = self.data_provider
params = self.params
if setup_params:
self.setup_params()
# obtain Keras classifier (Sequential) for specified parameters
x_cols, y_cols = data_provider.get_data_dims("regression")
if self.type == "regression":
y_cols = 2
if self.type == "BBOX":
y_cols = 4
# obtain the model definition
if "BBOX" in self.type:
classifier_template = bbox_model_for_generator(metrics=[])
self.classifier = classifier_template(X_cols=x_cols, Y_cols=y_cols, params=params)
elif "CNN" in self.type:
classifier_template = cnn_model_for_generator(metrics=[])
self.classifier = classifier_template(input_shape=x_cols,
Y_cols=y_cols, params=params)
else:
classifier_template = classification_model_for_generator(
metrics=[])
self.classifier = classifier_template(
X_cols=x_cols, Y_cols=y_cols, params=params)
self.type = "regression"
def _train_model(self, save_weights_only=False, evaluate=True):
"""
Trains a DNN model with the specified parameters using Keras
fit_generator function. Model is evaluated on separate test data
"""
output_dir = self.output_dir
data_provider = self.data_provider
params = self.params
# self.setup_model()
# calculate batch sizes (might be smaller than specified batch size if
# not enough data supplied)
num_train, num_val, num_test = data_provider.get_train_val_test_num(area_labels=self.type != "regression")
train_bs = min(num_train, params['batch_size'])
val_bs = min(num_val, train_bs)
test_bs = min(num_test, train_bs)
# calculate the steps per epoch (used by generator) to determine
# after how many steps a new epoch starts (model has seen all data)
val_steps_per_epoch = math.ceil(num_val / val_bs)
num_augs = 0
if 'augmentation' in params and params['augmentation'] is not None:
num_augs = params['augmentation']
train_steps_per_epoch = math.ceil(num_train * (num_augs + 1) / train_bs)
# setup callbacks
checkpoint_file_name = output_dir + "{}.hdf5".format(self.filename)
# save best performing model parameters
checkpoint = ModelCheckpoint(checkpoint_file_name, verbose=0,
monitor='val_loss',
save_best_only=True, mode='auto',
save_weights_only=save_weights_only)
earlyStopping = EarlyStopping(monitor='val_loss',
patience=60,
verbose=0,
mode='auto')
# obtain generator function from data provider
generator = data_provider.in_mem_data_generator
area_labels = self.type == "classification" or "CNN" in self.type
noise_percentage = None
if 'noise_percentage' in self.params:
noise_percentage = self.params['noise_percentage']
train_on_noisy_labels = False
if 'noisy_labels' in self.params:
train_on_noisy_labels = True
self.classifier.fit_generator(
generator(mode='train',
model_type=self.type,
area_labels=area_labels,
augmentation=num_augs,
noise_percentage=noise_percentage,
noisy_labels=train_on_noisy_labels),
validation_data=generator(mode='val', model_type=self.type,
area_labels=area_labels,
noisy_labels=train_on_noisy_labels,
batch_size=val_bs),
validation_steps=val_steps_per_epoch,
steps_per_epoch=train_steps_per_epoch,
epochs=params['epochs'],
callbacks=[earlyStopping, checkpoint],
verbose=0)
# evaluate model and store results in summary file
if evaluate:
self.evaluate_model(test_bs)
\ No newline at end of file
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.models import Sequential
from il_pipeline.models.layer_generator import hidden_layers
from il_pipeline.models.lr_normalizer import lr_normalizer
def bbox_model_for_generator(metrics):
def define_classification_model_for_generator(X_cols, Y_cols, params) -> Sequential:
model = Sequential()
model.add(Dense(params['first_neuron'], input_dim=X_cols,
activation=params['activation'],
kernel_regularizer=regularizers.l2(
params['regularization_penalty'])))
model.add(Dropout(params['dropout']))
hidden_layers(model, params, X_cols)
model.add(Dense(Y_cols, activation=params['last_activation'],
kernel_initializer=params['kernel_initializer'],
kernel_regularizer=regularizers.l2(
params['regularization_penalty'])))
# compile the model
model.compile(loss=params['losses'],
metrics=[*metrics],
optimizer=params['optimizer'](
lr=lr_normalizer(params['lr'], params['optimizer'])))
return model
return define_classification_model_for_generator
\ No newline at end of file
from data.data_provider_base import DataProviderBase
from il_pipeline.models.dnn_model import DnnModel
from il_pipeline.models.svm_model import SvmModel
from il_pipeline.models.transfer_model import TransferModel
from il_pipeline.pipeline import Pipeline
from il_pipeline.utility.config_reader import ConfigReader
from ldce.base import ClusterBase
from BboxModel import BboxModel
class BboxPipeline(Pipeline):
def __init__(self, data_provider: DataProviderBase=None,
clusterer: ClusterBase=None, config: ConfigReader=None,
params=None, filename=None):
super().__init__(data_provider, clusterer, config, params, filename)
def train_model(self):
super().train_model()
params = self.model_params
if params['type'] == "BBOX":
model = BboxModel(params['type'], self.summary, self.data_provider, params,
self.config.output_dir, self.filename)
model.setup_params()
model.pre_train_model()
model.type = "BBOX"
model.setup_model(setup_params=True)
model.load_weights()
model.train_model()
data:
# The data provider which should be used
provider: PDdataProvider
# File name of floor plan img
floor_plan_img: gia_floor_4.jpg
# (train, val, test) test=0.2 => 5 fold # The number of temporal epochs into which the dataset is split
split_ratio: [0.7, 0.1, 0.2]
connection: postgres://ogvxncajvywtut:98d3541b0d276eeb9fb9a892a308afdb6f99d9f0c705676ccb6dd8ad57346ee9@ec2-54-217-208-105.eu-west-1.compute.amazonaws.com:5432/d4g66sh4pgrte0
REST_connection:
base_url: https://indoor-localization.herokuapp.com/
token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySUQiOjEsImlhdCI6MTU4MzIyOTQ2MCwiZXhwIjoxNTgzMzE1ODYwLCJhdWQiOiJsb2NhbGhvc3QiLCJpc3MiOiJsb2NhbGhvc3QifQ.aF8-wgZYjANzRb5e0bSlwI0bnV7ru8Wwlb8N5W12KYg
map_id: 1
#
# are used when not locally set for pipeline
#
global_params:
# number of experiment repetitions
repetitions: 1
preprocessing:
# Whether to standardize the RSS values
standardize: True
# Whether to assign labels with no matching area to closet area
assign_closest: False
# The floor number of the Lohan dataset, which should be used
#floor: 0
# The epoch number of the split dataset
#num_epochs: 10
#epoch: 5
# How to check for area matches of labels (to label positions with matching areas)
area_assignment: convex_hull
model_params:
type: BBOX # (DNN, CNN, kNN, SVM) supported (require different parameters)
first_neuron: 512
hidden_layers: 3
lr: 0.7
batch_size: 32
epochs: 200
dropout: 0.5
regularization_penalty: 0
augmentation: 0
#
# List of pipelines that are executed
#
pipelines:
- name: gia_upload_class
floor_plan:
# 'segmentation' => computes floor plan segmentation,
# 'regression' => uses DBSCAN to partitions labels into train, test split
type: regression
# file where segmentation is stored or loaded from
seg_file: evaluation/gia_class_particle
# whether to 'load' or compute and 'store' the segmentation
mode: store # (load/store)
# file location that holds numpy array with walls (rows of form [s_x, s_y, e_x, e_y]
walls_file: /home/laskama/PycharmProjects/automaticPartitioning/my_new_walls.npy
# parameters of floor plan segmentation algorithm
segmentation:
# algorithm identifier (currently only LDCE supported)
type: LDCE
# file location of precomputed distance file (will be recomputed if it does not exist yet)
precomp_dist_file: evaluation/precomp_dist_gia(0,0).npy
# Parameters of LDCE algorithm
stop_size: 1
eps: 0.8
max_eps: 50
min_pts: 2
min_members: 1
rss_penalty: 0
wall_penalty: 0
cluster_penalty: 20
area_mode: convex_hull
# base directories for file storage
output:
model_dir: evaluation/gia/upload/particle/test/output/
summary_dir: evaluation/gia/upload/particle/test/summary/
img_folder: evaluation/gia/ # folder where floorplan images is located (if not present, will be downloaded)
\ No newline at end of file
data:
# The data provider which should be used
provider: PDdataProvider
# File name of floor plan img
floor_plan_img: gia_floor_4.jpg
# (train, val, test) test=0.2 => 5 fold # The number of temporal epochs into which the dataset is split
split_ratio: [0.7, 0.1, 0.2]
connection: postgres://ogvxncajvywtut:98d3541b0d276eeb9fb9a892a308afdb6f99d9f0c705676ccb6dd8ad57346ee9@ec2-54-217-208-105.eu-west-1.compute.amazonaws.com:5432/d4g66sh4pgrte0
REST_connection:
base_url: https://indoor-localization.herokuapp.com/
token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySUQiOjEsImlhdCI6MTU4MzIyOTQ2MCwiZXhwIjoxNTgzMzE1ODYwLCJhdWQiOiJsb2NhbGhvc3QiLCJpc3MiOiJsb2NhbGhvc3QifQ.aF8-wgZYjANzRb5e0bSlwI0bnV7ru8Wwlb8N5W12KYg
map_id: 1
#
# are used when not locally set for pipeline
#
global_params:
# number of experiment repetitions
repetitions: 1
preprocessing:
# Whether to standardize the RSS values
standardize: True
# Whether to assign labels with no matching area to closet area
assign_closest: False
# The floor number of the Lohan dataset, which should be used
#floor: 0
# The epoch number of the split dataset
#num_epochs: 10
#epoch: 5
# How to check for area matches of labels (to label positions with matching areas)
area_assignment: convex_hull
model_params:
type: BBOX # (DNN, CNN, kNN, SVM) supported (require different parameters)
first_neuron: 512
hidden_layers: 3
lr: 0.7
batch_size: 32
epochs: 200
dropout: 0.5
regularization_penalty: 0
augmentation: 0
#
# List of pipelines that are executed
#
pipelines:
- name: BBOX
floor_plan:
# 'segmentation' => computes floor plan segmentation,
# 'regression' => uses DBSCAN to partitions labels into train, test split
type: regression
# file where segmentation is stored or loaded from
seg_file: evaluation/gia_reg_bbox
# whether to 'load' or compute and 'store' the segmentation
mode: load # (load/store)
# file location that holds numpy array with walls (rows of form [s_x, s_y, e_x, e_y]
walls_file: /home/laskama/PycharmProjects/automaticPartitioning/my_new_walls.npy
# parameters of floor plan segmentation algorithm
segmentation:
# algorithm identifier (currently only LDCE supported)
type: LDCE
# file location of precomputed distance file (will be recomputed if it does not exist yet)
precomp_dist_file: evaluation/precomp_dist_gia_new(0,0).npy
# Parameters of LDCE algorithm
stop_size: 1
eps: 0.7
max_eps: 50
min_pts: 3
min_members: 1
rss_penalty: 0
wall_penalty: 0
cluster_penalty: 20
area_mode: convex_hull
# base directories for file storage
output:
model_dir: evaluation/gia/pretrain/box/output/
summary_dir: evaluation/gia/pretrain/box/summary/
img_folder: evaluation/gia/ # folder where floorplan images is located (if not present, will be downloaded)
\ No newline at end of file
data:
# The data provider which should be used
provider: LohanDSprovider
# File name of floor plan img
floor_plan_img: <image_name>
# (train, val, test) test=0.2 => 5 fold # The number of temporal epochs into which the dataset is split
split_ratio: [0.7, 0.1, 0.2]
#
# are used when not locally set for pipeline
#
global_params:
# number of experiment repetitions
repetitions: 1
preprocessing:
# Whether to standardize the RSS values
standardize: True
# Whether to assign labels with no matching area to closet area
assign_closest: False
# The floor number of the Lohan dataset, which should be used
floor: 0
# The epoch number of the split dataset
#num_epochs: 10
#epoch: 5
# How to check for area matches of labels (to label positions with matching areas)
area_assignment: convex_hull
floor_plan:
# 'segmentation' => computes floor plan segmentation,
# 'regression' => uses DBSCAN to partitions labels into train, test split
type: regression
# file where segmentation is stored or loaded from
seg_file: evaluation/lohan_class_particle
# whether to 'load' or compute and 'store' the segmentation
mode: load # (load/store)
# file location that holds numpy array with walls (rows of form [s_x, s_y, e_x, e_y]
walls_file: lohan/LohanWalls1stfloor.npy
# parameters of floor plan segmentation algorithm
segmentation:
# algorithm identifier (currently only LDCE supported)
type: LDCE
# file location of precomputed distance file (will be recomputed if it does not exist yet)
precomp_dist_file: evaluation/precomp_dist_lohan(0,0).npy
# Parameters of LDCE algorithm
stop_size: 1
eps: 0.8
max_eps: 50
min_pts: 2
min_members: 1
rss_penalty: 0
wall_penalty: 0
cluster_penalty: 20
area_mode: convex_hull
model_params:
type: BBOX # (DNN, CNN, kNN, SVM) supported (require different parameters)
first_neuron: 512
hidden_layers: 3
lr: 0.7
batch_size: 32
epochs: 200
dropout: 0.5
regularization_penalty: 0
augmentation: 0
#
# List of pipelines that are executed
#
pipelines:
- name: DNN
model_params:
pred: regression
type: DNN
- name: BBOX
model_params:
type: BBOX
# base directories for file storage
output:
model_dir: evaluation/lohan_cmp/output/
summary_dir: evaluation/lohan_cmp/summary/
img_folder: evaluation/lohan/ # folder where floorplan images is located (if not present, will be downloaded)
\ No newline at end of file
data:
# The data provider which should be used
provider: LohanDSprovider
# File name of floor plan img
floor_plan_img: <image_name>
# (train, val, test) test=0.2 => 5 fold # The number of temporal epochs into which the dataset is split
split_ratio: [0.7, 0.1, 0.2]
#
# are used when not locally set for pipeline
#
global_params:
# number of experiment repetitions
repetitions: 1
preprocessing:
# Whether to standardize the RSS values
standardize: True
# Whether to assign labels with no matching area to closet area
assign_closest: False
# The floor number of the Lohan dataset, which should be used
floor: 0
# The epoch number of the split dataset
#num_epochs: 10
#epoch: 5
# How to check for area matches of labels (to label positions with matching areas)
area_assignment: convex_hull
floor_plan:
# 'segmentation' => computes floor plan segmentation,
# 'regression' => uses DBSCAN to partitions labels into train, test split
type: regression
# file where segmentation is stored or loaded from
seg_file: evaluation/lohan_class_particle
# whether to 'load' or compute and 'store' the segmentation
mode: load # (load/store)
# file location that holds numpy array with walls (rows of form [s_x, s_y, e_x, e_y]
walls_file: lohan/LohanWalls1stfloor.npy
# parameters of floor plan segmentation algorithm
segmentation:
# algorithm identifier (currently only LDCE supported)
type: LDCE
# file location of precomputed distance file (will be recomputed if it does not exist yet)
precomp_dist_file: evaluation/precomp_dist_lohan(0,0).npy
# Parameters of LDCE algorithm
stop_size: 1
eps: 0.8
max_eps: 50
min_pts: 2
min_members: 1
rss_penalty: 0
wall_penalty: 0
cluster_penalty: 20
area_mode: convex_hull
model_params:
type: BBOX # (DNN, CNN, kNN, SVM) supported (require different parameters)
first_neuron: 512
hidden_layers: 3
lr: 0.7
batch_size: 32
epochs: 200
dropout: 0.5
regularization_penalty: 0
augmentation: 0
#
# List of pipelines that are executed
#
pipelines:
- name: BBOX
model_params:
type: BBOX
pretraining: center_loss
epochs: 100
# - name: BBOX_1
# model_params:
# type: BBOX
# epochs: 1
# - name: BBOX_2
# model_params:
# type: BBOX
# epochs: 2
# - name: BBOX_3
# model_params:
# type: BBOX
# epochs: 3
# - name: BBOX_4
# model_params:
# type: BBOX
# epochs: 4
# - name: BBOX_5
# model_params:
# type: BBOX
# epochs: 5
# base directories for file storage
output:
model_dir: evaluation/lohan_pretrain/output/
summary_dir: evaluation/lohan_pretrain/summary/
img_folder: evaluation/lohan/ # folder where floorplan images is located (if not present, will be downloaded)
\ No newline at end of file
import numpy as np
import tensorflow as tf
def test_center_loss():
init = tf.global_variables_initializer()
y_true = np.array([[4, 3], [9, 2], [3, 1], [2, 2], [7, 2]])
y_pred = np.array(
[[4, 3, 2, 3], [8, 0, 6, 3], [10, 2, 8, 4], [3, 1, 7, 3],
[8, 1, 12, 4]])
y_true = tf.constant(y_true, dtype=tf.float32)
y_pred = tf.constant(y_pred, dtype=tf.float32)
with tf.Session() as sess:
# Run the initializer on `w`.
sess.run(init)
print(sess.run(bbox_loss(y_true, y_pred)))
writer = tf.summary.FileWriter("output", sess.graph)
writer.close()
def center_loss(y_true, y_pred):