diff --git a/04_TrainModels.py b/04_TrainModels.py index 30475161e16b1e0851fbe23e6ed728caa701e59c..a51b9fe7dd1ee93afeab845a1e62fe3db0e23c2d 100644 --- a/04_TrainModels.py +++ b/04_TrainModels.py @@ -1,3 +1,5 @@ +import numpy as np + # from models.RegressionModels import RegressionModels from models.LinearRegression import LinearRegressionModel @@ -6,19 +8,60 @@ models = { } for model_name in models: + print(f'Model: {model_name}') + model = models[model_name] - X_r_velocity, X_r_pressure = model.POD(10) + # Use elbow point for the number of POD modes + X_r_velocity, X_r_pressure = model.POD() model_velocity, model_pressure = model.train(X_r_velocity, X_r_pressure) model.save_model(model_velocity, model_pressure) -# predict_velocity_test, predict_pressure_test = linregMod.predict_test(model_velocity, model_pressure, rescale=False) + # Search for the best number of POD modes concerning the rmse (root mean square error) + # R = np.linspace(1, 25, 25, dtype=int) + R = np.linspace(1, 3, 3, dtype=int) + + mae_velocity_best, mae_pressure_best = model.mae_test(model_velocity, model_pressure) + rmse_velocity_best, rmse_pressure_best = model.rmse_test(model_velocity, model_pressure) + max_error_velocity_best, max_error_pressure_best = model.max_error_test(model_velocity, model_pressure) + + best_model_velocity, best_model_pressure = model_velocity, model_pressure + R_best = model.elbow_point(scaled=True) + + print(f'Finding the optimal number of nodes for model {model_name}') + for R_ in R: + X_r_velocity, X_r_pressure = model.POD(R_velocity=R_, R_pressure=R_) + model_velocity, model_pressure = model.train(X_r_velocity, X_r_pressure) + + mae_velocity_, mae_pressure_ = model.mae_test(model_velocity, model_pressure) + rmse_velocity_, rmse_pressure_ = model.rmse_test(model_velocity, model_pressure) + max_error_velocity_, max_error_pressure_ = model.max_error_test(model_velocity, model_pressure) -# predict_velocity_test_rescaled, predict_pressure_test_rescaled = linregMod.predict_test(model_velocity, model_pressure, rescale=True) + if rmse_velocity_ < rmse_velocity_best: + best_model_velocity = model_velocity + mae_velocity_best = mae_velocity_ + rmse_velocity_best = rmse_velocity_ + max_error_velocity_best = max_error_velocity_ + R_best = (R_, R_best[1]) -# mae_test_velocity, mae_test_pressure = linregMod.mae_test(model_velocity, model_pressure) + if rmse_pressure_ < rmse_pressure_best: + best_model_pressure = model_pressure + mae_pressure_best = mae_pressure_ + rmse_pressure_best = rmse_pressure_ + max_error_pressure_best = max_error_pressure_ + R_best = (R_best[0], R_) -# rmse_test_velocity, rmse_test_pressure = linregMod.rmse_test(model_velocity, model_pressure) + model.save_model(best_model_velocity, best_model_pressure, model_velocity_file=f'LinearRegressionModel_velocity_{R_best[0]}.pkl', model_pressure_file=f'LinearRegressionModel_pressure_{R_best[1]}.pkl') -# Linfty_test_velocity, Linfty_test_pressure = linregMod.Linfty_test(model_velocity, model_pressure) \ No newline at end of file + print(f'Optimal setup:') + print('---------------------------------') + print(f'Model: {model_name}') + print(f'\t \t Velocity \t Pressure') + print(f'MAE: \t {mae_velocity_best} \t {mae_pressure_best}') + print(f'RMSE: \t {rmse_velocity_best} \t {rmse_pressure_best}') + print(f'max error: \t {max_error_velocity_best} \t {max_error_pressure_best}') + print(f'Number of POD modes: \t {R_best[0]} \t {R_best[1]}') + print(f'Elbow point: \t {model.elbow_point(scaled=True)[0]} \t {model.elbow_point(scaled=True)[1]}') + print('---------------------------------') + print('\n \n') \ No newline at end of file diff --git a/05_EvaluateModels.py b/05_EvaluateModels.py new file mode 100644 index 0000000000000000000000000000000000000000..49c2966cb726c7d9cbd6726b76e357d209cf6eaf --- /dev/null +++ b/05_EvaluateModels.py @@ -0,0 +1,44 @@ +import matplotlib.pyplot as plt + +# from models.RegressionModels import RegressionModels +from models.LinearRegression import LinearRegressionModel + +models = { + 'linregMod': LinearRegressionModel() +} + +for model_name in models: + model = models[model_name] + + model_velocity, model_pressure = model.load_model() + + mae_test_velocity, mae_test_pressure = model.mae_test(model_velocity, model_pressure, axis='samples') + + rmse_test_velocity, rmse_test_pressure = model.rmse_test(model_velocity, model_pressure, axis='samples') + + max_error_test_velocity, max_error_test_pressure = model.max_error_test(model_velocity, model_pressure, axis='samples') + + # print(f'Model: {model_name}') + # print(f'MAE Velocity: {mae_test_velocity}') + # print(f'MAE Pressure: {mae_test_pressure}') + # print(f'RMSE Velocity: {rmse_test_velocity}') + # print(f'RMSE Pressure: {rmse_test_pressure}') + # print(f'max error Velocity: {max_error_test_velocity}') + # print(f'max error Pressure: {max_error_test_pressure}') + + fig, axs = plt.subplots(nrows=3, ncols=2) + axs[0,0].boxplot(mae_test_velocity) + axs[0,0].set_title('mae velocity') + axs[0,1].boxplot(mae_test_pressure) + axs[0,1].set_title('mae pressure') + axs[1,0].boxplot(rmse_test_velocity) + axs[1,0].set_title('rmse velocity') + axs[1,1].boxplot(rmse_test_pressure) + axs[1,1].set_title('rmse pressure') + axs[2,0].boxplot(max_error_test_velocity) + axs[2,0].set_title('max error velocity') + axs[2,1].boxplot(max_error_test_pressure) + axs[2,1].set_title('max error pressure') + + fig.tight_layout() + fig.savefig(f'Figures/{model_name}/errors_boxplot.jpg') \ No newline at end of file diff --git a/Figures/linregMod/errors_boxplot.jpg b/Figures/linregMod/errors_boxplot.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4c542ab97adbc088a9cbddb3a2a65192cec9e65b Binary files /dev/null and b/Figures/linregMod/errors_boxplot.jpg differ diff --git a/models/LinearRegression.py b/models/LinearRegression.py index e396adfdeac67e0b5d3b3185864dbca0948312a1..4913aeacba4c636d7c5f66c8a163bd29f6f1e59a 100644 --- a/models/LinearRegression.py +++ b/models/LinearRegression.py @@ -5,8 +5,11 @@ class LinearRegressionModel(RegressionModels): def __init__(self, ): super().__init__() - def POD(self, R, scaled=True): - return super().POD(R, scaled) + def elbow_point(self, scaled=True): + return super().elbow_point(scaled) + + def POD(self, R_velocity=None, R_pressure=None, scaled=True): + return super().POD(R_velocity, R_pressure, scaled) def train(self, X_r_velocity, X_r_pressure): model_velocity, model_pressure = LinearRegression(), LinearRegression() @@ -17,6 +20,9 @@ class LinearRegressionModel(RegressionModels): def save_model(self, model_velocity, model_pressure, model_velocity_file='LinearRegressionModel_velocity.pkl', model_pressure_file='LinearRegressionModel_pressure.pkl'): super().save_model(model_velocity, model_pressure, model_velocity_file, model_pressure_file) + + def load_model(self, model_velocity_file='LinearRegressionModel_velocity.pkl', model_pressure_file='LinearRegressionModel_pressure.pkl'): + return super().load_model(model_velocity_file, model_pressure_file) def predict_test(self, model_velocity, model_pressure, rescale=True): return super().predict_test(model_velocity, model_pressure, rescale) \ No newline at end of file diff --git a/models/RegressionModels.py b/models/RegressionModels.py index a7bf877bf640eaba7aa5f312e71084fb199b54fe..6393e2b1a47279aaef42a88bcbffd5ad53c15a4b 100644 --- a/models/RegressionModels.py +++ b/models/RegressionModels.py @@ -1,7 +1,8 @@ import pandas as pd import numpy as np import joblib -from sklearn.metrics import mean_absolute_error, root_mean_squared_error +from sklearn.metrics import mean_absolute_error, root_mean_squared_error, max_error +from kneed import KneeLocator class RegressionModels: def __init__(self): @@ -29,7 +30,28 @@ class RegressionModels: self.scaler_velocity = joblib.load('Data/train/scaler_velocity.pkl') self.scaler_pressure = joblib.load('Data/train/scaler_pressure.pkl') - def POD(self, R, scaled=True): + def elbow_point(self, scaled=True): + # Perform SVD + if scaled: + _, S_velocity, _ = np.linalg.svd(self.velocity_matrix_scaled_train) + _, S_pressure, _ = np.linalg.svd(self.pressure_matrix_scaled_train) + else: + _, S_velocity, _ = np.linalg.svd(self.velocity_matrix_train) + _, S_pressure, _ = np.linalg.svd(self.pressure_matrix_train) + + knee_locator_velocity = KneeLocator(range(len(S_velocity)), S_velocity, curve='convex', direction='decreasing') + knee_locator_pressure = KneeLocator(range(len(S_pressure)), S_pressure, curve='convex', direction='decreasing') + + elbow_point_velocity = knee_locator_velocity.knee + elbow_point_pressure = knee_locator_pressure.knee + + return elbow_point_velocity, elbow_point_pressure + + def POD(self, R_velocity=None, R_pressure=None, scaled=True): + if R_velocity is None: + R_velocity, _ = self.elbow_point(scaled=True) + if R_pressure is None: + _, R_pressure = self.elbow_point(scaled=True) # Perform SVD if scaled: U_velocity, S_velocity, V_velocity = np.linalg.svd(self.velocity_matrix_scaled_train) @@ -39,12 +61,12 @@ class RegressionModels: U_pressure, S_pressure, V_pressure = np.linalg.svd(self.pressure_matrix_train) # Extract reduced matrices - U_r_velocity = U_velocity[:, :R] - S_r_velocity = np.diag(S_velocity[:R]) - V_r_velocity = V_velocity[:R, :] - U_r_pressure = U_pressure[:, :R] - S_r_pressure = np.diag(S_pressure[:R]) - V_r_pressure = V_pressure[:R, :] + U_r_velocity = U_velocity[:, :R_velocity] + S_r_velocity = np.diag(S_velocity[:R_velocity]) + V_r_velocity = V_velocity[:R_velocity, :] + U_r_pressure = U_pressure[:, :R_pressure] + S_r_pressure = np.diag(S_pressure[:R_pressure]) + V_r_pressure = V_pressure[:R_pressure, :] # Reconstruct the matrix X_r_velocity = U_r_velocity @ (S_r_velocity @ V_r_velocity) @@ -57,6 +79,11 @@ class RegressionModels: joblib.dump(model_velocity, f'{model_path}/{model_velocity_file}') joblib.dump(model_pressure, f'{model_path}/{model_pressure_file}') + def load_model(self, model_velocity_file='LinearRegressionModel_velocity.pkl', model_pressure_file='LinearRegressionModel_pressure.pkl'): + model_path = 'models/trained_models' + model_velocity = joblib.load(f'{model_path}/{model_velocity_file}') + model_pressure = joblib.load(f'{model_path}/{model_pressure_file}') + return model_velocity, model_pressure def scale_(self, data_velocity, data_pressure): return self.scaler_velocity.transform(data_velocity), self.scaler_pressure.transform(data_pressure) @@ -74,14 +101,31 @@ class RegressionModels: ) return prediction_velocity.T, prediction_pressure.T - def mae_test(self, model_velocity, model_pressure): + def calculate_error_samples(self, metric, predict_velocity, predict_pressure): + n_samples = self.velocity_matrix_test.shape[1] + mae_velocity, mae_pressure = np.empty(n_samples), np.empty(n_samples) + for sample in range(n_samples): + mae_velocity[sample] = metric(self.velocity_matrix_test[:,sample], predict_velocity[:,sample]) + mae_pressure[sample] = metric(self.pressure_matrix_test[:,sample], predict_pressure[:,sample]) + return mae_velocity, mae_pressure + + def mae_test(self, model_velocity, model_pressure, axis='full'): predict_velocity, predict_pressure = self.predict_test(model_velocity, model_pressure, rescale=True) - return mean_absolute_error(self.velocity_matrix_test, predict_velocity), mean_absolute_error(self.pressure_matrix_test, predict_pressure) - - def rmse_test(self, model_velocity, model_pressure): + if axis=='full': # return scalar + return mean_absolute_error(self.velocity_matrix_test, predict_velocity), mean_absolute_error(self.pressure_matrix_test, predict_pressure) + elif axis=='samples': # return vector with error per sample + return self.calculate_error_samples(mean_absolute_error, predict_velocity, predict_pressure) + + def rmse_test(self, model_velocity, model_pressure, axis='full'): predict_velocity, predict_pressure = self.predict_test(model_velocity, model_pressure, rescale=True) - return root_mean_squared_error(self.velocity_matrix_test, predict_velocity), root_mean_squared_error(self.pressure_matrix_test, predict_pressure) + if axis=='full': # return scalar + return root_mean_squared_error(self.velocity_matrix_test, predict_velocity), root_mean_squared_error(self.pressure_matrix_test, predict_pressure) + elif axis=='samples': # return vector with error per sample + return self.calculate_error_samples(root_mean_squared_error, predict_velocity, predict_pressure) - def Linfty_test(self, model_velocity, model_pressure): + def max_error_test(self, model_velocity, model_pressure, axis='full'): predict_velocity, predict_pressure = self.predict_test(model_velocity, model_pressure, rescale=True) - return np.max(np.abs(self.velocity_matrix_test - predict_velocity)), np.max(np.abs(self.pressure_matrix_test - predict_pressure)) \ No newline at end of file + if axis=='full': # return scalar + return max_error(self.velocity_matrix_test - predict_velocity), max_error(self.pressure_matrix_test - predict_pressure) + elif axis=='samples': # return vector with error per sample + return self.calculate_error_samples(max_error, predict_velocity, predict_pressure) \ No newline at end of file diff --git a/models/__pycache__/LinearRegression.cpython-311.pyc b/models/__pycache__/LinearRegression.cpython-311.pyc index 079ac1173e16180be4bcb5c39e2641cc068eb135..afbf029f18d2fe32f1898bf93bb57b5d73e810db 100644 Binary files a/models/__pycache__/LinearRegression.cpython-311.pyc and b/models/__pycache__/LinearRegression.cpython-311.pyc differ diff --git a/models/__pycache__/RegressionModels.cpython-311.pyc b/models/__pycache__/RegressionModels.cpython-311.pyc index 5cc422cee7b42ae0ded58f42da400bdc38249924..c2365dbc51811a4fbb742f3660b6ee59b838d6d5 100644 Binary files a/models/__pycache__/RegressionModels.cpython-311.pyc and b/models/__pycache__/RegressionModels.cpython-311.pyc differ diff --git a/models/trained_models/LinearRegressionModel_pressure.pkl b/models/trained_models/LinearRegressionModel_pressure.pkl index 319adb429e1a7dc52224fb248580174f4c5b46e3..a09005c4123a47ef2c6716ba919efd6f7198f152 100644 Binary files a/models/trained_models/LinearRegressionModel_pressure.pkl and b/models/trained_models/LinearRegressionModel_pressure.pkl differ diff --git a/models/trained_models/LinearRegressionModel_pressure_4.pkl b/models/trained_models/LinearRegressionModel_pressure_4.pkl new file mode 100644 index 0000000000000000000000000000000000000000..a09005c4123a47ef2c6716ba919efd6f7198f152 Binary files /dev/null and b/models/trained_models/LinearRegressionModel_pressure_4.pkl differ diff --git a/models/trained_models/LinearRegressionModel_velocity.pkl b/models/trained_models/LinearRegressionModel_velocity.pkl index d3edd84d3522351a71ee3cb5f060b36f41a82ebf..8781e04795b263614db758d4bb98f37144e7e9c4 100644 Binary files a/models/trained_models/LinearRegressionModel_velocity.pkl and b/models/trained_models/LinearRegressionModel_velocity.pkl differ diff --git a/models/trained_models/LinearRegressionModel_velocity_7.pkl b/models/trained_models/LinearRegressionModel_velocity_7.pkl new file mode 100644 index 0000000000000000000000000000000000000000..8781e04795b263614db758d4bb98f37144e7e9c4 Binary files /dev/null and b/models/trained_models/LinearRegressionModel_velocity_7.pkl differ