diff --git a/src/gui_version/RandomForest_gui.py b/src/gui_version/RandomForest_gui.py index 1113c5775be328e006d3b6305a2ebcadef13e2d3..debdd6ef4893ff654ec63bb6513baf3d9ebb82a6 100644 --- a/src/gui_version/RandomForest_gui.py +++ b/src/gui_version/RandomForest_gui.py @@ -140,7 +140,7 @@ class prepare_data: classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f]) self.distibuish_encoding[feat] = 'ohe' else: - classes.append(list(set(df_sub[feat + '_encode'].tolist()))) + classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f]) self.distibuish_encoding[feat] = 'ordinal' self.categorical_classes[feat] = {} self.categorical_classes[feat]['classes'] = [item for sublist in classes for item in sublist] @@ -197,7 +197,7 @@ class prepare_data: if cat_feat.count(feat)>1: classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f]) else: - classes.append(list(set(df_sub[feat + '_encode'].tolist()))) + classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f]) self.categorical_classes[feat] = {} self.categorical_classes[feat]['classes'] = [item for sublist in classes for item in sublist] self.categorical_classes[feat]['num_cols'] = cat_feat.count(feat) @@ -240,8 +240,14 @@ class prepare_data: class RandomForest(prepare_data): + + """ + This class conducts the training of the Random Forest model and the + generation of the landslide susceptibility and hazard map. + """ def __init__(self, master, aim, parallel=False, log=None, retrain=None): + super().__init__(master, aim, log=log, retrain=retrain) self.aim = aim self.logger = log @@ -371,6 +377,13 @@ class RandomForest(prepare_data): """ Split a NumPy array into chunks without changing the number of columns. + + Input: + pred: prediction dataset, varies depending on if the current run + is for model training or map generation + + Output: + Nones """ @@ -506,12 +519,22 @@ class RandomForest(prepare_data): not included in the training dataset are either set to no_value or nevertheless considered in the prediction. The surplus additional features are removed either way to achieve the same set of features - as in the training dataset + as in the training dataset. + + The prediction dataset is furthermore assessed if all features + that are included in the training dataset also appear in the prediction + dataset. If that is not the case, the training process is relaunched + with an adapted training dataset where the feature(s) that is/are + not contrained in the training dataset are removed. The second + trained model will be stored in a seperate folder which is named + <old_folder_name>_retrain. Input: - train_classes: dictionary containing for each categorical feature - all classes and the number of total classes - contained in the training dataset + train_classes: dictionary containing for each categorical feature + all classes and the number of total classes + contained in the training dataset + training_features: Complete feature names of the features + contained in the training dataset Output: None @@ -539,6 +562,7 @@ class RandomForest(prepare_data): self.master.update() self.logger.error('Error: Categorical feature ' + feat + ' not in prediction dataset') + self.logger.error('Error: cannot proceed with mapping') self.error = True self.retrain = True self.features_not_in_training.append(feat) diff --git a/src/plain_scripts/RandomForest.py b/src/plain_scripts/RandomForest.py index 8f48ed54d4480cd21bf499d7417ae9ca336b3522..02d800230a9fba9b742f0aa04b7d3119fdcf2c0b 100644 --- a/src/plain_scripts/RandomForest.py +++ b/src/plain_scripts/RandomForest.py @@ -107,7 +107,7 @@ class prepare_data: classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f]) self.distibuish_encoding[feat] = 'ohe' else: - classes.append(list(set(df_sub[feat + '_encoded'].tolist()))) + classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f]) self.distibuish_encoding[feat] = 'ordinal' self.categorical_classes[feat] = {} self.categorical_classes[feat]['classes'] = [item for sublist in classes for item in sublist] @@ -171,7 +171,7 @@ class prepare_data: if cat_feat.count(feat)>1: classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f]) else: - classes.append(list(set(df_sub[feat + '_encoded'].tolist()))) + classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f]) self.categorical_classes[feat] = {} self.categorical_classes[feat]['classes'] = [item for sublist in classes for item in sublist] self.categorical_classes[feat]['num_cols'] = cat_feat.count(feat)