diff --git a/src/plain_scripts/RandomForest.py b/src/plain_scripts/RandomForest.py
index 02d800230a9fba9b742f0aa04b7d3119fdcf2c0b..edfed30702c5f19691b48cace7e39a841b8bdb09 100644
--- a/src/plain_scripts/RandomForest.py
+++ b/src/plain_scripts/RandomForest.py
@@ -25,12 +25,12 @@ class prepare_data:
         used in the Random Forest classifier.
     """
 
-    def __init__(self, aim, logger, retrain):
+    def __init__(self, aim, logger):
 
         invalid = False
         self.aim = aim
         self.logger = logger
-        self.retrain = retrain
+
         if aim == 'train_test':
             print('Train the model')
             invalid = False
@@ -53,7 +53,7 @@ class prepare_data:
                 self.split_training_testing()
             elif aim == 'prediction':
                 self.import_features()  # Import prediction dataset
-        
+                
     def import_features(self):
 
         """
@@ -67,61 +67,29 @@ class prepare_data:
         else:
             path_pred = settings.path_pred
         
-        if path_pred.split('.')[-1] == 'csv':
-            self.features = pd.read_csv(path_pred)
-
-        elif path_pred.split('.')[-1] == 'nc':
-            ds = nc.Dataset(path_pred)
-            pred = ds['Result'][:, :].data
-            pred_features = ds['features'][:].data
-            self.feature_list = char_to_string(pred_features)
-
-            if 'xcoord' in self.feature_list and 'ycoord' in self.feature_list:
-                self.features = pd.DataFrame(pred, columns=self.feature_list)
-            else:
-                self.features = pd.DataFrame(pred, columns=['xcoord', 'ycoord']+self.feature_list)
-                
-            self.dropped = ds['Dropped'][:].data
-            self.dropped = [int(x) for x in self.dropped]
+        ds = nc.Dataset(path_pred)
+        pred = ds['Result'][:, :].data
+        pred_features = ds['features'][:].data
+        self.feature_list = char_to_string(pred_features)
+        self.features = pd.DataFrame(pred, columns=self.feature_list)
             
+        self.dropped = ds['Dropped'][:].data
+        self.dropped = [int(x) for x in self.dropped]
+        
         # Save the prediction coordinates in the prediction dataset
         self.xy['ycoord'] = self.features['ycoord']
         self.xy['xcoord'] = self.features['xcoord']
-
-        # Remove all features that shall not be included in
-        # prediction from DataFrame (see settings!)
-        if len(settings.not_included_pred_data) > 0:
-            for dataset in settings.not_included_pred_data:
-                self.features = self.features.drop(dataset, axis=1)
-                
-        # Determine which classes are contained in the categorical features
-        # It is distinguished between one-hot and ordinal encoded features
-        self.categorical_classes = {}
-        cat_subset = [feat for feat in self.features.columns.tolist() if '_encoded' in feat]
-        df_sub = self.features[cat_subset]
-        cat_feat = ['_'.join(col.split('_')[:len(col.split('_'))-1]) for col in df_sub.columns.tolist()]
-        self.distibuish_encoding = {}
-        for feat in list(set(cat_feat)):
-            classes = []
-            if cat_feat.count(feat)>1:
-                classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f])
-                self.distibuish_encoding[feat] = 'ohe'
-            else:
-                classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f])
-                self.distibuish_encoding[feat] = 'ordinal'
-            self.categorical_classes[feat] = {}
-            self.categorical_classes[feat]['classes'] = [item for sublist in classes for item in sublist]
-            self.categorical_classes[feat]['num_cols'] = cat_feat.count(feat)
-            
+        
+        self.features = self.features.drop(['xcoord', 'ycoord'], axis=1)
         self.feature_list = list(self.features.columns)
-        self.features_org = self.features.copy()
+        self.features = np.array(self.features)
 
         self.logger.info('Features for prediction were imported')
         self.logger.info('The following '
               + str(len(self.feature_list))
               + ' features are included in the prediction dataset: '
               + str(self.feature_list))
-
+        
     def import_features_labels(self):
 
         """
@@ -133,6 +101,7 @@ class prepare_data:
             self.features = pd.read_csv(settings.path_train + 'training.csv')
         else:
             self.features = pd.read_csv(settings.path_train)
+            
         # Extract and remove labels from training dataset
         self.labels = np.array(self.features[self.label_name]).reshape(
             [np.shape(self.features[self.label_name])[0], 1])
@@ -142,50 +111,17 @@ class prepare_data:
         self.xy['ycoord'] = self.features['ycoord']
         self.xy['xcoord'] = self.features['xcoord']
 
-        # Drop ID from training data
-        self.features = self.features.drop('ID', axis=1)
-        self.features = self.features.drop(['xcoord', 'ycoord'], axis=1)
-
-        # Remove all features that shall not be included in
-        # training from DataFrame (see settings!)
-        
-        if self.retrain:
-            features_to_remove = pd.read_csv(settings.path_ml + settings.model_to_save + '/feature_mismatch_training.csv')['to_drop'].to_list()
-            not_included_train_data = settings.not_included_train_data + features_to_remove
-        else:
-            not_included_train_data = settings.not_included_train_data
-            
-        
-        if len(not_included_train_data) > 0:
-            for dataset in not_included_train_data:
-                self.features = self.features.drop(dataset, axis=1)
-                
-        # Determine which classes are contained in the categorical features
-        # It is distinguished between one-hot and ordinal encoded features
-        self.categorical_classes = {}
-        cat_subset = [feat for feat in self.features.columns.tolist() if '_encoded' in feat]
-        df_sub = self.features[cat_subset]
-        cat_feat = ['_'.join(col.split('_')[:-2]) for col in df_sub.columns.tolist()]
-        for feat in list(set(cat_feat)):
-            classes = []
-            if cat_feat.count(feat)>1:
-                classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f])
-            else:
-                classes.append([f.split('_')[-2] for f in df_sub.columns.tolist() if feat in f])
-            self.categorical_classes[feat] = {}
-            self.categorical_classes[feat]['classes'] = [item for sublist in classes for item in sublist]
-            self.categorical_classes[feat]['num_cols'] = cat_feat.count(feat)
-        
-
+        self.features = self.features.drop(['xcoord', 'ycoord', 'ID'], axis=1)
         self.feature_list = list(self.features.columns)
+        self.features = np.array(self.features)
+
         self.logger.info('Features for training were imported')
         self.logger.info('The following ' + str(len(self.feature_list))
               + ' features are included in the training dataset: '
               + str(self.feature_list))
-        self.features = np.array(self.features)
 
     def split_training_testing(self):
-
+    
         """
             Splits the training data into training and validation data.
         """
@@ -196,33 +132,31 @@ class prepare_data:
                              test_size=self.test_size,
                              random_state=settings.random_seed,
                              stratify=self.labels)
+        
         print('Data split')
         self.logger.info('Training data split in training and test dataset')
-
-
+        
+        
 class RandomForest(prepare_data):
 
-    def __init__(self, aim, parallel=False, log=None, retrain=None):
+    def __init__(self, aim, parallel=False, log=None):
 
-        super().__init__(aim, log, retrain)
+        super().__init__(aim, log)
 
         self.aim = aim
         self.parallel = parallel
-        self.retrain = retrain
+
         self.logger = log
         self.num_chunks = 10
+        
         # Random Forest settings
         self.criterion = settings.criterion
         self.n_estimators = settings.num_trees
         self.max_depth = settings.depth
 
         self.model_dir = settings.model_database_dir
-        if self.retrain:
-            self.model_to_load = settings.model_to_load + '_retrain'
-            self.model_to_save = settings.model_to_save + '_retrain'
-        else:
-            self.model_to_load = settings.model_to_load
-            self.model_to_save = settings.model_to_save
+        self.model_to_load = settings.model_to_load
+        self.model_to_save = settings.model_to_save
         self.output_dir = None
 
         if aim == 'train_test':
@@ -240,11 +174,10 @@ class RandomForest(prepare_data):
             print('Prediction is performed')
             self.create_output_dir()
             self.load_model()
-            if not self.error:
-                self.predict()
-                self.extract_pos_neg_predictions()
-                self.reshape_prediction()
-                self.save_prediction()
+            self.predict()
+            self.extract_pos_neg_predictions()
+            self.reshape_prediction()
+            self.save_prediction()
 
     def define(self):
 
@@ -375,8 +308,7 @@ class RandomForest(prepare_data):
                   'roc_tpr': self.tpr,
                   'roc_auc': self.roc_auc,
                   'accuracy': self.acc,
-                  'fbeta': self.fbeta,
-                  'categories': self.categorical_classes
+                  'fbeta': self.fbeta
                   }
 
         with open(settings.model_database_dir
@@ -386,77 +318,6 @@ class RandomForest(prepare_data):
             
         self.logger.info('Parameters are saved')
         
-    def adapt_categorical_features(self, train_classes, training_features):
-        
-        """
-            The encoded features in the training and prediction dataset are
-            compared regarding the contained classes. Depending on the user
-            input, instances in the prediction dataset with classes that are
-            not included in the training dataset are either set to no_value or
-            nevertheless considered in the prediction. The surplus additional
-            features are removed either way to achieve the same set of features
-            as in the training dataset
-        """
-        
-        self.instances_to_drop = []
-        self.features_not_in_training = []
-        
-        for feat in [val for val in training_features if '_encode' in val]:
-            if feat not in self.feature_list:
-                print('Error: cannot proceed with mapping')
-                print('Error: Categorical feature ' + feat + ' not in prediction dataset')
-                self.logger.error('Error: Categorical feature ' + feat + ' not in prediction dataset')
-                
-                self.error = True
-                self.retrain = True
-                self.features_not_in_training.append(feat)
-                
-        if len(self.features_not_in_training) > 0:
-                pd.DataFrame(self.features_not_in_training, columns=['to_drop']).to_csv(self.model_dir + self.model_to_load + 'feature_mismatch_training.csv', index=False)
-
-        if not self.retrain:
-            if list(set([val for val in training_features if '_encode' in val])) != list(set(self.feature_list)):    
-                for feat in list(set(['_'.join(val.split('_')[:-2]) for val in self.feature_list if '_encode' in val])):
-                    if feat in list(self.distibuish_encoding.keys()):
-                        if self.distibuish_encoding[feat] == 'ohe':
-                            if (train_classes[feat]['num_cols'] < self.categorical_classes[feat]['num_cols']) or (set(train_classes[feat]['classes']) != set(self.categorical_classes[feat]['classes'])):
-                                print(feat + ': Prediction dataset contains more or other classes than training dataset')
-                                
-                                self.logger.warning(feat + ': Prediction dataset contains more classes than training dataset')
-                                self.logger.info('Apply user defined handling approach')
-                                
-                                common_elements = set(train_classes[feat]['classes']).intersection(set(self.categorical_classes[feat]['classes']))
-                            
-                                if self.properties_map['keep']:
-                                    if len(common_elements) == 0:
-                                        print('Error: no common classes for ' + feat + ' in training and prediction dataset')
-                                        self.logger.error('Error: no common classes for ' + feat + ' in training and prediction dataset')
-                                        self.error = True
-                                    else:
-                                        to_drop = [feat + '_' + str(f) + '_encode' for f in self.categorical_classes[feat]['classes'] if f not in common_elements]
-                                        self.features = self.features.drop(to_drop, axis=1)
-                                        self.feature_list = self.features.columns.tolist()
-                                elif self.properties_map['remove_instances']:
-                                    to_drop_col = [feat + '_' + str(f) + '_encode' for f in self.categorical_classes[feat]['classes'] if f not in common_elements]
-                                    to_drop_row = []
-                                    for col in to_drop_col:
-                                        to_drop_row = to_drop_row + self.features.index[self.features[col] == 1].tolist()
-                                    self.features = self.features.drop(to_drop_col, axis=1)
-                                    
-                                    print('Not matching features have been removed')
-                                    self.logger.info('Not matching features have been removed')
-                                    
-                                    self.feature_list = self.features.columns.tolist()
-                                    self.instances_to_drop = self.instances_to_drop + to_drop_row
-                                    
-                                    print('Instances to consider during mapping have been adapted')
-                                    self.logger.info('Instances to consider during mapping have been adapted')
-
-            print('Categorical features have been handled and hamonised')
-            self.logger.info('Categorical features have been handled and hamonised')
-            self.logger.info('Remaining features: ' + str(self.feature_list))
-                        
-
     def load_model(self):
 
         """
@@ -478,63 +339,7 @@ class RandomForest(prepare_data):
                   + '/model_params.pkl', 'rb') as f:
             params = pkl.load(f)
         features = params['features']
-        self.error = False
-        self.adapt_categorical_features(params['categories'], features)
-
-        if not self.error:
-            if len(self.feature_list) == len(features):
-                if set(self.feature_list) != set(features):
-                    
-                    print('Error: Not all features of the model are contained in the prediction dataset')
-                    self.logger.error('Error: Not all features of the model are contained in the prediction dataset')
-                    
-                    self.error = True
-                elif self.feature_list != features:
-                    
-                    print('The order or features differs. Prediction features are reordered')
-                    self.logger.info('The order or features differs. Prediction features are reordered')
-                    
-                    self.features = self.features[features]
-                    if self.features.columns.tolist() != features:
-                        print('There is still something wrong with the order of the features!')
-                       
-                elif self.feature_list == features:
-                    
-                    print('Prediction and training dataset have the same order')
-                    self.logger.info('Prediction and training dataset have the same order')
-            elif len(self.feature_list) < len(features):
-                
-                print('Error: Not all features of the model are contained in the prediction dataset')
-                self.logger.error('Error: Not all features of the model are contained in the prediction dataset')
-                
-                self.error = True
-            elif len(self.feature_list) > len(features):
-                if set(features).issubset(self.feature_list):
-                    to_drop = list(set(self.feature_list)-set(features))
-                    self.features = self.features.drop(to_drop, axis=1)
-                    self.feature = self.features[features]
-                    if self.features.columns.tolist() != features:
-                        print('There is still something wrong with the order of the features!')
-                        self.error = True
-                    else:                      
-                        print('Features in the prediction dataset which were not used for training were removed')
-                        print('Features in the prediction dataset were sorted to match the training features')
-                        
-                        self.logger.warning('Features in the prediction dataset which were not used for training were removed')
-                        self.logger.info('Features left: ' + str(self.feature_list))
-                        self.logger.info('Features in the prediction dataset were sorted to match the training features')
-                else:
-                    Label(self.master, text='Error: Not all features of the model are contained in the prediction dataset').grid(
-                                row=self.row, column=1)
-                    self.row = self.row + 1
-                    self.master.update()
-                    
-                    self.logger.error('Error: Not all features of the model are contained in the prediction dataset')
-                    
-                    self.error = True
-            if not self.error:
-                self.feature_list = self.features.columns.tolist()
-                self.features = self.features.to_numpy()  
+        
         
         self.logger.info('Model loaded from '
                          + self.model_dir
@@ -572,9 +377,8 @@ class RandomForest(prepare_data):
             Reshape the individual predictions into a map.
         """
 
-        dropped = list(set(self.dropped + self.instances_to_drop))
         arr_xy = np.array(self.xy)
-        arr_xy[dropped, :] = settings.no_value#*np.shape(arr_xy)[1]
+        arr_xy[self.dropped, :] = settings.no_value#*np.shape(arr_xy)[1]
 
         result = np.reshape(list(arr_xy[:, 2]),
                             (len(list(set(self.xy['ycoord']))),
diff --git a/src/plain_scripts/check_user_input.py b/src/plain_scripts/check_user_input.py
index 768d71cb640e50abb37180767bd2925342d63fe3..dd336ba013567b5e15cf7391ccea90cd88e0b487 100644
--- a/src/plain_scripts/check_user_input.py
+++ b/src/plain_scripts/check_user_input.py
@@ -14,16 +14,16 @@ class check_general_settings():
 
         if training_dataset or map_generation:
             if os.path.isdir(path_train):
-                save_path = path_train + 'check_user_input.log'
+                save_path = path_train + '/check_user_input.log'
             else:
-                save_path = os.path.dirname(path_train) + 'check_user_input.log'
+                save_path = os.path.dirname(path_train) + '/check_user_input.log'
         elif prediction_dataset:
             if os.path.isdir(path_pred):
-                save_path = path_pred + 'check_user_input.log'
+                save_path = path_pred + '/check_user_input.log'
             else:
-                save_path = os.path.dirname(path_pred) + 'check_user_input.log'
+                save_path = os.path.dirname(path_pred) + '/check_user_input.log'
         else:
-            save_path = 'check_user_input.log'
+            save_path = '/check_user_input.log'
 
         if os.path.exists(save_path):
             os.remove(save_path)
diff --git a/src/plain_scripts/compatibility_of_input_datasets.py b/src/plain_scripts/compatibility_of_input_datasets.py
new file mode 100644
index 0000000000000000000000000000000000000000..255869e669b384d04b44030b4381f7f7b65795ef
--- /dev/null
+++ b/src/plain_scripts/compatibility_of_input_datasets.py
@@ -0,0 +1,259 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Jan 29 13:20:59 2025
+
+@author: aedrich
+"""
+
+import numpy as np
+import pandas as pd
+import netCDF4 as nc
+import pickle as pkl
+import os
+import logging
+import settings
+import re
+
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.metrics import mean_squared_error, f1_score, roc_curve, auc, fbeta_score
+from joblib import delayed, Parallel
+from tkinter import Label
+
+from utilities.ncfile_generation import generate_basic_ncfile
+from utilities.strings_for_ncfile import char_to_string, features_to_char
+
+
+class comparison_training_prediction_dataset:
+    
+    def __init__(self, logger):
+        
+        self.logger = logger
+        self.error = False
+        
+        self.import_prediction_dataset()
+        self.import_training_dataset()
+        self.compare_features()
+        if not self.error:
+            self.additional_instances_to_drop()
+            self.save_prediction_dataset()
+            self.save_training_dataset()
+            
+    def import_prediction_dataset(self):
+
+        ds = nc.Dataset(settings.path_pred)
+        pred = ds['Result'][:, :].data
+        pred_features = ds['features'][:].data
+        self.feature_list = char_to_string(pred_features)
+        
+        if 'xcoord' in self.feature_list and 'ycoord' in self.feature_list:
+            self.pred = pd.DataFrame(pred, columns=self.feature_list)
+        else:
+            self.pred = pd.DataFrame(pred, columns=['xcoord', 'ycoord']+self.feature_list)
+            
+        self.xy = pd.DataFrame()
+        self.xy['ycoord'] = self.pred['ycoord']
+        self.xy['xcoord'] = self.pred['xcoord']
+        
+        self.idx = ds['Dropped'][:].data
+        self.idx = [int(x) for x in self.idx]
+            
+        if len(settings.not_included_pred_data) > 0:
+            for dataset in settings.not_included_pred_data:
+                if dataset in self.pred.columns.tolist():
+                    self.pred = self.pred.drop(dataset, axis=1)
+                
+        self.logger.info('Prediction dataset imported')
+        self.logger.info('The following ' + str(len(self.pred.columns.tolist())) 
+                         + ' features are included in the prediction dataset: ' 
+                         + str(self.pred.columns.tolist()))
+                
+    def import_training_dataset(self):
+        
+        # Import training dataset as csv file
+        self.train = pd.read_csv(settings.path_train)
+        # Extract and remove labels from training dataset
+        self.labels = np.array(
+            self.train['label']).reshape(
+                [np.shape(self.train['label'])[0], 1])
+        
+        self.xy_train = pd.DataFrame()
+        self.xy_train['ID'] = self.train['ID']
+        self.xy_train['label'] = self.train['label']
+        self.xy_train['ycoord'] = self.train['ycoord']
+        self.xy_train['xcoord'] = self.train['xcoord']
+        
+        self.train = self.train.drop(['xcoord', 'ycoord', 'ID', 'label'], axis=1)
+
+        if len(settings.not_included_train_data) > 0:
+            for dataset in settings.not_included_train_data:
+                if dataset in self.train.columns.tolist():
+                    self.train = self.train.drop(dataset, axis=1)
+
+        self.logger.info('Training dataset imported')
+        self.logger.info('The following ' + str(len(self.train.columns.tolist()))
+                         + ' features are included in the training dataset: '
+                         + str(self.train.columns.tolist()))
+
+    def compare_features(self):
+        
+        """
+            It is assessed if all features in the training dataset also appear
+            in the prediction dataset. If that is not the case, the training 
+            process will be relaunched with an adapted training dataset where the 
+            feature(s) that is/are not contrained in the training dataset are
+            removed. The second trained model will be stored in a seperate
+            folder which is named <old_folder_name>_retrain.
+            
+            If more features appear in the prediction dataset, the additional 
+            features are removed.
+            
+        """
+    
+        self.logger.info('Features are compared between training and prediction dataset')
+        
+        if set(self.train.columns) == set(self.pred.columns):  
+            self.logger.info('Features are identical in both training and prediction dataset')
+            self.pred = self.pred[self.train.columns]
+            
+            self.logger.info('Potentially varying order of features has been fixed')
+            self.error = False
+            
+        else:
+            self.logger.warning('Features are not identical in the training and prediction dataset')
+
+            extra_in_pred = set(self.pred.columns) - set(self.train.columns)
+            extra_in_train = set(self.train.columns) - set(self.pred.columns)
+            
+            if len(extra_in_pred) > 0 and len(extra_in_train) == 0:
+                self.logger.warning('More features in prediction dataset, additional features are removed')
+                
+                self.pred = self.pred[self.train.columns]
+                self.error = False
+                
+            elif len(extra_in_train) > 0  and len(extra_in_pred) == 0 :
+                self.logger.warning('More features in training dataset, additional features are removed')
+                
+                self.train = self.train[self.pred.columns]
+                self.error = False
+                
+            elif len(extra_in_train) > 0  and len(extra_in_pred) > 0:
+                self.logger.warning('There are mismatching features in both datasets')
+
+                self.common_columns = self.train.columns.intersection(self.pred.columns)
+                
+                if len(self.common_columns.tolist()) == 0:
+                    self.logger.error('Error: No common columns in training and prediction dataset')
+                    self.error = True
+
+                elif len(self.common_columns.tolist()) < 6:
+                    self.logger.warning('Warning: only ' + str(len(self.common_columns.tolist())) + ' common columns in training and prediction dataset')
+                    self.error = False
+                    
+                    self.train = self.train[self.common_columns]
+                    self.pred = self.pred[self.common_columns]
+                    
+                else:
+                    self.logger.info(str(len(self.common_columns.tolist())) + ' common columns in training and prediction dataset')         
+                    self.error = False
+                    
+                    self.train = self.train[self.common_columns]
+                    self.pred = self.pred[self.common_columns]
+            else:
+                self.logger.error('Error: Unknown issue detected. Check features manually!')
+                self.error = True
+                
+            self.logger.info('Feature comparison completed')
+            
+    def additional_instances_to_drop(self):     
+    
+        """
+            All instances that have a value of zero in all columns of a categorical
+            feature are identified and appended to the list of instances for which
+            a reliable prediction is not possible.
+            
+            Input:
+                master: related to information display in external window
+                logger: related to generation of a process log
+                row: related to information display in external window, int
+                idx: Previously defined instances for which prediction is not
+                     possible, list
+                pred: prediction dataset, pandas DataFrame
+                
+            Output:
+                idx: Updated list of instances for which prediction is not
+                     possible, list
+                row: Updated row information related to information display in
+                     external window, int
+        
+        """
+    
+        self.logger.info('Start identification of instances that are not represented by at least one categorical feature')
+    
+        columns = self.pred.columns
+        # Regular expression to match "<feature>_<value>_encoded"
+        pattern = re.compile(r"^(.*?)(_?\d+)?_encoded$")
+        encoded_features = {pattern.match(col).group(1) for col in columns if pattern.match(col)}
+        
+        self.logger.info('Identified encoded features: ' + str(encoded_features))
+        count = 0
+        for feature in encoded_features:
+            
+            feature_cols = [col for col in self.pred.columns if col.startswith(feature) and col.endswith("_encoded")]
+            all_zero_rows = (self.pred[feature_cols] == 0).all(axis=1)
+            all_zero_rows = self.pred.index[all_zero_rows].tolist()
+            self.idx = list(set(self.idx + all_zero_rows))
+            count = count + len(all_zero_rows)
+
+        self.logger.info(str(count) + ' instances have been identified that are not represented by at least one categorical feature')
+        
+    def save_prediction_dataset(self):
+        
+        """
+            Save prediction dataset and information on dropped rows as nc-file
+        """
+
+        self.pred = pd.concat([self.xy, self.pred], axis=1)
+        pred = self.pred.to_numpy()
+        char_features = features_to_char(self.pred.columns)
+
+        outfile = settings.path_pred
+        self.logger.info('Prediction dataset is saved to ' + outfile)
+        
+        if os.path.exists(outfile):
+            os.remove(outfile)
+
+        ds = generate_basic_ncfile(outfile, crs=None)
+        ds.createDimension('lat', (np.shape(pred)[0]))
+        ds.createDimension('lon', (np.shape(pred)[1]))
+        ds.createDimension('ix', (len(self.idx)))
+        ds.createDimension('feat', len(char_features))
+        result = ds.createVariable('Result', 'f4', ('lat', 'lon'))
+        dropped = ds.createVariable('Dropped', 'u8', 'ix')
+        Features = ds.createVariable('features', 'S1', 'feat')
+        result[:, :] = pred
+        dropped[:] = np.array(self.idx)
+        Features[:] = char_features
+        ds.close()
+    
+    def save_training_dataset(self):
+
+        """
+            Save dataframe as csv. If necessary folder is created.
+        """
+
+        self.logger.info('Saving of training data in progress')
+
+        outfile = settings.path_train
+
+        # If outfile exists already, delete
+        if os.path.exists(outfile):
+            os.remove(outfile)
+
+        self.train = pd.concat([self.xy_train, self.train], axis=1)
+
+        # Save dataframe as csv
+        self.train.to_csv(outfile, sep=',', index=False)
+        self.logger.info('Training dataset saved')
+
diff --git a/src/plain_scripts/settings copy.py b/src/plain_scripts/settings copy.py
new file mode 100644
index 0000000000000000000000000000000000000000..30ee799a15006c6db9168f649d2eb7b75b79ca15
--- /dev/null
+++ b/src/plain_scripts/settings copy.py	
@@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+    This is a template file for settings.py
+    Either duplicate and rename or fill out and rename.
+    More information on the individual meaning and what to consider can be
+    found in the user manual
+"""
+
+import logging
+import json
+import types
+
+def export_variables(logger):
+
+    variables = globals()
+    # Filter out non-serializable objects
+    defined_vars = {}
+    for k, v in variables.items():
+        if not k.startswith('__') and not callable(v) and not isinstance(v, types.ModuleType):
+            try:
+                # Test if the value is JSON serializable
+                json.dumps(v)
+                defined_vars[k] = v
+            except (TypeError, OverflowError):
+                # Skip non-serializable values
+                pass
+    # Convert the dictionary to a JSON string
+    vars_json = json.dumps(defined_vars, indent=4)
+    logger.info("Exported variables: %s", vars_json)
+
+# Mandatory parameters
+days = 2
+approach = 'statistical'
+
+# Steps
+training_dataset = False # Boolean, if training dataset shall be created
+preprocessing = 'no_interpolation' # Defines preprocessing approach: 'cluster', 'interpolation', 'no_interpolation'
+train_from_scratch = True
+train_delete = None
+
+prediction_dataset = False # Boolean, if prediction dataset shall be created
+pred_from_scratch = True
+pred_delete = None
+
+map_generation = True # Boolean, if mapping shall be performed
+
+# General
+
+crs = 'wgs84' # Coordinate reference system, string
+no_value = -999 # No data value, integer, suggestion -999
+random_seed = 42 # Random seed, integer
+resolution = 25 # Resolution in m of the final map, integer, all datasets will be interpolated to this resolution
+path_ml = '/Volumes/LaCie/2nd_Paper/entire_swiss_for_paper/maps/' # Path to where shire framework related parameters/files will be stored
+data_summary_path = None # Path to the data summary file, string, relevant only for training/prediction dataset generation
+key_to_include_path = None # Path to kets_to_include file, string, relevant only for training/prediction dataset generation
+
+# Training dataset generation
+
+size = None # Size of the validation dataset, float number between 0 and 1
+path_train = '/Volumes/LaCie/2nd_Paper/entire_swiss_for_paper/training_datasets/{days}/training_statistical_{days}d.csv' # Path to directory where the training dataset is/shall be stored
+ohe = None # One-hot encoding, bool
+
+path_landslide_database = None # Path to where the landslide database is stored, string 
+ID = 'ID' # Name of the column containing landslide ID, string
+landslide_database_x = 'xcoord' # Name of the column containing longitude values, string
+landslide_database_y = 'ycoord' # Name of the column containing latitude values, string
+
+path_nonls_locations = None # Path to where the non-landslide database is stored, string
+num_nonls = None # Number of non-landslide locations to include in the training dataset, integer
+nonls_database_x = None # Name of the column containing longitude values, string
+nonls_database_y = None  # Name of the column containing longitude values, string
+
+#cluster = False # Use clustering for training dataset generation, bool
+#interpolation = False # Use interpolation for training dataset generation, bool
+
+# Prediction dataset generation
+
+bounding_box = None # Coordinates of the edges of the bounding box of the area of interest, list, [<ymax>, <ymin>, <xmin>, <xmax>]
+path_pred = None # Path to directory where the prediction dataset is/shall be stored
+
+# Map generation
+
+RF_training = True # Train the RF, bool
+RF_prediction = True # Make a prediction using the RF, bool
+
+not_included_pred_data = ['xcoord', 'ycoord']# List of features in the training dataset not to be considered in prediction
+not_included_train_data = [] # List of features in the training dataset not to be considered in model training
+
+num_trees = 100 # Number of trees in the Random Forest, integer
+criterion = 'gini' # Criterion for the Random Forest, string
+depth = 20  # Number of nodes of the RF, integer
+
+model_to_save = '/Volumes/LaCie/2nd_Paper/entire_swiss_for_paper/maps/{approach}/RF_{days}' # Folder name for storage of the RF results, string
+model_to_load = '/Volumes/LaCie/2nd_Paper/entire_swiss_for_paper/maps/{approach}/RF_{days}' # Folder where RF model is stored, string, identical to model_to_save if training and prediction is done at the same time
+model_database_dir = path_ml # Directory where models should be stored
+parallel = True # Boolean, true if prediction data shall be split to predict in parallel
+
+keep_cat_features = False #bool, true if categorical features shall be kept even if some instances in prediction dataset have classes not covered by the prediction dataset
+remove_instances = True # bool, true of instances in prediction dataset shall be removed if they have different classes than the instances in the training dataset
\ No newline at end of file
diff --git a/src/plain_scripts/shire.py b/src/plain_scripts/shire.py
index a9c19b0e1fc9cea998a9aee2f6d325e7df3fea53..c4a97250872ec7f32ed58b442d24f1925906cf35 100644
--- a/src/plain_scripts/shire.py
+++ b/src/plain_scripts/shire.py
@@ -9,6 +9,8 @@ from create_training_data import create_training_data
 from create_prediction_data import create_prediction_data
 from RandomForest import RandomForest
 from check_user_input import check_general_settings
+from compatibility_of_input_datasets import comparison_training_prediction_dataset
+
 from utilities.initialise_log import save_log
 
 """
@@ -78,20 +80,26 @@ else:
         print('Map will be generated')
         logger.info('Map generation started')
     
-        if settings.parallel:
-            print('Prediction will run in parallel')
-            logger.info('Prediction will run in parallel')
-        if settings.RF_training:    
-            logger.info('Random Forest training is launched')
-            s =  RandomForest('train_test', parallel=settings.parallel, log=logger)
-            logger = s.logger
-        if settings.RF_prediction:
-            logger.info('Random Forest prediction in launched')
-            s = RandomForest('prediction', parallel=settings.parallel, log=logger)
-            logger = s.logger
-    
-        print('Map successfully created')
-        logger.info('Map successfully created')
+        print('Training and prediction dataset will be assessed for compatibility')
+        logger.info('Training and prediction dataset will be assessed for compatibility')
+        
+        s = comparison_training_prediction_dataset(logger)
+        
+        if not s.error:
+            if settings.parallel:
+                print('Prediction will run in parallel')
+                logger.info('Prediction will run in parallel')
+            if settings.RF_training:    
+                logger.info('Random Forest training is launched')
+                s =  RandomForest('train_test', parallel=settings.parallel, log=logger)
+                logger = s.logger
+            if settings.RF_prediction:
+                logger.info('Random Forest prediction in launched')
+                s = RandomForest('prediction', parallel=settings.parallel, log=logger)
+                logger = s.logger
+        
+            print('Map successfully created')
+            logger.info('Map successfully created')
         
     for handler in logger.handlers:
             handler.close()