settings_template.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
    This is a template file for settings.py
    Either duplicate and rename or fill out and rename.
    More information on the individual meaning and what to consider can be
    found in the user manual
"""

import logging
import json
import types

def export_variables(logger):

    variables = globals()
    # Filter out non-serializable objects
    defined_vars = {}
    for k, v in variables.items():
        if not k.startswith('__') and not callable(v) and not isinstance(v, types.ModuleType):
            try:
                # Test if the value is JSON serializable
                json.dumps(v)
                defined_vars[k] = v
            except (TypeError, OverflowError):
                # Skip non-serializable values
                pass
    # Convert the dictionary to a JSON string
    vars_json = json.dumps(defined_vars, indent=4)
    logger.info("Exported variables: %s", vars_json)

# Mandatory parameters

# Steps
training_dataset =  # Boolean, if training dataset shall be created
preprocessing = # Defines preprocessing approach: 'cluster', 'interpolation', 'no_interpolation'
train_from_scratch = 
train_delete = None

prediction_dataset =  # Boolean, if prediction dataset shall be created
pred_from_scratch = 
pred_delete = None

map_generation =  # Boolean, if mapping shall be performed

# General

crs =  # Coordinate reference system, string
no_value =  # No data value, integer, suggestion -999
random_seed =  # Random seed, integer
resolution =  # Resolution in m of the final map, integer, all datasets will be interpolated to this resolution
path_ml =  # Path to where shire framework related parameters/files will be stored
data_summary_path = # Path to the data summary file, string, relevant only for training/prediction dataset generation
key_to_include_path = # Path to kets_to_include file, string, relevant only for training/prediction dataset generation

# Training dataset generation

size =  # Size of the validation dataset, float number between 0 and 1
path_train =  # Path to directory where the training dataset is/shall be stored
ohe = # One-hot encoding, bool

path_landslide_database =  # Path to where the landslide database is stored, string 
ID =  # Name of the column containing landslide ID, string
landslide_database_x =  # Name of the column containing longitude values, string
landslide_database_y =  # Name of the column containing latitude values, string

path_nonls_locations =  # Path to where the non-landslide database is stored, string
num_nonls =  # Number of non-landslide locations to include in the training dataset, integer
nonls_database_x =  # Name of the column containing longitude values, string
nonls_database_y =  # Name of the column containing longitude values, string

cluster = # Use clustering for training dataset generation, bool
interpolation = # Use interpolation for training dataset generation, bool

# Prediction dataset generation

bounding_box =  # Coordinates of the edges of the bounding box of the area of interest, list, [<ymax>, <ymin>, <xmin>, <xmax>]
path_pred =  # Path to directory where the prediction dataset is/shall be stored

# Map generation

RF_training = # Train the RF, bool
RF_prediction = # Make a prediction using the RF, bool

not_included_pred_data = ['xcoord', 'ycoord']# List of features in the training dataset not to be considered in prediction
not_included_train_data = [] # List of features in the training dataset not to be considered in model training

num_trees =  # Number of trees in the Random Forest, integer
criterion =  # Criterion for the Random Forest, string
depth =  # Number of nodes of the RF, integer

model_to_save =  # Folder name for storage of the RF results, string
model_to_load =  # Folder where RF model is stored, string, identical to model_to_save if training and prediction is done at the same time
model_database_dir =  # Directory where models should be stored
parallel =  # Boolean, true if prediction data shall be split to predict in parallel

keep_cat_features = #bool, true if categorical features shall be kept even if some instances in prediction dataset have classes not covered by the prediction dataset
remove_instances = # bool, true of instances in prediction dataset shall be removed if they have different classes than the instances in the training dataset