diff --git a/wrapper.py b/wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..98ac995d92ed8ca3da405827b5ede064f045811b
--- /dev/null
+++ b/wrapper.py
@@ -0,0 +1,321 @@
+import numpy as np
+import pandas as pd
+from sklearn.preprocessing import StandardScaler
+from sklearn import svm
+from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, cross_val_score
+from sklearn.linear_model import LogisticRegression
+from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
+from sklearn.metrics import precision_recall_fscore_support, roc_auc_score
+from sklearn.inspection import permutation_importance
+import logging
+logging.basicConfig(filename='log.txt',level=logging.INFO)
+
+from chgen.functions import run_dbscan, get_intersections, full_intersections, get_correlated, run_classifier, find_features
+
+def find_loss(combo, df, df_name, eps = 0.5, min_samples = 4, not_noise = 0.9):
+    '''
+    Wrapper function to detect outliers using DBSCAN
+
+    Args:
+        combo: a list, pair of parameters of interest
+        df: dataframe with values
+        df_name: string, name of a dataframe to process
+        eps: float, a DBSCAN parameter to define a neighborhood of a point
+        min_samples: int, minimal number of points in the neighborhood of a point
+        not_noise: float, minimal (relative) amount of instances of a dataframe to be considered as a cluster
+    Return:
+        bools: a boolean list, True - a point belongs to a cluster, False - an outlier
+        combo: string, pair of parameters of interest
+        pat_loss: int, number of points considered as an outlier
+        eps: float, end value for eps-parameter
+        smpl: int, end value for min_samples-parameter
+    '''
+    points = df.loc[:,combo]
+    points = np.array(points)
+
+    bools, eps, smpl = run_dbscan(points = points, xy = combo, df_name = df_name, eps = eps, min_samples = min_samples, not_noise = not_noise)
+
+    #count patient loss
+    pats=points[bools]
+    pat_loss = df.shape[0]-len(pats)
+
+    combo = ', '.join(combo)
+
+    return bools, combo, pat_loss, eps, smpl
+
+def compute_intersections(combo, df_1, df_2, df_names, bools_1 = [], bools_2 = [], first_fixed = None, size = []):
+    '''
+    Wrapper function to calculate intersections of two lists of points (with/without outliers) of the same dimension
+
+    Args:
+        combo: a list, a combination of parameters
+        df_1: numpy array of points of one dataframe which size will be altered. Contains only parameters of interest
+        df_2: numpy array of points of another dataframe which size remains the same. Contains only parameters of interest
+        df_names: a list of strings, hospitals' names (e.g. ['df_1','df_2']), in the same order as to_boot and fixed arguments
+        bools_1, bools_2: lists of boolean values, True - a point is in the cluster, False - a point is an outlier
+        first_fixed: boolean, determines what dataframe (True = the first one or False = the second one) keeps its length,
+                     if None - both datasets are taken at the full length
+        size: a list of integers for resampling
+    Returns:
+        df: a pandas dataframe, a table with intersection values for a combination of parameters and dataframe cases
+    '''
+
+    try:
+        points_1 = df_1.loc[:,combo]
+        points_2 = df_2.loc[:,combo]
+        points_1 = np.array(points_1)
+        points_2 = np.array(points_2)
+
+        if (len(bools_1)!=0) and (len(bools_2)!=0):
+            #keep only clustered point (w/o outliers) for current combo
+            points_1=points_1[bools_1]
+            points_2=points_2[bools_2]
+
+        if first_fixed == True:
+            df = get_intersections(to_boot = points_2, fixed = points_1, size = size, df_names = df_names[::-1], pair = combo)
+        elif first_fixed == False:
+            df = get_intersections(to_boot = points_1, fixed = points_2, size = size, df_names = df_names, pair = combo)
+        else: # default case - compute intersections with all datapoints
+            df = full_intersections(df_1 = points_1, df_2 = points_2, df_names = df_names, pair = combo)
+        return df
+
+    except:
+        logging.info(f'Problem: {combo}')
+        df = pd.DataFrame()
+        return df
+
+def classify(df_1, df_2, target, df_name, corr_coef):
+    '''
+    Computes a report with the result of classification using four classifiers and two methods. A model tries to
+    distinguish between two dataframes and find features that are the most important in that task.
+
+    Args:
+        df_1, df_2: pandas dataframes, dataframes of interest
+        target: string, dependent feature
+        df_name: name of a dataframes (in target variable) to be set at 1 in classification problem
+        corr_coef: float, coefficient of acceptable correlation, if more, then one of the correlated features will be dropped
+    Returns:
+        result: a dataframe with the results of classifiaction
+    '''
+
+    #find common parameters for both dfs
+    common_params = sorted(list(set(df_1.columns).intersection(df_2.columns)))
+    logging.info(f'Number of common parameters: {len(common_params)}')
+    logging.info(f'Common parameters: {common_params}')
+
+    #keep only checked common >70% pat parameters
+    df_1 = df_1[common_params]
+    df_2 = df_2[common_params]
+
+    df = pd.concat([df_1,df_2])
+
+    correlated = get_correlated(df.drop(columns = [target]), corr_coef)
+    df.drop(columns=correlated, inplace=True)
+
+    all_features = df.drop(columns=[target]).copy()
+    label=df[target]
+    # split df
+    X_train, X_test, y_train, y_test = train_test_split(all_features, label, test_size = 0.2, stratify=label)
+    logging.info(" Shapes: " + str(X_train.shape) +" "+ str(y_train.shape)+ " " + str(X_test.shape)+ " "+ str(y_test.shape))
+
+    #scale features
+    scaler = StandardScaler()
+    X_train = scaler.fit_transform(X_train)
+    X_test = scaler.transform(X_test)
+
+
+    df_train = pd.DataFrame(X_train, columns=all_features.columns)
+    df_test = pd.DataFrame(X_test, columns=all_features.columns)
+
+    logging.info(" Unique values: " + "df_train: " +" "+ str(y_train.value_counts())+ " " + " df_test: "+ " "+ str(y_test.value_counts()))
+
+
+    #order: LR, RF, SVM, Ada
+    clfs = [LogisticRegression(), RandomForestClassifier(), svm.SVC(), AdaBoostClassifier()]
+    parameters = [{'penalty': ["l2"], "C": np.linspace(0.001,2,20), "class_weight": ['balanced'], "solver": ["liblinear"]}, #LR
+                  {'criterion': np.array(['gini']), 'n_estimators': [100], 'min_samples_leaf': np.arange(2,5), 'max_depth': np.arange(2,5), 'min_samples_split': np.arange(2,5), "class_weight": ['balanced']}, #RF
+                  {"C":np.linspace(0.1,3,100), "class_weight": ["balanced"],  "kernel": ["rbf"], "gamma": ["auto"], 'probability':[True]}, #SVC
+                  {'n_estimators':[50]}] #Ada
+
+    #column names of a dataframe to save
+    dct={
+         'Classifier':{},
+         'Test_score':{},
+         'Score_value':{},
+         'Iteration_omission':{},
+         'Model_roc_auc_omission':{},
+         'Features_omission':{},
+         'Importance_omission':{},
+         'Rank_omission':{},
+         'Iteration_permutation':{},
+         'Model_roc_auc_permutation':{},
+         'Features_permutation':{},
+         'Importance_permutation':{},
+         'Rank_permutation':{}
+         }
+
+    k,c,p,l,idx_o,idx_p = 0,0,0,0,0,0 #counter for indexes (k - df, clf and score, c - classes, p - permutation)
+    it=1
+    rank=1
+
+    new_y_train = y_train.copy()
+    new_y_train = y_train.apply(lambda x: 1 if x==df_name else 0) # setting the chosen dataframe to 1
+
+    new_y_test = y_test.copy()
+    new_y_test = y_test.apply(lambda x: 1 if x==df_name else 0)
+    for classifier, param in zip(clfs, parameters):
+        dct['Classifier'][k]=str(classifier).split("(")[0]
+        cv = StratifiedKFold(n_splits=5, shuffle = True)
+        clf = GridSearchCV(classifier, param, scoring = "roc_auc", cv = cv, n_jobs = -1)
+        clf.fit(df_train, new_y_train)
+
+        if "Logistic" in str(classifier):
+            model = LogisticRegression(**clf.best_params_)
+            logging.info("LR Best params: "+str(clf.best_params_))
+        elif "Random" in str(classifier):
+            model = RandomForestClassifier(**clf.best_params_, n_jobs = -1)
+            logging.info("RF Best params: "+str(clf.best_params_))
+        elif "SVC" in str(classifier):
+            model = svm.SVC(**clf.best_params_)
+            logging.info("SVC Best params: "+str(clf.best_params_))
+        elif "Ada" in str(classifier):
+            model = AdaBoostClassifier(**clf.best_params_)
+            logging.info("Ada Best params: "+str(clf.best_params_))
+
+        logging.info(f" MODEL: {model}")
+        model.fit(df_train, new_y_train)
+
+        ### PREDICTION ANALYSIS
+        logging.info("\n-----------------------PREDICTION---------------------------")
+        df_pred = model.predict(df_test)
+        df_probs = model.predict_proba(df_test)[:, 1] #for roc_auc score
+
+        #results measurement
+        roc_auc = roc_auc_score(new_y_test, df_probs)
+        precision, recall, f1_score, support = precision_recall_fscore_support(new_y_test, df_pred, average='binary')
+        dct['Test_score'][c] = "ROC_AUC"
+        dct['Score_value'][c] = round(roc_auc, 5)
+        c=c+1
+        dct['Test_score'][c] = "Precision"
+        dct['Score_value'][c] = round(precision, 5)
+        c=c+1
+        dct['Test_score'][c] = "Recall"
+        dct['Score_value'][c] = round(recall, 5)
+        c=c+1
+        dct['Test_score'][c] = "f1_score"
+        dct['Score_value'][c] = round(f1_score, 5)
+        c=c+1
+
+        logging.info(" ROC_AUC: " + str(roc_auc))
+        logging.info(" Precision: " + str(precision))
+        logging.info(" Recall:    " + str(recall))
+        logging.info(" f1_score:  " + str(f1_score))
+
+        ### PERMUTATION ANALYSIS
+        logging.info("\n-----------------------PERMUTATION--------------------------")
+        perm_features_to_drop = []
+        score_perm = cross_val_score(model, df_train, new_y_train, scoring="roc_auc", cv=StratifiedKFold(n_splits=5, shuffle = True))
+        mean_score_perm = np.mean(score_perm)
+        logging.info(f"Perm Score: {score_perm}, mean: {mean_score_perm}")
+        if mean_score_perm>0.5:
+            dct['Iteration_permutation'][idx_p]=it
+            dct['Model_roc_auc_permutation'][idx_p] = round(mean_score_perm, 5)
+            res = permutation_importance(model, df_train, new_y_train, n_repeats=10, scoring = "roc_auc")
+            means, stds = res.importances_mean, res.importances_std
+            logging.info(f"Permutation means: {means}")
+            logging.info(f"Permutation stds: {stds}")
+            rank_curr=rank
+            for mean in means.argsort()[::-1]:
+                if means[mean] - 2 * stds[mean] > 0:
+                    curr_it_p=it
+                    dct['Iteration_permutation'][idx_p]=curr_it_p
+                    perm_features_to_drop.append(df_train.columns[mean])
+                    dct['Features_permutation'][p]=df_train.columns[mean]
+                    dct['Importance_permutation'][p]=f"{means[mean]:.5f}"
+                    dct['Rank_permutation'][p]=rank_curr
+                    rank_curr=rank_curr+1
+                    logging.info((f"{df_train.columns[mean]}: " + f"{means[mean]:.5f}" + f" +/- {stds[mean]:.8f}"))
+                    p=p+1
+            curr_it_p += 1
+            idx_p=p
+            logging.info(f"Permutated features to drop: {perm_features_to_drop}")
+            X_perm_new = df_train.drop(columns=perm_features_to_drop, axis=1)
+            #loop permutation importance further w/o features found in the first iteration
+            while((len(perm_features_to_drop)!=0) & (len(X_perm_new.columns)!=0) & (mean_score_perm>0.5)):
+                perm_features_to_drop = []
+                model.fit(X_perm_new, new_y_train)
+                score_perm = cross_val_score(model, X_perm_new, new_y_train, scoring="roc_auc", cv=StratifiedKFold(n_splits=5, shuffle = True))
+                mean_score_perm = np.mean(score_perm)
+                logging.info(f"Perm Score: {score_perm}, mean: {mean_score_perm}")
+
+                if mean_score_perm>0.5:
+                    dct['Model_roc_auc_permutation'][idx_p] = round(mean_score_perm, 5)
+                    res = permutation_importance(model, X_perm_new, new_y_train, n_repeats=10, scoring = "roc_auc")
+                    means, stds = res.importances_mean, res.importances_std
+                    logging.info(f"Permutation means: {means}")
+                    logging.info(f"Permutation stds: {stds}")
+                    for mean in means.argsort()[::-1]:
+                        if means[mean] - 2 * stds[mean] > 0:
+                            dct['Iteration_permutation'][idx_p]=curr_it_p
+                            perm_features_to_drop.append(X_perm_new.columns[mean])
+                            dct['Features_permutation'][p]=X_perm_new.columns[mean]
+                            dct['Importance_permutation'][p]=f"{means[mean]:.5f}"
+                            dct['Rank_permutation'][p]=rank_curr
+                            rank_curr=rank_curr+1
+                            logging.info((f"{X_perm_new.columns[mean]}: " + f"{means[mean]:.5f}" + f" +/- {stds[mean]:.8f}"))
+                            p=p+1
+                    curr_it_p += 1
+                    idx_p=p
+                    logging.info(f"Permutated features to drop: {perm_features_to_drop}")
+                    X_perm_new = X_perm_new.drop(columns=perm_features_to_drop, axis=1)
+
+
+
+        ### OMISSION ANALYSIS
+        logging.info("\n-------------------------OMISSION----------------------------")
+        df_omission = df_train.copy()
+        df_omission[target] = new_y_train.values
+        iter_1 = find_features(df_omission, target, classifier, param, scoring="roc_auc")
+        best_parameters = iter_1[3]
+        #df without features obtained in first iteration
+        all_features_current = df_omission.drop(columns=iter_1[2])
+        output = []
+        curr_it=it
+        rank_curr=rank
+        # doubled for LR as it may happen that there are now features>0
+        if iter_1[0]>0.5:
+            dct['Iteration_omission'][idx_o]=curr_it
+            dct['Model_roc_auc_omission'][k]=round(iter_1[0], 5)
+
+        while ((not iter_1[2].empty) & (iter_1[0]>0.5)):
+
+            if (all_features_current.shape[1] != 0):
+                dct['Iteration_omission'][idx_o]=curr_it
+                curr_it=curr_it+1
+                dct['Model_roc_auc_omission'][k]=round(iter_1[0], 5)
+                for el_2, el_1 in zip(iter_1[2], iter_1[1]):
+                    dct['Features_omission'][l] = el_2
+                    dct['Importance_omission'][l] = round(el_1, 5)
+                    dct['Rank_omission'][l]=rank_curr
+                    rank_curr=rank_curr+1
+                    l=l+1
+                idx_o,k=l,l
+                output.append(iter_1[0:3])
+                logging.info(iter_1[0:3])
+                logging.info("______________________________________________")
+                #iterate without important features from first iteration (second iteration)
+                iter_1 = find_features(all_features_current, target, classifier, param, scoring="roc_auc")[0:3]
+                if not iter_1[2].empty:
+                    all_features_current = all_features_current.drop(columns=iter_1[2])
+            else:
+                logging.info("done")
+                break
+        logging.info("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
+        k,c,p,l,idx_o,idx_p = max(c,p,l), max(c,p,l), max(c,p,l), max(c,p,l), max(c,p,l), max(c,p,l)
+
+    #table with scores (test set) and perm importance on train test
+    result = pd.DataFrame(dct)
+    result.sort_index(ascending=True, inplace=True)
+    result = result.replace(np.nan, " ")
+
+    return result