cut_and_interpolate_gui.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import numpy as np
import pickle
import os
import netCDF4 as nc

from LatLon23 import LatLon, Latitude, Longitude
from scipy.interpolate import interp2d, interp1d

from utilities.import_raw_dataset import import_raw_dataset
from utilities.import_format import import_tif, import_nc
from utilities.ncfile_generation import generate_basic_ncfile

class cut_and_interpolate:

    """
        This class imports a dataset, cuts it to the desired extent and
        interpolates it to the desired resolution.
        The settings are taken from the user input through the gui.
    """

    def __init__(self, key=None, path=None, no_data_value=None,
                 categorical=None, several=None, several_same=None,
                 first=None, bb=None, cluster=False, for_prediction=False,
                 prop_settings=None, prop=None, path_properties=None):

        """
            Input:
                key: key belonging to the dataset (data_summary.csv)
                path: path where the dataset is stored (data_summary.csv)
                no_data_value: value representing no data (data_summary.csv)
                categorical: boolean if dataset contains categorical
                            information (data_summary.csv)
                several: boolean if class is called several times
                        e.g. in a for loop over several datasets
                several_same: boolean if all datasets have the same
                            spatial extent and resolution
                first: boolean,
                        only important if several or several_same is True
                bb: list of bounding boxes, format list ymax, ymin, xmin, xmax
                cluster: boolean, determines whether several sections of the
                        dataset are interpolated in a loop
                for_pediction: boolean,
                            results used for creating the prediction dataset
                prop_settings: dictionary containing general settings
                prop: dictionary containing necessary
                    information for cutting and interpolation
                path_properties: path to separate file storing information
                                on applied cutting extent and
                                interpolation vectors
        """

        # Import cutting and interpolation information if this is not the
        # first dataset of several to be cut and interpolated
        if several and not first:
            with open(path_properties, 'rb') as handle:
                self.properties = pickle.load(handle)
        self.path = path

        self.key = key
        self.cluster = cluster
        self.for_prediction = for_prediction
        self.prop = prop
        self.prop_settings = prop_settings
        
        # Define bounding box
        if cluster:
            self.bb_ges = bb
            self.to_cluster = True
        elif self.for_prediction:
            self.to_cluster = False
            self.bb = [self.prop['north'], self.prop['south'],
                       self.prop['west'], self.prop['east']]
            if several and not first:
                self.bb = [self.properties['interp_vectors']['y'][0],
                           self.properties['interp_vectors']['y'][-1],
                           self.properties['interp_vectors']['x'][0],
                           self.properties['interp_vectors']['x'][-1]]
        else:
            self.to_cluster = False
            if several and not first:
                self.bb = [self.properties['interp_vectors']['y'][0],
                           self.properties['interp_vectors']['y'][-1],
                           self.properties['interp_vectors']['x'][0],
                           self.properties['interp_vectors']['x'][-1]]
            else:
                self.bb = bb

        self.path_properties = path_properties

        if no_data_value != 'None':
            self.no_data = no_data_value.split(',')
            self.no_data = [float(val) for val in self.no_data]
        else:
            self.no_data = no_data_value

        self.categorical = categorical
        self.several = several
        self.several_same = several_same
        self.first = first

        # Define limits to determine interpolation approach for dataset
        self.limit_org = 50000000
        self.limit_interp = 7500000
        self.size = 200
        self.overlap = 100

        self.data, self.x_org, self.y_org = import_raw_dataset(self.path, self.no_data, prop_settings['no_value'])   # Import raw datasets
    
        # If training locations are clustered
        if self.to_cluster:
            
            self.x_raw = self.x_org
            self.y_raw = self.y_org
            self.data_raw = self.data

            def parallized_interpolation(num):

                # Interpolate the cut dataset
                a = self.interpolate_dataset(
                    self.subsets[num],
                    self.y_orgs[num],
                    self.x_orgs[num],
                    self.ds['Longitude' + str(num)][:].data,
                    self.ds['Latitude' + str(num)][:].data)

                # Save the interpolated dataset in the nc file/Update the cut
                # and interpolated dataset for the 2nd and following datasets
                if self.first_dataset:
                    result = self.ds.createVariable(
                        'Result' + str(num),
                        'f4',
                        ('lat' + str(num), 'lon' + str(num)))

                    result[:, :] = a
                else:
                    self.ds['Result' + str(num)][:, :] = a

            self.subsets = []
            self.x_orgs = []
            self.y_orgs = []
            self.cuttables = []

            self.first_dataset = False

            # Iterate over all bounding boxes of
            # the clustered training locations
            for count, self.bb in enumerate(self.bb_ges):

                self.x_org = self.x_raw
                self.y_org = self.y_raw
                self.data = self.data_raw

                # Check that all bounding boxes are
                # covered by the extent of the dataset
                self.compare_extends()

                self.cuttables.append(self.cuttable)
                # Cut the original dataset to the
                # currently considered bounding box
                self.cut_to_boundingbox()
                # Store cut properties to be used in the interpolation
                self.subsets.append(self.data)
                self.x_orgs.append(self.x_org)
                self.y_orgs.append(self.y_org)

                if not os.path.isfile('tmp.nc') or self.first_dataset:

                    if count == 0:
                        # Open temporarty file to store the
                        # interpolated subsets of the dataset
                        self.ds = generate_basic_ncfile('tmp.nc')
                        self.first_dataset = True

                    # Determine the x and y vectors for interpolation
                    self.determine_reference_vectors()
                    # Saving the interpolation vectors to the temporary file
                    self.ds.createDimension('lat' + str(count), len(self.y))
                    self.ds.createDimension('lon' + str(count), len(self.x))
                    longitude = self.ds.createVariable(
                        'Longitude' + str(count),
                        'f4',
                        'lon' + str(count))
                    latitude = self.ds.createVariable(
                        'Latitude' + str(count),
                        'f4',
                        'lat' + str(count))

                    longitude[:] = self.x
                    latitude[:] = self.y

                elif (os.path.isfile('tmp.nc')
                      and not self.first_dataset and count == 0):
                    # If it's not the first dataset to be cut, open the nc file
                    self.ds = nc.Dataset('tmp.nc', mode='a')

            self.one_go, self.as_chunks, self.as_cols = True, False, False

            # Final decision whether cutting and interpolation is possible
            if False in self.cuttables:
                self.cuttable = False
            else:
                self.cuttable = True

            # Interpolate all subsets in parallel
            #Parallel(n_jobs=5, backend='threading', timeout=999999)
            #(delayed(parallized_interpolation)(num) 
            # for num in range(len(self.bb_ges)))
            for num in range(len(self.bb_ges)):
                parallized_interpolation(num)
            self.ds.close()

        elif self.for_prediction:

            def test_parallel_interpolation(i):

                ref = self.interpolate_dataset(
                    np.array(chunks_old[i]),
                    np.array(np.linspace(
                        self.y_org[pixels_old[i][0]],
                        self.y_org[pixels_old[i][1]],
                        abs(pixels_old[i][1]-pixels_old[i][0]))),
                    np.array(np.linspace(
                        self.x_org[pixels_old[i][2]],
                        self.x_org[pixels_old[i][3]],
                        abs(pixels_old[i][3]-pixels_old[i][2]))),
                    self.x_final[i],
                    self.y_final[i])

                return ref
            self.compare_extends()

            # If bounding box is within limits of dataset
            if self.cuttable:
                self.cut_to_boundingbox()   # Cut to the bounding box

                # Determine interpolation vectors
                self.determine_reference_vectors()
                # Depending on dataset size determine interpolation approach
                self.determine_interpolation_approach()

                if self.one_go:
                    # Interpolate dataset
                    self.array = self.interpolate_dataset(
                                        self.data,
                                        self.y_org,
                                        self.x_org,
                                        self.x,
                                        self.y)

                # If original dataset has to be split into chunks
                elif self.as_chunks:
                    # Split the dataset into chunks
                    chunks_old, pixels_old = self.split_into_chunks()
                    # Determine interpolation vectors for each chunk
                    self.determine_new_vector()

                    #ref_tmp = Parallel(n_jobs=5,
                    #                   backend='threading',
                    #                   timeout=999999)
                    #(delayed(test_parallel_interpolation)(num)
                    # for num in range(len(self.x_final)))
                    ref_tmp = []
                    for num in range(len(self.x_final)):
                        ref_tmp.append(test_parallel_interpolation(num))
                    # Combine the individual interpolated
                    # chunks into one dataset
                    self.array = self.reshape_chunks(ref_tmp)

                elif self.as_cols:

                    self.split_into_chunks()  # Split the dataset into chunks

                    ref_tmp = []
                    # Go through all chunks and interpolate them individually
                    for i in range(len(self.x_final)):
                        ref = self.interpolate_dataset(self.data,
                                                       self.y_org,
                                                       self.x_org,
                                                       self.x_final[i],
                                                       self.y_final[i])
                        ref_tmp.append(list(ref))
                    # Combine the individual interpolated
                    # chunks into one dataset
                    self.array = self.reshape_chunks(ref_tmp)

                # If a path is provided, the cutting and interpolation
                # information is saved in a pickle file
                if self.path_properties is not None:
                    with open(self.path_properties, 'wb') as handle:
                        pickle.dump(self.properties, handle)

        else:
            # Check if bounding box is covered by limits of dataset
            self.compare_extends()

            # If bounding box is within limits of dataset
            if self.cuttable:

                self.cut_to_boundingbox()   # Cut to the bounding box
                # Determine interpolation vectors
                self.determine_reference_vectors()
                # Depending on dataset size determine interpolation approach
                self.determine_interpolation_approach()

                # If interpolation can be done in one go
                if self.one_go:

                    # Interpolate dataset
                    self.array = self.interpolate_dataset(self.data,
                                                          self.y_org,
                                                          self.x_org,
                                                          self.x,
                                                          self.y)

                # If original dataset has to be split into chunks
                elif self.as_chunks:
                    # Split the dataset into chunks
                    chunks_old, pixels_old = self.split_into_chunks()
                    # Determine interpolation vectors for each chunk
                    self.determine_new_vector()

                    ref_tmp = []
                    # Go through all chunks and interpolate them individually
                    for i in range(len(chunks_old)):
                        ref = self.interpolate_dataset(
                            np.array(chunks_old[i]),
                            np.array(np.linspace(
                                self.y_org[pixels_old[i][0]],
                                self.y_org[pixels_old[i][1]],
                                abs(pixels_old[i][1]-pixels_old[i][0]))),
                            np.array(np.linspace(
                                self.x_org[pixels_old[i][2]],
                                self.x_org[pixels_old[i][3]],
                                abs(pixels_old[i][3]-pixels_old[i][2]))),
                            self.x_final[i],
                            self.y_final[i])
                        ref_tmp.append(list(ref))
                    # Combine the individual interpolated
                    # chunks into one dataset
                    self.array = self.reshape_chunks(ref_tmp)

                elif self.as_cols:

                    self.split_into_chunks()  # Split the dataset into chunks

                    ref_tmp = []
                    # Go through all chunks and interpolate them individually
                    for i in range(len(self.x_final)):
                        ref = self.interpolate_dataset(self.data,
                                                       self.y_org,
                                                       self.x_org,
                                                       self.x_final[i],
                                                       self.y_final[i])
                        ref_tmp.append(list(ref))
                    # Combine the individual interpolated
                    # chunks into one dataset
                    self.array = self.reshape_chunks(ref_tmp)

                # If a path is provided, the cutting and interpolation
                # information is saved in a pickle file
                if self.path_properties is not None:
                    with open(self.path_properties, 'wb') as handle:
                        pickle.dump(self.properties, handle)

    def compare_extends(self):

        """
            Determine if the bounding box to which the dataset shall be cut is
            completely covered by the dataset.
            If not, the execution of the script will be aborted.
        """

        self.cuttable = True
        self.left_too_short = False
        self.right_too_short = False
        self.bottom_too_short = False
        self.top_too_short = False
        y, x = [], []
        for coord in [self.y_org[0], self.y_org[-1], self.bb[0], self.bb[1]]:

            if coord >= 0:
                y.append(90 + coord)

            if coord < 0:
                y.append(90 - abs(coord))

        for coord in [self.x_org[0], self.x_org[-1], self.bb[2], self.bb[3]]:

            if coord >= 0:
                x.append(180 + coord)

            if coord < 0:
                x.append(180 - abs(coord))

        if y[2] > y[0]:
            self.top_too_short = True
        if y[3] < y[1]:
            self.bottom_too_short = True
        if x[2] < x[0]:
            self.left_too_short = True
        if x[3] > x[1]:
            self.right_too_short = True

        if (self.bottom_too_short or self.top_too_short
                or self.left_too_short or self.right_too_short):
            self.cuttable = False
            self.array = None
            self.x = None
            self.y = None

        return self.cuttable

    def cut_to_boundingbox(self):

        """
            Cut the dataset to the bounding box
        """
            
        if self.several_same and not self.first:

            # Load the indices of the bounding box from the properties file
            self.top = self.properties['boundaries']['top']
            self.bottom = self.properties['boundaries']['bottom']
            self.left = self.properties['boundaries']['left']
            self.right = self.properties['boundaries']['right']
            
        else:
            # If several datasets shall be interpolated after another and the
            # current run is the first dataset
            if (self.several and self.first) or (self.several_same and self.first):
                # Open empty dictionary to store the cutting and
                # interpolation information in
                self.properties = {}
            
            # Determine if the coordinate vectors
            # contain both pos and neg values
            if (all(val >= 0 for val in self.x_org)
                    or all(val <= 0 for val in self.x_org)):

                # Determine pixel index of left and right edge of bounding box
                self.left = int((np.abs(self.x_org - self.bb[2])).argmin())
                self.right = int((np.abs(self.x_org - self.bb[3])).argmin())

            else:

                if self.bb[2] <= 0:
                    tmp = [x for x in self.x_org if x <= 0]
                else:
                    tmp = [x for x in self.x_org if x >= 0]

                self.left = list(self.x_org).index(
                    tmp[int((np.abs(np.array(tmp) - self.bb[2])).argmin())])

                if self.bb[3] <= 0:
                    tmp = [x for x in self.x_org if x <= 0]
                else:
                    tmp = [x for x in self.x_org if x >= 0]

                self.right = list(self.x_org).index(
                    tmp[int((np.abs(np.array(tmp) - self.bb[3])).argmin())])

            if (all(val >= 0 for val in self.y_org)
                    or all(val <= 0 for val in self.y_org)):

                # Determine pixel index of top and bottom edge of bounding box
                self.top = int((np.abs(self.y_org - self.bb[0])).argmin())
                self.bottom = int((np.abs(self.y_org - self.bb[1])).argmin())

            else:

                if self.bb[0] <= 0:
                    tmp = [y for y in self.y_org if y <= 0]
                else:
                    tmp = [y for y in self.y_org if y >= 0]

                self.top = list(self.y_org).index(
                    tmp[int((np.abs(np.array(tmp) - self.bb[0])).argmin())])

                if self.bb[1] <= 0:
                    tmp = [y for y in self.y_org if y <= 0]
                else:
                    tmp = [y for y in self.y_org if y >= 0]

                self.bottom = list(self.y_org).index(
                    tmp[int((np.abs(np.array(tmp) - self.bb[1])).argmin())])

            # Add pixel in all directions to account for rounding issues

            if not self.for_prediction:
                if self.left-100 >= 0:
                    self.left = self.left - 100
                if self.top-100 >= 0:
                    self.top = self.top - 100
                if self.bottom+100 <= np.shape(self.data)[0]:
                    self.bottom = self.bottom + 100
                if self.right+100 <= np.shape(self.data)[1]:
                    self.right = self.right + 100
            
            if self.several_same and self.first:
                # Store the indices to be used again with the next dataset
                self.properties['boundaries'] = {}
                self.properties['boundaries']['top'] = self.top
                self.properties['boundaries']['bottom'] = self.bottom
                self.properties['boundaries']['left'] = self.left
                self.properties['boundaries']['right'] = self.right 

        # Cut the dataset and x, y vectors to the determined extent
        self.data = self.data[self.top:self.bottom, self.left:self.right]

        self.x_org = self.x_org[self.left:self.right]
        self.y_org = self.y_org[self.top:self.bottom]

    def determine_reference_vectors(self):

        """
            Determine interpolation vectors x and y.
        """

        # If several datasets shall be interpolated after another and the
        # current run is the first dataset
        if self.several and self.first:

            # Determine distance in meters in x and y
            # direction between bounds of dataset
            point1_x = LatLon(Latitude(self.y_org[0]),
                              Longitude(self.x_org[0]))
            point2_x = LatLon(Latitude(self.y_org[0]),
                              Longitude(self.x_org[-1]))
            distance_x = point1_x.distance(point2_x)*1000

            point1_y = LatLon(Latitude(self.y_org[0]),
                              Longitude(self.x_org[0]))
            point2_y = LatLon(Latitude(self.y_org[-1]),
                              Longitude(self.x_org[0]))
            distance_y = point1_y.distance(point2_y)*1000

            # Determine interpolation vector with desired resolution
            self.x = np.linspace(
                self.x_org[0],
                self.x_org[-1],
                int(distance_x/self.prop_settings['resolution']))
            self.y = np.linspace(
                self.y_org[0],
                self.y_org[-1],
                int(distance_y/self.prop_settings['resolution']))

            # Store interpolation vector in properties file
            self.properties['interp_vectors'] = {}
            self.properties['interp_vectors']['x'] = self.x
            self.properties['interp_vectors']['y'] = self.y

        # If only one dataset shall be interpolated
        elif not self.several:

            # Determine distance in meters in x and y
            # direction between bounds of dataset
            point1_x = LatLon(Latitude(self.y_org[0]),
                              Longitude(self.x_org[0]))
            point2_x = LatLon(Latitude(self.y_org[0]),
                              Longitude(self.x_org[-1]))
            distance_x = point1_x.distance(point2_x)*1000

            point1_y = LatLon(Latitude(self.y_org[0]),
                              Longitude(self.x_org[0]))
            point2_y = LatLon(Latitude(self.y_org[-1]),
                              Longitude(self.x_org[0]))
            distance_y = point1_y.distance(point2_y)*1000

            # Determine interpolation vector with desired resolution
            self.x = np.linspace(
                self.x_org[0],
                self.x_org[-1],
                int(distance_x/self.prop_settings['resolution']))
            self.y = np.linspace(
                self.y_org[0],
                self.y_org[-1],
                int(distance_y/self.prop_settings['resolution']))

        # If several datasets shall be interpolated after another and the
        # current run is not the first dataset
        elif self.several and not self.first:

            self.x = np.array(self.properties['interp_vectors']['x'])
            self.y = np.array(self.properties['interp_vectors']['y'])
    
    def determine_new_vector(self):

        """
            Determine interpolation vectors for the chunks.
        """

        # For each chunk determine the original x and y vectors
        x_ref = [[self.x_org[self.x_limits[i][0]],
                  self.x_org[self.x_limits[i][1]]]
                 for i in range(len(self.x_limits))]
        y_ref = [[self.y_org[self.y_limits[i][0]],
                  self.y_org[self.y_limits[i][1]]]
                 for i in range(len(self.y_limits))]

        self.x_final = []
        self.y_final = []

        for j in range(np.shape(x_ref)[0]):
            ind_min_x = int((np.abs(self.x - x_ref[j][0])).argmin())
            ind_max_x = int((np.abs(self.x - x_ref[j][1])).argmin())
            
            self.x_final.append(self.x[ind_min_x:ind_max_x])
            
        for j in range(np.shape(y_ref)[0]):
            ind_min_y = int((np.abs(self.y - y_ref[j][0])).argmin())
            ind_max_y = int((np.abs(self.y - y_ref[j][1])).argmin())
            
            self.y_final.append(self.y[ind_min_y:ind_max_y])

    def split_into_chunks(self):

        """
            Split the dataset into chunks for interpolation
        """

        # If the dataset needs to be split into chunks
        if self.as_chunks:

            y_len, x_len = np.shape(self.data)[0], np.shape(self.data)[1]

            # Split in equal sized chunks and treat the bottom and right
            # differently that have different shape than the equal sized chunks

            plus_y = self.data.shape[0] % self.size
            plus_x = self.data.shape[1] % self.size

            # Number of equal sized chunks in x and y direction
            num_y = int(self.data.shape[0] / self.size)
            num_x = int(self.data.shape[1] / self.size)

            # If final columns and row too small to be called individual
            # chunks, combine with second to last row and column
            if plus_y < 2/3*self.size:
                num_y = num_y - 1

            if plus_x < 2/3*self.size:
                num_x = num_x - 1

            self.num_y = num_y
            self.num_x = num_x

            chunks = []  # Store the chunks
            pixels = []  # Store the pixel limits to acces original coordinates
            count = 0

            # Store the coord limits to acces original coordinates
            self.x_limits = []
            self.y_limits = []

            # Save the chunks in a list
            count_ges = 0
            tmpy = 0
            for i in range(num_y):
                tmpx = 0
                for j in range(num_x):
                    if ((i+1)*self.size-1+self.overlap <= self.data.shape[0]) and ((j+1)*self.size-1+self.overlap <= self.data.shape[1]):
                        chunks.append(
                            list(self.data[i*self.size:(i+1)*self.size-1+self.overlap,
                                           j*self.size:(j+1)*self.size-1+self.overlap]))
                        pixels.append(
                            [i*self.size, (i+1)*self.size-1+self.overlap,
                             j*self.size, (j+1)*self.size-1+self.overlap])
                        
                        self.x_limits.append([j*self.size, (j+1)*self.size-1+self.overlap])
                        self.y_limits.append([i*self.size, (i+1)*self.size-1+self.overlap])
                        
                    elif ((i+1)*self.size-1+self.overlap > self.data.shape[0]) and ((j+1)*self.size-1+self.overlap <= self.data.shape[1]):
                        chunks.append(
                            list(self.data[i*self.size:,
                                           j*self.size:(j+1)*self.size-1+self.overlap]))
                        pixels.append(
                            [i*self.size, np.shape(self.data)[0]-1,
                             j*self.size, (j+1)*self.size-1+self.overlap])
                    elif ((j+1)*self.size-1+self.overlap > self.data.shape[1]) and ((i+1)*self.size-1+self.overlap <= self.data.shape[0]):   
                        chunks.append(
                                list(self.data[i*self.size:(i+1)*self.size-1+self.overlap,
                                               j*self.size:]))
                        pixels.append(
                            [i*self.size, (i+1)*self.size-1+self.overlap,
                             j*self.size, np.shape(self.data)[1]-1])
                    elif ((j+1)*self.size-1+self.overlap > self.data.shape[1]) and ((i+1)*self.size-1+self.overlap > self.data.shape[0]):
                        chunks.append(
                                list(self.data[i*self.size:,
                                               j*self.size:]))
                        pixels.append(
                            [i*self.size, np.shape(self.data)[0]-1,
                             j*self.size, np.shape(self.data)[1]-1])
                tmpy = tmpy + 1

            # Chunks most bottom column
            tmpx = 0
            for j in range(num_x):
                if ((j+1)*self.size-1+self.overlap <= self.data.shape[1]):
                    chunks.append(
                        list(self.data[(num_y)*self.size:-1,
                                       j*self.size:(j+1)*self.size-1+self.overlap]))
                    pixels.append(
                        [(num_y)*self.size, np.shape(self.data)[0]-1,
                         j*self.size, (j+1)*self.size-1+self.overlap])
                    self.x_limits.append([j*self.size, (j+1)*self.size-1+self.overlap])
                    self.y_limits.append([(num_y)*self.size, np.shape(self.data)[0]-1])
                else:
                    chunks.append(
                        list(self.data[(num_y)*self.size:-1,
                                       j*self.size:]))
                    pixels.append(
                        [(num_y)*self.size, np.shape(self.data)[0]-1,
                         j*self.size, np.shape(self.data)[1]-1])
                    self.x_limits.append([j*self.size, (j+1)*self.size-1])

            # Chunks most right column
            tmpy = 0
            for j in range(num_y):
                if ((j+1)*self.size-1+self.overlap <= self.data.shape[0]):
                    chunks.append(
                        list(self.data[j*self.size:(j+1)*self.size-1+self.overlap,
                                       (num_x)*self.size:-1]))
                    pixels.append(
                        [j*self.size, (j+1)*self.size-1+self.overlap,
                         (num_x)*self.size, x_len-1])
                    self.y_limits.append([j*self.size, (j+1)*self.size-1+self.overlap])
                    self.x_limits.append([(num_x)*self.size, x_len-1])
                else:
                    chunks.append(
                        list(self.data[j*self.size:-1,
                                       (num_x)*self.size:-1]))
                    pixels.append(
                        [j*self.size, np.shape(self.data)[0]-1,
                         (num_x)*self.size, x_len-1])
                    self.y_limits.append([j*self.size, (j+1)*self.size-1])

            # Chunk bottom right
            chunks.append(
                list(self.data[num_y*self.size:-1,
                               num_x*self.size:-1]))
            pixels.append(
                [num_y*self.size, y_len-1,
                 num_x*self.size, x_len-1])

            # Save corner indices for the chunks
            self.x_limits.append([num_x*self.size, x_len-1])
            self.y_limits.append([num_y*self.size, y_len-1])

            return chunks, pixels

        # If dataset is interpolated columns-wise
        elif self.as_cols:

            chunks, pixels = None, None
            self.x_limits = [[], [], [], [], [], [], [], []]

            # Determine columns to be interpolated in each chunk
            i = 0
            while i <= len(self.x):
                for j in range(len(self.x_limits)):
                    if i+j <= len(self.x)-1:
                        self.x_limits[j].append(i + j)
                i = i + j + 1

            # Determine the coordinates in the interpolation vector
            self.x_final = [[], [], [], [], [], [], [], []]
            self.y_final = []

            for i in range(len(self.x_limits)):
                for j in self.x_limits[i]:
                    self.x_final[i].append(self.x[j])
                self.y_final.append(list(self.y))

    def determine_interpolation_approach(self):

        """
            Depending on the siz of the original dataset and the size of the
            dataset after the interpolation, the computational
            power  might be exceeded and the dataset needs to be
            split up to be interpolated.

            Different cases are covered in this function and depending
            on the sizes, the approach is determined.
            Approaches:
                one_go: dataset before and after interpolation small
                        enough to be interpolated in one go
                as_chunks: dataset before interpolation already so large that
                            it needs to be split into chunks which then
                            area interpolated independently
                as_cols: dataset after interpolation so large,
                        that interpolation is done columnwise

        """

        # If several datasets shall be interpolated after another and the
        # current run is the first dataset
        if (self.several and self.first) or not self.several:
             if len(self.x_org) < 2*self.size and len(self.y_org) < 2*self.size:
                self.one_go, self.as_chunks, self.as_cols = True, False, False
             else:
                # Assessment of the size of the dataset before and after
                # interpolation and comparison with manually defined limit
                # to decide for interpolation approach
                if ((len(self.x) * len(self.y) < self.limit_interp)
                        and (len(self.x_org) * len(self.y_org) < self.limit_org)):
                    self.one_go, self.as_chunks, self.as_cols = True, False, False
    
                elif len(self.x_org) * len(self.y_org) >= self.limit_org:
                    self.one_go, self.as_chunks, self.as_cols = False, True, False
    
                elif (len(self.x) * len(self.y) > self.limit_interp):
                    self.one_go, self.as_chunks, self.as_cols = False, False, True
                
                if self.several and self.first:
                    # Store the interpolation approach in the properties file
                    self.properties['interp_approach'] = {}
                    self.properties['interp_approach']['one_go'] = self.one_go
                    self.properties['interp_approach']['as_chunks'] = self.as_chunks
                    self.properties['interp_approach']['as_cols'] = self.as_cols

        # If several datasets shall be interpolated after another and the
        # current run is not the first dataset
        elif self.several and not self.first:

            # Load the interpolation approach from the properties file
            self.one_go = self.properties['interp_approach']['one_go']
            self.as_chunks = self.properties['interp_approach']['as_chunks']
            self.as_cols = self.properties['interp_approach']['as_cols']
    
    def interpolate_dataset(self, data, y, x, x_new, y_new):

        """
            Interpolate dataset. Categorical data is interpolated using
            nearest neighbor first into x direction then into y direction

            Input:
                data: data to interpolate, depending on the interpolation
                    appraoch the whole dataset or a chunk
                y: original y vector
                x: original x vector
                x_new: interpolation vector x
                y_new: interpolation vector y

            Return:
                data_interp: interpolated data
        """

        # Interpolation vectors
        x_new = np.array(x_new)
        y_new = np.array(y_new)
        
        # Make sure that no data values do not corrupt the interpolation
        data = data.astype(float)
        data[data == self.prop_settings['no_value']] = np.nan

        
        if self.categorical==False:
            data = np.flipud(data)
            if self.prop_settings['no_value'] != None:
                nan_map = np.zeros_like(data)
                nan_map[np.isnan(data)] = 1
            filled_z = data.copy()
            filled_z[np.isnan(data)] = 0
            # Interpolation
            f = interp2d(x, np.flip(y), filled_z, kind='linear')
            data_interp = f(x_new, y_new)
            if self.prop_settings['no_value'] != None:
                f_nan = interp2d(x, np.flip(y), nan_map, kind='linear')
                nan_new = f_nan(x_new, y_new)

            # Set all by nan values affected pixels to no data value
            data_interp[nan_new > 0] = self.prop_settings['no_value']
            
            return np.flipud(data_interp)

        # If data is categorical
        elif self.categorical==True:
            # Define empty arrays to be filled
            if self.prop_settings['no_value'] != None:
                nan_map = np.zeros_like(data)
                nan_map[np.isnan(data)] = 1
            filled_z = data.copy()
            filled_z[np.isnan(data)] = 0
            
            data_interp_x = np.zeros((len(y), len(x_new)))
            nan_interp_x = np.zeros((len(y), len(x_new)))

            # Interpolate first in x direction
            for i in range(len(y)):

                tmp = filled_z[i, :]
                f = interp1d(x, tmp, kind='nearest', fill_value="extrapolate")
                data_interp_x[i, :] = f(x_new)

                if self.prop_settings['no_value'] != None:
                    tmp = nan_map[i, :]
                    f = interp1d(x, tmp, kind='nearest', fill_value="extrapolate")
                    nan_interp_x[i, :] = f(x_new)
                    nan_interp = np.zeros((len(y_new), len(x_new)))

            # Define empty arrays to be filled
            data_interp = np.zeros((len(y_new), len(x_new)))        

            # Then interpolate in y direction
            for i in range(len(x_new)):

                tmp = data_interp_x[:, i]
                f = interp1d(y, tmp, kind='nearest', fill_value="extrapolate")
                data_interp[:, i] = f(y_new)
                if self.prop_settings['no_value'] != None:
                    tmp = nan_interp_x[:, i]
                    f = interp1d(y, tmp, kind='nearest', fill_value="extrapolate")
                    nan_interp[:, i] = f(y_new)

                    # Set all by nan values affected pixels to no data value
                    data_interp[nan_interp > 0] = self.prop_settings['no_value']
            
            return data_interp
    
    def reshape_chunks(self, chunks):

        """
            Interpolated chunks are attached to form the interpolated dataset.
            The chunks overlap and for categorical features, only one version
            is used. For continuous features, the overlapping parts are averaged.
            
            Input:
                chunks: interpolated chunks, list of lists
        """
             
        if self.as_chunks:           
            array = np.zeros((len(self.y), len(self.x)))
            aa = np.zeros((len(self.y), len(self.x)))
            test = np.zeros((len(self.y), len(self.x)))
            
            shape_x, shape_y = [], []
            for chunk in chunks:
                shape_x.append(np.shape(np.array(chunk))[1])
                shape_y.append(np.shape(np.array(chunk))[0])
            
            count = 0
            for count, chunk in enumerate(chunks):            
                    xt = int((np.abs(self.x - self.x_final[count][0])).argmin())
                    yt = int((np.abs(self.y - self.y_final[count][0])).argmin())
                    
                    tmp = np.array(chunks[count])
                    tmp1 = array[yt:yt+shape_y[count], xt:xt+shape_x[count]]
                    aa[yt:yt+shape_y[count], xt:xt+shape_x[count]] = tmp
                    
                    mask = (tmp1 == 0) | (tmp1 == -999) | (tmp == -999)
    
                    if not self.categorical:
                        # Calculate the element-wise average only where mask is False
                        average_array = np.zeros_like(tmp, dtype=float)  # Initialize array for the result
                        average_array[~mask] = (tmp[~mask] + tmp1[~mask]) / 2
                        
                        # Assign elements from arr2 where arr1 is equal to zero
                        average_array[mask] = tmp[mask]
                        
                        array[yt:yt+shape_y[count], xt:xt+shape_x[count]] = average_array
                        
                        tmp = np.ones_like(tmp, dtype=float)*count + 1
                        tmp1 = test[yt:yt+shape_y[count], xt:xt+shape_x[count]]
                        
                        mask = (tmp1 == 0)
        
                        # Calculate the element-wise average only where mask is False
                        average_array = np.zeros_like(tmp, dtype=float)  # Initialize array for the result
                        average_array[~mask] = (tmp[~mask] + tmp1[~mask]) / 2
                        
                        # Assign elements from arr2 where arr1 is equal to zero
                        average_array[mask] = tmp[mask]
                        
                        test[yt:yt+shape_y[count], xt:xt+shape_x[count]] = average_array
                        
                    elif self.categorical:
                        
                        average_array = np.zeros_like(tmp, dtype=float)  # Initialize array for the result
                        average_array[~mask] = (tmp[~mask] + tmp1[~mask]) / 2
                        
                        # Assign elements from arr2 where arr1 is equal to zero
                        average_array[mask] = tmp[mask]
                        
                        array[yt:yt+shape_y[count], xt:xt+shape_x[count]] = tmp
                        test[yt:yt+shape_y[count], xt:xt+shape_x[count]] = average_array
            self.test = test.copy()
        elif self.as_cols:
            # Final array to be filled
            array = np.zeros((len(self.y), len(self.x)))

            # Insert the columns of the individual
            # chunks into the final dataset
            for i in range(len(chunks)):
                array[:, self.x_limits[i]] = np.array(chunks[i])
        
        return array