diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32f3e756f764c1065686e88b16ae1635dc45ce33
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,46 @@
+cmake_minimum_required(VERSION 3.2)
+
+project(CompactNSearch)
+
+# Visual studio solution directories.
+set_property(GLOBAL PROPERTY USE_FOLDERS on)
+
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+if (UNIX)
+find_package(OpenMP)
+if (OPENMP_FOUND)
+	set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+endif(OPENMP_FOUND)
+endif (UNIX)
+
+SET(CMAKE_DEBUG_POSTFIX "_d")
+
+set (HEADER_FILES 
+		include/Config.h
+		include/CompactNSearch.h
+		include/PointSet.h
+		include/DataStructures.h)
+
+include_directories("include")
+
+add_library(CompactNSearch
+		${HEADER_FILES}
+		src/CompactNSearch.cpp
+)
+
+install(FILES "include/CompactNSearch" ${HEADER_FILES}
+	DESTINATION include/)
+
+install(TARGETS CompactNSearch
+	RUNTIME DESTINATION bin
+	LIBRARY DESTINATION lib
+	ARCHIVE DESTINATION lib)
+
+option(BUILD_DEMO "Build example of how to use this library."
+		ON)
+if(BUILD_DEMO)
+	add_subdirectory(demo)
+endif(BUILD_DEMO)
+
diff --git a/LICENSE b/LICENSE
index c912e1a249e576435f2bd5f620f51b2fad3458b5..d41d0c82d55bff659497baa635137cff068c51f1 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
-MIT License
+The MIT License (MIT)
 
-Copyright (c) 2016 Interactive Computer Graphics 
+Copyright (c) 2016-present, CompactNSearch contributors
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -19,3 +19,4 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..223bbd2faa4e5c7d5c81a8da185830687dbdc65f
--- /dev/null
+++ b/README.md
@@ -0,0 +1,75 @@
+# CompactNSearch
+
+**CompactNSearch** is a C++ library for **parallel** computation of neighboring points in a **fixed radius** in a **three-dimensional point cloud**. The implemented algorithm is a variant of the *Compact Hashing* approach proposed by Ihmsen et al. [IABT11]. The neighborhood information can be efficiently updated when the points spatially move. Moreover, the library offers the possibility to reorder the points (and other array-stored per point information) according to a space-filling Z curve to improve the cache efficiency in later queries or accesses.
+
+The library was used to generate all results of the SPH-based fluid simulations presented by Bender and Koschier in [BK15, BK16].
+
+**Author**: Dan Koschier, **License**: MIT
+
+## Libraries using CompactNSearch
+* [PBD] - A C++ library for physically-based simulation of rigid bodies, deformables, cloth and fluids using Position-Based Dynamics
+* [SPlisHSPlasH] - A C++ library for the physically-based simulation of fluids using Smoothed Particle Hydrodynamics (cf. video) (Coming soon)
+
+[![Video](https://img.youtube.com/vi/POnmzzhc5E0/0.jpg)](https://www.youtube.com/watch?v=POnmzzhc5E0)
+
+## Build Instructions
+
+This project is based on [CMake](https://cmake.org/). Simply generate project, Makefiles, etc. using [CMake](https://cmake.org/) and compile the project with the compiler of your choice. The code was tested with the following configurations:
+- Windows 10 64-bit, CMake 3.7, Visual Studio 2015
+- Debian 8 64-bit, CMake 3.7, GCC 4.9.2.
+
+## Usage
+A data structure to perform a neighborhood search can be created by calling the constructor given a fixed search radius ```r```.
+```c++
+CompactNSearch::NeighborhoodSearch nsearch(r);
+```
+An arbitrary number of point clouds can then be added to the data structure using the method ```add_point_set```. The library expects the point positions to be contiguously stored in an array-like structure. The method will return a unique id associated with the initialized point set.
+```c++
+std::vector<std::array<Real, 3>> positions;
+// ... Fill array with 3 * n real numbers representing three-dimensional point positions.
+unsigned int point_set_id = nsearch.add_point_set(positions.front().data(), positions.size());
+nsearch.find_neighbors();
+```
+In order to generate the neighborhood information simply execute the following command
+```c++
+nsearch.find_neighbors();
+```
+Finally, the neighborhood information can be accessed as follows
+```c++
+PointSet const& ps = nsearch.point_set(point_set_id);
+for (int i = 0; i < ps.n_points(); ++i)
+{
+	for (int j = 0; j < ps.n_neighbors(i); ++j)
+	{
+    	// Return PointID of the jth neighbor of the ith particle.
+	    PointID const& pid = ps.neighbor(i, j);
+	    // ...
+	    // Do whatever you want with the point id. The id contains two indices. 
+	    // The first field pid.point_set_id represents the unique point set id returnd by add_point_set.
+	    // The second field pid.point_id stands for the index of the neighboring particle within 
+	    // the containing point set.
+	    // ...
+	}
+}
+```
+
+Besides the basic functionality the library offers to compute a rule for reordering the points according to a space-filling Z curve. The reordering will improve the performance of future neighborhood queries and accesses. The rule can be computed via
+```c++
+nsearch.z_sort();
+```
+Please note that the actual reordering must invoked by the user by
+```c++
+ps.sort_field(positions.data());
+```
+Assuming that there is additional information stored per-point (e.g. velocity, color, mass etc.) the information **must** also be reorded using the same method to maintain consistency. Subsequently, the ```find_neighbors``` function has to be invoked again to update the neighborhood information.
+
+Another self-explaining (benchmark) [demo](demo/main.cpp) is contained in the project.
+
+## References
+
+* [IABT11] M. Ihmsen, N. Akinci, M. Becker and M. Teschner, 2011. "A Parallel SPH Implementation on Multi-Core CPUs", Computer Graphics Forum 30, 1, 99-112.
+* [BK15] J. Bender and D. Koschier 2015. "Divergence-Free Smoothed Particle Hydrodynamics", ACM SIGGRAPH / Eurographics Symposium on Computer Animation, 1-9
+* [BK16] J. Bender and D. Koschier, 2016. "Divergence-Free SPH for Incompressible and Viscous Fluids", IEEE Transactions on Visualization and Computer Graphics.
+
+[PBD]: <https://github.com/InteractiveComputerGraphics/PositionBasedDynamics>
+[SPlisHSPlasH]: <https://github.com/InteractiveComputerGraphics/SPlisHSPlasH>
\ No newline at end of file
diff --git a/demo/CMakeLists.txt b/demo/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7afc12b622c3feb3873aae508e1f4fde1c76f9ce
--- /dev/null
+++ b/demo/CMakeLists.txt
@@ -0,0 +1,11 @@
+
+
+add_definitions(-D_USE_MATH_DEFINES)
+
+include_directories("../include")
+
+add_executable(Demo
+    main.cpp
+)
+
+target_link_libraries(Demo CompactNSearch)
diff --git a/demo/main.cpp b/demo/main.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..db6f91b3fdaeff47f6e53bed2090603f5057b9f9
--- /dev/null
+++ b/demo/main.cpp
@@ -0,0 +1,212 @@
+
+#include <CompactNSearch>
+
+#include <iostream>
+#include <vector>
+#include <array>
+#include <cmath>
+#include <limits>
+#include <chrono>
+#include <random>
+
+#include <omp.h>
+
+using namespace CompactNSearch;
+
+std::vector<std::array<Real, 3>> positions;
+
+std::size_t const N = 150;
+Real const r_omega = 0.15;
+Real const r_omega2 = r_omega * r_omega;
+Real const radius = 2.0 * (2.0 * r_omega / static_cast<Real>(N-1));
+
+std::size_t const N_enright_steps = 50;
+
+Real
+compute_average_number_of_neighbors(NeighborhoodSearch const& nsearch)
+{
+	unsigned long res = 0;
+	auto const& d = nsearch.point_set(0);
+	for (int i = 0; i < d.n_points(); ++i)
+	{
+		res += static_cast<unsigned long>(d.n_neighbors(i));
+	}
+	return static_cast<Real>(res) / static_cast<Real>(d.n_points());
+}
+
+Real
+compute_average_distance(NeighborhoodSearch const& nsearch)
+{
+	unsigned long long res = 0;
+	auto const& d = nsearch.point_set(0);
+	unsigned long long count = 0;
+	for (int i = 0; i < d.n_points(); ++i)
+	{
+		std::size_t nn = d.n_neighbors(i);
+		for (int j = 0; j < nn; ++j)
+		{
+			CompactNSearch::PointID const& k = d.neighbor(i, j);
+			res += std::abs(i - static_cast<int>(k.point_id));
+			count++;
+		}
+	}
+	return static_cast<Real>(res) / static_cast<Real>(count);
+}
+
+std::vector<std::vector<unsigned int>>
+brute_force_search()
+{
+	std::vector<std::vector<unsigned int>> brute_force_neighbors(positions.size());
+	for (int i = 0; i < positions.size(); ++i)
+	{
+		std::vector<unsigned int>& neighbors = brute_force_neighbors[i];
+		for (int j = 0; j < positions.size(); ++j)
+		{
+			if (i == j)
+				continue;
+			std::array<Real, 3> const& xa = positions[i];
+			std::array<Real, 3> const& xb = positions[j];
+			Real l2 =
+					(xa[0] - xb[0])*(xa[0] - xb[0]) +
+					(xa[1] - xb[1])*(xa[1] - xb[1]) +
+					(xa[2] - xb[2])*(xa[2] - xb[2]);
+			if (l2 <= radius * radius)
+			{
+				neighbors.push_back(j);
+			}
+		}
+	}
+	return std::move(brute_force_neighbors);
+}
+
+void
+compare_with_bruteforce_search(NeighborhoodSearch const& nsearch)
+{
+	auto brute_force_neighbors = brute_force_search();
+	PointSet const& d0 = nsearch.point_set(0);
+	for (int i = 0; i < N; ++i)
+	{
+		auto const& bfn = brute_force_neighbors[i];
+		if (bfn.size() != d0.n_neighbors(i))
+		{
+			std::cerr << "ERROR: Not the same number of neighbors." << std::endl;
+		}
+		for (int j = 0; j < d0.n_neighbors(i); ++j)
+		{
+			if (std::find(bfn.begin(), bfn.end(), d0.neighbor(i, j).point_id) == bfn.end())
+			{
+				std::cerr << "ERROR: Neighbor not found in brute force list." << std::endl;
+			}
+		}
+	}
+}
+
+std::array<Real, 3>
+enright_velocity_field(std::array<Real, 3> const& x)
+{
+	Real sin_pi_x_2 = std::sin(M_PI * x[0]);
+	Real sin_pi_y_2 = std::sin(M_PI * x[1]);
+	Real sin_pi_z_2 = std::sin(M_PI * x[2]);
+	sin_pi_x_2 *= sin_pi_x_2;
+	sin_pi_y_2 *= sin_pi_y_2;
+	sin_pi_z_2 *= sin_pi_z_2;
+
+	Real sin_2_pi_x = std::sin(2.0 * M_PI * x[0]);
+	Real sin_2_pi_y = std::sin(2.0 * M_PI * x[1]);
+	Real sin_2_pi_z = std::sin(2.0 * M_PI * x[2]);
+	return {{
+			2.0 * sin_pi_x_2 * sin_2_pi_y * sin_2_pi_z,
+			-sin_2_pi_x * sin_pi_y_2 * sin_2_pi_z,
+			-sin_2_pi_x * sin_2_pi_y * sin_pi_z_2}};
+
+}
+
+void
+advect()
+{
+#ifdef _MSC_VER
+	concurrency::parallel_for_each
+#else
+	__gnu_parallel::for_each
+#endif
+	(positions.begin(), positions.end(), [&](std::array<Real, 3>& x)
+	{
+		std::array<Real, 3> v = enright_velocity_field(x);
+		x[0] += 0.005 * v[0];
+		x[1] += 0.005 * v[1];
+		x[2] += 0.005 * v[1];
+	}
+	);
+}
+
+int main(int argc, char* argv[])
+{
+	Real min_x = std::numeric_limits<Real>::max();
+	Real max_x = std::numeric_limits<Real>::min();
+	positions.reserve(N * N * N);
+	for (unsigned int i = 0; i < N; ++i)
+	{
+		for (unsigned int j = 0; j < N; ++j)
+		{
+			for (unsigned int k = 0; k < N; ++k)
+			{
+				std::array<Real, 3> x = {{
+						r_omega * (2.0 * static_cast<Real>(i) / static_cast<Real>(N-1)-1.0),
+						r_omega * (2.0 * static_cast<Real>(j) / static_cast<Real>(N-1)-1.0),
+						r_omega * (2.0 * static_cast<Real>(k) / static_cast<Real>(N-1)-1.0)}};
+
+				Real l2  = x[0] * x[0] + x[1] * x[1] + x[2] * x[2];
+				if (l2 < r_omega2)
+				{
+					x[0] += 0.35;
+					x[1] += 0.35;
+					x[2] += 0.35;
+					positions.push_back(x);
+					if (min_x > x[0])
+					{
+						min_x = x[0];
+					}
+					if (max_x < x[0])
+					{
+						max_x = x[0];
+					}
+				}
+			}
+		}
+	}
+	std::random_shuffle(positions.begin(), positions.end());
+
+	NeighborhoodSearch nsearch(radius, true);
+	nsearch.add_point_set(positions.front().data(), positions.size(), true, true);
+	nsearch.find_neighbors();
+
+	std::cout << "#Points                                = " << positions.size() << std::endl;
+	std::cout << "Search radius                          = " << radius << std::endl;
+	std::cout << "Min x                                  = " << min_x << std::endl;
+	std::cout << "Max x                                  = " << max_x << std::endl;
+	std::cout << "Average number of neighbors            = " << compute_average_number_of_neighbors(nsearch) << std::endl;
+	std::cout << "Average index distance prior to z-sort = " << compute_average_distance(nsearch) << std::endl;
+
+	nsearch.z_sort();
+	for (auto const& d : nsearch.point_sets())
+	{
+		d.sort_field(positions.data());
+	}
+	nsearch.find_neighbors();
+	//compare_with_bruteforce_search(nsearch);
+
+	std::cout << "Average index distance after z-sort    = " << compute_average_distance(nsearch) << std::endl;
+
+	std::cout << "Moving points:" << std::endl;
+	for (int i = 0; i < N_enright_steps; ++i)
+	{
+		std::cout << "Enright step " << i << ". ";
+		advect();
+		auto t0 = std::chrono::high_resolution_clock::now();
+		nsearch.find_neighbors();
+		std::cout << "Neighborhood search took " << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - t0).count() << "ms" << std::endl;
+		//compare_with_bruteforce_search(nsearch);
+	}
+
+	return 0;
+}
diff --git a/extern/libmorton/LICENSE b/extern/libmorton/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..33a07df35e1accc99dcf1b5a847faf374e4f2862
--- /dev/null
+++ b/extern/libmorton/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2016 Jeroen Baert
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/extern/libmorton/README.md b/extern/libmorton/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..53160f8455560f3583706c6aa4e4423ca712e819
--- /dev/null
+++ b/extern/libmorton/README.md
@@ -0,0 +1,32 @@
+# Libmorton
+
+ * Libmorton is a **C++ header-only library** with methods to efficiently encode/decode between 64, 32 and 16-bit morton codes and coordinates, in 2D and 3D. *Morton order* is also known as *Z-order* or *[the Z-order curve](https://en.wikipedia.org/wiki/Z-order_curve)*.
+ * Libmorton is a **lightweight and portable** library - in its most basic form it only depends on standard C++ headers. Architecture-specific optimizations are implemented incrementally.
+ * This library is under active development. SHIT MIGHT BREAK.
+ * More info and some benchmarks in these blogposts:
+   * http://www.forceflow.be/2016/01/18/libmorton-a-library-for-morton-order-encoding-decoding/
+   * http://www.forceflow.be/2013/10/07/morton-encodingdecoding-through-bit-interleaving-implementations/
+
+## Usage
+Just include *libmorton/morton.h*. This will always have functions that point to the most efficient way to encode/decode Morton codes. If you want to test out alternative (and possibly slower) methods, you can find them in *libmorton/morton2D.h* and *libmorton/morton3D.h*.
+
+<pre>
+// ENCODING 2D / 3D morton codes, of length 32 and 64 bits
+inline uint_fast32_t morton2D_32_encode(const uint_fast16_t x, const uint_fast16_t y);
+inline uint_fast64_t morton2D_64_encode(const uint_fast32_t x, const uint_fast32_t y);
+inline uint_fast32_t morton3D_32_encode(const uint_fast16_t x, const uint_fast16_t y, const uint_fast16_t z);
+inline uint_fast64_t morton3D_64_encode(const uint_fast32_t x, const uint_fast32_t y, const uint_fast32_t z);
+// DECODING 2D / 3D morton codes, of length 32 and 64 bits
+inline void morton2D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y);
+inline void morton2D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y);
+inline void morton3D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y, uint_fast16_t& z);
+inline void morton3D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y, uint_fast32_t& z);
+</pre>
+
+## Testing
+The *test* folder contains tools I use to test correctness and performance of the libmorton implementation. This section is under heavy re-writing, but might contain some useful code for advanced usage.
+
+## TODO
+ * Write better test suites
+ * Implement BMI2 instruction set detection and optimization for newer Intel CPU's
+ * More testing
diff --git a/extern/libmorton/libmorton/include/morton.h b/extern/libmorton/libmorton/include/morton.h
new file mode 100644
index 0000000000000000000000000000000000000000..015a3c7e8c87507d402dca862b65c1988a979eee
--- /dev/null
+++ b/extern/libmorton/libmorton/include/morton.h
@@ -0,0 +1,32 @@
+#pragma once
+
+// This file will always contain pointers to the fastest Morton encoding/decoding implementation
+// IF you just want to use the fastest method to encode/decode morton codes, include this
+
+#include "morton2D.h"
+#include "morton3D.h"
+
+//// ENCODE
+//inline uint_fast32_t morton2D_32_encode(const uint_fast16_t x, const uint_fast16_t y);
+//inline uint_fast64_t morton2D_64_encode(const uint_fast32_t x, const uint_fast32_t y);
+//inline uint_fast32_t morton3D_32_encode(const uint_fast16_t x, const uint_fast16_t y, const uint_fast16_t z);
+//inline uint_fast64_t morton3D_64_encode(const uint_fast32_t x, const uint_fast32_t y, const uint_fast32_t z);
+//
+//// DECODE
+//inline void morton2D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y);
+//inline void morton2D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y);
+//inline void morton3D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y, uint_fast16_t& z);
+//inline void morton3D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y, uint_fast32_t& z);
+
+// Functions under this are stubs which will always point to fastest implementation at the moment
+//-----------------------------------------------------------------------------------------------
+
+#define morton2D_32_encode m2D_e_sLUT<uint_fast32_t, uint_fast16_t>
+#define morton2D_64_encode m2D_e_sLUT<uint_fast64_t, uint_fast32_t>
+#define morton2D_32_decode m2D_d_sLUT<uint_fast32_t, uint_fast16_t>
+#define morton2D_64_decode m2D_d_sLUT<uint_fast64_t, uint_fast32_t>
+
+#define morton3D_32_encode m3D_e_sLUT<uint_fast32_t, uint_fast16_t>
+#define morton3D_64_encode m3D_e_sLUT<uint_fast64_t, uint_fast32_t>
+#define morton3D_32_decode m3D_d_sLUT<uint_fast32_t, uint_fast16_t>
+#define morton3D_64_decode m3D_d_sLUT<uint_fast64_t, uint_fast32_t>
\ No newline at end of file
diff --git a/extern/libmorton/libmorton/include/morton2D.h b/extern/libmorton/libmorton/include/morton2D.h
new file mode 100644
index 0000000000000000000000000000000000000000..cfeab9770d3c3e577b12114b9148c19cca35e885
--- /dev/null
+++ b/extern/libmorton/libmorton/include/morton2D.h
@@ -0,0 +1,256 @@
+#pragma once
+
+// Libmorton - Methods to encode/decode 64-bit morton codes from/to 32-bit (x,y) coordinates
+// Warning: morton.h will always point to the functions that use the fastest available method.
+
+#include <stdint.h>
+#include <math.h>
+#include "morton2D_LUTs.h"
+#include "morton_common.h"
+
+using namespace std;
+
+// Encode methods
+template<typename morton, typename coord> inline morton m2D_e_sLUT(const coord x, const coord y);
+template<typename morton, typename coord> inline morton m2D_e_sLUT_ET(const coord x, const coord y);
+template<typename morton, typename coord> inline morton m2D_e_LUT(const coord x, const coord y);
+template<typename morton, typename coord> inline morton m2D_e_LUT_ET(const coord x, const coord y);
+template<typename morton, typename coord> inline morton m2D_e_magicbits(const coord x, const coord y);
+template<typename morton, typename coord> inline morton m2D_e_for(const coord x, const coord y);
+template<typename morton, typename coord> inline morton m2D_e_for_ET(const coord x, const coord y);
+
+// Decode methods
+template<typename morton, typename coord> inline void m2D_d_sLUT(const morton m, coord& x, coord& y);
+template<typename morton, typename coord> inline void m2D_d_sLUT_ET(const morton m, coord& x, coord& y);
+template<typename morton, typename coord> inline void m2D_d_LUT(const morton m, coord& x, coord& y);
+template<typename morton, typename coord> inline void m2D_d_LUT_ET(const morton m, coord& x, coord& y);
+template<typename morton, typename coord> inline void m2D_d_magicbits(const morton m, coord& x, coord& y);
+template<typename morton, typename coord> inline void m2D_d_for(const morton m, coord& x, coord& y);
+
+// ENCODE 2D Morton code : Pre-shifted LookUpTable (sLUT)
+template<typename morton, typename coord>
+inline morton m2D_e_sLUT(const coord x, const coord y) {
+	morton answer = 0;
+	const static morton EIGHTBITMASK = 0x000000FF;
+	for (unsigned int i = sizeof(coord); i > 0; --i) {
+		unsigned int shift = (i - 1) * 8;
+		answer =
+			answer << 16 | 
+			Morton2D_encode_y_256[(y >> shift) & EIGHTBITMASK] |
+			Morton2D_encode_x_256[(x >> shift) & EIGHTBITMASK];
+	}
+	return answer;
+}
+
+// ENCODE 2D Morton code : LookUpTable (LUT)
+template<typename morton, typename coord>
+inline morton m2D_e_LUT(const coord x, const coord y) {
+	morton answer = 0;
+	const static morton EIGHTBITMASK = 0x000000FF;
+	for (unsigned int i = sizeof(coord); i > 0; --i) {
+		unsigned int shift = (i - 1) * 8; 
+		answer =
+			answer << 16 |
+			(Morton2D_encode_x_256[(y >> shift) & EIGHTBITMASK] << 1) |
+			(Morton2D_encode_x_256[(x >> shift) & EIGHTBITMASK]);
+	}
+	return answer;
+}
+
+// HELPER METHOD for Early Termination LUT Encode
+template<typename morton, typename coord>
+inline morton compute2D_ET_LUT_encode(const coord c, const coord *LUT) {
+	const static morton EIGHTBITMASK = 0x000000FF;
+	unsigned long maxbit = 0;
+	morton answer = 0;
+	if (findFirstSetBit<coord>(c, &maxbit) == 0) { return 0; }
+	unsigned int i = 0;
+	while (maxbit >= i) {
+		answer |= (LUT[(c >> i) & EIGHTBITMASK]) << i * 2;
+		i += 8;
+	}
+	return answer;
+}
+
+// ENCODE 2D Morton code : Pre-shifted LUT (Early termination version)
+// This version tries to terminate early when there are no more bits to process
+// Figuring this out is probably too costly in most cases.
+template<typename morton, typename coord>
+inline morton m2D_e_sLUT_ET(const coord x, const coord y) {
+	morton answer_x = compute2D_ET_LUT_encode<morton, coord>(x, Morton2D_encode_x_256);
+	morton answer_y = compute2D_ET_LUT_encode<morton, coord>(y, Morton2D_encode_y_256);
+	return answer_y | answer_x;
+}
+
+// ENCODE 2D Morton code : LUT (Early termination version)
+template<typename morton, typename coord>
+inline morton m2D_e_LUT_ET(const coord x, const coord y) {
+	morton answer_x = compute2D_ET_LUT_encode<morton, coord>(x, Morton2D_encode_x_256);
+	morton answer_y = compute2D_ET_LUT_encode<morton, coord>(y, Morton2D_encode_x_256);
+	return (answer_y << 1) | answer_x;
+}
+
+// HELPER METHOD for Magic bits encoding - split by 2
+template<typename morton, typename coord>
+inline morton morton2D_SplitBy2Bits(const coord a) {
+	const morton* masks = (sizeof(morton) <= 4) ? reinterpret_cast<const morton*>(magicbit2D_masks32) : reinterpret_cast<const morton*>(magicbit2D_masks64);
+	morton x = a;
+	if (sizeof(morton) > 4) { x = (x | x << 32) & masks[0]; }
+	x = (x | x << 16) & masks[1];
+	x = (x | x << 8) & masks[2];
+	x = (x | x << 4) & masks[3];
+	x = (x | x << 2) & masks[4];
+	x = (x | x << 1) & masks[5];
+	return x;
+}
+
+// ENCODE 2D Morton code : Magic bits
+template<typename morton, typename coord>
+inline morton m2D_e_magicbits(const coord x, const coord y) {
+	return morton2D_SplitBy2Bits<morton, coord>(x) | (morton2D_SplitBy2Bits<morton, coord>(y) << 1);
+}
+
+// ENCODE 2D Morton code : For Loop
+template<typename morton, typename coord>
+inline morton m2D_e_for(const coord x, const coord y){
+	morton answer = 0;
+	unsigned int checkbits = floor(sizeof(morton) * 4.0f);
+	for (unsigned int i = 0; i <= checkbits; ++i) {
+		morton mshifted = static_cast<morton>(0x1) << i; // Here we need to cast 0x1 to 64bits, otherwise there is a bug when morton code is larger than 32 bits
+		unsigned int shift = 2 * i;
+		answer |=
+			  ((x & mshifted) << shift)
+			| ((y & mshifted) << (shift + 1));
+	}
+	return answer;
+}
+
+// ENCODE 2D Morton code : For Loop (Early termination version)
+template<typename morton, typename coord>
+inline morton m2D_e_for_ET(const coord x, const coord y) {
+	morton answer = 0;
+	unsigned long x_max = 0, y_max = 0;
+	unsigned int checkbits = sizeof(morton) * 4;
+	findFirstSetBit<morton>(x, &x_max);
+	findFirstSetBit<morton>(y, &y_max);
+	checkbits = min(static_cast<unsigned long>(checkbits), max(x_max, y_max) + 1ul);
+	for (unsigned int i = 0; i <= checkbits; ++i) {
+		morton m_shifted = static_cast<morton>(0x1) << i; // Here we need to cast 0x1 to 64bits, otherwise there is a bug when morton code is larger than 32 bits
+		unsigned int shift = 2 * i;
+		answer |= ((x & m_shifted) << shift)
+			| ((y & m_shifted) << (shift + 1));
+	}
+	return answer;
+}
+
+// HELPER METHODE for LUT decoding
+template<typename morton, typename coord>
+inline coord morton2D_DecodeCoord_LUT256(const morton m, const uint_fast8_t *LUT, const unsigned int startshift) {
+	morton a = 0;
+	morton EIGHTBITMASK = 0x000000ff;
+	unsigned int loops = sizeof(morton);
+	for (unsigned int i = 0; i < loops; ++i) {
+		a |= (LUT[(m >> ((i * 8) + startshift)) & EIGHTBITMASK] << (4 * i));
+	}
+	return static_cast<coord>(a);
+}
+
+// DECODE 2D Morton code : Shifted LUT
+template<typename morton, typename coord>
+inline void m2D_d_sLUT(const morton m, coord& x, coord& y) {
+	x = morton2D_DecodeCoord_LUT256<morton, coord>(m, Morton2D_decode_x_256, 0);
+	y = morton2D_DecodeCoord_LUT256<morton, coord>(m, Morton2D_decode_y_256, 0);
+}
+
+// DECODE 2D 64-bit morton code : LUT
+template<typename morton, typename coord>
+inline void m2D_d_LUT(const morton m, coord& x, coord& y) {
+	x = morton2D_DecodeCoord_LUT256<morton, coord>(m, Morton2D_decode_x_256, 0);
+	y = morton2D_DecodeCoord_LUT256<morton, coord>(m, Morton2D_decode_x_256, 1);
+}
+
+// DECODE 2D Morton code : Shifted LUT (early termination)
+template<typename morton, typename coord>
+inline void m2D_d_sLUT_ET(const morton m, coord& x, coord& y) {
+	x = 0; y = 0;
+	morton EIGHTBITMASK = 0x000000ff;
+	unsigned long firstbit_location = 0;
+	if (!findFirstSetBit<morton>(m, &firstbit_location)) { return; }
+	unsigned int i = 0;
+	unsigned int shiftback = 0;
+	while (firstbit_location >= i) {
+		morton m_shifted = (m >> i) & EIGHTBITMASK;
+		x |= Morton2D_decode_x_256[m_shifted] << shiftback;
+		y |= Morton2D_decode_y_256[m_shifted] << shiftback;
+		shiftback += 4;
+		i += 8;
+	}
+}
+
+// DECODE 2D Morton code : LUT (early termination)
+template<typename morton, typename coord>
+inline void m2D_d_LUT_ET(const morton m, coord& x, coord& y) {
+	x = 0; y = 0;
+	morton EIGHTBITMASK = 0x000000ff;
+	unsigned long firstbit_location = 0;
+	if (!findFirstSetBit<morton>(m, &firstbit_location)) { return; }
+	unsigned int i = 0;
+	unsigned int shiftback = 0;
+	while (firstbit_location >= i) {
+		morton m_shifted = (m >> i) & EIGHTBITMASK;
+		x |= Morton2D_decode_x_256[(m >> i) & EIGHTBITMASK] << shiftback;
+		y |= Morton2D_decode_x_256[(m >> (i+1)) & EIGHTBITMASK] << shiftback;
+		shiftback += 4;
+		i += 8;
+	}
+}
+
+// HELPER method for Magicbits decoding
+template<typename morton, typename coord>
+static inline coord morton2D_GetSecondBits(const morton m) {
+	morton* masks = (sizeof(morton) <= 4) ? reinterpret_cast<morton*>(magicbit2D_masks32) : reinterpret_cast<morton*>(magicbit2D_masks64);
+	morton x = m & masks[4];
+	x = (x ^ (x >> 2)) & masks[3];
+	x = (x ^ (x >> 4)) & masks[2];
+	x = (x ^ (x >> 8)) & masks[1];
+	if (sizeof(morton) > 4) x = (x ^ (x >> 16)) & masks[0];
+	return static_cast<coord>(x);
+}
+
+// DECODE 2D Morton code : Magic bits
+// This method splits the morton codes bits by using certain patterns (magic bits)
+template<typename morton, typename coord>
+inline void m2D_d_magicbits(const morton m, coord& x, coord& y) {
+	x = morton2D_GetSecondBits<morton, coord>(m);
+	y = morton2D_GetSecondBits<morton, coord>(m >> 1);
+}
+
+
+// DECODE 2D morton code : For loop
+template<typename morton, typename coord>
+inline void m2D_d_for(const morton m, coord& x, coord& y) {
+	x = 0; y = 0;
+	unsigned int checkbits = sizeof(morton) * 4;
+	for (unsigned int i = 0; i <= checkbits; ++i) {
+		morton selector = 1;
+		unsigned int shift_selector = 2 * i;
+		x |= (m & (selector << shift_selector)) >> i;
+		y |= (m & (selector << (shift_selector + 1))) >> (i + 1);
+	}
+}
+
+// DECODE 3D Morton code : For loop (Early termination version)
+template<typename morton, typename coord>
+inline void m2D_d_for_ET(const morton m, coord& x, coord& y) {
+	x = 0; y = 0;
+	float defaultbits = sizeof(morton) * 4;
+	unsigned long firstbit_location = 0;
+	if (!findFirstSetBit<morton>(m, &firstbit_location)) return;
+	unsigned int checkbits = static_cast<unsigned int>(min(defaultbits, firstbit_location / 2.0f));
+	for (unsigned int i = 0; i <= checkbits; ++i) {
+		morton selector = 1;
+		unsigned int shift_selector = 2 * i;
+		x |= (m & (selector << shift_selector)) >> i;
+		y |= (m & (selector << (shift_selector + 1))) >> (i + 1);
+	}
+}
diff --git a/extern/libmorton/libmorton/include/morton2D_LUTs.h b/extern/libmorton/libmorton/include/morton2D_LUTs.h
new file mode 100644
index 0000000000000000000000000000000000000000..a9217390c2b2037ddf50d989c94a35fb827d5e6f
--- /dev/null
+++ b/extern/libmorton/libmorton/include/morton2D_LUTs.h
@@ -0,0 +1,118 @@
+#pragma once
+
+#include <stdint.h>
+
+// Magicbits masks
+static uint_fast32_t magicbit2D_masks32[6] = { 0xFFFFFFFF, 0x0000FFFF, 0x00FF00FF, 0x0F0F0F0F, 0x33333333, 0x55555555 };
+static uint_fast64_t magicbit2D_masks64[6] = { 0x00000000FFFFFFFF, 0x0000FFFF0000FFFF, 0x00FF00FF00FF00FF,
+0x0F0F0F0F0F0F0F0F, 0x3333333333333333, 0x5555555555555555 };
+
+static const uint_fast16_t Morton2D_encode_x_256[256] =
+{
+	0, 1, 4, 5, 16, 17, 20, 21,
+	64, 65, 68, 69, 80, 81, 84, 85,
+	256, 257, 260, 261, 272, 273, 276, 277,
+	320, 321, 324, 325, 336, 337, 340, 341,
+	1024, 1025, 1028, 1029, 1040, 1041, 1044, 1045,
+	1088, 1089, 1092, 1093, 1104, 1105, 1108, 1109,
+	1280, 1281, 1284, 1285, 1296, 1297, 1300, 1301,
+	1344, 1345, 1348, 1349, 1360, 1361, 1364, 1365,
+	4096, 4097, 4100, 4101, 4112, 4113, 4116, 4117,
+	4160, 4161, 4164, 4165, 4176, 4177, 4180, 4181,
+	4352, 4353, 4356, 4357, 4368, 4369, 4372, 4373,
+	4416, 4417, 4420, 4421, 4432, 4433, 4436, 4437,
+	5120, 5121, 5124, 5125, 5136, 5137, 5140, 5141,
+	5184, 5185, 5188, 5189, 5200, 5201, 5204, 5205,
+	5376, 5377, 5380, 5381, 5392, 5393, 5396, 5397,
+	5440, 5441, 5444, 5445, 5456, 5457, 5460, 5461,
+	16384, 16385, 16388, 16389, 16400, 16401, 16404, 16405,
+	16448, 16449, 16452, 16453, 16464, 16465, 16468, 16469,
+	16640, 16641, 16644, 16645, 16656, 16657, 16660, 16661,
+	16704, 16705, 16708, 16709, 16720, 16721, 16724, 16725,
+	17408, 17409, 17412, 17413, 17424, 17425, 17428, 17429,
+	17472, 17473, 17476, 17477, 17488, 17489, 17492, 17493,
+	17664, 17665, 17668, 17669, 17680, 17681, 17684, 17685,
+	17728, 17729, 17732, 17733, 17744, 17745, 17748, 17749,
+	20480, 20481, 20484, 20485, 20496, 20497, 20500, 20501,
+	20544, 20545, 20548, 20549, 20560, 20561, 20564, 20565,
+	20736, 20737, 20740, 20741, 20752, 20753, 20756, 20757,
+	20800, 20801, 20804, 20805, 20816, 20817, 20820, 20821,
+	21504, 21505, 21508, 21509, 21520, 21521, 21524, 21525,
+	21568, 21569, 21572, 21573, 21584, 21585, 21588, 21589,
+	21760, 21761, 21764, 21765, 21776, 21777, 21780, 21781,
+	21824, 21825, 21828, 21829, 21840, 21841, 21844, 21845
+};
+
+static const uint_fast16_t Morton2D_encode_y_256[256] =
+{
+	0, 2, 8, 10, 32, 34, 40, 42,
+	128, 130, 136, 138, 160, 162, 168, 170,
+	512, 514, 520, 522, 544, 546, 552, 554,
+	640, 642, 648, 650, 672, 674, 680, 682,
+	2048, 2050, 2056, 2058, 2080, 2082, 2088, 2090,
+	2176, 2178, 2184, 2186, 2208, 2210, 2216, 2218,
+	2560, 2562, 2568, 2570, 2592, 2594, 2600, 2602,
+	2688, 2690, 2696, 2698, 2720, 2722, 2728, 2730,
+	8192, 8194, 8200, 8202, 8224, 8226, 8232, 8234,
+	8320, 8322, 8328, 8330, 8352, 8354, 8360, 8362,
+	8704, 8706, 8712, 8714, 8736, 8738, 8744, 8746,
+	8832, 8834, 8840, 8842, 8864, 8866, 8872, 8874,
+	10240, 10242, 10248, 10250, 10272, 10274, 10280, 10282,
+	10368, 10370, 10376, 10378, 10400, 10402, 10408, 10410,
+	10752, 10754, 10760, 10762, 10784, 10786, 10792, 10794,
+	10880, 10882, 10888, 10890, 10912, 10914, 10920, 10922,
+	32768, 32770, 32776, 32778, 32800, 32802, 32808, 32810,
+	32896, 32898, 32904, 32906, 32928, 32930, 32936, 32938,
+	33280, 33282, 33288, 33290, 33312, 33314, 33320, 33322,
+	33408, 33410, 33416, 33418, 33440, 33442, 33448, 33450,
+	34816, 34818, 34824, 34826, 34848, 34850, 34856, 34858,
+	34944, 34946, 34952, 34954, 34976, 34978, 34984, 34986,
+	35328, 35330, 35336, 35338, 35360, 35362, 35368, 35370,
+	35456, 35458, 35464, 35466, 35488, 35490, 35496, 35498,
+	40960, 40962, 40968, 40970, 40992, 40994, 41000, 41002,
+	41088, 41090, 41096, 41098, 41120, 41122, 41128, 41130,
+	41472, 41474, 41480, 41482, 41504, 41506, 41512, 41514,
+	41600, 41602, 41608, 41610, 41632, 41634, 41640, 41642,
+	43008, 43010, 43016, 43018, 43040, 43042, 43048, 43050,
+	43136, 43138, 43144, 43146, 43168, 43170, 43176, 43178,
+	43520, 43522, 43528, 43530, 43552, 43554, 43560, 43562,
+	43648, 43650, 43656, 43658, 43680, 43682, 43688, 43690
+};
+
+static const uint_fast8_t Morton2D_decode_x_256[256]{
+	0,1,0,1,2,3,2,3,0,1,0,1,2,3,2,3,
+	4,5,4,5,6,7,6,7,4,5,4,5,6,7,6,7,
+	0,1,0,1,2,3,2,3,0,1,0,1,2,3,2,3,
+	4,5,4,5,6,7,6,7,4,5,4,5,6,7,6,7,
+	8,9,8,9,10,11,10,11,8,9,8,9,10,11,10,11,
+	12,13,12,13,14,15,14,15,12,13,12,13,14,15,14,15,
+	8,9,8,9,10,11,10,11,8,9,8,9,10,11,10,11,
+	12,13,12,13,14,15,14,15,12,13,12,13,14,15,14,15,
+	0,1,0,1,2,3,2,3,0,1,0,1,2,3,2,3,
+	4,5,4,5,6,7,6,7,4,5,4,5,6,7,6,7,
+	0,1,0,1,2,3,2,3,0,1,0,1,2,3,2,3,
+	4,5,4,5,6,7,6,7,4,5,4,5,6,7,6,7,
+	8,9,8,9,10,11,10,11,8,9,8,9,10,11,10,11,
+	12,13,12,13,14,15,14,15,12,13,12,13,14,15,14,15,
+	8,9,8,9,10,11,10,11,8,9,8,9,10,11,10,11,
+	12,13,12,13,14,15,14,15,12,13,12,13,14,15,14,15
+};
+
+static const uint_fast8_t Morton2D_decode_y_256[256]{
+	0,0,1,1,0,0,1,1,2,2,3,3,2,2,3,3,
+	0,0,1,1,0,0,1,1,2,2,3,3,2,2,3,3,
+	4,4,5,5,4,4,5,5,6,6,7,7,6,6,7,7,
+	4,4,5,5,4,4,5,5,6,6,7,7,6,6,7,7,
+	0,0,1,1,0,0,1,1,2,2,3,3,2,2,3,3,
+	0,0,1,1,0,0,1,1,2,2,3,3,2,2,3,3,
+	4,4,5,5,4,4,5,5,6,6,7,7,6,6,7,7,
+	4,4,5,5,4,4,5,5,6,6,7,7,6,6,7,7,
+	8,8,9,9,8,8,9,9,10,10,11,11,10,10,11,11,
+	8,8,9,9,8,8,9,9,10,10,11,11,10,10,11,11,
+	12,12,13,13,12,12,13,13,14,14,15,15,14,14,15,15,
+	12,12,13,13,12,12,13,13,14,14,15,15,14,14,15,15,
+	8,8,9,9,8,8,9,9,10,10,11,11,10,10,11,11,
+	8,8,9,9,8,8,9,9,10,10,11,11,10,10,11,11,
+	12,12,13,13,12,12,13,13,14,14,15,15,14,14,15,15,
+	12,12,13,13,12,12,13,13,14,14,15,15,14,14,15,15
+};
diff --git a/extern/libmorton/libmorton/include/morton3D.h b/extern/libmorton/libmorton/include/morton3D.h
new file mode 100644
index 0000000000000000000000000000000000000000..3ca80a2751667432bfaf3b3d37adb188e22819c5
--- /dev/null
+++ b/extern/libmorton/libmorton/include/morton3D.h
@@ -0,0 +1,277 @@
+#pragma once
+
+// Libmorton - Methods to encode/decode 64-bit morton codes from/to 32-bit (x,y,z) coordinates
+// Warning: morton.h will always point to the functions that use the fastest available method.
+
+#include <stdint.h>
+#include <math.h>
+#include "morton3D_LUTs.h"
+#include "morton_common.h"
+
+using namespace std;
+
+// AVAILABLE METHODS FOR ENCODING
+template<typename morton, typename coord> inline morton m3D_e_sLUT(const coord x, const coord y, const coord z);
+template<typename morton, typename coord> inline morton m3D_e_sLUT_ET(const coord x, const coord y, const coord z);
+template<typename morton, typename coord> inline morton m3D_e_LUT(const coord x, const coord y, const coord z);
+template<typename morton, typename coord> inline morton m3D_e_LUT_ET(const coord x, const coord y, const coord z);
+template<typename morton, typename coord> inline morton m3D_e_magicbits(const coord x, const coord y, const coord z);
+template<typename morton, typename coord> inline morton m3D_e_for(const coord x, const coord y, const coord z);
+template<typename morton, typename coord> inline morton m3D_e_for_ET(const coord x, const coord y, const coord z);
+
+// AVAILABLE METHODS FOR DECODING
+template<typename morton, typename coord> inline void m3D_d_sLUT(const morton m, coord& x, coord& y, coord& z);
+template<typename morton, typename coord> inline void m3D_d_sLUT_ET(const morton m, coord& x, coord& y, coord& z);
+template<typename morton, typename coord> inline void m3D_d_LUT(const morton m, coord& x, coord& y, coord& z);
+template<typename morton, typename coord> inline void m3D_d_LUT_ET(const morton m, coord& x, coord& y, coord& z);
+template<typename morton, typename coord> inline void m3D_d_magicbits(const morton m, coord& x, coord& y, coord& z);
+template<typename morton, typename coord> inline void m3D_d_for(const morton m, coord& x, coord& y, coord& z);
+template<typename morton, typename coord> inline void m3D_d_for_ET(const morton m, coord& x, coord& y, coord& z);
+
+// ENCODE 3D Morton code : Pre-Shifted LookUpTable (sLUT)
+template<typename morton, typename coord>
+inline morton m3D_e_sLUT(const coord x, const coord y, const coord z) {
+	morton answer = 0;
+	const static morton EIGHTBITMASK = 0x000000FF;
+	for (unsigned int i = sizeof(coord); i > 0; --i) {
+		unsigned int shift = (i - 1) * 8;
+		answer =
+			answer << 24 |
+			(Morton3D_encode_z_256[(z >> shift) & EIGHTBITMASK] |
+				Morton3D_encode_y_256[(y >> shift) & EIGHTBITMASK] |
+				Morton3D_encode_x_256[(x >> shift) & EIGHTBITMASK]);
+	}
+	return answer;
+}
+
+// ENCODE 3D Morton code : LookUpTable (LUT)
+template<typename morton, typename coord>
+inline morton m3D_e_LUT(const coord x, const coord y, const coord z) {
+	morton answer = 0;
+	const static morton EIGHTBITMASK = 0x000000FF;
+	for (unsigned int i = sizeof(coord); i > 0; --i) {
+		unsigned int shift = (i - 1) * 8;
+		answer =
+			answer << 24 |
+			(Morton3D_encode_x_256[(z >> shift) & EIGHTBITMASK] << 2) |
+			(Morton3D_encode_x_256[(y >> shift) & EIGHTBITMASK] << 1) |
+			Morton3D_encode_x_256[(x >> shift) & EIGHTBITMASK];
+	}
+	return answer;
+}
+
+// HELPER METHOD for ET LUT encode
+template<typename morton, typename coord>
+inline morton compute3D_ET_LUT_encode(const coord c, const coord *LUT) {
+	const static morton EIGHTBITMASK = 0x000000FF;
+	unsigned long maxbit = 0;
+	morton answer = 0;
+	if (findFirstSetBit<coord>(c, &maxbit) == 0) { return 0; }
+	for (int i = ceil((maxbit + 1) / 8.0f) ; i >= 0; --i){
+		unsigned int shift = i* 8;
+		answer = answer << 24 | (LUT[(c >> shift) & EIGHTBITMASK]);
+	}
+	return answer;
+}
+
+// ENCODE 3D Morton code : Pre-shifted LookUpTable (LUT) (Early Termination version)
+// This version tries to terminate early when there are no more bits to process
+// Figuring this out is probably too costly in most cases.
+template<typename morton, typename coord>
+inline morton m3D_e_sLUT_ET(const coord x, const coord y, const coord z) {
+	morton answer_x = compute3D_ET_LUT_encode<morton, coord>(x, Morton3D_encode_x_256);
+	morton answer_y = compute3D_ET_LUT_encode<morton, coord>(y, Morton3D_encode_y_256);
+	morton answer_z = compute3D_ET_LUT_encode<morton, coord>(z, Morton3D_encode_z_256);
+	return answer_z | answer_y | answer_x;
+}
+
+// ENCODE 3D Morton code : LookUpTable (LUT) (Early termination version)
+// This version tries to terminate early when there are no more bits to process
+// Figuring this out is probably too costly in most cases.
+template<typename morton, typename coord>
+inline morton m3D_e_LUT_ET(const coord x, const coord y, const coord z) {
+	morton answer_x = compute3D_ET_LUT_encode<morton, coord>(x, Morton3D_encode_x_256);
+	morton answer_y = compute3D_ET_LUT_encode<morton, coord>(y, Morton3D_encode_x_256);
+	morton answer_z = compute3D_ET_LUT_encode<morton, coord>(z, Morton3D_encode_x_256);
+	return (answer_z << 2) | (answer_y << 1) | answer_x;
+}
+
+// HELPER METHOD: Magic bits encoding (helper method)
+template<typename morton, typename coord>
+static inline morton morton3D_SplitBy3bits(const coord a) {
+	const morton* masks = (sizeof(morton) <= 4) ? reinterpret_cast<const morton*>(magicbit3D_masks32) : reinterpret_cast<const morton*>(magicbit3D_masks64);
+	morton x = a;
+	x = x & masks[0];
+	x = (x | x << 16) & masks[1];
+	x = (x | x << 8)  & masks[2];
+	x = (x | x << 4)  & masks[3];
+	x = (x | x << 2)  & masks[4];
+	return x;
+}
+
+// ENCODE 3D Morton code : Magic bits method
+// This method uses certain bit patterns (magic bits) to split bits in the coordinates
+template<typename morton, typename coord>
+inline morton m3D_e_magicbits(const coord x, const coord y, const coord z){
+	return morton3D_SplitBy3bits<morton, coord>(x) | (morton3D_SplitBy3bits<morton, coord>(y) << 1) | (morton3D_SplitBy3bits<morton, coord>(z) << 2);
+}
+
+// ENCODE 3D Morton code : For loop
+// This is the most naive way of encoding coordinates into a morton code
+template<typename morton, typename coord>
+inline morton m3D_e_for(const coord x, const coord y, const coord z){
+	morton answer = 0;
+	unsigned int checkbits = static_cast<unsigned int>(floor((sizeof(morton) * 8.0f / 3.0f)));
+	for (unsigned int i = 0; i < checkbits; ++i) {
+		morton mshifted= static_cast<morton>(1) << i; // Here we need to cast 0x1 to 64bits, otherwise there is a bug when morton code is larger than 32 bits
+		unsigned int shift = 2 * i; // because you have to shift back i and forth 3*i
+		answer |= ((x & mshifted) << shift)
+				| ((y & mshifted) << (shift + 1))
+				| ((z & mshifted) << (shift + 2));
+	}
+	return answer;
+}
+
+// ENCODE 3D Morton code : For loop (Early termination version)
+// In case of the for loop, figuring out when to stop early has huge benefits.
+template<typename morton, typename coord>
+inline morton m3D_e_for_ET(const coord x, const coord y, const coord z) {
+	morton answer = 0;
+	unsigned long x_max = 0, y_max = 0, z_max = 0;
+	unsigned int checkbits = static_cast<unsigned int>(floor((sizeof(morton) * 8.0f / 3.0f)));
+	findFirstSetBit<morton>(x, &x_max);
+	findFirstSetBit<morton>(y, &y_max);
+	findFirstSetBit<morton>(z, &z_max);
+	checkbits = min((unsigned long)checkbits, max(z_max, max(x_max, y_max)) + (unsigned long) 1);
+	for (unsigned int i = 0; i < checkbits; ++i) {
+		morton m_shifted = static_cast<morton>(1) << i; // Here we need to cast 0x1 to 64bits, otherwise there is a bug when morton code is larger than 32 bits
+		unsigned int shift = 2 * i;
+		answer |= ((x & m_shifted) << shift)
+			| ((y & m_shifted) << (shift + 1))
+			| ((z & m_shifted) << (shift + 2));
+	}
+	return answer;
+}
+
+
+// HELPER METHOD for LUT decoding
+// todo: wouldn't this be better with 8-bit aligned decode LUT?
+template<typename morton, typename coord>
+inline coord morton3D_DecodeCoord_LUT256(const morton m, const uint_fast8_t *LUT, const unsigned int startshift) {
+	morton a = 0;
+	morton NINEBITMASK = 0x000001ff;
+	unsigned int loops = (sizeof(morton) <= 4) ? 4 : 7; // ceil for 32bit, floor for 64bit
+	for (unsigned int i = 0; i < loops; ++i){
+		a |= (LUT[(m >> ((i * 9) + startshift)) & NINEBITMASK] << (3 * i));
+	}
+	return static_cast<coord>(a);
+}
+
+// DECODE 3D Morton code : Shifted LUT
+template<typename morton, typename coord>
+inline void m3D_d_sLUT(const morton m, coord& x, coord& y, coord& z) {
+	x = morton3D_DecodeCoord_LUT256<morton, coord>(m, Morton3D_decode_x_512, 0);
+	y = morton3D_DecodeCoord_LUT256<morton, coord>(m, Morton3D_decode_y_512, 0);
+	z = morton3D_DecodeCoord_LUT256<morton, coord>(m, Morton3D_decode_z_512, 0);
+}
+
+// DECODE 3D Morton code : LUT
+template<typename morton, typename coord>
+inline void m3D_d_LUT(const morton m, coord& x, coord& y, coord& z) {
+	x = morton3D_DecodeCoord_LUT256<morton, coord>(m, Morton3D_decode_x_512, 0);
+	y = morton3D_DecodeCoord_LUT256<morton, coord>(m, Morton3D_decode_x_512, 1);
+	z = morton3D_DecodeCoord_LUT256<morton, coord>(m, Morton3D_decode_x_512, 2);
+}
+
+// DECODE 3D Morton code : Shifted LUT (Early termination version)
+template<typename morton, typename coord>
+inline void m3D_d_sLUT_ET(const morton m, coord& x, coord& y, coord& z){
+	x = 0; y = 0; z = 0;
+	morton NINEBITMASK = 0x000001ff;
+	unsigned long firstbit_location = 0;
+	if (!findFirstSetBit<morton>(m, &firstbit_location)) { return; }
+	unsigned int i = 0;
+	unsigned int shiftback = 0;
+	while (firstbit_location >= i) {
+		morton m_shifted = (m >> i) & NINEBITMASK;
+		x |= Morton3D_decode_x_512[m_shifted] << shiftback;
+		y |= Morton3D_decode_y_512[m_shifted] << shiftback;
+		z |= Morton3D_decode_z_512[m_shifted] << shiftback;
+		shiftback += 3;
+		i += 9;
+	}
+	return;
+}
+
+// DECODE 3D Morton code : LUT (Early termination version)
+template<typename morton, typename coord>
+inline void m3D_d_LUT_ET(const morton m, coord& x, coord& y, coord& z){
+	x = 0; y = 0; z = 0;
+	morton NINEBITMASK = 0x000001ff;
+	unsigned long firstbit_location = 0;
+	if (!findFirstSetBit<uint_fast64_t>(m, &firstbit_location)) { return; }
+	unsigned int i = 0;
+	unsigned int shiftback = 0;
+	while (i <= firstbit_location) {
+		x = x | Morton3D_decode_x_512[(m >> i) & NINEBITMASK] << shiftback;
+		y = y | Morton3D_decode_x_512[(m >> (i+1)) & NINEBITMASK] << shiftback;
+		z = z | Morton3D_decode_x_512[(m >> (i+2)) & NINEBITMASK] << shiftback;
+		i += 9;
+		shiftback += 3;
+	}
+	return;
+}
+
+// HELPER METHOD for Magic bits decoding
+template<typename morton, typename coord>
+static inline coord morton3D_GetThirdBits(const morton m) {
+	morton* masks = (sizeof(morton) <= 4) ? reinterpret_cast<morton*>(magicbit3D_masks32) : reinterpret_cast<morton*>(magicbit3D_masks64);
+	morton x = m & masks[4];
+	x = (x ^ (x >> 2)) & masks[3];
+	x = (x ^ (x >> 4)) & masks[2];
+	x = (x ^ (x >> 8)) & masks[1];
+	x = (x ^ (x >> 16)) & masks[0];
+	return static_cast<coord>(x);
+}
+
+// DECODE 3D Morton code : Magic bits
+// This method splits the morton codes bits by using certain patterns (magic bits)
+template<typename morton, typename coord>
+inline void m3D_d_magicbits(const morton m, coord& x, coord& y, coord& z){
+	x = morton3D_GetThirdBits<morton, coord>(m);
+	y = morton3D_GetThirdBits<morton, coord>(m >> 1);
+	z = morton3D_GetThirdBits<morton, coord>(m >> 2);
+}
+
+// DECODE 3D Morton code : For loop
+template<typename morton, typename coord>
+inline void m3D_d_for(const morton m, coord& x, coord& y, coord& z){
+	x = 0; y = 0; z = 0;
+	unsigned int checkbits = static_cast<unsigned int>(floor((sizeof(morton) * 8.0f / 3.0f)));
+	for (unsigned int i = 0; i <= checkbits; ++i) {
+		morton selector = 1;
+		unsigned int shift_selector = 3 * i;
+		unsigned int shiftback = 2 * i;
+		x |= (m & (selector << shift_selector)) >> (shiftback);
+		y |= (m & (selector << (shift_selector + 1))) >> (shiftback + 1);
+		z |= (m & (selector << (shift_selector + 2))) >> (shiftback + 2);
+	}
+}
+
+// DECODE 3D Morton code : For loop (Early termination version)
+template<typename morton, typename coord>
+inline void m3D_d_for_ET(const morton m, coord& x, coord& y, coord& z) {
+	x = 0; y = 0; z = 0;
+	float defaultbits = floor((sizeof(morton) * 8.0f / 3.0f));
+	unsigned long firstbit_location = 0;
+	if(!findFirstSetBit<morton>(m, &firstbit_location)) return;
+	unsigned int checkbits = static_cast<unsigned int>(min(defaultbits, firstbit_location / 3.0f));
+	for (unsigned int i = 0; i <= checkbits; ++i) {
+		morton selector = 1;
+		unsigned int shift_selector = 3 * i;
+		unsigned int shiftback = 2 * i;
+		x |= (m & (selector << shift_selector)) >> (shiftback);
+		y |= (m & (selector << (shift_selector + 1))) >> (shiftback + 1);
+		z |= (m & (selector << (shift_selector + 2))) >> (shiftback + 2);
+	}
+}
diff --git a/extern/libmorton/libmorton/include/morton3D_LUTs.h b/extern/libmorton/libmorton/include/morton3D_LUTs.h
new file mode 100644
index 0000000000000000000000000000000000000000..2e0299b4efe1feca36ff67d2a0f00486af478707
--- /dev/null
+++ b/extern/libmorton/libmorton/include/morton3D_LUTs.h
@@ -0,0 +1,222 @@
+#pragma once
+
+#include <stdint.h>
+
+// Magicbits masks
+static uint_fast32_t magicbit3D_masks32[5] = { 0x000003ff, 0x30000ff, 0x0300f00f, 0x30c30c3, 0x9249249 };
+static uint_fast64_t magicbit3D_masks64[5] = { 0x1f000000001ffff, 0x1f0000ff0000ff, 0x100f00f00f00f00f, 0x10c30c30c30c30c3, 0x1249249249249249 };
+
+// Version with lookup table
+static const uint_fast32_t Morton3D_encode_x_256[256] =
+{
+	0x00000000, 
+	0x00000001, 0x00000008, 0x00000009, 0x00000040, 0x00000041, 0x00000048, 0x00000049, 0x00000200, 
+	0x00000201, 0x00000208, 0x00000209, 0x00000240, 0x00000241, 0x00000248, 0x00000249, 0x00001000,
+	0x00001001, 0x00001008, 0x00001009, 0x00001040, 0x00001041, 0x00001048, 0x00001049, 0x00001200,
+	0x00001201, 0x00001208, 0x00001209, 0x00001240, 0x00001241, 0x00001248, 0x00001249, 0x00008000,
+	0x00008001, 0x00008008, 0x00008009, 0x00008040, 0x00008041, 0x00008048, 0x00008049, 0x00008200,
+	0x00008201, 0x00008208, 0x00008209, 0x00008240, 0x00008241, 0x00008248, 0x00008249, 0x00009000,
+	0x00009001, 0x00009008, 0x00009009, 0x00009040, 0x00009041, 0x00009048, 0x00009049, 0x00009200,
+	0x00009201, 0x00009208, 0x00009209, 0x00009240, 0x00009241, 0x00009248, 0x00009249, 0x00040000,
+	0x00040001, 0x00040008, 0x00040009, 0x00040040, 0x00040041, 0x00040048, 0x00040049, 0x00040200,
+	0x00040201, 0x00040208, 0x00040209, 0x00040240, 0x00040241, 0x00040248, 0x00040249, 0x00041000,
+	0x00041001, 0x00041008, 0x00041009, 0x00041040, 0x00041041, 0x00041048, 0x00041049, 0x00041200,
+	0x00041201, 0x00041208, 0x00041209, 0x00041240, 0x00041241, 0x00041248, 0x00041249, 0x00048000,
+	0x00048001, 0x00048008, 0x00048009, 0x00048040, 0x00048041, 0x00048048, 0x00048049, 0x00048200,
+	0x00048201, 0x00048208, 0x00048209, 0x00048240, 0x00048241, 0x00048248, 0x00048249, 0x00049000,
+	0x00049001, 0x00049008, 0x00049009, 0x00049040, 0x00049041, 0x00049048, 0x00049049, 0x00049200,
+	0x00049201, 0x00049208, 0x00049209, 0x00049240, 0x00049241, 0x00049248, 0x00049249, 0x00200000,
+	0x00200001, 0x00200008, 0x00200009, 0x00200040, 0x00200041, 0x00200048, 0x00200049, 0x00200200,
+	0x00200201, 0x00200208, 0x00200209, 0x00200240, 0x00200241, 0x00200248, 0x00200249, 0x00201000,
+	0x00201001, 0x00201008, 0x00201009, 0x00201040, 0x00201041, 0x00201048, 0x00201049, 0x00201200,
+	0x00201201, 0x00201208, 0x00201209, 0x00201240, 0x00201241, 0x00201248, 0x00201249, 0x00208000,
+	0x00208001, 0x00208008, 0x00208009, 0x00208040, 0x00208041, 0x00208048, 0x00208049, 0x00208200,
+	0x00208201, 0x00208208, 0x00208209, 0x00208240, 0x00208241, 0x00208248, 0x00208249, 0x00209000,
+	0x00209001, 0x00209008, 0x00209009, 0x00209040, 0x00209041, 0x00209048, 0x00209049, 0x00209200,
+	0x00209201, 0x00209208, 0x00209209, 0x00209240, 0x00209241, 0x00209248, 0x00209249, 0x00240000,
+	0x00240001, 0x00240008, 0x00240009, 0x00240040, 0x00240041, 0x00240048, 0x00240049, 0x00240200,
+	0x00240201, 0x00240208, 0x00240209, 0x00240240, 0x00240241, 0x00240248, 0x00240249, 0x00241000,
+	0x00241001, 0x00241008, 0x00241009, 0x00241040, 0x00241041, 0x00241048, 0x00241049, 0x00241200,
+	0x00241201, 0x00241208, 0x00241209, 0x00241240, 0x00241241, 0x00241248, 0x00241249, 0x00248000,
+	0x00248001, 0x00248008, 0x00248009, 0x00248040, 0x00248041, 0x00248048, 0x00248049, 0x00248200,
+	0x00248201, 0x00248208, 0x00248209, 0x00248240, 0x00248241, 0x00248248, 0x00248249, 0x00249000,
+	0x00249001, 0x00249008, 0x00249009, 0x00249040, 0x00249041, 0x00249048, 0x00249049, 0x00249200,
+	0x00249201, 0x00249208, 0x00249209, 0x00249240, 0x00249241, 0x00249248, 0x00249249
+};
+
+static const uint_fast32_t Morton3D_encode_y_256[256] = {
+	0x00000000,
+	0x00000002, 0x00000010, 0x00000012, 0x00000080, 0x00000082, 0x00000090, 0x00000092, 0x00000400,
+	0x00000402, 0x00000410, 0x00000412, 0x00000480, 0x00000482, 0x00000490, 0x00000492, 0x00002000,
+	0x00002002, 0x00002010, 0x00002012, 0x00002080, 0x00002082, 0x00002090, 0x00002092, 0x00002400,
+	0x00002402, 0x00002410, 0x00002412, 0x00002480, 0x00002482, 0x00002490, 0x00002492, 0x00010000,
+	0x00010002, 0x00010010, 0x00010012, 0x00010080, 0x00010082, 0x00010090, 0x00010092, 0x00010400,
+	0x00010402, 0x00010410, 0x00010412, 0x00010480, 0x00010482, 0x00010490, 0x00010492, 0x00012000,
+	0x00012002, 0x00012010, 0x00012012, 0x00012080, 0x00012082, 0x00012090, 0x00012092, 0x00012400,
+	0x00012402, 0x00012410, 0x00012412, 0x00012480, 0x00012482, 0x00012490, 0x00012492, 0x00080000,
+	0x00080002, 0x00080010, 0x00080012, 0x00080080, 0x00080082, 0x00080090, 0x00080092, 0x00080400,
+	0x00080402, 0x00080410, 0x00080412, 0x00080480, 0x00080482, 0x00080490, 0x00080492, 0x00082000,
+	0x00082002, 0x00082010, 0x00082012, 0x00082080, 0x00082082, 0x00082090, 0x00082092, 0x00082400,
+	0x00082402, 0x00082410, 0x00082412, 0x00082480, 0x00082482, 0x00082490, 0x00082492, 0x00090000,
+	0x00090002, 0x00090010, 0x00090012, 0x00090080, 0x00090082, 0x00090090, 0x00090092, 0x00090400,
+	0x00090402, 0x00090410, 0x00090412, 0x00090480, 0x00090482, 0x00090490, 0x00090492, 0x00092000,
+	0x00092002, 0x00092010, 0x00092012, 0x00092080, 0x00092082, 0x00092090, 0x00092092, 0x00092400,
+	0x00092402, 0x00092410, 0x00092412, 0x00092480, 0x00092482, 0x00092490, 0x00092492, 0x00400000,
+	0x00400002, 0x00400010, 0x00400012, 0x00400080, 0x00400082, 0x00400090, 0x00400092, 0x00400400,
+	0x00400402, 0x00400410, 0x00400412, 0x00400480, 0x00400482, 0x00400490, 0x00400492, 0x00402000,
+	0x00402002, 0x00402010, 0x00402012, 0x00402080, 0x00402082, 0x00402090, 0x00402092, 0x00402400,
+	0x00402402, 0x00402410, 0x00402412, 0x00402480, 0x00402482, 0x00402490, 0x00402492, 0x00410000,
+	0x00410002, 0x00410010, 0x00410012, 0x00410080, 0x00410082, 0x00410090, 0x00410092, 0x00410400,
+	0x00410402, 0x00410410, 0x00410412, 0x00410480, 0x00410482, 0x00410490, 0x00410492, 0x00412000,
+	0x00412002, 0x00412010, 0x00412012, 0x00412080, 0x00412082, 0x00412090, 0x00412092, 0x00412400,
+	0x00412402, 0x00412410, 0x00412412, 0x00412480, 0x00412482, 0x00412490, 0x00412492, 0x00480000,
+	0x00480002, 0x00480010, 0x00480012, 0x00480080, 0x00480082, 0x00480090, 0x00480092, 0x00480400,
+	0x00480402, 0x00480410, 0x00480412, 0x00480480, 0x00480482, 0x00480490, 0x00480492, 0x00482000,
+	0x00482002, 0x00482010, 0x00482012, 0x00482080, 0x00482082, 0x00482090, 0x00482092, 0x00482400,
+	0x00482402, 0x00482410, 0x00482412, 0x00482480, 0x00482482, 0x00482490, 0x00482492, 0x00490000,
+	0x00490002, 0x00490010, 0x00490012, 0x00490080, 0x00490082, 0x00490090, 0x00490092, 0x00490400,
+	0x00490402, 0x00490410, 0x00490412, 0x00490480, 0x00490482, 0x00490490, 0x00490492, 0x00492000,
+	0x00492002, 0x00492010, 0x00492012, 0x00492080, 0x00492082, 0x00492090, 0x00492092, 0x00492400,
+	0x00492402, 0x00492410, 0x00492412, 0x00492480, 0x00492482, 0x00492490, 0x00492492
+};
+
+static const uint_fast32_t Morton3D_encode_z_256[256] = {
+	0x00000000,
+	0x00000004, 0x00000020, 0x00000024, 0x00000100, 0x00000104, 0x00000120, 0x00000124, 0x00000800,
+	0x00000804, 0x00000820, 0x00000824, 0x00000900, 0x00000904, 0x00000920, 0x00000924, 0x00004000,
+	0x00004004, 0x00004020, 0x00004024, 0x00004100, 0x00004104, 0x00004120, 0x00004124, 0x00004800,
+	0x00004804, 0x00004820, 0x00004824, 0x00004900, 0x00004904, 0x00004920, 0x00004924, 0x00020000,
+	0x00020004, 0x00020020, 0x00020024, 0x00020100, 0x00020104, 0x00020120, 0x00020124, 0x00020800,
+	0x00020804, 0x00020820, 0x00020824, 0x00020900, 0x00020904, 0x00020920, 0x00020924, 0x00024000,
+	0x00024004, 0x00024020, 0x00024024, 0x00024100, 0x00024104, 0x00024120, 0x00024124, 0x00024800,
+	0x00024804, 0x00024820, 0x00024824, 0x00024900, 0x00024904, 0x00024920, 0x00024924, 0x00100000,
+	0x00100004, 0x00100020, 0x00100024, 0x00100100, 0x00100104, 0x00100120, 0x00100124, 0x00100800,
+	0x00100804, 0x00100820, 0x00100824, 0x00100900, 0x00100904, 0x00100920, 0x00100924, 0x00104000,
+	0x00104004, 0x00104020, 0x00104024, 0x00104100, 0x00104104, 0x00104120, 0x00104124, 0x00104800,
+	0x00104804, 0x00104820, 0x00104824, 0x00104900, 0x00104904, 0x00104920, 0x00104924, 0x00120000,
+	0x00120004, 0x00120020, 0x00120024, 0x00120100, 0x00120104, 0x00120120, 0x00120124, 0x00120800,
+	0x00120804, 0x00120820, 0x00120824, 0x00120900, 0x00120904, 0x00120920, 0x00120924, 0x00124000,
+	0x00124004, 0x00124020, 0x00124024, 0x00124100, 0x00124104, 0x00124120, 0x00124124, 0x00124800,
+	0x00124804, 0x00124820, 0x00124824, 0x00124900, 0x00124904, 0x00124920, 0x00124924, 0x00800000,
+	0x00800004, 0x00800020, 0x00800024, 0x00800100, 0x00800104, 0x00800120, 0x00800124, 0x00800800,
+	0x00800804, 0x00800820, 0x00800824, 0x00800900, 0x00800904, 0x00800920, 0x00800924, 0x00804000,
+	0x00804004, 0x00804020, 0x00804024, 0x00804100, 0x00804104, 0x00804120, 0x00804124, 0x00804800,
+	0x00804804, 0x00804820, 0x00804824, 0x00804900, 0x00804904, 0x00804920, 0x00804924, 0x00820000,
+	0x00820004, 0x00820020, 0x00820024, 0x00820100, 0x00820104, 0x00820120, 0x00820124, 0x00820800,
+	0x00820804, 0x00820820, 0x00820824, 0x00820900, 0x00820904, 0x00820920, 0x00820924, 0x00824000,
+	0x00824004, 0x00824020, 0x00824024, 0x00824100, 0x00824104, 0x00824120, 0x00824124, 0x00824800,
+	0x00824804, 0x00824820, 0x00824824, 0x00824900, 0x00824904, 0x00824920, 0x00824924, 0x00900000,
+	0x00900004, 0x00900020, 0x00900024, 0x00900100, 0x00900104, 0x00900120, 0x00900124, 0x00900800,
+	0x00900804, 0x00900820, 0x00900824, 0x00900900, 0x00900904, 0x00900920, 0x00900924, 0x00904000,
+	0x00904004, 0x00904020, 0x00904024, 0x00904100, 0x00904104, 0x00904120, 0x00904124, 0x00904800,
+	0x00904804, 0x00904820, 0x00904824, 0x00904900, 0x00904904, 0x00904920, 0x00904924, 0x00920000,
+	0x00920004, 0x00920020, 0x00920024, 0x00920100, 0x00920104, 0x00920120, 0x00920124, 0x00920800,
+	0x00920804, 0x00920820, 0x00920824, 0x00920900, 0x00920904, 0x00920920, 0x00920924, 0x00924000,
+	0x00924004, 0x00924020, 0x00924024, 0x00924100, 0x00924104, 0x00924120, 0x00924124, 0x00924800,
+	0x00924804, 0x00924820, 0x00924824, 0x00924900, 0x00924904, 0x00924920, 0x00924924
+};
+
+static const uint_fast8_t Morton3D_decode_x_512[512] = {
+	0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3,
+	0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3,
+	0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3,
+	0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3,
+	4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7,
+	4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7,
+	4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7,
+	4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7,
+	0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3,
+	0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3,
+	0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3,
+	0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3,
+	4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7,
+	4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7,
+	4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7,
+	4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7,
+	0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3,
+	0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3,
+	0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3,
+	0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3,
+	4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7,
+	4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7,
+	4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7,
+	4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7,
+	0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3,
+	0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3,
+	0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3,
+	0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3,
+	4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7,
+	4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7,
+	4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7,
+	4, 5, 4, 5, 4, 5, 4, 5, 6, 7, 6, 7, 6, 7, 6, 7
+};
+
+static const uint_fast8_t Morton3D_decode_y_512[512] = {
+	0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
+	2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
+	0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
+	2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
+	0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
+	2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
+	0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
+	2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
+	4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5,
+	6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7,
+	4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5,
+	6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7,
+	4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5,
+	6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7,
+	4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5,
+	6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7,
+	0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
+	2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
+	0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
+	2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
+	0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
+	2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
+	0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
+	2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3,
+	4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5,
+	6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7,
+	4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5,
+	6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7,
+	4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5,
+	6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7,
+	4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5,
+	6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7, 6, 6, 7, 7
+};
+
+static const uint_fast8_t Morton3D_decode_z_512[512] = {
+	0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
+	0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
+	2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
+	2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
+	0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
+	0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
+	2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
+	2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
+	0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
+	0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
+	2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
+	2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
+	0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
+	0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1,
+	2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
+	2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3,
+	4, 4, 4, 4, 5, 5, 5, 5, 4, 4, 4, 4, 5, 5, 5, 5,
+	4, 4, 4, 4, 5, 5, 5, 5, 4, 4, 4, 4, 5, 5, 5, 5,
+	6, 6, 6, 6, 7, 7, 7, 7, 6, 6, 6, 6, 7, 7, 7, 7,
+	6, 6, 6, 6, 7, 7, 7, 7, 6, 6, 6, 6, 7, 7, 7, 7,
+	4, 4, 4, 4, 5, 5, 5, 5, 4, 4, 4, 4, 5, 5, 5, 5,
+	4, 4, 4, 4, 5, 5, 5, 5, 4, 4, 4, 4, 5, 5, 5, 5,
+	6, 6, 6, 6, 7, 7, 7, 7, 6, 6, 6, 6, 7, 7, 7, 7,
+	6, 6, 6, 6, 7, 7, 7, 7, 6, 6, 6, 6, 7, 7, 7, 7,
+	4, 4, 4, 4, 5, 5, 5, 5, 4, 4, 4, 4, 5, 5, 5, 5,
+	4, 4, 4, 4, 5, 5, 5, 5, 4, 4, 4, 4, 5, 5, 5, 5,
+	6, 6, 6, 6, 7, 7, 7, 7, 6, 6, 6, 6, 7, 7, 7, 7,
+	6, 6, 6, 6, 7, 7, 7, 7, 6, 6, 6, 6, 7, 7, 7, 7,
+	4, 4, 4, 4, 5, 5, 5, 5, 4, 4, 4, 4, 5, 5, 5, 5,
+	4, 4, 4, 4, 5, 5, 5, 5, 4, 4, 4, 4, 5, 5, 5, 5,
+	6, 6, 6, 6, 7, 7, 7, 7, 6, 6, 6, 6, 7, 7, 7, 7,
+	6, 6, 6, 6, 7, 7, 7, 7, 6, 6, 6, 6, 7, 7, 7, 7
+};
\ No newline at end of file
diff --git a/extern/libmorton/libmorton/include/morton_BMI.h b/extern/libmorton/libmorton/include/morton_BMI.h
new file mode 100644
index 0000000000000000000000000000000000000000..081a5dd7aba4130d09c5ced1ef27ab2cafe1a9da
--- /dev/null
+++ b/extern/libmorton/libmorton/include/morton_BMI.h
@@ -0,0 +1,5 @@
+#pragma once
+
+#if defined(__BMI2__)
+
+#endif
\ No newline at end of file
diff --git a/extern/libmorton/libmorton/include/morton_LUT_generators.h b/extern/libmorton/libmorton/include/morton_LUT_generators.h
new file mode 100644
index 0000000000000000000000000000000000000000..baaadd328b94506c11ec46ef402dee8fd685fd31
--- /dev/null
+++ b/extern/libmorton/libmorton/include/morton_LUT_generators.h
@@ -0,0 +1,98 @@
+#pragma once
+
+#include "morton2D.h"
+#include "morton3D.h"
+#include <iostream>
+
+template <typename element>
+void printTable(const element* table, size_t howmany, unsigned int splitat){
+	for (size_t i = 0; i < howmany; i++){
+		if (i % splitat == 0){ cout << endl; }
+		printf("%u ,", static_cast<unsigned int>(table[i]));
+	}
+	cout << endl;
+}
+
+void generate2D_EncodeLUT(size_t how_many_bits, uint_fast16_t*& x_table, uint_fast16_t*& y_table, bool print_tables){
+	size_t total = 1 << how_many_bits;
+	x_table = (uint_fast16_t*)malloc(total * sizeof(uint_fast16_t));
+	y_table = (uint_fast16_t*)malloc(total * sizeof(uint_fast16_t));
+
+	for (uint_fast32_t i = 0; i < total; i++){
+		x_table[i] = (uint_fast16_t) m2D_e_magicbits<uint_fast32_t, uint_fast16_t>(i, 0);
+		y_table[i] = (uint_fast16_t)m2D_e_magicbits<uint_fast32_t, uint_fast16_t>(0, i);
+	}
+
+	if (print_tables){
+		cout << "X Table " << endl;
+		printTable<uint_fast32_t>(x_table, total, 8);
+		cout << "Y Table " << endl;
+		printTable<uint_fast32_t>(y_table, total, 8);
+	}
+}
+
+void generate2D_DecodeLUT(size_t how_many_bits, uint_fast8_t*& x_table, uint_fast8_t*& y_table, bool print_tables){
+	size_t total = 1 << how_many_bits;
+	x_table = (uint_fast8_t*)malloc(total * sizeof(uint_fast8_t));
+	y_table = (uint_fast8_t*)malloc(total * sizeof(uint_fast8_t));
+
+	//generate tables
+	for (size_t i = 0; i < total; i++) {
+		m2D_d_for(i, x_table[i], y_table[i]);
+	}
+
+	if (print_tables) {
+		cout << "X Table " << endl;
+		printTable<uint_fast8_t>(x_table, total, 16);
+		cout << "Y Table " << endl;
+		printTable<uint_fast8_t>(y_table, total, 16);
+	}
+}
+
+void generate3D_EncodeLUT(size_t how_many_bits, uint_fast32_t*& x_table, uint_fast32_t*& y_table, uint_fast32_t*& z_table, bool print_tables){
+	// how many items
+	size_t total = 1 << how_many_bits;
+	x_table = (uint_fast32_t*)malloc(total * sizeof(uint_fast32_t));
+	y_table = (uint_fast32_t*)malloc(total * sizeof(uint_fast32_t));
+	z_table = (uint_fast32_t*)malloc(total * sizeof(uint_fast32_t));
+
+	for (uint_fast32_t i = 0; i < total; i++){
+		x_table[i] = (uint_fast32_t) m3D_e_magicbits<uint_fast32_t, uint_fast16_t>(i, 0, 0);
+		y_table[i] = (uint_fast32_t) m3D_e_magicbits<uint_fast32_t, uint_fast16_t>(0, i, 0);
+		z_table[i] = (uint_fast32_t) m3D_e_magicbits<uint_fast32_t, uint_fast16_t>(0, 0, i);
+	}
+
+	if (print_tables){
+		cout << "X Table " << endl;
+		printTable<uint_fast32_t>(x_table, total, 8);
+		cout << "Y Table " << endl;
+		printTable<uint_fast32_t>(y_table, total, 8);
+		cout << "Z Table " << endl;
+		printTable<uint_fast32_t>(z_table, total, 8);
+	}
+}
+
+// Generate a decode tables for 3D morton code
+// how_many_bits should be a multiple of three
+void generate3D_DecodeLUT(size_t how_many_bits, uint_fast8_t*& x_table, uint_fast8_t*& y_table, uint_fast8_t*& z_table, bool print_tables){
+	size_t total = 1 << how_many_bits;
+	x_table = (uint_fast8_t*) malloc(total * sizeof(uint_fast8_t));
+	y_table = (uint_fast8_t*) malloc(total * sizeof(uint_fast8_t));
+	z_table = (uint_fast8_t*) malloc(total * sizeof(uint_fast8_t));
+
+	//generate tables
+	for (size_t i = 0; i < total; i++){
+		x_table[i] = morton3D_GetThirdBits<uint_fast64_t, uint_fast32_t>(i);
+		y_table[i] = morton3D_GetThirdBits<uint_fast64_t, uint_fast32_t>(i >> 1);
+		z_table[i] = morton3D_GetThirdBits<uint_fast64_t, uint_fast32_t>(i >> 2);
+	}
+
+	if (print_tables){
+		cout << "X Table " << endl;
+		printTable<uint_fast8_t>(x_table, total, 16);
+		cout << "Y Table " << endl;
+		printTable<uint_fast8_t>(y_table, total, 16);
+		cout << "Z Table " << endl;
+		printTable<uint_fast8_t>(z_table, total, 16);	
+	}
+}
diff --git a/extern/libmorton/libmorton/include/morton_common.h b/extern/libmorton/libmorton/include/morton_common.h
new file mode 100644
index 0000000000000000000000000000000000000000..175b8a832a916246c372e0d019b68bf457f16004
--- /dev/null
+++ b/extern/libmorton/libmorton/include/morton_common.h
@@ -0,0 +1,38 @@
+#pragma once
+
+// Libmorton - Common helper methods needed in Morton encoding/decoding
+
+#include <stdint.h>
+#if _MSC_VER
+#include <intrin.h>
+#endif
+
+template<typename morton>
+inline bool findFirstSetBit(const morton x, unsigned long* firstbit_location) {
+#if _MSC_VER && !_WIN64
+	// 32 BIT on 32 BIT
+	if (sizeof(morton) <= 4) {
+		return _BitScanReverse(firstbit_location, x) != 0;
+	}
+	// 64 BIT on 32 BIT
+	else {
+		*firstbit_location = 0;
+		if (_BitScanReverse(firstbit_location, (x >> 32))) { // check first part
+			firstbit_location += 32;
+			return true;
+		}
+		return _BitScanReverse(firstbit_location, (x & 0xFFFFFFFF)) != 0;
+	}
+#elif  _MSC_VER && _WIN64
+	// 32 or 64 BIT on 64 BIT
+	return _BitScanReverse64(firstbit_location, x) != 0;
+#elif __GNUC__
+	if (x == 0) {
+		return false;
+	}
+	else {
+		*firstbit_location = static_cast<unsigned long>((sizeof(morton)*8) - __builtin_clzll(x));
+		return true;
+	}
+#endif
+}
\ No newline at end of file
diff --git a/include/CompactNSearch b/include/CompactNSearch
new file mode 100755
index 0000000000000000000000000000000000000000..b1ec2d751d18d19310939c155c48574403c2508d
--- /dev/null
+++ b/include/CompactNSearch
@@ -0,0 +1 @@
+#include "CompactNSearch.h"
diff --git a/include/CompactNSearch.h b/include/CompactNSearch.h
new file mode 100644
index 0000000000000000000000000000000000000000..b0c385fd01da84c5a35ac4cb1d9cdad6dc1fb2a9
--- /dev/null
+++ b/include/CompactNSearch.h
@@ -0,0 +1,151 @@
+
+#ifndef COMPACT_N_SEARCH__HPP
+#define COMPACT_N_SEARCH__HPP
+
+#include "Config.h"
+#include "DataStructures.h"
+#include "PointSet.h"
+
+#include <unordered_map>
+
+namespace CompactNSearch
+{
+
+/**
+* @class NeighborhoodSearch
+* Stores point data multiple set of points in which neighborhood information for a fixed
+* radius r should be generated.
+*/
+class NeighborhoodSearch
+{
+
+public:
+
+	/**
+	* Constructor.
+	* Creates a new instance of the neighborhood search class.
+	* @param r Search radius. If two points are closer to each other than a distance r they are considered neighbors.
+	* @param erase_empty_cells If true. Empty cells in spatial hashing grid are erased if the points move.
+	*/
+	NeighborhoodSearch(Real r, bool erase_empty_cells = false);
+
+	/**
+	* Destructor.
+	*/
+	virtual ~NeighborhoodSearch() = default;
+
+	/**
+	* Get method to access a point set.
+	* @param i Index of the point set to retrieve.
+	*/
+	PointSet const& point_set(unsigned int i) const { return m_point_sets[i];     }
+
+	/**
+	* Get method to access a point set.
+	* @param i Index of the point set to retrieve.
+	*/
+	PointSet      & point_set(unsigned int i)       { return m_point_sets[i];     }
+
+
+	/**
+	* Returns the number of point sets contained in the search.
+	*/
+	std::size_t  n_point_sets()               const { return m_point_sets.size(); }
+
+	/**
+	* Get method to access the list of point sets.
+	*/
+	std::vector<PointSet> const& point_sets() const { return m_point_sets;        }
+
+	/**
+	* Get method to access the list of point sets.
+	*/
+	std::vector<PointSet>      & point_sets()       { return m_point_sets;        }
+
+	/**
+	* Increases the size of a point set under the assumption that the existing points remain at
+	* the same position.
+	* @param i Index of point set that will be resized.
+	* @param x Pointer to the point position data. Must point to continguous data of 3 * n
+	* real values.
+	* @param n Number of points.
+	*/
+	void increase_point_set_size(unsigned int i, Real const* x, std::size_t n);
+
+	/**
+	* Creates and adds a new set of points.
+	* @param x Pointer to the point position data. Must point to continguous data of 3 * n
+	* real values.
+	* @param n Number of points.
+	* @param is_dynamic Specifies wether the point positions will change for future queries.
+	* @param search_neighbors If true, no neighbors of this point set will be determined during the
+	* actual search. However, other point set will still determine neighboring points belonging to
+	* this point set.
+	* @returns Returns unique identifier in form of an index assigned to the newly created point
+	* set.
+	*/
+	unsigned int add_point_set(Real const* x, std::size_t n, bool is_dynamic = true,
+		bool search_neighbors = true) 
+	{ 
+		m_point_sets.push_back({x, n, is_dynamic, search_neighbors});
+		return static_cast<unsigned int>(m_point_sets.size() - 1);
+	}
+
+	/**
+	* Performs the actual query. This method will assign a list of neighboring points to each point
+	* every added point set.
+	*/
+	void find_neighbors();
+
+	/*
+	* Generates a sort table according to a space-filling Z curve. Any array-based per point
+	* information can then be reordered using the function sort_field of the PointSet class.
+	* Please note that the position data will not be modified by this class, such that the user has
+	* to invoke the sort_field function on the position array. Moreover, be aware the the grid has
+	* be reinitialized after each sort. Therefore, the points should not be reordered too
+	* frequently.
+	*/
+	void z_sort();
+
+	/*
+	* @returns Returns the radius in which point neighbors are searched.
+	*/
+	double radius() const { return std::sqrt(m_r2); }
+
+	/**
+	* Sets the radius in which point point neighbors are searched.
+	* @param r Search radius.
+	*/
+	void set_radius(double r) 
+	{ 
+		m_r2 = r * r; 
+		m_inv_cell_size = 1.0 / r;
+		m_initialized = false;
+	}
+
+private:
+
+	void init();
+	void update_hash_table(std::vector<unsigned int>& to_delete);
+	void erase_empty_entries(std::vector<unsigned int> const& to_delete);
+	void query();
+
+	HashKey cell_index(Real const* x) const;
+
+private:
+
+
+	std::vector<PointSet> m_point_sets;
+
+	Real m_inv_cell_size;
+	Real m_r2;
+	std::unordered_map<HashKey, unsigned int, SpatialHasher> m_map;
+	std::vector<HashEntry> m_entries;
+
+	bool m_erase_empty_cells;
+	bool m_initialized;
+};
+
+}
+
+#endif //COMPACT_N_SEARCH__HPP
diff --git a/include/Config.h b/include/Config.h
new file mode 100644
index 0000000000000000000000000000000000000000..8d86375fd7a387bbfa7c900373099869a32d0264
--- /dev/null
+++ b/include/Config.h
@@ -0,0 +1,18 @@
+#ifndef __CONFIG_H__
+#define __CONFIG_H__
+
+namespace CompactNSearch
+{
+    using Real = double;
+}
+
+#define INITIAL_NUMBER_OF_INDICES 50
+
+#ifdef _MSC_VER
+	#include <ppl.h>
+#else
+	#include <parallel/algorithm>
+#endif
+
+
+#endif
diff --git a/include/DataStructures.h b/include/DataStructures.h
new file mode 100644
index 0000000000000000000000000000000000000000..bd706c6e8c00cdc99b6cea655eedc23a90d411b5
--- /dev/null
+++ b/include/DataStructures.h
@@ -0,0 +1,122 @@
+
+
+#ifndef __DATA_STRUCTURES_H__
+#define __DATA_STRUCTURES_H__
+
+#include "Config.h"
+
+#include <atomic>
+#include <vector>
+
+namespace CompactNSearch
+{
+struct PointID
+{
+	unsigned int point_set_id;
+	unsigned int point_id;
+
+	bool operator==(PointID const& other) const
+	{
+		return point_id == other.point_id && point_set_id == other.point_set_id;
+	}
+};
+
+struct HashKey
+{
+	HashKey() = default;
+	HashKey(int i, int j, int k)
+		: k{i, j, k}
+	{
+	}
+
+	HashKey& operator=(HashKey const& other)
+	{
+		k[0] = other.k[0];
+		k[1] = other.k[1];
+		k[2] = other.k[2];
+		return *this;
+	}
+
+	bool operator==(HashKey const& other) const
+	{
+		return 
+			k[0] == other.k[0] &&
+			k[1] == other.k[1] && 
+			k[2] == other.k[2];
+	}
+
+	bool operator!=(HashKey const& other) const
+	{
+		return !(*this == other);
+	}
+
+	int k[3];
+};
+
+struct HashEntry
+{
+	HashEntry() 
+	{
+		indices.reserve(INITIAL_NUMBER_OF_INDICES);
+	}
+
+	HashEntry(PointID const& id)
+	{
+		add(id);
+	}
+
+	void add(PointID const& id)
+	{
+		indices.push_back(id);
+	}
+
+	void erase(PointID const& id)
+	{
+		indices.erase(std::find(indices.begin(), indices.end(), id));
+	}
+
+	unsigned int n_indices() const
+	{
+		return static_cast<unsigned int>(indices.size());
+	}
+
+	std::vector<PointID> indices;
+};
+
+struct SpatialHasher
+{
+	std::size_t operator()(HashKey const& k) const
+	{
+		return 
+			73856093 * k.k[0] ^
+			19349663 * k.k[1] ^
+			83492791 * k.k[2];
+	}
+};
+
+class Spinlock
+{
+public:
+
+	void lock()
+	{
+		while (m_lock.test_and_set(std::memory_order_acquire));
+	}
+
+	void unlock()
+	{
+		m_lock.clear(std::memory_order_release);
+	}
+
+	Spinlock() = default;
+	Spinlock(Spinlock const& other) {};
+	Spinlock& operator=(Spinlock const& other) { return *this; }
+
+
+private:
+
+	std::atomic_flag m_lock = ATOMIC_FLAG_INIT;
+};
+}
+
+#endif // __DATA_STRUCTURES_H__
diff --git a/include/PointSet.h b/include/PointSet.h
new file mode 100755
index 0000000000000000000000000000000000000000..135787d201cf27347572d0222bbdde3f167d6e79
--- /dev/null
+++ b/include/PointSet.h
@@ -0,0 +1,168 @@
+
+#ifndef __POINT_SET_H__
+#define __POINT_SET_H__
+
+#include <Config.h>
+#include <iostream>
+
+namespace CompactNSearch
+{
+class NeighborhoodSearch;
+
+/**
+* @class PointSet.
+* Represents a set of points in three-dimensional space.
+*/
+class PointSet
+{
+
+public:
+
+	/**
+	* Copy constructor.
+	*/
+	PointSet(PointSet const& other)
+	{
+		*this = other;
+	}
+
+	/**
+	* Assignment operator.
+	*/
+	PointSet& operator=(PointSet const& other)
+	{
+		m_x = other.m_x;
+		m_n = other.m_n;
+		m_dynamic = other.m_dynamic;
+		m_search_neighbors = other.m_search_neighbors;
+
+		m_neighbors = other.m_neighbors;
+		m_keys = other.m_keys;
+		m_old_keys = other.m_old_keys;
+		m_locks.resize(other.m_locks.size());
+		m_sort_table = other.m_sort_table;
+
+		return *this;
+	}
+
+	/**
+	* Returns the number of neighbors of point i in the given point set.
+	* @param i Point index.
+	* @returns Number of points neighboring point i.
+	*/
+	std::size_t n_neighbors(unsigned int i) const 
+	{
+		return static_cast<unsigned int>(m_neighbors[i].size());
+	}
+
+	/**
+	* Fetches id pair of kth neighbor of point i in the given point set.
+	* @param i Point index for which the neighbor id should be returned.
+	* @param k Represents kth neighbor of point i.
+	* @returns Number of points neighboring point i.
+	*/
+	PointID const& neighbor(unsigned int i, unsigned int k) const 
+	{
+		return m_neighbors[i][k];
+	}
+
+	/**
+	* Returns the number of points contained in the point set.
+	*/
+	std::size_t n_points() const { return m_n; }
+
+	/*
+	* Returns true, if the point locations may be updated by the user.
+	**/
+	bool is_dynamic() const { return m_dynamic; }
+
+	/**
+	* If true is passed, the point positions may be altered by the user.
+	*/
+	void set_dynamic(bool v) { m_dynamic = v; }
+
+	/**
+	* Returns true, if neighborhood information will be generated for the given point set.
+	*/
+	bool is_neighborsearch_enabled() const { return m_search_neighbors; }
+
+	/**
+	* Enables or disables, if neighborhood information will be generated for the given point set.
+	*/
+	void enable_neighborsearch(bool v) { m_search_neighbors = v; }
+
+	/**
+	* Reorders an array according to a previously generated sort table by invocation of the method
+	* "z_sort" of class "NeighborhoodSearch". Please note that the method "z_sort" of class
+	* "Neighborhood search" has to be called beforehand.
+	*/
+	template <typename T>
+	void sort_field(T* lst) const;
+
+private:
+
+	friend NeighborhoodSearch;
+	PointSet(Real const* x, std::size_t n, bool dynamic, bool search_neighbors)
+		: m_x(x), m_n(n), m_dynamic(dynamic), m_neighbors(n)
+		, m_keys(n, {
+		std::numeric_limits<int>::lowest(),
+		std::numeric_limits<int>::lowest(),
+		std::numeric_limits<int>::lowest() })
+		, m_search_neighbors(search_neighbors)
+	{
+		m_old_keys = m_keys;
+	}
+
+	void resize(Real const* x, std::size_t n)
+	{ 
+		m_x = x;
+		m_n = n; 
+		m_keys.resize(n, {
+			std::numeric_limits<int>::lowest(),
+			std::numeric_limits<int>::lowest(),
+			std::numeric_limits<int>::lowest() }); 
+		m_old_keys.resize(n, {
+			std::numeric_limits<int>::lowest(),
+			std::numeric_limits<int>::lowest(),
+			std::numeric_limits<int>::lowest() });
+		m_neighbors.resize(n);
+	}
+
+	Real const* point(unsigned int i) const { return &m_x[3*i]; }
+
+private:
+
+	Real const* m_x;
+	std::size_t m_n;
+	bool m_dynamic;
+	bool m_search_neighbors;
+
+	std::vector<std::vector<PointID>> m_neighbors;
+	std::vector<HashKey> m_keys, m_old_keys;
+	std::vector<Spinlock> m_locks;
+	std::vector<unsigned int> m_sort_table;
+};
+
+template <typename T> void
+PointSet::sort_field(T* lst) const
+{
+	if (m_sort_table.empty())
+	{
+		std::cerr << "WARNING: No sort table was generated for the current point set. "
+			<< "First invoke the method 'z_sort' of the class 'NeighborhoodSearch.'" << std::endl;
+		return;
+	}
+
+	std::vector<T> tmp(lst, lst + m_sort_table.size());
+	std::transform(m_sort_table.begin(), m_sort_table.end(), 
+#ifdef _MSC_VER
+		stdext::unchecked_array_iterator<T*>(lst),
+#else
+		lst,
+#endif
+		[&](int i){ return tmp[i]; });
+}
+}
+
+#endif // __POINT_SET_H__
+
diff --git a/src/CompactNSearch.cpp b/src/CompactNSearch.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..814b497b5f84904770a4e955084b9772224626c8
--- /dev/null
+++ b/src/CompactNSearch.cpp
@@ -0,0 +1,455 @@
+
+#include <CompactNSearch>
+#include "../extern/libmorton/libmorton/include/morton.h"
+
+#include <iostream>
+#include <numeric>
+#include <array>
+#include <cstdint>
+#include <limits>
+#include <algorithm>
+
+
+namespace CompactNSearch
+{
+namespace
+{
+// Determines Morten value according to z-curve.
+inline uint_fast64_t
+z_value(HashKey const& key)
+{
+	return morton3D_64_encode(
+			static_cast<uint_fast32_t>(static_cast<int64_t>(key.k[0]) -
+									  (std::numeric_limits<int>::lowest() + 1)),
+			static_cast<uint_fast32_t>(static_cast<int64_t>(key.k[1]) -
+									  (std::numeric_limits<int>::lowest() + 1)),
+			static_cast<uint_fast32_t>(static_cast<int64_t>(key.k[2]) -
+									  (std::numeric_limits<int>::lowest() + 1))
+	);
+}
+}
+
+
+NeighborhoodSearch::NeighborhoodSearch(Real r, bool erase_empty_cells)
+	: m_r2(r * r), m_inv_cell_size(1.0 / r)
+	, m_erase_empty_cells(erase_empty_cells)
+	, m_initialized(false)
+{
+	if (r <=  0.0)
+	{
+		std::cerr << "WARNING: Neighborhood search may not be initialized with a zero or negative"
+			<< " search radius. This may lead to unexpected behavior." << std::endl;
+	}
+}
+
+// Computes triple index to a world space position x.
+HashKey
+NeighborhoodSearch::cell_index(Real const* x) const
+{
+	HashKey ret;
+	for (unsigned int i = 0; i < 3; ++i)
+	{
+		if (x[i] >= 0.0) ret.k[i] = static_cast<int>(m_inv_cell_size * x[i]);
+		else ret.k[i] = static_cast<int>(m_inv_cell_size * x[i]) - 1;
+	}
+	return ret;
+}
+
+// Determines permutation table for point array.
+void
+NeighborhoodSearch::z_sort()
+{
+	for (PointSet& d : m_point_sets)
+	{
+		d.m_sort_table.resize(d.n_points());
+		std::iota(d.m_sort_table.begin(), d.m_sort_table.end(), 0);
+
+		std::sort(d.m_sort_table.begin(), d.m_sort_table.end(),
+			[&](unsigned int a, unsigned int b)
+		{
+			return z_value(cell_index(d.point(a))) < z_value(cell_index(d.point(b)));
+		});
+	}
+	m_initialized = false;
+}
+
+
+// Build hash table and entry array from scratch.
+void
+NeighborhoodSearch::init()
+{
+	m_entries.clear();
+	m_map.clear();
+
+	// Determine existing entries.
+	std::vector<HashKey> temp_keys;
+	for (unsigned int j = 0; j < m_point_sets.size(); ++j)
+	{
+		PointSet& d = m_point_sets[j];
+		d.m_locks.resize(d.n_points());
+		for (unsigned int i = 0; i < d.n_points(); i++)
+		{
+			HashKey const& key = cell_index(d.point(i));
+			d.m_keys[i] = d.m_old_keys[i] = key;
+			auto it = m_map.find(key);
+			if (it == m_map.end())
+			{
+				m_entries.push_back({{ j, i }});
+				temp_keys.push_back(key);
+				m_map[key] = static_cast<unsigned int>(m_entries.size() - 1);
+			}
+			else
+			{
+				m_entries[it->second].add({j, i});
+			}
+		}
+	}
+
+	m_map.clear();
+	for (unsigned int i = 0; i < m_entries.size(); ++i)
+	{
+		m_map.emplace(temp_keys[i], i);
+	}
+
+	m_initialized = true;
+}
+
+
+void
+NeighborhoodSearch::increase_point_set_size(unsigned int index, Real const* x, std::size_t size)
+{
+	PointSet& point_set = m_point_sets[index];
+	std::size_t old_size = point_set.n_points();
+
+	// Insert new entries.
+	for (unsigned int i = static_cast<unsigned int>(old_size); i < point_set.n_points(); i++)
+	{
+		HashKey key = cell_index(point_set.point(i));
+		point_set.m_keys[i] = point_set.m_old_keys[i] = key;
+		auto it = m_map.find(key);
+		if (it == m_map.end())
+		{
+			m_entries.push_back({{ index, i }});
+			m_map[key] = static_cast<unsigned int>(m_entries.size() - 1);
+		}
+		else 
+		{
+			m_entries[it->second].add({ index, i });
+		}
+	}
+
+	point_set.resize(x, size);
+
+}
+
+void
+NeighborhoodSearch::find_neighbors()
+{
+	if (!m_initialized)
+	{
+		init();
+		m_initialized = true;
+	}
+
+	// Precompute cell indices.
+	for (PointSet& d : m_point_sets)
+	{
+		if (!d.is_dynamic()) continue;
+		d.m_keys.swap(d.m_old_keys);
+		for (unsigned int i = 0; i < d.n_points(); ++i)
+			d.m_keys[i] = cell_index(d.point(i));
+	}
+
+	std::vector<unsigned int> to_delete;
+	if (m_erase_empty_cells)
+	{
+		to_delete.reserve(m_entries.size());
+	}
+	update_hash_table(to_delete);
+	if (m_erase_empty_cells)
+	{
+		erase_empty_entries(to_delete);
+	}
+	query();
+}
+
+void
+NeighborhoodSearch::erase_empty_entries(std::vector<unsigned int> const& to_delete)
+{
+	if (to_delete.empty())
+		return;
+
+	// Indicated empty cells.
+	m_entries.erase(std::remove_if(m_entries.begin(), m_entries.end(), [](HashEntry const& entry)
+	{
+		return entry.indices.empty();
+	}), m_entries.end());
+
+	{
+		auto it = m_map.begin();
+		while (it != m_map.end())
+		{
+			auto& kvp = *it;
+
+			if (kvp.second <= to_delete.front() && kvp.second >= to_delete.back() &&
+				std::binary_search(to_delete.rbegin(), to_delete.rend(), kvp.second))
+			{
+				it = m_map.erase(it);
+			}
+			else
+			{
+				++it;
+			}
+		}
+	}
+
+	std::vector<std::pair<HashKey const, unsigned int>*> kvps(m_map.size());
+	std::transform(m_map.begin(), m_map.end(), kvps.begin(),
+	[](std::pair<HashKey const, unsigned int>& kvp)
+	{
+		return &kvp;
+	});
+
+	// Perform neighborhood search.
+#ifdef _MSC_VER
+	concurrency::parallel_for_each
+#else
+	__gnu_parallel::for_each
+#endif
+	(kvps.begin(), kvps.end(), [&](std::pair<HashKey const, unsigned int>* kvp_)
+	{
+		auto& kvp = *kvp_;
+
+		for (unsigned int i = 0; i < to_delete.size(); ++i)
+		{
+			if (kvp.second >= to_delete[i])
+			{
+				kvp.second -= static_cast<unsigned int>(to_delete.size() - i);
+				break;
+			}
+		}
+	});
+}
+
+void
+NeighborhoodSearch::update_hash_table(std::vector<unsigned int>& to_delete)
+{
+	// Indicate points changing inheriting cell.
+	for (unsigned int j = 0; j < m_point_sets.size(); ++j)
+	{
+		PointSet& d = m_point_sets[j];
+		for (unsigned int i = 0; i < d.n_points(); ++i)
+		{
+			if (d.m_keys[i] == d.m_old_keys[i]) continue;
+
+			HashKey const& key = d.m_keys[i];
+			auto it = m_map.find(key);
+			if (it == m_map.end())
+			{
+				m_entries.push_back({{j, i}});
+				m_map.insert({ key, static_cast<unsigned int>(m_entries.size() - 1) });
+			}
+			else
+			{
+				HashEntry& entry = m_entries[it->second];
+				entry.add({j, i});
+			}
+
+			unsigned int entry_index = m_map[d.m_old_keys[i]];
+			m_entries[entry_index].erase({j, i});
+			if (m_erase_empty_cells)
+			{
+				if (m_entries[entry_index].n_indices() == 0)
+				{
+					to_delete.push_back(entry_index);
+				}
+			}
+		}
+	}
+
+	to_delete.erase(std::remove_if(to_delete.begin(), to_delete.end(),
+	[&](unsigned int index)
+	{
+		return m_entries[index].n_indices() != 0;
+	}), to_delete.end());
+	std::sort(to_delete.begin(), to_delete.end(), std::greater<unsigned int>());
+}
+
+void
+NeighborhoodSearch::query()
+{
+	for (PointSet& d : m_point_sets)
+	{
+		if (d.is_neighborsearch_enabled())
+		{
+			for (auto& n : d.m_neighbors)
+			{
+				n.clear();
+			}
+		}
+	}
+
+	std::vector<std::pair<HashKey const, unsigned int> const*> kvps(m_map.size());
+	std::transform(m_map.begin(), m_map.end(), kvps.begin(),
+	[](std::pair<HashKey const, unsigned int> const& kvp)
+	{
+		return &kvp;
+	});
+
+	// Perform neighborhood search.
+#ifdef _MSC_VER
+	concurrency::parallel_for_each
+#else
+	__gnu_parallel::for_each
+#endif
+	(kvps.begin(), kvps.end(), [&](std::pair<HashKey const, unsigned int> const* kvp_)
+	{
+		auto const& kvp = *kvp_;
+		HashEntry const& entry = m_entries[kvp.second];
+		HashKey const& key = kvp.first;
+
+		for (unsigned int a = 0; a < entry.n_indices(); ++a)
+		{
+			PointID const& va = entry.indices[a];
+			PointSet& da = m_point_sets[va.point_set_id];
+			for (unsigned int b = a + 1; b < entry.n_indices(); ++b)
+			{
+				PointID const& vb = entry.indices[b];
+				PointSet& db = m_point_sets[vb.point_set_id];
+
+				if (!da.is_neighborsearch_enabled() && 
+					!db.is_neighborsearch_enabled())
+				{
+					continue;
+				}
+
+				Real const* xa = da.point(va.point_id);
+				Real const* xb = db.point(vb.point_id);
+				Real tmp = xa[0] - xb[0];
+				Real l2 = tmp * tmp;
+				tmp = xa[1] - xb[1];
+				l2 += tmp * tmp;
+				tmp = xa[2] - xb[2];
+				l2 += tmp * tmp;
+
+				if (l2 < m_r2)
+				{
+					if (da.is_neighborsearch_enabled())
+					{
+						da.m_neighbors[va.point_id].push_back(vb);
+					}
+					if (db.is_neighborsearch_enabled())
+					{
+						db.m_neighbors[vb.point_id].push_back(va);
+					}
+				}
+			}
+		}
+	});
+
+
+	std::vector<std::array<bool, 27>> visited(m_entries.size(), {false});
+	std::vector<Spinlock> entry_locks(m_entries.size());
+
+#ifdef _MSC_VER
+	concurrency::parallel_for_each
+#else
+	__gnu_parallel::for_each
+#endif
+	(kvps.begin(), kvps.end(), [&](std::pair<HashKey const, unsigned int> const* kvp_)
+	{
+		auto const& kvp = *kvp_;
+		HashEntry const& entry = m_entries[kvp.second];
+		HashKey const& key = kvp.first;
+
+		for (int dj = -1; dj <= 1; dj++)
+		for (int dk = -1; dk <= 1; dk++)
+		for (int dl = -1; dl <= 1; dl++)
+		{
+			int l_ind = 9 * (dj + 1) + 3 * (dk + 1) + (dl + 1);
+			if (l_ind == 13)
+			{
+				continue;
+			}
+			entry_locks[kvp.second].lock();
+			if (visited[kvp.second][l_ind])
+			{
+				entry_locks[kvp.second].unlock();
+				continue;
+			}
+			entry_locks[kvp.second].unlock();
+
+
+
+			auto it = m_map.find({ key.k[0] + dj, key.k[1] + dk, key.k[2] + dl });
+			if (it == m_map.end())
+				continue;
+
+			std::array<unsigned int, 2> entry_ids{{kvp.second, it->second}};
+			if (entry_ids[0] > entry_ids[1])
+				std::swap(entry_ids[0], entry_ids[1]);
+			entry_locks[entry_ids[0]].lock();
+			entry_locks[entry_ids[1]].lock();
+
+			if (visited[kvp.second][l_ind])
+			{
+				entry_locks[entry_ids[1]].unlock();
+				entry_locks[entry_ids[0]].unlock();
+				continue;
+			}
+
+			visited[kvp.second][l_ind] = true;
+			visited[it->second][26 - l_ind] = true;
+
+			entry_locks[entry_ids[1]].unlock();
+			entry_locks[entry_ids[0]].unlock();
+
+			for (unsigned int i = 0; i < entry.n_indices(); ++i)
+			{
+				PointID const& va = entry.indices[i];
+				HashEntry const& entry_ = m_entries[it->second];
+				unsigned int n_ind = entry_.n_indices();
+				for (unsigned int j = 0; j < n_ind; ++j)
+				{
+					PointID const& vb = entry_.indices[j];
+					PointSet& db = m_point_sets[vb.point_set_id];
+
+					PointSet& da = m_point_sets[va.point_set_id];
+					if (!da.is_neighborsearch_enabled() && 
+						!db.is_neighborsearch_enabled())
+					{
+						continue;
+					}
+
+					Real const* xa = da.point(va.point_id);
+					Real const* xb = db.point(vb.point_id);
+					Real tmp = xa[0] - xb[0];
+					Real l2 = tmp * tmp;
+					tmp = xa[1] - xb[1];
+					l2 += tmp * tmp;
+					tmp = xa[2] - xb[2];
+					l2 += tmp * tmp;
+					if (l2 < m_r2)
+					{
+						if (da.is_neighborsearch_enabled())
+						{
+							da.m_locks[va.point_id].lock();
+							da.m_neighbors[va.point_id].push_back(vb);
+							da.m_locks[va.point_id].unlock();
+						}
+						if (db.is_neighborsearch_enabled())
+						{
+							db.m_locks[vb.point_id].lock();
+							db.m_neighbors[vb.point_id].push_back(va);
+							db.m_locks[vb.point_id].unlock();
+						}
+					}
+				}
+			}
+		
+		}
+	});
+}
+
+}
+