diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..10651aed4b7cc8797f78c4490fcf109e90bd851a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+figures
+.snakemake
+**/__pycache__
+resources
+results
\ No newline at end of file
diff --git a/README.md b/README.md
index 5e067848b12e3446bee50dbb6abbbf522c397b62..ca9fcfd3201d03c34f383ad8ae661e629326cc6b 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,58 @@
-# edge-flow-cell-complexes
\ No newline at end of file
+
+
+# Representing Edge Flows on Graphs via Sparse Cell Complexes
+
+<img style="float:right;width:200px;margin-top:-5px" src="readme_src/LOGO_ERC-FLAG_FP.png">
+
+[![arXiv:2309.01632](https://img.shields.io/badge/arXiv-2309.01632-b31b1b.svg?logo=arxiv)](https://arxiv.org/abs/2309.01632)
+[![MIT license](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/josefhoppe/edge-flow-cell-complexes/blob/main/LICENSE)
+[![Snakemake 7.3 or higher](https://img.shields.io/badge/snakemake-â‰¥7.3.0-brightgreen.svg?style=flat)](https://snakemake.readthedocs.io)
+
+## About
+
+This repository accompanies our paper linked above.
+The code is split into two repositories:
+
+- This repository contains the evaluation code and snakemake workflow.
+- [Cell FLOWer](https://github.com/josefhoppe/cell-flower) is a separate package that contains only the code required for the inference and is published to PyPI.
+
+```
+@misc{hoppe2023representing,
+      title={Representing Edge Flows on Graphs via Sparse Cell Complexes}, 
+      author={Josef Hoppe and Michael T. Schaub},
+      year={2023},
+      eprint={2309.01632},
+      archivePrefix={arXiv},
+      primaryClass={cs.SI}
+}
+```
+
+## Running numerical experiments
+
+This project uses [Snakemake](https://snakemake.readthedocs.io).
+To run the experiments:
+
+1. Install [Miniconda](https://docs.conda.io/en/latest/miniconda.html) (or mamba)
+2. Create a new conda environment and install dependencies: `conda env update --file workflow/envs/environment.yaml`
+3. Run snakemake: `snakemake -c8 all`
+
+## Workflow structure
+
+The Rulegraph gives a rough overview of the structure:
+
+![Snakemake Rulegraph](rules.png)
+
+Generally, we aimed to give rules names that are short and easily understood.
+For the different kinds of experiments, we used the following naming system:
+
+- **approx_exp** - approximation experiment on synthetic data, i.e., how large is the approximation error?
+- **inference_exp** - inference experiment on synthetic data, i.e., how many ground-truth cells have been recovered?
+- **realworld_exp** - approximation experiment on realworld-data.
+- **time_exp** - experiment to measure runtime performance.
+- **heuristic_exp** - experiment to see how many ground-truth cells are detected by the heuristic (first iteration).
+
+
+
+## Acknowledgements
+
+Funded by the European Union (ERC, HIGH-HOPeS, 101039827). Views and opinions expressed are however those of the author(s) only and do not necessarily reflect those of the European Union or the European Research Council Executive Agency. Neither the European Union nor the granting authority can be held responsible for them.
\ No newline at end of file
diff --git a/cell_complexes/__init__.py b/cell_complexes/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e21d3cd29f2a6a64b5f026f81685bc023fd16f3d
--- /dev/null
+++ b/cell_complexes/__init__.py
@@ -0,0 +1,8 @@
+"""
+Cell detection module
+"""
+
+import cell_complexes.generator
+import cell_complexes.flow_generator
+
+from cell_complexes.experiment import CellComplexDetectionExperiment
diff --git a/cell_complexes/experiment.py b/cell_complexes/experiment.py
new file mode 100644
index 0000000000000000000000000000000000000000..6624af99bf8b8ea9d454ed76467306b7247479b7
--- /dev/null
+++ b/cell_complexes/experiment.py
@@ -0,0 +1,519 @@
+"""
+Module to configure and evaluate cell complex detection
+"""
+
+import itertools, time
+from typing import Iterable, List, Set, Tuple, Dict, Literal
+from collections import defaultdict
+
+import numpy as np
+import pandas as pd
+
+from scipy.sparse import csc_array
+
+from cell_flower.cell_complex import CellComplex, calc_edges, normalize_cell
+from .flow_generator import generate_flows_edge_noise, FlowGeneratorCell
+from cell_flower.detection import CellSearchFlowNormalization, FlowPotentialSpanningTree, cell_candidate_search_st, CellSearchMethod, project_flow, score_cells_multiple
+from .generator import CellComplexGenerator, TriangulationComplexGenerator
+
+def one_missing_cells(cell: tuple) -> Set[tuple]:
+    """
+    Gets all cells that can be retrieved by removing one node from `cell`
+    """
+    return {normalize_cell(cell[:i] + cell[i+1:]) for i in range(len(cell))}
+
+# default seeds
+default_flow_seeds = (77367626, 170391664, 185278135, 92926967, 59345847, 40832980, 69907117,
+                      258438430, 95881092, 69854436, 152878489, 261025528, 179519147, 5980565,
+                      76801101, 187317209, 250694206, 212138740, 100587057, 18805444)
+
+default_alg_seeds = (19356077, 165085303, 6015432, 75970932, 91207037, 108074582, 98227442,
+                     189625962, 202713794, 172660770, 234964091, 243157323, 230948579, 166182846,
+                     5063053, 212910573, 246681574, 162390301, 34053785, 98954684)
+
+# literal to indicate usage of ground truth cells instead of CellSearchMethod
+GROUND_TRUTH = "ground_truth"
+
+
+def find_combinations(a: tuple, b: tuple) -> List[tuple]:
+    """
+    Finds all combinations of the given 2-cells.
+
+    can be multiple if there are multiple but distinct overlaps, i.e.:
+    Cell a: `(1,2,3,4,5,6)`
+    Cell b: `(1,2,4,5)`
+    -> (1,2) and (4,5) are shared, the combinations are (2,3,4) and (1,5,6)
+    """
+    a_edges = set(calc_edges(a))
+    b_edges = set(calc_edges(b))
+    shared_edges = a_edges.intersection(b_edges)
+    if len(shared_edges) > 0:
+        combined_edges = a_edges.union(b_edges).difference(shared_edges)
+        return find_cycles(combined_edges)
+    else:
+        return []
+
+
+def find_cycles(edges: Set[Tuple[int,int]]) -> List[tuple]:
+    """
+    finds the disjoint cycles from edges.
+    edges must not contain any edges not belonging to the cycles.
+    `edges` will be modified.
+    """
+    result = []
+    while len(edges) > 0:
+        cycle = list(edges.pop())
+        while cycle[0] != cycle[-1]:
+            possible_edges = [edge for edge in edges if cycle[-1] in edge]
+            if len(possible_edges) != 1:
+                # either node-joint cycles or path -> nothing to detect here
+                return []
+            next_edge = possible_edges[0]
+            edges.remove(next_edge)
+            next_node = next_edge[0] if next_edge[0] != cycle[-1] else next_edge[1]
+            cycle.append(next_node)
+        result.append(normalize_cell(tuple(cycle[:-1])))
+    return result
+
+
+class CellComplexDetectionExperiment:
+    """
+    Defines, executes, and evaluates a single configuration of cell complex detection
+    """
+    __cell_compl: CellComplex
+    __alg_seeds: Iterable[int]
+    __flow_cache: List[np.ndarray]
+    __runs: int
+    __flow_counts: Iterable[int]
+    __search_method: CellSearchMethod | Literal['ground_truth']
+    __n_clusters: int
+    __search_flow_norm: CellSearchFlowNormalization
+    __num_curls: int
+    __approx_epsilon: float
+    __cells_per_step: int
+    __agg_results: bool
+    __results: pd.DataFrame | None = None
+    __summarized_results: pd.DataFrame | None = None
+    __time_recorded: Dict[int, list] | None = None # num flows -> [ runtime with seed 1, ... ]
+    __correct_cells: Dict[int, list] | None = None # num flows -> [ number of correct cells with seed 1, ... ]
+    __approx_error: Dict[int, list] | None = None # num flows -> [ error after n with seed 1, ... ]
+    __approx_cell_count: Dict[int, list] | None = None # num flows -> [ n to error < epsilon with seed 1, ... ]
+    __cell_len_sum: Dict[int, list] | None = None # num flows -> [ sum of cell lengths (~ 1 / sparsity) with seed 1, ... ]
+    __inferred_cells: Dict[int, List[list]] | None = None # num flows -> [ list of cells detected with seed 1, ... ]
+    __detailed_result_df: pd.DataFrame | None = None # DataFrame with one row per simulated run and cols method, flows, iterations, total_cell_len, cell_candidates, run, approx_error
+    __always_opt_cell: bool
+
+    @property
+    def cell_compl(self) -> CellComplex:
+        """
+        The ground-truth cell complex used in this experiment
+        """
+        return self.__cell_compl
+
+    @property
+    def search_method(self) -> CellSearchMethod:
+        """
+        The `CellSearchMethod` used in this experiment
+        """
+        return self.__search_method
+
+    @property
+    def flows(self) -> List[np.ndarray]:
+        """
+        Returns the list of flow matrices, one for each run of the algorithm
+        """
+        return self.__flow_cache
+
+    @property
+    def results(self) -> pd.DataFrame:
+        """
+        If necessary, uses self.run() to obtain results
+
+        Result: Dataframe with index = flow_counts, columns = all possible cycles in cell_complex
+        """
+        if self.__results is None:
+            self.run()
+        return self.__results.copy()
+
+    @property
+    def summarized_results(self) -> pd.DataFrame:
+        """
+        If necessary, uses self.run() to obtain results
+
+        Summarizes results by counting correct, one off, and other cells
+        """
+        if self.__summarized_results is None:
+            correct = [cell for cell in self.correct_cells() if cell in self.results.columns]
+            one_down_cols = [tpl for tpl in set().union(
+                *[one_missing_cells(cell) for cell in correct]) if tpl in self.results.columns]
+            one_up_cols = [cell for cell in self.results.columns if len(
+                one_missing_cells(cell).intersection(correct)) > 0]
+
+            to_delete = one_down_cols + list(correct) + one_up_cols
+            self.__summarized_results = self.results.drop(
+                to_delete, axis='columns')
+
+            results: pd.DataFrame = self.results
+            self.__summarized_results['correct'] = results[list(correct)].sum(axis=1)
+            self.__summarized_results['one_down'] = results[one_down_cols].sum(axis=1)
+            self.__summarized_results['one_up'] = results[one_up_cols].sum(axis=1)
+            self.__summarized_results['at_most_one_away'] = self.__summarized_results['correct'] + \
+                self.__summarized_results['one_down'] + \
+                self.__summarized_results['one_up']
+
+            self.__summarized_results.sort_values(max(self.__flow_counts), axis='columns', ascending=False, inplace=True)
+        return self.__summarized_results.copy()
+    
+    @property
+    def execution_time(self) -> Dict[int, list]:
+        """
+        If necessary, uses self.run() to obtain results
+
+        Returns: a dictionary of `number of flows -> [ runtime with seed 1, ... ]`
+        """
+        if self.__time_recorded is None:
+            self.run()
+        return { key: list(val) for key, val in self.__time_recorded.items() }
+    
+    @property
+    def correct_cells_found(self) -> Dict[int, list]:
+        """
+        If necessary, uses self.run() to obtain results
+
+        Returns: a dictionary of `number of flows -> [ number of correct cells with seed 1, ... ]`
+        """
+        if self.__correct_cells is None:
+            self.run()
+        return { key: list(val) for key, val in self.__correct_cells.items() }
+
+    @property
+    def approx_error(self) -> Dict[int, list]:
+        """
+        If necessary, uses self.run() to obtain results
+
+        Returns: a dictionary of `number of flows -> [ approx error after n cells with seed 1, ... ]`
+        """
+        if self.__approx_error is None:
+            self.run()
+        return { key: list(val) for key, val in self.__approx_error.items() }
+
+    @property
+    def approx_cell_count(self) -> Dict[int, list]:
+        """
+        If necessary, uses self.run() to obtain results
+
+        Returns: a dictionary of `number of flows -> [ number cells for error < epsilon with seed 1, ... ]`
+        """
+        if self.__approx_cell_count is None:
+            self.run()
+        return { key: list(val) for key, val in self.__approx_cell_count.items() }
+
+    @property
+    def cell_len_sum(self) -> Dict[int, list]:
+        """
+        If necessary, uses self.run() to obtain results
+
+        Returns: a dictionary of `number of flows -> [ sum of cell lengths (~ 1 / sparsity) with seed 1, ... ]`
+        """
+        if self.__cell_len_sum is None:
+            self.run()
+        return { key: list(val) for key, val in self.__cell_len_sum.items() }
+
+    @property
+    def inferred_cells(self) -> Dict[int, List[list]]:
+        """
+        If necessary, uses self.run() to obtain results
+
+        Returns: a dictionary of `number of flows -> [ list of cells detected with seed 1, ... ]`
+        """
+        if self.__inferred_cells is None:
+            self.run()
+        return { key: [list(cells) for cells in val] for key, val in self.__inferred_cells.items() }
+    
+    @property
+    def detailed_results(self) -> pd.DataFrame:
+        """Get DataFrame with one row per simulated run.
+        
+        columns: 
+        - method
+        - flows
+        - iterations
+        - total_cell_len
+        - cell_candidates
+        - run
+        - approx_error."""
+        if self.__detailed_result_df is None:
+            self.run()
+        return self.__detailed_result_df.copy()
+    
+    def modified(self, **kwargs) -> 'CellComplexDetectionExperiment':
+        """
+        Returns a copy with only kwargs changed.
+        any keyword from the __init__ function is permitted.
+
+        Also note:
+        - flows are only changed if passed explicitly as `flows`. All arguments related to flow generation are ignored.
+        """
+        kwargs.setdefault('cell_compl', self.__cell_compl)
+        kwargs.setdefault('alg_seeds', self.__alg_seeds)
+        kwargs.setdefault('flows', self.__flow_cache)
+        kwargs.setdefault('flow_counts', self.__flow_counts)
+        kwargs.setdefault('runs', self.__runs)
+        kwargs.setdefault('search_method', self.__search_method)
+        kwargs.setdefault('n_clusters', self.__n_clusters)
+        kwargs.setdefault('search_flow_norm', self.__search_flow_norm)
+        kwargs.setdefault('num_curls', self.__num_curls)
+        kwargs.setdefault('approx_epsilon', self.__approx_epsilon)
+        kwargs.setdefault('cells_per_step', self.__cells_per_step)
+        kwargs.setdefault('rw_scoring_func', self.__rw_scoring_func)
+        kwargs.setdefault('agg_results', self.__agg_results)
+        kwargs.setdefault('always_opt_cell', self.__always_opt_cell)
+
+        return CellComplexDetectionExperiment(**kwargs)
+
+    def __init__(self, cell_compl: CellComplex | CellComplexGenerator = TriangulationComplexGenerator(),
+                 alg_seeds: Iterable[int] = default_alg_seeds,
+                 flow_seeds: Iterable[int] = default_flow_seeds,
+                 runs: int = 20, flow_counts: Iterable[int] = tuple(range(1, 21)),
+                 curl_flow_sigma: np.float64 = 1, noise_sigma: np.float64 = .5,
+                 flows: List[np.ndarray] | None = None,
+                 search_method: CellSearchMethod | Literal['ground_truth'] = CellSearchMethod.MAX,
+                 n_clusters: int = 11,
+                 search_flow_norm: CellSearchFlowNormalization = CellSearchFlowNormalization.LEN,
+                 num_curls: int | None = None, approx_epsilon: float = np.inf, cells_per_step: int = 5,
+                 agg_results:bool=False, always_opt_cell:bool=False):
+        self.__alg_seeds = alg_seeds
+        self.__runs = runs
+        self.__cell_compl = cell_compl if type(cell_compl) == CellComplex else cell_compl.generate()
+        self.__search_method = search_method
+        self.__n_clusters = n_clusters
+        self.__cells_per_step = cells_per_step
+        self.__num_curls = num_curls if num_curls is not None else len(self.__cell_compl.get_cells(2))
+        self.__flow_counts = flow_counts
+        self.__search_flow_norm = search_flow_norm
+        self.__approx_epsilon = approx_epsilon
+        self.__agg_results = agg_results
+        self.__always_opt_cell = always_opt_cell
+        if flows is not None:
+            self.__flow_cache = flows
+        else:
+            self.__flow_cache = []
+            flow_cells = [FlowGeneratorCell(
+                cell, -curl_flow_sigma, curl_flow_sigma) for cell in self.__cell_compl.get_cells(2)]
+            for seed in flow_seeds:
+                rnd = np.random.default_rng(seed)
+                # Generate flows
+                flows = generate_flows_edge_noise(
+                    rnd, self.__cell_compl, flow_cells, noise_sigma, np.max(flow_counts))
+                flows_matrix = np.stack([f.to_numpy() for f in flows])
+                self.__flow_cache.append(flows_matrix)
+    
+    def correct_cells(self, only_detected: bool = True) -> Set[tuple]:
+        """
+        Get all correct cells (ground truth or linear combinations)
+        """
+        correct = set(self.cell_compl.get_cells(2))
+        # linear combinations of correct cells are also correct
+        for a, b in itertools.combinations(correct, 2):
+            correct.update(find_combinations(a, b))
+        if only_detected:
+            correct = [cell for cell in correct if cell in self.results.columns]
+        return correct
+
+    def get_cell_candidates(self, rnd: np.random.Generator | int, current_compl: CellComplex, harmonic_flows: np.ndarray) -> List[Tuple[tuple, csc_array] | Tuple[tuple, csc_array, FlowPotentialSpanningTree]]:
+        """
+        Finds candidates to add to `current_compl`, guided by `harmonic_flows` and the configured cell search method
+
+        - rnd: either random generator or number of algorithm seed (alg_seeds constructor param)
+        """
+        if isinstance(rnd, int):
+            rnd = np.random.default_rng(seed=self.__alg_seeds[rnd])
+        if self.__search_method == CellSearchMethod.TRIANGLES:
+            cand_list = [(np.sum(np.abs(boundary.T @ harmonic_flows.T)), cell, boundary) for cell, boundary in current_compl.triangles]
+            cand_list.sort(key=lambda x: x[0], reverse=True)
+            return [(cell, boundary) for _, cell, boundary in cand_list[:self.__cells_per_step]]
+        elif self.__search_method == GROUND_TRUTH:
+            b2 = self.__cell_compl.boundary_map(2)
+            cell_list = [(cell, b2[:, [i]]) for i, cell in enumerate(self.__cell_compl.get_cells(2)) if cell not in current_compl.get_cells(2)]
+            cand_list = [(np.sum(np.abs(boundary.T @ harmonic_flows.T)), cell, boundary) for cell, boundary in cell_list]
+            cand_list.sort(key=lambda x: x[0], reverse=True)
+            return [(cell, boundary) for _, cell, boundary in cand_list[:self.__cells_per_step]]
+        else:
+            seed = rnd.integers(4294967295)
+            return cell_candidate_search_st(rnd, current_compl, flows=harmonic_flows, result_count=self.__cells_per_step, method=self.__search_method, flow_norm=self.__search_flow_norm, random_seed=seed, n_clusters=self.__n_clusters)
+        
+    def get_harmonic_flows(self, current_compl: CellComplex, seed_num: int, flow_count: int) -> np.ndarray:
+        """
+        Helper function for further analysis
+        """
+        flows = self.__flow_cache[seed_num][:flow_count]
+        no_gradient_flows = np.copy(flows)
+        for i in range(flows.shape[0]):
+            no_gradient_flows[i] -= project_flow(
+                current_compl.boundary_map(1).T, flows[[i]])
+        harmonic_flows = np.copy(no_gradient_flows)
+        for j in range(harmonic_flows.shape[0]):
+            harmonic_flows[j] -= project_flow(
+                current_compl.boundary_map(2), no_gradient_flows[[j]])
+        return harmonic_flows
+
+    def simulate_run(self, seed_num: int, flow_count: int) -> tuple[list[tuple], float, int, list[dict]]:
+        """
+        simulates a single run of the algorithm with the `seed_num`th seed and `flow_count` many flows
+        """
+        rnd = np.random.default_rng(self.__alg_seeds[seed_num])
+        current_compl = self.__cell_compl.skeleton()
+        flows = self.__flow_cache[seed_num][:flow_count]
+        no_gradient_flows = np.copy(flows)
+        for i in range(flows.shape[0]):
+            no_gradient_flows[i] -= project_flow(
+                current_compl.boundary_map(1).T, flows[[i]])
+        harmonic_flows = np.copy(no_gradient_flows)
+
+        added_cells: List[tuple] = []
+
+        n = 0
+        approx_error = np.sum(np.square(harmonic_flows))
+        n_to_epsilon = 0
+        approx_error_at_n = approx_error if self.__num_curls == 0 else -1
+        found_n = approx_error <= self.__approx_epsilon
+        partial_res_lst = []
+        partial_res_lst.append({
+                    'method': self.__search_method,
+                    'n_clusters': self.__n_clusters,
+                    'flows': flow_count,
+                    'iterations': 0,
+                    'total_cell_len': 0,
+                    'cell_candidates': self.__cells_per_step,
+                    'run': seed_num,
+                    'approx_error': approx_error
+                })
+        correct_cells = self.correct_cells(only_detected=False)
+        while n < self.__num_curls or (approx_error > self.__approx_epsilon and self.__approx_epsilon >= 0):
+
+            candidate_cells = self.get_cell_candidates(rnd,
+                                                       current_compl, harmonic_flows)
+            correct_candidates = []
+            if self.__always_opt_cell:
+                # check if one candidate is in true cells
+                correct_candidates = [cell for cell in candidate_cells if cell[0] in correct_cells]
+            if len(correct_candidates) == 0:
+                score_vals, score_cells = score_cells_multiple(
+                    current_compl, no_gradient_flows, [cell[:2] for cell in candidate_cells])
+                scores = pd.DataFrame(score_vals, index=pd.Index(score_cells, tupleize_cols=False))
+                next_cell = scores.mean(axis=1).idxmin()
+            else:
+                next_cell = correct_candidates[0][0]
+            cell_boundaries = { cell[0]: cell[1] for cell in candidate_cells}
+
+            if next_cell == ():
+                print(
+                    f'[WARN] detected empty cell: {seed_num}, {flow_count}, ({n})')
+                return added_cells, approx_error_at_n, n_to_epsilon, partial_res_lst
+
+            added_cells.append(next_cell)
+            current_compl = current_compl.add_cell_fast(next_cell, cell_boundaries[next_cell])
+
+            harmonic_flows = np.copy(no_gradient_flows)
+            for j in range(harmonic_flows.shape[0]):
+                harmonic_flows[j] -= project_flow(
+                    current_compl.boundary_map(2), no_gradient_flows[[j]])
+            
+            approx_error = np.sum(np.square(harmonic_flows))
+            n += 1
+
+
+            partial_res_lst.append({
+                    'method': self.__search_method,
+                    'flows': flow_count,
+                    'iterations': n,
+                    'total_cell_len': sum(map(len, added_cells)),
+                    'cell_candidates': self.__cells_per_step,
+                    'run': seed_num,
+                    'approx_error': approx_error
+                })
+
+            if approx_error <= self.__approx_epsilon and not found_n:
+                found_n = True
+                n_to_epsilon = n
+            if n == self.__num_curls:
+                approx_error_at_n = approx_error
+
+        return added_cells, approx_error_at_n, n_to_epsilon, partial_res_lst
+    
+    def simulate_heuristic(self, seed_num: int, flow_count: int) -> list[tuple]:
+        """
+        simulates a single heuristic run.
+
+        returns the list of cell candidates
+        """
+        rnd = np.random.default_rng(self.__alg_seeds[seed_num])
+        current_compl = self.__cell_compl.skeleton()
+        flows = self.__flow_cache[seed_num][:flow_count]
+        no_gradient_flows = np.copy(flows)
+        for i in range(flows.shape[0]):
+            no_gradient_flows[i] -= project_flow(
+                current_compl.boundary_map(1).T, flows[[i]])
+        harmonic_flows = np.copy(no_gradient_flows)
+        return [ tpl[0] for tpl in self.get_cell_candidates(rnd, current_compl, harmonic_flows) ]
+    
+
+    def eval_heuristic_step(self) -> list[tuple[int, int, int]]:
+        """
+        Performs the heuristic for the first iteration and checks how many correct cells (or linear combinations thereof) were found
+
+        returns [(# of flows, run, number of correct cells)]
+        """
+        result = []
+        correct_cells = self.correct_cells(only_detected=False)
+        for num_flows in self.__flow_counts:
+            for seed_num in range(self.__runs):
+                candidates = self.simulate_heuristic(seed_num, num_flows)
+                result.append((num_flows, seed_num, len(correct_cells.intersection(candidates))))
+        return result
+
+
+    def run(self):
+        """
+        Run the cell detection experiment for each seed and flow number
+        """
+        idx = pd.Index([], tupleize_cols=False, dtype='object')
+        results = pd.DataFrame(
+            dtype=np.int32, index=self.__flow_counts, columns=idx).fillna(0)
+        time_recorded = defaultdict(list)
+        correct_cells_found = defaultdict(list)
+        approx_error = defaultdict(list)
+        approx_cell_count = defaultdict(list)
+        total_cell_len = defaultdict(list)
+        inferred_cells = defaultdict(list)
+        detailed_res_lst = []
+        correct_cells = self.correct_cells(only_detected=False)
+        for num_flows in self.__flow_counts:
+            print(f'Starting Experiments: {num_flows} flows')
+            flows_loc = results.index.get_loc(num_flows)
+            for seed_num in range(self.__runs):
+                before = time.time()
+                res, approx_error_at_n, n_to_epsilon, partial_res_lst = self.simulate_run(seed_num, num_flows)
+                inferred_cells[num_flows].append(res)
+                time_recorded[num_flows].append(time.time() - before)
+                correct_cells_found[num_flows].append(len(correct_cells.intersection(res)))
+                approx_error[num_flows].append(approx_error_at_n)
+                approx_cell_count[num_flows].append(n_to_epsilon)
+                total_cell_len[num_flows].append(sum(map(len, res)))
+                detailed_res_lst.extend(partial_res_lst)
+                for cell in res:
+                    if self.__agg_results:
+                        if not cell in results.columns:
+                            # pylint:disable=unsupported-assignment-operation
+                            # pylint (incorrectly) thinks the dataframe does not allow assigning a column
+                                results[cell] = 0
+                        cell_loc = results.columns.get_loc(cell)
+                        results.iloc[flows_loc, cell_loc] += 1
+        self.__results = results
+        self.__time_recorded = time_recorded
+        self.__correct_cells = correct_cells_found
+        self.__approx_cell_count = approx_cell_count
+        self.__approx_error = approx_error
+        self.__cell_len_sum = total_cell_len
+        self.__inferred_cells = inferred_cells
+        self.__detailed_result_df = pd.DataFrame(detailed_res_lst)
\ No newline at end of file
diff --git a/cell_complexes/flow_generator.py b/cell_complexes/flow_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a9d789a1e56dbe998086abb0d694fb6f0c6c4ad
--- /dev/null
+++ b/cell_complexes/flow_generator.py
@@ -0,0 +1,32 @@
+
+
+from typing import List, NamedTuple
+
+import pandas as pd
+from numpy.random import Generator
+
+from cell_flower.cell_complex import CellComplex
+
+
+class FlowGeneratorCell(NamedTuple):
+    cell: tuple
+    min: float
+    max: float
+
+
+def generate_flows_edge_noise(rnd: Generator, cell_compl: CellComplex, generator_cells: List[FlowGeneratorCell], max_noise: float, count: int = 10) -> List[pd.Series]:
+    """
+    generate flows from the given cells (with a gaussian flow each), with an added edge-level gaussian noise.
+    """
+    result = []
+    
+
+    for _ in range(count):
+        curl_flow: pd.Series = pd.Series({g.cell: rnd.normal(
+            (g.max+g.min) / 2, (g.max-g.min)/2) for g in generator_cells})
+        indices = [cell_compl.get_cells(2).index(g.cell) for g in generator_cells]
+        flow = cell_compl.boundary_map(2)[:, indices] @ curl_flow
+        flow += rnd.normal(scale=max_noise, size=len(flow))
+        result.append(pd.Series(flow, index=cell_compl.get_cells(1)))
+
+    return result
diff --git a/cell_complexes/generator.py b/cell_complexes/generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd6d9eab320582ccaeaf2b6cc4869cef127dfad9
--- /dev/null
+++ b/cell_complexes/generator.py
@@ -0,0 +1,223 @@
+
+from abc import ABC, abstractmethod
+from typing import Iterable
+
+import numpy as np
+from numpy.random import Generator
+from scipy.spatial.qhull import Delaunay
+from sklearn.utils import check_random_state
+from sklearn.utils.random import sample_without_replacement
+import itertools
+import networkx as nx
+
+from cell_flower.cell_complex import CellComplex, calc_edges, normalize_cell
+
+
+class CellComplexGenerator(ABC):
+    """
+    Base class for generating Cell complexes.
+    """
+    _seed: int
+    _cell_lengths: Iterable[int]
+    _nodes: int
+
+    def __init__(self, seed: int = 34905852, nodes: int = 20, cell_lengths: Iterable[int] = (5,5,10,10)):
+        self._seed = seed
+        self._nodes = nodes
+        self._cell_lengths = cell_lengths
+
+    @abstractmethod
+    def generate(self) -> CellComplex:
+        """
+        Generates a CellComplex according to the settings.
+        """
+        
+class SplitCircleComplexGenerator(CellComplexGenerator):
+    """
+    Generates a Cellcomplex that consists of:
+
+    - a main path of length `split_circle_common_len`
+    - 2-cells that include this main path and also `cell_lengths` other nodes
+    - `edges` many randomly added edges
+    """
+    __edges: int
+    __split_circle_common_len: int
+
+    def __init__(self, seed: int = 34905852, nodes: int = 20, cell_lengths: Iterable[int] = (5, 5, 10, 10),
+                 split_circle_common_len: int = 3, edges: int = 80):
+        super().__init__(seed, nodes, cell_lengths)
+        self.__edges = edges
+        self.__split_circle_common_len = split_circle_common_len
+
+    def generate(self) -> CellComplex:
+        common_circle_part = list(range(1, self.__split_circle_common_len + 1))
+        next_node = self.__split_circle_common_len + 1
+        cells = []
+        for split_len in self._cell_lengths:
+            cells.append(tuple(common_circle_part + list(range(next_node, next_node + split_len))))
+            next_node += split_len
+        
+        rnd = check_random_state(self._seed)
+        for _ in range(self.__edges):
+            cells.append(normalize_cell(tuple(i + 1 for i in sample_without_replacement(next_node - 1, 2, random_state=rnd))))
+
+        return CellComplex(cells)
+
+class TriangulationComplexGenerator(CellComplexGenerator):
+    """
+    CellComplexGenerator that generates a cell complex based on a Delaunay triangulation
+    """
+    __ndim: int
+    __delete_nodes: int
+    __delete_edges: int
+
+    def __init__(self, seed: int = 34905852, nodes: int = 50, cell_lengths: Iterable[int] = (5, 5, 10, 10),
+                 ndim: int = 2, delete_nodes: int = 5, delete_edges: int = 10):
+        super().__init__(seed, nodes, cell_lengths)
+        self.__ndim = ndim
+        self.__delete_edges = delete_edges
+        self.__delete_nodes = delete_nodes
+
+    def generate(self) -> CellComplex:
+        rnd = np.random.default_rng(self._seed)
+        points = rnd.uniform(size=(self._nodes,self.__ndim))
+
+        tri = Delaunay(points)
+        cells = [ self.generate_cell(rnd, tri, length) for length in self._cell_lengths]
+        
+        can_delete_node = np.ones(self._nodes)
+        for a,b in tri.convex_hull:
+            can_delete_node[a] = 0
+            can_delete_node[b] = 0
+        for cell in cells:
+            for node in cell:
+                can_delete_node[node] = 0
+        deleted_nodes = set()
+        if np.sum(can_delete_node) > 0:
+            deleted_nodes = set(rnd.choice(self._nodes, size=self.__delete_nodes, replace=False, p=can_delete_node / np.sum(can_delete_node)))
+        else:
+            print('WARN: cannot delete any nodes')
+        idx_diff = np.zeros(self._nodes)
+        for node in deleted_nodes:
+            idx_diff[node:] += 1
+
+        def new_lbl(node_lbl: int) -> int:
+            return node_lbl - int(idx_diff[node_lbl])
+        
+        cells = [ tuple(new_lbl(n) for n in cell) for cell in cells ]
+        edges = list(set().union(*[calc_edges(tuple(simpl)) for simpl in tri.simplices]))
+        edges = [ (new_lbl(a), new_lbl(b)) for a,b in edges if len(deleted_nodes.intersection({a,b})) == 0 ]
+        edges.sort()
+
+        can_delete_edge = np.ones(len(edges))
+        for (a,b) in tri.convex_hull:
+            norm_edge = normalize_cell((new_lbl(a), new_lbl(b)))
+            can_delete_edge[edges.index(norm_edge)] = 0
+        for cell in cells:
+            for edge in calc_edges(cell):
+                can_delete_edge[edges.index(edge)] = 0
+
+        delete_edges = rnd.choice(edges, self.__delete_edges, replace=False, p=can_delete_edge / np.sum(can_delete_edge))
+
+        nodes_should_exist = { new_lbl(n) for n in set(range(self._nodes)).difference(deleted_nodes) }
+        for edge in delete_edges:
+            edges.remove(tuple(edge))
+
+        # make sure no nodes are disconnected, adding edges if necessary
+        nodes_after_edge_deletion = {e[0] for e in edges}.union({e[1] for e in edges})
+        missing_nodes = nodes_should_exist.difference(nodes_after_edge_deletion)
+        choice_list = list(nodes_after_edge_deletion)
+        dont_delete_edges = [ (n, rnd.choice(choice_list)) for n in missing_nodes ]
+
+        result = CellComplex(edges + dont_delete_edges + cells)
+        result.embedding = points[[idx for idx in range(self._nodes) if not idx in deleted_nodes]]
+        return result
+
+
+    def generate_cell(self, rnd: Generator, tri: Delaunay, length: int) -> tuple:
+        """
+        samples a single cell of length `length` from tri by adding simplices until the total boundary is sufficiently long.
+        """
+        assert length >= 3
+
+        added_simplices = np.full(tri.nsimplex, False)
+        shared_boundary = np.zeros(tri.nsimplex, np.int8)
+        is_first = True
+
+        border_nodes = []
+
+        while len(border_nodes) < length:
+            next_add_simpl = None
+            if is_first:
+                next_add_simpl = rnd.choice(tri.nsimplex)
+                border_nodes = list(tri.simplices[next_add_simpl])
+            else:
+                next_add_simpl = rnd.choice(tri.nsimplex, p=shared_boundary / np.sum(shared_boundary))
+
+            is_loop = False
+            # ensure no loop is closed
+            for idx, point in enumerate(tri.simplices[next_add_simpl]):
+                if point in border_nodes and not is_first:
+                    # if the point is part of the border, but not adjacent to an adjacent simplex, we are closing a loop
+                    if (not added_simplices[tri.neighbors[next_add_simpl][(idx + 1) % 3]]) and (not added_simplices[tri.neighbors[next_add_simpl][(idx + 2) % 3]]):
+                        is_loop = True
+
+            if not is_loop:
+                shared_boundary[next_add_simpl] = 0
+                added_simplices[next_add_simpl] = True
+                closed_wedge = False
+                for idx, point in enumerate(tri.simplices[next_add_simpl]):
+                    # check if point is now an inner point of the cell
+                    if (not is_first) and added_simplices[tri.neighbors[next_add_simpl][(idx + 1) % 3]] and added_simplices[tri.neighbors[next_add_simpl][(idx + 2) % 3]]:
+                        border_nodes.remove(point)
+                        closed_wedge = True
+                if (not closed_wedge) and (not is_first):
+                    new_point = [ p for p in tri.simplices[next_add_simpl] if p not in border_nodes][0]
+                    existing_point_indices = [ border_nodes.index(p) for p in tri.simplices[next_add_simpl] if p != new_point ]
+                    if 0 in existing_point_indices and 1 not in existing_point_indices:
+                        # between first and last element -> append at end
+                        border_nodes.append(new_point)
+                    else:
+                        # insert in the middle
+                        border_nodes.insert(max(existing_point_indices), new_point)
+                for neighbor in tri.neighbors[next_add_simpl]:
+                    if neighbor != -1 and not added_simplices[neighbor]:
+                        shared_boundary[neighbor] += 1
+                is_first = False
+        return normalize_cell(tuple(border_nodes))
+
+
+
+class SmallWorldComplexGenerator(CellComplexGenerator):
+    """
+    CellComplexGenerator that generates a cell complex based on NetworkX' `newman_watts_strogatz_graph`
+    """
+    __p: float
+    __k: int
+
+    def __init__(self, seed: int = 34905852, nodes: int = 50, cell_lengths: Iterable[int] = (5, 5, 10, 10),
+                 p: int = 0.01, k: int = 4):
+        super().__init__(seed, nodes, cell_lengths)
+        assert k > 3
+        self.__p = p
+        self.__k = k
+
+    def generate(self) -> CellComplex:
+        G = nx.watts_strogatz_graph(self._nodes, self.__k, 0, seed=self._seed)
+        rnd = np.random.default_rng(self._seed + 1)
+        cells = [self.gen_cell(rnd, length) for length in self._cell_lengths]
+
+        for a,b in itertools.combinations(G.nodes, 2):
+            if a < b:
+                if rnd.choice([True, False], p=[self.__p, 1-self.__p]):
+                    cells.append((a,b))
+        
+        return CellComplex(list(map(lambda e: tuple(sorted(e)), G.edges)) + cells)
+
+    def gen_cell(self, rnd: Generator, length: int) -> tuple:
+        """
+        draws a node and generates a cycle of the next `len` nodes by alternating between 'path forward' and 'path backward' nodes.
+        """
+        start_node = rnd.choice(self._nodes)
+        cell = (start_node,) + tuple(range(start_node+1,start_node+length, 2)) + tuple(reversed(range(start_node+2, start_node+length, 2)))
+        return tuple(map(lambda x: x % self._nodes, cell))
diff --git a/readme_src/LOGO_ERC-FLAG_FP.png b/readme_src/LOGO_ERC-FLAG_FP.png
new file mode 100644
index 0000000000000000000000000000000000000000..6c8c555f9aec0fa9af3c51887d875e8754c85d3a
Binary files /dev/null and b/readme_src/LOGO_ERC-FLAG_FP.png differ
diff --git a/rules.png b/rules.png
new file mode 100644
index 0000000000000000000000000000000000000000..c92f9988c2ea2ae90a132696ce542b1722e045d4
Binary files /dev/null and b/rules.png differ
diff --git a/workflow/Snakefile b/workflow/Snakefile
new file mode 100644
index 0000000000000000000000000000000000000000..b9920b289531ce064f1732b4efe31c9b748ff38c
--- /dev/null
+++ b/workflow/Snakefile
@@ -0,0 +1,265 @@
+
+from helper import coinsizes
+
+rule all:
+    input:
+        "figures/time_exp.pdf",
+        "figures/inference_exp.pdf",
+        "figures/inference_exp_full.pdf",
+        "figures/approx_exp_error.pdf",
+        "figures/approx_exp_error_iter.pdf",
+        "figures/approx_exp_noise.pdf",
+        "figures/approx_exp_noise_iter.pdf",
+        "figures/realworld_exp_error.pdf",
+        "figures/realworld_exp_error_iter.pdf",
+        "figures/heuristic_exp.pdf",
+        expand("figures/realworld_tntp-{city}_exp_error{type}.pdf", city=["Anaheim", "Barcelona", "Berlin-Center", "Berlin-Mitte-Prenzlauerberg-Friedrichshain-Center", "Winnipeg"], type=['', '_iter'])
+    shell:
+        "echo 'ok'"
+
+rule time_exp:
+    output:
+        "results/runtime/{method}/{model}/{size}.csv"
+    script:
+        "scripts/time_complexity.py"
+
+rule time_exp_plot:
+    input:
+        expand("results/runtime/max/triangulation/{size}.csv", size=coinsizes(50,100000), method=['max', 'similarity']),
+        expand("results/runtime/max/smallworld/{size}.csv", size=coinsizes(50,10000), method=['max', 'similarity']),
+        expand("results/runtime/similarity/triangulation/{size}.csv", size=coinsizes(50,50000), method=['max', 'similarity']),
+        expand("results/runtime/similarity/smallworld/{size}.csv", size=coinsizes(50,5000), method=['max', 'similarity'])
+    output:
+        "figures/time_exp.pdf"
+    script:
+        "scripts/plot_time_compl.py"
+
+rule inference_exp_plot_full:
+    input:
+        expand("results/{exp_name}/{method}/{model}x{size}/{flows}x{noise}/{cell_len}x{cells}/{iterations}x{cell_candidates}x11.csv", model=['triangulation'], size=[50], flows=[1,5,10,15,20], noise=[0,0.2,0.4,0.6,0.8,1.0,1.2,1.4,1.6,1.8,2.0], cell_len=[3,5,10],cells=[5], iterations=[5], cell_candidates=[10], exp_name=['opt-inference', 'inference'], method=['max', 'similarity']),
+        expand("results/{exp_name}/{method}/{model}x{size}/{flows}x{noise}/{cell_len}x{cells}/{iterations}x{cell_candidates}x11.csv", model=['triangulation'], size=[50], flows=[1,5,10,15,20], noise=[0,0.2,0.4,0.6,0.8,1.0,1.2,1.4,1.6,1.8,2.0], cell_len=[3,5,10],cells=[5], iterations=[5], cell_candidates=[1], exp_name=['heuristic-only'], method=['max', 'similarity'])
+    params:
+        col='exp_name',
+        row='flows'
+    output:
+        "figures/inference_exp_full.pdf"
+    script:
+        "scripts/plot_inference.py"
+
+rule inference_exp_plot:
+    input:
+        expand("results/inference/{method}/{model}x{size}/{flows}x{noise}/{cell_len}x{cells}/{iterations}x{cell_candidates}x11.csv", model=['triangulation'], size=[50], flows=[20], noise=[0,0.2,0.4,0.6,0.8,1.0,1.2,1.4,1.6,1.8,2.0], cell_len=[3,5,10],cells=[5], iterations=[5], cell_candidates=[10], method=['max', 'similarity']),
+    output:
+        "figures/inference_exp.pdf"
+    script:
+        "scripts/plot_inference.py"
+
+rule heuristic_exp_plot:
+    input:
+        expand("results/heuristic/{method}/{model}x{size}/{flows}x{noise}/{cell_len}x{cells}/{cell_candidates}x{clusters}.csv", model=['triangulation'], size=[100], flows=[20], noise=[0,0.2,0.4,0.6,0.8,1.0,1.2,1.4,1.6,1.8,2.0], cell_len=[3,5,10],cells=[5], cell_candidates=[5], method=['max', 'similarity'], clusters=[11])
+    output:
+        "figures/heuristic_exp.pdf"
+    script:
+        "scripts/plot_heuristic.py"
+
+rule inference_experiment:
+    output:
+        "results/inference/{method}/{model}x{size}/{flows}x{noise}/{cell_len}x{cells}/{iterations}x{cell_candidates}x{clusters}.csv"
+    params:
+        exp_name='inference'
+    script:
+        "scripts/inference_experiment.py"
+
+rule heuristic_only_experiment:
+    output:
+        "results/heuristic-only/{method}/{model}x{size}/{flows}x{noise}/{cell_len}x{cells}/{iterations}x{cell_candidates}x{clusters}.csv"
+    params:
+        exp_name='heuristic_only'
+    script:
+        "scripts/inference_experiment.py"
+
+rule opt_inference_experiment:
+    output:
+        "results/opt-inference/{method}/{model}x{size}/{flows}x{noise}/{cell_len}x{cells}/{iterations}x{cell_candidates}x{clusters}.csv"
+    params:
+        opt=True,
+        exp_name='opt_inference'
+    script:
+        "scripts/inference_experiment.py"
+
+rule approx_experiment:
+    output:
+        "results/approximation/{method}/{model}x{size}/{flows}x{noise}/{cell_len}x{cells}/{iterations}x{cell_candidates}x{clusters}.csv"
+    script:
+        "scripts/approx_experiment.py"
+
+rule heuristic_experiment:
+    output:
+        "results/heuristic/{method}/{model}x{size}/{flows}x{noise}/{cell_len}x{cells}/{cell_candidates}x{clusters}.csv"
+    script:
+        "scripts/heuristic_experiment.py"
+
+approx_exp_params_no_noise = {
+'size':[50], 'flows':[20], 'cell_len':[10], 'cells':[10], 'iterations':[200], 'cell_candidates': [10], 'clusters': [21]
+}
+approx_exp_params = {
+'noise':[.75], **approx_exp_params_no_noise
+}
+
+rule plot_approx_exp_error_noise_iter:
+    input:
+        expand("results/approximation/{method}/{model}x{size}/{flows}x{noise}/{cell_len}x{cells}/{iterations}x{cell_candidates}x{clusters}.csv", method=['triangles', 'max', 'similarity', 'ground_truth'], model=['triangulation'], noise=[0,.1,.2,.3,.4,.5,.6,.7,.8,.9,1,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,2], **approx_exp_params_no_noise)
+    params:
+        x_axis='noise',
+        x_label='edge noise ($\sigma_n$)',
+        sparsity=20,
+        sparsity_measure='iterations'
+        #max_entries=120
+    output:
+        "figures/approx_exp_noise_iter.pdf"
+    script:
+        "scripts/plot_approx_exp_error.py"
+
+rule plot_approx_exp_error_noise:
+    input:
+        expand("results/approximation/{method}/{model}x{size}/{flows}x{noise}/{cell_len}x{cells}/{iterations}x{cell_candidates}x{clusters}.csv", method=['triangles', 'max', 'similarity'], model=['triangulation'], noise=[0,.1,.2,.3,.4,.5,.6,.7,.8,.9,1,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,2], **approx_exp_params_no_noise),
+        expand("results/approximation/{method}/{model}x{size}/{flows}x{noise}/{cell_len}x{cells}/{iterations}x{cell_candidates}x{clusters}.csv", method=['ground_truth'], model=['triangulation'], noise=[0,.1,.2,.3,.4,.5,.6,.7,.8,.9,1], **approx_exp_params_no_noise)
+    params:
+        x_axis='noise',
+        x_label='edge noise ($\sigma_n$)',
+        sparsity=200
+        #max_entries=120
+    output:
+        "figures/approx_exp_noise.pdf"
+    script:
+        "scripts/plot_approx_exp_error.py"
+
+rule plot_approx_exp_error:
+    input:
+        expand("results/approximation/{method}/{model}x{size}/{flows}x{noise}/{cell_len}x{cells}/{iterations}x{cell_candidates}x{clusters}.csv", method=['triangles', 'max', 'ground_truth', 'similarity'], model=['triangulation'], **approx_exp_params)
+    params:
+        x_axis='total_cell_len',
+        x_label='$||B_2||_0$'
+        #max_entries=120
+    output:
+        "figures/approx_exp_error.pdf"
+    script:
+        "scripts/plot_approx_exp_error.py"
+
+rule plot_approx_exp_error_iter:
+    input:
+        expand("results/approximation/{method}/{model}x{size}/{flows}x{noise}/{cell_len}x{cells}/{iterations}x{cell_candidates}x{clusters}.csv", method=['triangles', 'max', 'ground_truth', 'similarity'], model=['triangulation'], **approx_exp_params)
+    params:
+        x_axis='iterations',
+        x_label='scr-C_2'
+        #max_entries=120
+    output:
+        "figures/approx_exp_error_iter.pdf"
+    script:
+        "scripts/plot_approx_exp_error.py"
+
+rule get_taxi_dataset:
+    output:
+        "resources/taxi/Manhattan-taxi-trajectories.tar.gz"
+    shell:
+        "gdown 1o6bBC7m9IMYQ1OdCBWjLfMw6MXQPmv9J -O {output}"
+
+rule extract_taxi_dataset:
+    input: 
+        "resources/taxi/Manhattan-taxi-trajectories.tar.gz"
+    output:
+        expand("resources/taxi/{file}", file=['README.txt', 'neighborhoods.txt', 'medallions.txt', 'Manhattan-taxi-trajectories.txt'])
+    shell:
+        "tar -xzf {input} -C resources/taxi --strip-components 1"
+
+rule download_tntp_birmingham:
+    output:
+        "resources/tntp-Birmingham/trips.tntp"
+    shell:
+        "wget https://raw.githubusercontent.com/bstabler/TransportationNetworks/master/Birmingham-England/Birmingham_Trips.tntp -O {output}"
+
+rule download_tntp_dataset:
+    output:
+        "resources/tntp-{city}/{type}.tntp"
+    run:
+        try:
+            shell("wget https://raw.githubusercontent.com/bstabler/TransportationNetworks/master/{wildcards.city}/{wildcards.city}_{wildcards.type}.tntp -O {output}")
+        except:
+            shell("wget https://raw.githubusercontent.com/bstabler/TransportationNetworks/master/{wildcards.city}/" + wildcards.city.lower() + "_{wildcards.type}.tntp -O {output}")
+
+rule process_tntp_data:
+    input:
+        "resources/tntp-{city}/trips.tntp"
+    output:
+        expand("resources/tntp-{{city}}/{file}", file=['graph.txt', 'flows.csv'])
+    script:
+        "scripts/process_tntp.py"
+
+rule process_taxi_data:
+    input:
+        "resources/taxi/Manhattan-taxi-trajectories.txt"
+    output:
+        expand("resources/taxi/{file}", file=['graph.txt', 'flows.csv'])
+    script:
+        "scripts/process_taxi.py"
+
+rule simulate_realworld:
+    output:
+        "results/realworld/{dataset}/{method}/{flows}/{iterations}x{cell_candidates}x{clusters}.csv",
+        "results/realworld/{dataset}/{method}/{flows}/{iterations}x{cell_candidates}x{clusters}_result.txt"
+    input:
+        expand("resources/{{dataset}}/{file}", file=['graph.txt', 'flows.csv'])
+    script:
+        "scripts/realworld_experiment.py"
+
+rule plot_realworld_exp_error:
+    input:
+        expand("results/realworld/{{dataset}}/{method}/{flows}/{iterations}x{cell_candidates}x{clusters}.csv", method=['triangles', 'max', 'similarity'], flows=[1], iterations=[1000], cell_candidates=[1,5,20], clusters=[11])
+    params:
+        #x_axis='iterations',
+        x_axis='total_cell_len',
+        x_label='$||B_2||_0$',
+        alt_max_x=3000
+    output:
+        "figures/realworld_{dataset}_exp_error.pdf"
+    script:
+        "scripts/plot_realworld_exp_error.py"
+
+rule plot_realworld_exp_error_iter:
+    input:
+        expand("results/realworld/{{dataset}}/{method}/{flows}/{iterations}x{cell_candidates}x{clusters}.csv", method=['triangles', 'max', 'similarity'], flows=[1], iterations=[1000], cell_candidates=[1,5,20], clusters=[11])
+    params:
+        x_axis='iterations',
+        #x_axis='total_cell_len',
+        #x_label='$||B_2||_0$',
+        #max_x=500
+    output:
+        "figures/realworld_{dataset}_exp_error_iter.pdf"
+    script:
+        "scripts/plot_realworld_exp_error.py"
+
+rule plot_realworld_taxi_exp_error:
+    input:
+        expand("results/realworld/taxi/{method}/{flows}/{iterations}x{cell_candidates}x{clusters}.csv", method=['triangles', 'max', 'similarity'], flows=[10], iterations=[1000], cell_candidates=[1,5,20], clusters=[11])
+    params:
+        #x_axis='iterations',
+        x_axis='total_cell_len',
+        x_label='$||B_2||_0$',
+        max_x=300
+    output:
+        "figures/realworld_exp_error.pdf"
+    script:
+        "scripts/plot_realworld_exp_error.py"
+
+rule plot_realworld_taxi_exp_error_iter:
+    input:
+        expand("results/realworld/taxi/{method}/{flows}/{iterations}x{cell_candidates}x{clusters}.csv", method=['triangles', 'max', 'similarity'], flows=[10], iterations=[1000], cell_candidates=[1,5,20], clusters=[11])
+    params:
+        x_axis='iterations',
+        #x_axis='total_cell_len',
+        #x_label='$||B_2||_0$',
+        max_x=200
+    output:
+        "figures/realworld_exp_error_iter.pdf"
+    script:
+        "scripts/plot_realworld_exp_error.py"
\ No newline at end of file
diff --git a/workflow/config/config.yaml b/workflow/config/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/workflow/envs/environment.yaml b/workflow/envs/environment.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..165f5dbf6e03fc3403f1d329601a58bd8a4b485a
--- /dev/null
+++ b/workflow/envs/environment.yaml
@@ -0,0 +1,15 @@
+channels:
+  - bioconda
+  - conda-forge
+dependencies:
+  - snakemake-minimal >=7.3
+  - matplotlib
+  - pandas
+  - networkx
+  - numpy
+  - scikit-learn
+  - graphviz
+  - seaborn
+  - gdown
+  - openmatrix
+  - numba
\ No newline at end of file
diff --git a/workflow/helper.py b/workflow/helper.py
new file mode 100644
index 0000000000000000000000000000000000000000..5871ec9124a6b4ce6b0f8b6d9f8d5c185711b5aa
--- /dev/null
+++ b/workflow/helper.py
@@ -0,0 +1,16 @@
+
+import numpy as np
+
+def coinsizes(lower, upper):
+    """
+    returns an array of numbers corresponding to approximately exponential 'coin-size' numbers:
+    1,2,5,10,20,50,100,200,500,...
+    lower, upper are inclusive.
+    """
+    lower_log = int(np.floor(np.log10(lower)))
+    upper_log = int(np.floor(np.log10(upper)))
+    result = []
+    for i in range(lower_log, upper_log + 1):
+        base = int(10 ** i)
+        result += [factor * base for factor in [1,2,5] if factor * base >= lower and factor * base <= upper]
+    return result
\ No newline at end of file
diff --git a/workflow/scripts/approx_experiment.py b/workflow/scripts/approx_experiment.py
new file mode 100644
index 0000000000000000000000000000000000000000..d27477288796a75344d8ed43d34131850dc90063
--- /dev/null
+++ b/workflow/scripts/approx_experiment.py
@@ -0,0 +1,71 @@
+"""
+Evaluates the inference accuracy with the given parameters.
+"""
+
+from script_utils import method_map
+
+import time
+import pandas as pd
+from cell_complexes import CellComplexDetectionExperiment
+from cell_complexes.generator import TriangulationComplexGenerator, SmallWorldComplexGenerator
+from cell_flower.detection import CellSearchMethod
+from snakemake.script import Snakemake
+
+def fix_smk() -> Snakemake:
+    """
+    Helper function to make linters think `snakemake` exists
+    and to add type annotation. Doesn't change any code behavior.
+    """
+    return snakemake
+
+snakemake = fix_smk()
+
+
+method = snakemake.wildcards['method']
+model = snakemake.wildcards['model']
+size = int(snakemake.wildcards['size'])
+flows = int(snakemake.wildcards['flows'])
+noise = float(snakemake.wildcards['noise'])
+cell_len = int(snakemake.wildcards['cell_len'])
+cells = int(snakemake.wildcards['cells'])
+iterations = int(snakemake.wildcards['iterations'])
+cell_candidates = int(snakemake.wildcards['cell_candidates'])
+n_clusters = int(snakemake.wildcards['clusters'])
+
+generator = TriangulationComplexGenerator(
+            nodes = size + size // 10,
+            delete_nodes = size // 10,
+            delete_edges = size // 10,
+            cell_lengths = [cell_len] * cells
+        )
+
+if model == 'smallworld':
+    generator = SmallWorldComplexGenerator(
+            nodes=size, 
+            cell_lengths = [cell_len] * cells
+        )
+    
+experiment = CellComplexDetectionExperiment(
+    cell_compl=generator,
+    flow_counts=[flows],
+    noise_sigma=noise,
+    runs=10,
+    cells_per_step=cell_candidates,
+    num_curls=iterations,
+    approx_epsilon=-1,
+    search_method=method_map[method],
+    n_clusters=n_clusters
+)
+
+experiment.run()
+
+df_approx = experiment.detailed_results
+
+df_approx['model'] = model
+df_approx['size'] = size
+df_approx['noise'] = noise
+df_approx['cell_len'] = cell_len
+df_approx['cells'] = cells
+df_approx['n_clusters'] = n_clusters
+
+df_approx.to_csv(snakemake.output[0])
diff --git a/workflow/scripts/generate_figures.py b/workflow/scripts/generate_figures.py
new file mode 100644
index 0000000000000000000000000000000000000000..b65585bd4a677aa7ba5c31cbf760bf694e6085b6
--- /dev/null
+++ b/workflow/scripts/generate_figures.py
@@ -0,0 +1,24 @@
+import matplotlib.pyplot as plt
+import pandas as pd
+
+def runtime_exp():
+    """
+    runtime in relation to graph size experiment
+    """
+    df_runtimes = pd.read_csv('results/runtime_exp.csv', index_col=0)
+    df_runtimes.columns = df_runtimes.columns.map(int)
+    print(df_runtimes.mean())
+    fig, ax = plt.subplots()
+    ax.fill_between(df_runtimes.columns, df_runtimes.mean() - df_runtimes.std(), df_runtimes.mean() + df_runtimes.std(), color='lightblue')
+    ax.plot(df_runtimes.mean(), linestyle='-', marker='o')
+    ax.set_xlabel('number of nodes in graph [-]')
+    ax.set_ylabel('time [s]')
+    ax.loglog()
+    return fig
+
+figures = {
+    'time_exp': runtime_exp()
+}
+
+for name, fig in figures.items():
+    fig.savefig(f'paper/figures/{name}.pdf')
\ No newline at end of file
diff --git a/workflow/scripts/heuristic_experiment.py b/workflow/scripts/heuristic_experiment.py
new file mode 100644
index 0000000000000000000000000000000000000000..063b75210952f94e867532657ff245c455903f03
--- /dev/null
+++ b/workflow/scripts/heuristic_experiment.py
@@ -0,0 +1,62 @@
+"""
+Evaluates the inference accuracy with the given parameters.
+"""
+
+from script_utils import method_map
+
+import pandas as pd
+from cell_complexes import CellComplexDetectionExperiment
+from cell_complexes.generator import TriangulationComplexGenerator, SmallWorldComplexGenerator
+from snakemake.script import Snakemake
+
+def fix_smk() -> Snakemake:
+    """
+    Helper function to make linters think `snakemake` exists
+    and to add type annotation. Doesn't change any code behavior.
+    """
+    return snakemake
+
+snakemake = fix_smk()
+
+method = snakemake.wildcards['method']
+model = snakemake.wildcards['model']
+size = int(snakemake.wildcards['size'])
+flows = int(snakemake.wildcards['flows'])
+noise = float(snakemake.wildcards['noise'])
+cell_len = int(snakemake.wildcards['cell_len'])
+cells = int(snakemake.wildcards['cells'])
+cell_candidates = int(snakemake.wildcards['cell_candidates'])
+n_clusters = int(snakemake.wildcards['clusters'])
+
+generator = TriangulationComplexGenerator(
+            nodes = size + size // 10,
+            delete_nodes = size // 10,
+            delete_edges = size // 10,
+            cell_lengths = [cell_len] * cells
+        )
+
+if model == 'smallworld':
+    generator = SmallWorldComplexGenerator(
+            nodes=size, 
+            cell_lengths = [cell_len] * cells
+        )
+    
+experiment = CellComplexDetectionExperiment(
+    cell_compl=generator,
+    flow_counts=[flows],
+    noise_sigma=noise,
+    runs=20,
+    cells_per_step=cell_candidates,
+    num_curls=1,
+    search_method=method_map[method],
+    n_clusters=n_clusters
+)
+
+df_inference = pd.DataFrame([
+        {'model': model, 'size': size, 'flows': flows, 'noise': noise, 'cell_len': cell_len,
+         'cells': cells, 'cell_candidates': cell_candidates, 'n_clusters': n_clusters,
+         'run': run, 'correct_abs': correct, 'correct_percent': correct / cells, 'method': method} 
+        for _, run, correct in experiment.eval_heuristic_step()
+    ])
+
+df_inference.to_csv(snakemake.output[0])
\ No newline at end of file
diff --git a/workflow/scripts/inference_experiment.py b/workflow/scripts/inference_experiment.py
new file mode 100644
index 0000000000000000000000000000000000000000..7bd1b2fb714b2841bcda7e51806755b27c07694b
--- /dev/null
+++ b/workflow/scripts/inference_experiment.py
@@ -0,0 +1,72 @@
+"""
+Evaluates the inference accuracy with the given parameters.
+"""
+
+from script_utils import method_map
+
+import time
+import pandas as pd
+from cell_complexes import CellComplexDetectionExperiment
+from cell_complexes.generator import TriangulationComplexGenerator, SmallWorldComplexGenerator
+from snakemake.script import Snakemake
+
+def fix_smk() -> Snakemake:
+    """
+    Helper function to make linters think `snakemake` exists
+    and to add type annotation. Doesn't change any code behavior.
+    """
+    return snakemake
+
+snakemake = fix_smk()
+
+method = snakemake.wildcards['method']
+model = snakemake.wildcards['model']
+size = int(snakemake.wildcards['size'])
+flows = int(snakemake.wildcards['flows'])
+noise = float(snakemake.wildcards['noise'])
+cell_len = int(snakemake.wildcards['cell_len'])
+cells = int(snakemake.wildcards['cells'])
+iterations = int(snakemake.wildcards['iterations'])
+cell_candidates = int(snakemake.wildcards['cell_candidates'])
+n_clusters = int(snakemake.wildcards['clusters'])
+opt = snakemake.params.get('opt', False)
+exp_name = snakemake.params.get('exp_name', '-')
+
+
+generator = TriangulationComplexGenerator(
+            nodes = size + size // 10,
+            delete_nodes = size // 10,
+            delete_edges = size // 10,
+            cell_lengths = [cell_len] * cells
+        )
+
+if model == 'smallworld':
+    generator = SmallWorldComplexGenerator(
+            nodes=size, 
+            cell_lengths = [cell_len] * cells
+        )
+    
+experiment = CellComplexDetectionExperiment(
+    cell_compl=generator,
+    flow_counts=[flows],
+    noise_sigma=noise,
+    runs=10,
+    cells_per_step=cell_candidates,
+    num_curls=iterations,
+    always_opt_cell=opt,
+    search_method=method_map[method],
+    n_clusters=n_clusters,
+)
+
+
+experiment.run()
+
+df_inference = pd.DataFrame([
+        {'model': model, 'size': size, 'flows': flows, 'noise': noise, 'cell_len': cell_len,
+         'cells': cells, 'iterations': iterations, 'cell_candidates': cell_candidates, 'n_clusters': n_clusters,
+         'run': run, 'correct_abs': correct, 'correct_percent': correct / cells,
+         'exp_name': exp_name, 'method': method} 
+        for run, correct in enumerate((experiment.correct_cells_found)[flows])
+    ]) 
+
+df_inference.to_csv(snakemake.output[0])
\ No newline at end of file
diff --git a/workflow/scripts/plot_approx_exp_error.py b/workflow/scripts/plot_approx_exp_error.py
new file mode 100644
index 0000000000000000000000000000000000000000..cca4c7359d8f2c698efc83788dcd71520f14fec0
--- /dev/null
+++ b/workflow/scripts/plot_approx_exp_error.py
@@ -0,0 +1,60 @@
+import pandas as pd
+import numpy as np
+import seaborn as sns
+from snakemake.script import Snakemake
+
+def fix_smk() -> Snakemake:
+    """
+    Helper function to make linters think `snakemake` exists
+    and to add type annotation. Doesn't change any code behavior.
+    """
+    return snakemake
+
+snakemake = fix_smk()
+
+df_data = pd.concat([pd.read_csv(file) for file in snakemake.input])
+
+method_dict = {
+    'CellSearchMethod.TRIANGLES': 'triangles',
+    'CellSearchMethod.MAX': 'max',
+    'CellSearchMethod.GROUND_TRUTH': 'true_cells',
+    'CellSearchMethod.CLUSTER': 'similarity'
+}
+
+
+df_data['method'] = df_data['method'].map(method_dict.get)
+df_data['approx_error'] = np.sqrt(df_data.approx_error)
+print(df_data.method.unique())
+
+if 'max_entries' in snakemake.params.keys():
+    df_data = df_data[df_data.total_cell_len <= int(snakemake.params['max_entries'])]
+
+if 'sparsity' in snakemake.params.keys():
+    sparsity_measure = snakemake.params.get('sparsity_measure', 'total_cell_len')
+    df_data = df_data[df_data[sparsity_measure] <= int(snakemake.params.sparsity)].sort_values(sparsity_measure, ascending=False).groupby(['run', 'noise', 'method']).first()
+
+#for run in df_data['run'].unique():
+#    max_error = df_data.loc[df_data.run == run, 'approx_error'].max()
+#    df_data.loc[df_data.run == run, 'approx_error'] /= max_error
+
+sns.set_theme()
+
+x_axis = snakemake.params.get('x_axis', 'total_cell_len')
+x_axis = snakemake.params.get('x_axis', 'total_cell_len')
+x_label = snakemake.params.get('x_label', x_axis)
+
+plot: sns.FacetGrid = sns.relplot(
+    data=df_data, kind="line", x=x_axis, y="approx_error", hue='method', #marker='o',
+    height=2.2, aspect=1.61803,
+    units='run', estimator=None, linewidth=.3, hue_order=['triangles', 'max', 'similarity', 'true_cells']
+)
+
+if x_label == '$||B_2||_0$':
+    x_label = r'$\|\mathbf{B}_2\|_0$'
+
+if x_label == 'scr-C_2':
+    x_label = r'$|\mathscr{C}_2|$'
+
+plot.set_axis_labels(x_label, r'$\mathrm{loss}(\mathscr{C},\mathbf{F})$')
+
+plot.savefig(snakemake.output[0])
\ No newline at end of file
diff --git a/workflow/scripts/plot_heuristic.py b/workflow/scripts/plot_heuristic.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3c38810d5922b2a3dfaf3ef378db06d6e4f0221
--- /dev/null
+++ b/workflow/scripts/plot_heuristic.py
@@ -0,0 +1,26 @@
+import pandas as pd
+import seaborn as sns
+from snakemake.script import Snakemake
+
+def fix_smk() -> Snakemake:
+    """
+    Helper function to make linters think `snakemake` exists
+    and to add type annotation. Doesn't change any code behavior.
+    """
+    return snakemake
+
+snakemake = fix_smk()
+
+df_data = pd.concat([pd.read_csv(file) for file in snakemake.input])
+
+sns.set_theme()
+
+plot = sns.relplot(
+    data=df_data, kind="line", x="noise", y="correct_percent", hue='cell_len',
+    height=2.2, aspect=1.61803,
+ style='method', palette='tab10'#, col='flows'
+)
+
+plot.set_axis_labels(r'edge noise ($\sigma_n$)', r'correct')
+
+plot.savefig(snakemake.output[0])
\ No newline at end of file
diff --git a/workflow/scripts/plot_inference.py b/workflow/scripts/plot_inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..e326c8c48566590700f853e5a4cdc762288f5704
--- /dev/null
+++ b/workflow/scripts/plot_inference.py
@@ -0,0 +1,30 @@
+import pandas as pd
+import seaborn as sns
+from snakemake.script import Snakemake
+
+def fix_smk() -> Snakemake:
+    """
+    Helper function to make linters think `snakemake` exists
+    and to add type annotation. Doesn't change any code behavior.
+    """
+    return snakemake
+
+snakemake = fix_smk()
+
+df_data = pd.concat([pd.read_csv(file) for file in snakemake.input])
+
+col = snakemake.params.get('col', None)
+row = snakemake.params.get('row', None)
+
+sns.set_theme()
+
+plot = sns.relplot(
+    data=df_data, kind="line", x="noise", y="correct_percent", hue='cell_len', 
+    height=2.2, aspect=1.61803,
+    style='method', palette='tab10', col=col, row=row
+)
+
+
+plot.set_axis_labels(r'edge noise ($\sigma_n$)', r'correct')
+
+plot.savefig(snakemake.output[0])
\ No newline at end of file
diff --git a/workflow/scripts/plot_realworld_exp_error.py b/workflow/scripts/plot_realworld_exp_error.py
new file mode 100644
index 0000000000000000000000000000000000000000..d0f42b6af71ddfa23a373a8e417b8f25c612d8cb
--- /dev/null
+++ b/workflow/scripts/plot_realworld_exp_error.py
@@ -0,0 +1,62 @@
+from math import isnan
+import pandas as pd
+import numpy as np
+import seaborn as sns
+from snakemake.script import Snakemake
+
+def fix_smk() -> Snakemake:
+    """
+    Helper function to make linters think `snakemake` exists
+    and to add type annotation. Doesn't change any code behavior.
+    """
+    return snakemake
+
+snakemake = fix_smk()
+x_axis = snakemake.params.get('x_axis', 'total_cell_len')
+x_label = snakemake.params.get('x_label', x_axis)
+
+df_data = pd.concat([pd.read_csv(file) for file in snakemake.input])
+df_data['approx_error'] = np.sqrt(df_data.approx_error)
+
+method_dict = {
+    'CellSearchMethod.TRIANGLES': 'triangles',
+    'CellSearchMethod.MAX': 'max',
+    'CellSearchMethod.GROUND_TRUTH': 'true_cells',
+    'CellSearchMethod.CLUSTER': 'similarity'
+}
+
+df_data['method'] = df_data['method'].map(method_dict.get)
+
+if 'max_x' in snakemake.params.keys():
+    df_data = df_data[df_data[x_axis] <= int(snakemake.params['max_x'])]
+else:
+    # find x where first approach is below 1% of initial error
+    max_error = df_data.approx_error.max()
+    max_x = df_data[df_data.approx_error < 0.01 * max_error][x_axis].min()
+    if 'alt_max_x' in snakemake.params.keys():
+        alt_max = int(snakemake.params.alt_max_x)
+        if np.isnan(max_x) or max_x > alt_max:
+            max_x = alt_max
+    if not isnan(max_x):
+        df_data = df_data[df_data[x_axis] <= max_x]
+
+#for run in df_data['run'].unique():
+#    max_error = df_data.loc[df_data.run == run, 'approx_error'].max()
+#    df_data.loc[df_data.run == run, 'approx_error'] /= max_error
+
+sns.set_theme()
+
+
+plot: sns.FacetGrid = sns.relplot(
+    data=df_data, kind="line", x=x_axis, y="approx_error", hue='method', #marker='o',
+    height=2.2, aspect=1.61803,
+    style='cell_candidates'
+)
+
+if x_label == '$||B_2||_0$':
+    x_label = r'$\|\mathbf{B}_2\|_0$'
+
+plot.set_axis_labels(x_label, r'$\mathrm{loss}(\mathscr{C},\mathbf{F})$')
+plot.set(yscale='log')
+
+plot.savefig(snakemake.output[0])
\ No newline at end of file
diff --git a/workflow/scripts/plot_time_compl.py b/workflow/scripts/plot_time_compl.py
new file mode 100644
index 0000000000000000000000000000000000000000..02ac905adb808949af387d1c0637516c0c15ebb7
--- /dev/null
+++ b/workflow/scripts/plot_time_compl.py
@@ -0,0 +1,28 @@
+import pandas as pd
+import seaborn as sns
+from snakemake.script import Snakemake
+
+def fix_smk() -> Snakemake:
+    """
+    Helper function to make linters think `snakemake` exists
+    and to add type annotation. Doesn't change any code behavior.
+    """
+    return snakemake
+
+snakemake = fix_smk()
+
+df_data = pd.concat([pd.read_csv(file) for file in snakemake.input])
+
+sns.set_theme()
+
+plot = sns.relplot(
+    data=df_data, kind="line", x="size", y="time", hue='model', marker='o',
+    height=2.2, aspect=1.61803, style='method'
+)
+
+plot.set(xscale='log')
+plot.set(xlabel=r'size ($|\mathscr{C}_0|$)')
+plot.set(yscale='log')
+plot.set(ylabel='time [s]')
+
+plot.savefig(snakemake.output[0])
\ No newline at end of file
diff --git a/workflow/scripts/process_taxi.py b/workflow/scripts/process_taxi.py
new file mode 100644
index 0000000000000000000000000000000000000000..f999e2c04af6c6ac33b45799d9c501febade4675
--- /dev/null
+++ b/workflow/scripts/process_taxi.py
@@ -0,0 +1,60 @@
+"""
+Processes the taxi trajectory data to a graph with edge flows
+"""
+
+# add code to import path
+from pathlib import Path
+import sys, resource
+sys.path.append(str((Path(__file__).parent.parent.parent).absolute()))
+
+import time
+import pandas as pd
+from cell_complexes import CellComplexDetectionExperiment
+from cell_complexes.generator import TriangulationComplexGenerator, SmallWorldComplexGenerator
+from cell_flower.detection import CellSearchMethod
+import itertools
+from collections import defaultdict
+from snakemake.script import Snakemake
+
+def fix_smk() -> Snakemake:
+    """
+    Helper function to make linters think `snakemake` exists
+    and to add type annotation. Doesn't change any code behavior.
+    """
+    return snakemake
+
+snakemake = fix_smk()
+
+trajectory_file = open(snakemake.input[0], 'r')
+
+all_flows = []
+all_nodes = set()
+all_edges = set()
+
+line: str
+for line in trajectory_file.readlines():
+    trajectory = list(map(int,line.split(',')))
+    flow_dict = defaultdict(lambda: 0)
+    all_flows.append(flow_dict)
+    for a,b in itertools.pairwise(trajectory):
+        all_nodes.add(a)
+        all_nodes.add(b)
+        if a < b:
+            if a != 0 and b != 0:
+                flow_dict[(a,b)] += 1
+            all_edges.add((a,b))
+        if a > b:
+            if a != 0 and b != 0:
+                flow_dict[(b,a)] -= 1
+            all_edges.add((b,a))
+
+edges = sorted(all_edges)
+
+cells = [(x,) for x in sorted(all_nodes)] + edges
+
+graph_file = open(snakemake.output[0], 'w')
+graph_file.write(str(cells))
+graph_file.close()
+
+df_flows = pd.DataFrame(all_flows, columns=edges)
+df_flows.to_csv(snakemake.output[1])
\ No newline at end of file
diff --git a/workflow/scripts/process_tntp.py b/workflow/scripts/process_tntp.py
new file mode 100644
index 0000000000000000000000000000000000000000..d115071c8ca2ef4335013c112157fd2f96ac5547
--- /dev/null
+++ b/workflow/scripts/process_tntp.py
@@ -0,0 +1,88 @@
+"""
+Processes the taxi trajectory data to a graph with edge flows
+"""
+
+# add code to import path
+from pathlib import Path
+import sys, resource
+sys.path.append(str((Path(__file__).parent.parent.parent).absolute()))
+
+import time
+import pandas as pd
+from ast import literal_eval
+from snakemake.script import Snakemake
+
+def import_trips(matfile: str, min_flow_ratio = 0.01) -> tuple[list[tuple], pd.DataFrame]:
+    """
+    imports a matrix of flows from the TransportationNetworks format.
+
+    The result is a tuple (cell list, flow matrix)
+    - cell list: list of all nodes as singleton tuples followed by all edges. Can be used to construct CellComplex
+    - flow matrix: pandas DataFrame with edges as columns and rows representing the flows (currently always one row)
+
+    Adapted from https://github.com/bstabler/TransportationNetworks/blob/master/_scripts/parsing%20networks%20in%20Python.ipynb
+    """
+    f = open(matfile, 'r')
+    all_rows = f.read()
+    f.close()
+    
+    blocks = all_rows.split('Origin')[1:]
+    matrix = {}
+    for block in blocks:
+        orig = block.split('\n')
+        dests = orig[1:]
+        orig=int(orig[0])
+
+        d = [literal_eval('{'+a.replace(';',',').replace(' ','') +'}') for a in dests]
+        destinations = {}
+        for i in d:
+            destinations = {**destinations, **i}
+        matrix[orig] = destinations
+
+    node_set = set()
+    edge_set = set()
+    flow_map = {}
+    for u, targets in matrix.items():
+        node_set.add(u-1)
+        for v, flow in targets.items():
+            edge_set.add((min(u,v) - 1, max(u,v) -1))
+            flow_map[(u-1,v-1)] = flow
+            node_set.add(v-1)
+    nodes = [(n,) for n in range(max(node_set) + 1)]
+
+    edges = sorted(edge_set)
+    
+    res = pd.Series(.0, index=edges)
+    for (a,b) in edges:
+        res[(a,b)] = flow_map.get((a,b),0) - flow_map.get((b,a),0)
+    
+    res = res[res.abs() >= min_flow_ratio * res.abs().max()]
+    edges = sorted(res.index.to_list())
+
+    return nodes + edges, pd.DataFrame(res).T
+
+def fix_smk() -> Snakemake:
+    """
+    Helper function to make linters think `snakemake` exists
+    and to add type annotation. Doesn't change any code behavior.
+    """
+    return snakemake
+
+snakemake = fix_smk()
+
+tripfile = snakemake.input[0]
+
+cells, df_flows = import_trips(tripfile)
+
+max_flow = df_flows.abs().max().max()
+total_flow = df_flows.abs().sum().sum()
+
+print(f"processed flow, max={max_flow:.2f}, total={total_flow:.2f}, edges={len(df_flows.columns)}, nodes={len([c for c in cells if len(c) == 1])}, flows={len(df_flows.index)}")
+
+print((df_flows > 1).sum().sum())
+
+graph_file = open(snakemake.output[0], 'w')
+graph_file.write(str(cells))
+graph_file.close()
+
+df_flows.to_csv(snakemake.output[1])
\ No newline at end of file
diff --git a/workflow/scripts/realworld_experiment.py b/workflow/scripts/realworld_experiment.py
new file mode 100644
index 0000000000000000000000000000000000000000..6918f21803bd4f63bbcecac75a65bc62413fa089
--- /dev/null
+++ b/workflow/scripts/realworld_experiment.py
@@ -0,0 +1,55 @@
+"""
+Evaluates the inference accuracy with the given parameters.
+"""
+
+from script_utils import method_map
+
+import pandas as pd
+from cell_complexes import CellComplexDetectionExperiment
+from cell_flower.cell_complex import CellComplex
+from ast import literal_eval
+from snakemake.script import Snakemake
+
+def fix_smk() -> Snakemake:
+    """
+    Helper function to make linters think `snakemake` exists
+    and to add type annotation. Doesn't change any code behavior.
+    """
+    return snakemake
+
+snakemake = fix_smk()
+
+method = snakemake.wildcards['method']
+flows = int(snakemake.wildcards['flows'])
+iterations = int(snakemake.wildcards['iterations'])
+cell_candidates = int(snakemake.wildcards['cell_candidates'])
+n_clusters = int(snakemake.wildcards['clusters'])
+
+cell_compl_file = open(snakemake.input[0])
+realworld_complex = CellComplex(literal_eval(cell_compl_file.read()))
+
+df_flows = pd.read_csv(snakemake.input[1], index_col=0).fillna(0)
+df_flows = df_flows.groupby(df_flows.index.map(int) % flows).sum()
+    
+experiment = CellComplexDetectionExperiment(
+    cell_compl=realworld_complex,
+    flows=[df_flows.to_numpy()  for _ in range(20)],
+    flow_counts=[flows],
+    runs=1,
+    cells_per_step=cell_candidates,
+    num_curls=iterations,
+    approx_epsilon=-1,
+    search_method=method_map[method],
+    n_clusters=n_clusters,
+    agg_results=False
+)
+
+experiment.run()
+
+df_approx = experiment.detailed_results
+
+df_approx.to_csv(snakemake.output[0])
+
+inf_cells_file = open(snakemake.output[1], 'w')
+inf_cells_file.write(str(experiment.inferred_cells[flows]))
+inf_cells_file.close()
\ No newline at end of file
diff --git a/workflow/scripts/script_utils.py b/workflow/scripts/script_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..6cd56909cc04be64cf4c700133602c7e4a502d22
--- /dev/null
+++ b/workflow/scripts/script_utils.py
@@ -0,0 +1,15 @@
+
+# add code to import path
+from pathlib import Path
+import sys, resource
+sys.path.append(str((Path(__file__).parent.parent.parent).absolute()))
+
+from cell_flower.detection import CellSearchMethod
+from cell_complexes.experiment import GROUND_TRUTH
+
+method_map = {
+    'max': CellSearchMethod.MAX,
+    'triangles': CellSearchMethod.TRIANGLES,
+    'ground_truth': GROUND_TRUTH,
+    'similarity': CellSearchMethod.CLUSTER,
+}
diff --git a/workflow/scripts/time_complexity.py b/workflow/scripts/time_complexity.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a98d8eefec385506ba19714e95c1a0ef5112b24
--- /dev/null
+++ b/workflow/scripts/time_complexity.py
@@ -0,0 +1,66 @@
+"""
+Tests the runtime for the given parameters
+"""
+
+# add code to import path
+import sys, resource
+from script_utils import method_map
+
+import time
+import pandas as pd
+from snakemake.script import Snakemake
+from cell_complexes import CellComplexDetectionExperiment
+from cell_complexes.generator import TriangulationComplexGenerator, SmallWorldComplexGenerator
+
+# too large graphs, will run into recursion limit with spanning tree
+class recursionlimit:
+    def __init__(self, limit):
+        self.limit = limit
+
+    def __enter__(self):
+        self.old_limit = sys.getrecursionlimit()
+        sys.setrecursionlimit(self.limit)
+        resource.setrlimit(resource.RLIMIT_STACK, (2**29,-1))
+
+    def __exit__(self, type, value, tb):
+        sys.setrecursionlimit(self.old_limit)
+
+def fix_smk() -> Snakemake:
+    """
+    Helper function to make linters think `snakemake` exists
+    and to add type annotation. Doesn't change any code behavior.
+    """
+    return snakemake
+
+snakemake = fix_smk()
+
+size: int = int(snakemake.wildcards['size'])
+model: str = snakemake.wildcards['model']
+method = snakemake.wildcards['method']
+
+generator = TriangulationComplexGenerator(
+            nodes = size + size // 10,
+            delete_nodes = size // 10,
+            delete_edges = size // 10
+        )
+
+if model == 'smallworld':
+    generator = SmallWorldComplexGenerator(nodes=size, p=0.01)
+
+experiment = CellComplexDetectionExperiment(
+    cell_compl=generator,
+    flow_counts=[5],
+    runs=10,
+    search_method=method_map[method],
+    n_clusters=9
+)
+
+with recursionlimit(20000):
+    experiment.run()
+
+df_runtimes = pd.DataFrame([
+        {'model': model, 'size': size, 'run': run, 'time': duration, 'method': method, 'n_clusters': 9} 
+        for run, duration in enumerate((experiment.execution_time)[5])
+    ])
+
+df_runtimes.to_csv(snakemake.output[0])
\ No newline at end of file