diff --git a/imspy/examples/simulation/run_example_simulation.py b/imspy/examples/simulation/run_example_simulation.py
new file mode 100644
index 00000000..640b876e
--- /dev/null
+++ b/imspy/examples/simulation/run_example_simulation.py
@@ -0,0 +1,101 @@
+from imspy.simulation.experiment import LcImsMsMs
+from imspy.simulation.hardware_models import (NeuralChromatographyApex, 
+                                              NormalChromatographyProfileModel, 
+                                              LiquidChromatography, 
+                                              ElectroSpray,
+                                              TrappedIon,
+                                              TOF,
+                                              NeuralIonMobilityApex, 
+                                              NormalIonMobilityProfileModel,
+                                              AveragineModel,
+                                              BinomialIonSource
+                                              )
+from imspy.proteome import ProteinSample, Trypsin, ORGANISM
+from imspy.chemistry.mass import BufferGas
+
+import pandas as pd
+import numpy as np
+
+
+def irt_to_rt(irt):
+    return irt
+
+
+def scan_im_interval(scan_id):
+    intercept = 1451.357
+    slope = -877.361
+    scan_id = np.atleast_1d(scan_id)
+    lower = ( scan_id    - intercept ) / slope
+    upper = ((scan_id+1) - intercept ) / slope
+    return np.stack([1/lower, 1/upper], axis=1)
+
+
+def im_to_scan(reduced_ion_mobility):
+    intercept = 1451.357
+    slope = -877.361
+    # TODO more appropriate function here ?
+    one_over_k0 = 1/reduced_ion_mobility
+    return np.round(one_over_k0 * slope + intercept).astype(np.int16)
+
+
+def build_experiment():
+    t = LcImsMsMs("./timstofexp1_binomial_ion_source_21_7/") # maybe rather call this class LCIMSMSExperiment
+
+    lc = LiquidChromatography()
+    lc.frame_length = 1200 #ms
+    lc.gradient_length = 120 # min
+    esi = ElectroSpray()
+    tims = TrappedIon()
+    tims.scan_time = 110 # us
+    tof_mz = TOF()
+
+    t.lc_method = lc
+    t.ionization_method = esi
+    t.ion_mobility_separation_method = tims
+    t.mz_separation_method = tof_mz
+
+    N2 = BufferGas("N2")
+
+    tokenizer_path = '/home/tim/Workspaces/ionmob/pretrained-models/tokenizers/tokenizer.json'
+
+    rt_model_weights = "/home/tim/Workspaces/Resources/models/DeepChromatograpy/"
+    t.lc_method.apex_model = NeuralChromatographyApex(rt_model_weights,tokenizer_path = tokenizer_path)
+
+    t.lc_method.profile_model = NormalChromatographyProfileModel()
+    t.lc_method.irt_to_rt_converter = irt_to_rt
+
+    im_model_weights = "/home/tim/Workspaces/ionmob/pretrained-models/GRUPredictor"
+    t.ion_mobility_separation_method.apex_model = NeuralIonMobilityApex(im_model_weights, tokenizer_path = tokenizer_path)
+
+    t.ion_mobility_separation_method.profile_model = NormalIonMobilityProfileModel()
+    t.ion_mobility_separation_method.buffer_gas = N2
+    t.ion_mobility_separation_method.scan_to_reduced_im_interval_converter = scan_im_interval
+    t.ion_mobility_separation_method.reduced_im_to_scan_converter = im_to_scan
+
+    t.ionization_method.ionization_model = BinomialIonSource()
+
+    t.mz_separation_method.model = AveragineModel()
+
+    rng = np.random.default_rng(2023)
+    # read proteome
+    proteome = pd.read_feather('/home/tim/Workspaces/Resources/Homo-sapiens-proteome.feather')
+    random_abundances = rng.integers(1e3,1e7,size=proteome.shape[0])
+    proteome = proteome.assign(abundancy= random_abundances)
+    # create sample and sample digest; TODO: add missed cleavages to ENZYMEs
+    sample = ProteinSample(proteome, ORGANISM.HOMO_SAPIENS)
+    sample_digest = sample.digest(Trypsin())
+
+    # to reduce computational load in example
+    sample_digest.data = sample_digest.data.sample(100, random_state= rng)
+
+
+    t.load_sample(sample_digest)
+    return t
+
+
+if __name__ == "__main__":
+
+    t = build_experiment()
+
+    #cProfile.run("t.run(10000)", filename="profiler_10000_8_process",sort="cumtime")
+    t.run(100, frames_per_assemble_process=10)
\ No newline at end of file
diff --git a/imspy/imspy/chemistry/mass.py b/imspy/imspy/chemistry/mass.py
index 1cba89da..a1819638 100644
--- a/imspy/imspy/chemistry/mass.py
+++ b/imspy/imspy/chemistry/mass.py
@@ -1,11 +1,21 @@
 import re
 from collections import defaultdict
 import numpy as np
+import mendeleev as me
 
 MASS_PROTON = 1.007276466583
 MASS_NEUTRON = 1.00866491597
 MASS_ELECTRON = 0.00054857990946
-MASS_H2O = 18.0105646863
+MASS_WATER = 18.0105646863
+
+# IUPAC standard in Kelvin
+STANDARD_TEMPERATURE = 273.15
+# IUPAC standard in Pa
+STANDARD_PRESSURE = 1e5
+# IUPAC elementary charge
+ELEMENTARY_CHARGE = 1.602176634e-19
+# IUPAC BOLTZMANN'S CONSTANT
+K_BOLTZMANN = 1.380649e-23
 
 AMINO_ACIDS = {
     'Lysine': 'K',
@@ -146,7 +156,7 @@ def calculate_monoisotopic_mass(sequence):
     mass_sequence = np.sum([AMINO_ACID_MASSES[amino_acid] * count for amino_acid, count in aa_counts.items()])
     mass_modifics = np.sum([MODIFICATIONS_MZ_NUMERICAL[mod] * count for mod, count in mod_counts.items()])
 
-    return mass_sequence + mass_modifics + MASS_H2O
+    return mass_sequence + mass_modifics + MASS_WATER
 
 
 def calculate_mz(monoisotopic_mass, charge):
@@ -174,3 +184,151 @@ def calculate_mz_from_sequence(sequence, charge):
     float: The m/z of the sequence.
     """
     return calculate_mz(calculate_monoisotopic_mass(sequence), charge)
+
+
+def get_monoisotopic_token_weight(token:str):
+    """
+    Gets monoisotopic weight of token
+
+    :param token: Token of aa sequence e.g. "<START>[UNIMOD:1]"
+    :type token: str
+    :return: Weight in Dalton.
+    :rtype: float
+    """
+    splits = token.split("[")
+    for i in range(1, len(splits)):
+        splits[i] = "["+splits[i]
+
+    mass = 0
+    for split in splits:
+        mass += AMINO_ACID_MASSES[split]
+    return mass
+
+
+def get_mono_isotopic_weight(sequence_tokenized: list[str]) -> float:
+    mass = 0
+    for token in sequence_tokenized:
+        mass += get_monoisotopic_token_weight(token)
+    return mass + MASS_WATER
+
+
+def get_mass_over_charge(mass: float, charge: int) -> float:
+    return (mass / charge) + MASS_PROTON
+
+
+def get_num_protonizable_sites(sequence: str) -> int:
+    """
+    Gets number of sites that can be protonized.
+    This function does not yet account for PTMs.
+
+    :param sequence: Amino acid sequence
+    :type sequence: str
+    :return: Number of protonizable sites
+    :rtype: int
+    """
+    sites = 1 # n-terminus
+    for s in sequence:
+        if s in ["H", "R", "K"]:
+            sites += 1
+    return sites
+
+
+class ChemicalCompound:
+
+    def _calculate_molecular_mass(self):
+        mass = 0
+        for (atom, abundance) in self.element_composition.items():
+            mass += me.element(atom).atomic_weight * abundance
+        return mass
+
+    def __init__(self, formula):
+        self.element_composition = self.get_composition(formula)
+        self.mass = self._calculate_molecular_mass()
+
+    def get_composition(self, formula: str):
+        """
+        Parse chemical formula into Dict[str:int] with
+        atoms as keys and the respective counts as values.
+
+        :param formula: Chemical formula of compound e.g. 'C6H12O6'
+        :type formula: str
+        :return: Dictionary Atom: Count
+        :rtype: Dict[str:int]
+        """
+        if formula.startswith("("):
+            assert formula.endswith(")")
+            formula = formula[1:-1]
+
+        tmp_group = ""
+        tmp_group_count = ""
+        depth = 0
+        comp_list = []
+        comp_counts = []
+
+        # extract components: everything in brackets and atoms
+        # extract component counts: number behind component or 1
+        for (i, e) in enumerate(formula):
+            if e == "(":
+                depth += 1
+                if depth == 1:
+                    if tmp_group != "":
+                        comp_list.append(tmp_group)
+                        tmp_group = ""
+                        if tmp_group_count == "":
+                            comp_counts.append(1)
+                        else:
+                            comp_counts.append(int(tmp_group_count))
+                            tmp_group_count = ""
+                tmp_group += e
+                continue
+            if e == ")":
+                depth -= 1
+                tmp_group += e
+                continue
+            if depth > 0:
+                tmp_group += e
+                continue
+            if e.isupper():
+                if tmp_group != "":
+                    comp_list.append(tmp_group)
+                    tmp_group = ""
+                    if tmp_group_count == "":
+                        comp_counts.append(1)
+                    else:
+                        comp_counts.append(int(tmp_group_count))
+                        tmp_group_count = ""
+                tmp_group += e
+                continue
+            if e.islower():
+                tmp_group += e
+                continue
+            if e.isnumeric():
+                tmp_group_count += e
+        if tmp_group != "":
+            comp_list.append(tmp_group)
+            if tmp_group_count == "":
+                comp_counts.append(1)
+            else:
+                comp_counts.append(int(tmp_group_count))
+
+        # assemble dictionary from component lists
+        atom_dict = {}
+        for (comp, count) in zip(comp_list, comp_counts):
+            if not comp.startswith("("):
+                atom_dict[comp] = count
+            else:
+                atom_dicts_depth = self.get_composition(comp)
+                for atom in atom_dicts_depth:
+                    atom_dicts_depth[atom] *= count
+                    if atom in atom_dict:
+                        atom_dict[atom] += atom_dicts_depth[atom]
+                    else:
+                        atom_dict[atom] = atom_dicts_depth[atom]
+                atom_dicts_depth = {}
+        return atom_dict
+
+
+class BufferGas(ChemicalCompound):
+
+    def __init__(self, formula: str):
+        super().__init__(formula)
diff --git a/imspy/imspy/chemistry/mobility.py b/imspy/imspy/chemistry/mobility.py
index 796f15c8..d668d527 100644
--- a/imspy/imspy/chemistry/mobility.py
+++ b/imspy/imspy/chemistry/mobility.py
@@ -1,5 +1,7 @@
 import numpy as np
 
+SUMMARY_CONSTANT = 18509.8632163405
+
 
 def one_over_k0_to_ccs(one_over_k0, mz, charge, mass_gas=28.013, temp=31.85, t_diff=273.15):
     """
@@ -11,7 +13,6 @@ def one_over_k0_to_ccs(one_over_k0, mz, charge, mass_gas=28.013, temp=31.85, t_d
     :param temp: temperature of the drift gas in C°
     :param t_diff: factor to translate from C° to K
     """
-    SUMMARY_CONSTANT = 18509.8632163405
     reduced_mass = (mz * charge * mass_gas) / (mz * charge + mass_gas)
     return (SUMMARY_CONSTANT * charge) / (np.sqrt(reduced_mass * (temp + t_diff)) * 1 / one_over_k0)
 
@@ -26,6 +27,5 @@ def ccs_to_one_over_k0(ccs, mz, charge, mass_gas=28.013, temp=31.85, t_diff=273.
     :param temp: temperature of the drift gas in C°
     :param t_diff: factor to translate from C° to K
     """
-    SUMMARY_CONSTANT = 18509.8632163405
     reduced_mass = (mz * charge * mass_gas) / (mz * charge + mass_gas)
     return ((np.sqrt(reduced_mass * (temp + t_diff))) * ccs) / (SUMMARY_CONSTANT * charge)
diff --git a/imspy/imspy/core/frame.py b/imspy/imspy/core/frame.py
index 886673c0..7a6ef5a1 100644
--- a/imspy/imspy/core/frame.py
+++ b/imspy/imspy/core/frame.py
@@ -7,6 +7,7 @@
 
 import numpy as np
 import imspy_connector as pims
+
 from imspy.core.spectrum import MzSpectrum, TimsSpectrum, IndexedMzSpectrum
 
 from imspy.utility.utilities import re_index_indices
diff --git a/imspy/imspy/core/slice.py b/imspy/imspy/core/slice.py
index 2ff48ec7..fc5f3a3b 100644
--- a/imspy/imspy/core/slice.py
+++ b/imspy/imspy/core/slice.py
@@ -8,6 +8,7 @@
 from imspy.utility.utilities import re_index_indices
 
 import imspy_connector as pims
+
 from imspy.core.frame import TimsFrame, TimsFrameVectorized
 from imspy.core.spectrum import MzSpectrum, TimsSpectrum
 
diff --git a/imspy/imspy/core/spectrum.py b/imspy/imspy/core/spectrum.py
index af39760a..1d229e50 100644
--- a/imspy/imspy/core/spectrum.py
+++ b/imspy/imspy/core/spectrum.py
@@ -1,12 +1,30 @@
+from __future__ import annotations
+import json
 import numpy as np
-from typing import List, Tuple
-
+from typing import List, Tuple, Callable
 import pandas as pd
 from numpy.typing import NDArray
-
+from scipy.signal import find_peaks
 import imspy_connector as pims
 
 
+def get_peak_integral(peaks: NDArray[np.int32], peak_info: dict) -> NDArray[np.float64]:
+    """Calculates the integral of the peaks in a spectrum.
+
+    Args:
+        peaks (NDArray[np.int32]): Peak indices.
+        peak_info (dict): Peak info.
+
+    Returns:
+        NDArray[np.float64]: Peak integrals.
+    """
+    integrals = np.zeros(len(peaks), dtype=np.float64)
+    FWHM = peak_info['widths']
+    h = peak_info['prominences']
+    integrals = np.sqrt(2*np.pi) * h * FWHM / (2*np.sqrt(2*np.log(2)))
+    return integrals
+
+
 class IndexedMzSpectrum:
     def __init__(self, index: NDArray[np.int32], mz: NDArray[np.float64], intensity: NDArray[np.float64]):
         """IndexedMzSpectrum class.
@@ -103,6 +121,27 @@ def __repr__(self):
 
 
 class MzSpectrum:
+    
+    @classmethod
+    def from_jsons(cls, jsons: str) -> MzSpectrum:
+        json_dict:dict = json.loads(jsons)
+        mz = json_dict["mz"]
+        intensity = json_dict["intensity"]
+        return cls(np.array(mz, dtype=np.float64), np.array(intensity, dtype=np.float64))
+    
+    @classmethod
+    def from_mz_spectra_list(cls, spectra_list:List[MzSpectrum], resolution: int)->MzSpectrum:
+        """Generates a convoluted mass spectrum by adding all spectra in the given list.
+
+        Args:
+            spectra_list (List[MzSpectrum]): List of mass spectra.
+            resolution (int): Desired resolution of returned spectrum.
+
+        Returns:
+            MzSpectrum: Convoluted spectrum.
+        """
+        return cls.from_py_mz_spectrum(pims.PyMzSpectrum.from_mzspectra_list([spectrum.__spec_ptr for spectrum in spectra_list], resolution))
+    
     def __init__(self, mz: NDArray[np.float64], intensity: NDArray[np.float64]):
         """MzSpectrum class.
 
@@ -160,9 +199,21 @@ def from_py_mz_spectrum(cls, spec: pims.PyMzSpectrum):
 
     def __repr__(self):
         return f"MzSpectrum(num_peaks={len(self.mz)})"
+    
+    def __mul__(self, scale) -> MzSpectrum:
+        """Overwrite * operator for scaling of spectrum
+
+        Args:
+            scale (float): Scale.
+
+        Returns:
+            MzSpectrum: Scaled spectrum
+        """
+        tmp: pims.PyMzSpectrum =  self.__spec_ptr * scale
+        return self.from_py_mz_spectrum(tmp)
 
     def to_windows(self, window_length: float = 10, overlapping: bool = True, min_num_peaks: int = 5,
-                   min_intensity: float = 1) -> Tuple[NDArray, List['MzSpectrum']]:
+                   min_intensity: float = 1) -> Tuple[NDArray, List[MzSpectrum]]:
         """Convert the spectrum to a list of windows.
 
         Args:
@@ -178,8 +229,20 @@ def to_windows(self, window_length: float = 10, overlapping: bool = True, min_nu
         indices, windows = self.__spec_ptr.to_windows(window_length, overlapping, min_num_peaks, min_intensity)
         return indices, [MzSpectrum.from_py_mz_spectrum(window) for window in windows]
 
+    def to_resolution(self, resolution: int) -> MzSpectrum:
+        """Bins the spectrum's m/z values to a 
+        given resolution and sums the intensities.
+
+        Args:
+            resolution (int): Negative decadic logarithm of bin size.
+
+        Returns:
+            MzSpectrum: A new `MzSpectrum` where m/z values are binned according to the given resolution.
+        """
+        return MzSpectrum.from_py_mz_spectrum(self.__spec_ptr.to_resolution(resolution))
+    
     def filter(self, mz_min: float = 0.0, mz_max: float = 2000.0, intensity_min: float = 0.0,
-               intensity_max: float = 1e9) -> 'MzSpectrum':
+               intensity_max: float = 1e9) -> MzSpectrum:
         """Filter the spectrum for a given m/z range and intensity range.
 
         Args:
@@ -194,7 +257,7 @@ def filter(self, mz_min: float = 0.0, mz_max: float = 2000.0, intensity_min: flo
         return MzSpectrum.from_py_mz_spectrum(
             self.__spec_ptr.filter_ranged(mz_min, mz_max, intensity_min, intensity_max))
 
-    def vectorized(self, resolution: int = 2) -> 'MzSpectrumVectorized':
+    def vectorized(self, resolution: int = 2) -> MzSpectrumVectorized:
         """Convert the spectrum to a vectorized spectrum.
 
         Args:
@@ -204,8 +267,18 @@ def vectorized(self, resolution: int = 2) -> 'MzSpectrumVectorized':
             MzSpectrumVectorized: Vectorized spectrum.
         """
         return MzSpectrumVectorized.from_py_mz_spectrum_vectorized(self.__spec_ptr.vectorized(resolution))
+    
+    def to_jsons(self) -> str:
+        """
+        generates json string representation of MzSpectrum
+        """
+        json_dict = {}
+        json_dict["mz"] = self.mz.tolist()
+        json_dict["intensity"] = self.intensity.tolist()
 
+        return json.dumps(json_dict)
 
+    
 class MzSpectrumVectorized:
     def __init__(self, indices: NDArray[np.int32], values: NDArray[np.float64], resolution: int):
         """MzSpectrum class.
@@ -237,7 +310,7 @@ def from_py_mz_spectrum_vectorized(cls, spec: pims.PyMzSpectrumVectorized):
     @property
     def resolution(self) -> float:
         """Resolution.
-
+        
         Returns:
             float: Resolution.
         """
@@ -261,6 +334,21 @@ def values(self) -> NDArray[np.float64]:
         """
         return self.__spec_ptr.values
 
+    def to_centroided(self, integrate_method: Callable = get_peak_integral) -> MzSpectrum:
+        """Convert the spectrum to a centroided spectrum.
+
+        Returns:
+            MzSpectrum: Centroided spectrum.
+        """
+        # first generate dense spectrum
+        dense_spectrum = MzSpectrumVectorized.from_py_mz_spectrum_vectorized(self.__spec_ptr.to_dense_spectrum(None))
+        # find peaks in the dense spectrum and widths with scipy
+        peaks, peak_info = find_peaks(dense_spectrum.values, height=0, width=(0,0.5))
+        # then get the peak integrals
+        integrals = integrate_method(peaks, peak_info)
+        # then create a new spectrum with the peak indices and the integrals
+        return MzSpectrum.from_py_mz_spectrum(pims.PyMzSpectrum(dense_spectrum.indices[peaks]/np.power(10,dense_spectrum.resolution), integrals))
+
     def __repr__(self):
         return f"MzSpectrumVectorized(num_values={len(self.values)})"
 
diff --git a/imspy/imspy/feature.py b/imspy/imspy/feature.py
new file mode 100644
index 00000000..53ab596f
--- /dev/null
+++ b/imspy/imspy/feature.py
@@ -0,0 +1,211 @@
+import pandas as pd
+import numpy as np
+from numpy.typing import ArrayLike
+
+import json
+
+from imspy.data import TimsSlice, TimsFrame
+from imspy.utility import gaussian, exp_gaussian
+from imspy.isotopes import IsotopePatternGenerator, create_initial_feature_distribution
+from abc import ABC, abstractmethod
+from typing import Optional, Dict
+class Profile:
+
+    def __init__(self,positions:Optional[ArrayLike] = None, rel_abundancies:Optional[ArrayLike] = None, model_params: Optional[Dict] = None, jsons:Optional[str] = None):
+        if jsons is not None:
+            self._jsons = jsons
+            self.positions, self.rel_abundancies, self.model_params, self.access_dictionary = self._from_jsons(jsons)
+        else:
+            self.positions = np.asarray(positions)
+            self.rel_abundancies = np.asarray(rel_abundancies)
+            self.model_params = model_params
+            self.access_dictionary = {p:i for p,i in zip(positions, rel_abundancies)}
+            self._jsons = self._to_jsons()
+
+    def __iter__(self):
+        self.__size = len(self.positions)
+        self.__iterator_pos = 0
+        return self
+
+    def __next__(self):
+        if self.__iterator_pos < self.__size:
+            p = self.positions[self.__iterator_pos]
+            ra = self.rel_abundancies[self.__iterator_pos]
+            self.__iterator_pos += 1
+            return p,ra
+        raise StopIteration
+
+    def __getitem__(self, position: int):
+        return self.access_dictionary[position]
+
+    def _to_jsons(self):
+        mp = {}
+        for key,val in self.model_params.items():
+            if isinstance(val,np.generic):
+                mp[key] = val.item()
+            else:
+                mp[key] = val
+
+        json_dict = {"positions": self.positions.tolist(),
+                     "rel_abundancies": self.rel_abundancies.tolist(),
+                     "model_params": mp}
+        return json.dumps(json_dict, allow_nan = False)
+
+    def _from_jsons(self, jsons:str):
+        json_dict = json.loads(jsons)
+        positions = json_dict["positions"]
+        rel_abundancies = json_dict["rel_abundancies"]
+        access_dictionary = {p:i for p,i in zip(positions, rel_abundancies)}
+        return positions,rel_abundancies,json_dict["model_params"], access_dictionary
+
+    @property
+    def jsons(self):
+        return self._jsons
+
+class RTProfile(Profile):
+
+    def __init__(self,frames:Optional[ArrayLike] = None, rel_abundancies:Optional[ArrayLike] = None, model_params: Optional[Dict] = None, jsons:Optional[str] = None):
+        super().__init__(frames, rel_abundancies, model_params, jsons)
+
+    @property
+    def frames(self):
+        return self.positions
+
+class ScanProfile(Profile):
+
+    def __init__(self,scans:Optional[ArrayLike] = None, rel_abundancies:Optional[ArrayLike] = None, model_params: Optional[Dict] = None, jsons:Optional[str] = None):
+        super().__init__(scans, rel_abundancies, model_params,  jsons)
+
+    @property
+    def scans(self):
+        return self.positions
+
+class ChargeProfile(Profile):
+    def __init__(self,charges:Optional[ArrayLike] = None, rel_abundancies:Optional[ArrayLike] = None, model_params: Optional[Dict] = None, jsons:Optional[str] = None):
+        abundant_charges = charges[rel_abundancies>0]
+        relative_abundancies_over_0 = rel_abundancies[rel_abundancies>0]
+        super().__init__(abundant_charges, relative_abundancies_over_0, model_params, jsons)
+
+    @property
+    def charges(self):
+        return self.positions
+
+class FeatureGenerator(ABC):
+    def __init__(self):
+        pass
+
+    @abstractmethod
+    def generate_feature(self, mz: float, charge: int):
+        pass
+
+
+class PrecursorFeatureGenerator(FeatureGenerator):
+
+    def __init__(self):
+        super(PrecursorFeatureGenerator).__init__()
+
+    def generate_feature(self, mz: float, charge: int,
+                         pattern_generator: IsotopePatternGenerator,
+                         num_rt: int = 64,
+                         num_im: int = 32,
+                         distr_im=gaussian,
+                         distr_rt=exp_gaussian,
+                         rt_lower: float = -9,
+                         rt_upper: float = 18,
+                         im_lower: float = -4,
+                         im_upper: float = 4,
+                         intensity_amplitude: float = 1e3,
+                         min_intensity: int = 5) -> TimsSlice:
+
+        I = create_initial_feature_distribution(num_rt, num_im, rt_lower, rt_upper,
+                                                im_lower, im_upper, distr_im, distr_rt)
+
+        frame_list = []
+
+        spec = pattern_generator.generate_spectrum(mz, charge, min_intensity=5, centroided=True, k=12)
+        mz, intensity = spec.mz, spec.intensity / np.max(spec.intensity) * 100
+
+        for i in range(num_rt):
+
+            scan_arr, mz_arr, intensity_arr, tof_arr, inv_mob_arr = [], [], [], [], []
+
+            for j in range(num_im):
+                intensity_scaled = intensity * I[i, j]
+                intensity_scaled = intensity_scaled * intensity_amplitude
+                int_intensity = np.array([int(x) for x in intensity_scaled])
+
+                bt = [(x, y) for x, y in zip(mz, int_intensity) if y >= min_intensity]
+
+                mz_tmp = np.array([x for x, y in bt])
+                intensity_tmp = np.array([y for x, y in bt]).astype(np.int32)
+
+                rts = np.repeat(i, intensity_tmp.shape[0]).astype(np.float32)
+                scans = np.repeat(j, intensity_tmp.shape[0]).astype(np.int32)
+                tof = np.ones_like(rts).astype(np.int32)
+                inv_mob = np.ones_like(scans)
+
+                scan_arr.append(scans)
+                mz_arr.append(mz_tmp)
+                intensity_arr.append(intensity_tmp)
+                tof_arr.append(tof)
+                inv_mob_arr.append(inv_mob)
+
+            frame = TimsFrame(None, i, float(i),
+                              np.hstack(scan_arr), np.hstack(mz_arr), np.hstack(intensity_arr),
+                              np.hstack(tof_arr), np.hstack(inv_mob_arr))
+
+            frame_list.append(frame)
+
+        return TimsSlice(None, frame_list, [])
+
+
+class FeatureBatch:
+    def __init__(self, feature_table: pd.DataFrame, raw_data: TimsSlice):
+        """
+
+        :param feature_table:
+        :param raw_data:
+        """
+        self.feature_table = feature_table.sort_values(['rt_length'], ascending=False)
+        self.raw_data = raw_data
+        self.__feature_counter = 0
+        self.current_row = self.feature_table.iloc[0]
+        r = self.current_row
+        self.current_feature = self.get_feature(r.mz - 0.1, r.mz + (r.num_peaks / r.charge) + 0.1,
+                                                r.im_start,
+                                                r.im_stop,
+                                                r.rt_start,
+                                                r.rt_stop)
+
+    def __repr__(self):
+        return f'FeatureBatch(features={self.feature_table.shape[0]}, slice={self.raw_data})'
+
+    def get_feature(self, mz_min, mz_max, scan_min, scan_max, rt_min, rt_max, intensity_min=20):
+        return self.raw_data.filter_ranged(mz_min=mz_min,
+                                           mz_max=mz_max,
+                                           scan_min=scan_min,
+                                           scan_max=scan_max,
+                                           intensity_min=intensity_min,
+                                           rt_min=rt_min,
+                                           rt_max=rt_max)
+
+    def __iter__(self):
+        return self.current_row, self.current_feature
+
+    def __next__(self):
+        feature = self.current_row
+        data = self.current_feature
+
+        if self.__feature_counter < self.feature_table.shape[0]:
+            self.__feature_counter += 1
+            self.current_row = self.feature_table.iloc[self.__feature_counter]
+            r = self.current_row
+            self.current_feature = self.get_feature(r.mz - 0.1,
+                                                    r.mz + (r.num_peaks / r.charge) + 0.1,
+                                                    r.im_start,
+                                                    r.im_stop,
+                                                    r.rt_start,
+                                                    r.rt_stop)
+            return feature, data
+
+        raise StopIteration
diff --git a/imspy/imspy/isotopes.py b/imspy/imspy/isotopes.py
new file mode 100644
index 00000000..da75dae2
--- /dev/null
+++ b/imspy/imspy/isotopes.py
@@ -0,0 +1,242 @@
+from __future__ import annotations
+from typing import Optional, Tuple
+import warnings
+import numpy as np
+from numpy.typing import ArrayLike
+from abc import ABC, abstractmethod
+
+from scipy.signal import argrelextrema
+from imspy.data import MzSpectrum
+from imspy.utility import gaussian, exp_gaussian, normal_pdf
+import numba
+import pyopenms
+
+from imspy.noise import detection_noise
+
+MASS_PROTON = 1.007276466621
+MASS_NEUTRON = 1.00866491595
+
+
+@numba.jit(nopython=True)
+def factorial(n: int):
+    if n <= 0:
+        return 1
+    return n * factorial(n - 1)
+
+
+# linear dependency of mass
+@numba.jit(nopython=True)
+def lam(mass: float, slope: float = 0.000594, intercept: float = -0.03091):
+    """
+    :param intercept:
+    :param slope:
+    :param mass:
+    :return:
+    """
+    return slope * mass + intercept
+
+
+@numba.jit(nopython=True)
+def weight(mass: float, peak_nums: ArrayLike, normalize: bool = True):
+    """
+    :param mass:
+    :param num_steps:
+    :param normalize:
+    :return:
+    """
+    factorials = np.zeros_like(peak_nums)
+    norm = 1
+    for i,k in enumerate(peak_nums):
+        factorials[i] = factorial(k)
+    weights = np.exp(-lam(mass)) * np.power(lam(mass), peak_nums) / factorials
+    if normalize:
+        norm = weights.sum()
+    return weights/norm
+
+def get_pyopenms_weights(sequence: str, peak_nums: ArrayLike, generator: pyopenms.CoarseIsotopePatternGenerator):
+    """
+    Gets hypothetical intensities of isotopic peaks.
+
+    :param sequence: Peptide sequence in proForma format.
+    :type sequence: str
+    :param generator: pyopenms generator for isotopic pattern calculation
+    :type generator: pyopenms.CoarseIsotopePatternGenerator
+    :return: List of isotopic peak intensities
+    :rtype: List
+    """
+
+    n = peak_nums.shape[0]
+    generator.setMaxIsotope(n)
+    aa_sequence = sequence.strip("_")
+    peptide= pyopenms.AASequence().fromString(aa_sequence.replace("[","(").replace("]",")"))
+    formula = peptide.getFormula()
+    distribution = generator.run(formula)
+    intensities = [i.getIntensity() for i in distribution.getContainer()]
+    return intensities
+
+@numba.jit(nopython=True)
+def iso(x: ArrayLike, mass: float, charge: float, sigma: float, amp: float, K: int, step_size:float, add_detection_noise: bool = False, mass_neutron: float = MASS_NEUTRON):
+    """
+    :param mass_neutron:
+    :param x:
+    :param mass:
+    :param charge:
+    :param sigma:
+    :param amp:
+    :param K:4
+    :param step_size:
+    :param add_detection_noise:
+    :return:
+    """
+    k = np.arange(K)
+    means = ((mass + mass_neutron * k) / charge).reshape((1,-1))
+    weights = weight(mass,k).reshape((1,-1))
+    intensities = np.sum(weights*normal_pdf(x.reshape((-1,1)), means, sigma), axis= 1)*step_size
+    if add_detection_noise:
+        return detection_noise(intensities*amp)
+    else:
+        return intensities * amp
+
+
+@numba.jit(nopython=True)
+def numba_generate_pattern(lower_bound: float,
+                     upper_bound: float,
+                     mass: float,
+                     charge: float,
+                     amp: float,
+                     k: int,
+                     sigma: float = 0.008492569002123142,
+                     resolution: int = 3):
+    """
+    :param lower_bound:
+    :param upper_bound:
+    :param mass:
+    :param charge:
+    :param amp:
+    :param k:
+    :param sigma:
+    :param resolution:
+    :return:
+    """
+    step_size = min(sigma/10,1/np.power(10,resolution))
+    size = int((upper_bound-lower_bound)//step_size+1)
+    mzs = np.linspace(lower_bound,upper_bound,size)
+    intensities = iso(mzs,mass,charge,sigma,amp,k,step_size)
+
+    return mzs + MASS_PROTON, intensities
+
+#@numba.jit(nopython= True)
+def numba_ion_sampler(mass: float, charge: int, sigma: ArrayLike, k: int, ion_count: int, intensity_per_ion: int ):
+        sigma = np.atleast_1d(sigma)
+        if sigma.size == 1:
+            sigma = np.repeat(sigma,k)
+        if sigma.size != k:
+            raise ValueError("Sigma must be either length 1 or k")
+
+
+        us = np.zeros(ion_count)
+        devs_std = np.zeros(ion_count)
+        for ion in range(ion_count):
+            u = np.random.uniform()
+            dev_std = np.random.normal()
+            us[ion] = u
+            devs_std[ion] = dev_std
+
+        weights = weight(mass, np.arange(k)).cumsum()
+
+        def _get_component(u: float, weights_cum_sum: ArrayLike = weights) -> int:
+            for idx, weight_sum in enumerate(weights_cum_sum):
+                if u < weight_sum:
+                    return idx
+
+        comps = np.zeros_like(us)
+        devs = np.zeros_like(us)
+        for idx, u in enumerate(us):
+            comp = _get_component(u)
+            comps[idx] = comp
+            devs[idx] = sigma[comp] * devs_std[idx]
+
+        mz = ((comps*MASS_NEUTRON) + mass) / charge + MASS_PROTON + devs
+        i = np.ones_like(mz) * intensity_per_ion
+        return (mz, i)
+
+@numba.jit
+def create_initial_feature_distribution(num_rt: int, num_im: int,
+                                        rt_lower: float = -9,
+                                        rt_upper: float = 18,
+                                        im_lower: float = -4,
+                                        im_upper: float = 4,
+                                        distr_im=gaussian,
+                                        distr_rt=exp_gaussian) -> np.array:
+
+    I = np.ones((num_rt, num_im)).astype(np.float32)
+
+    for i, x in enumerate(np.linspace(im_lower, im_upper, num_im)):
+        for j, y in enumerate(np.linspace(rt_lower, rt_upper, num_rt)):
+            I[j, i] *= (distr_im(x) * distr_rt(y))
+
+    return I
+
+
+class IsotopePatternGenerator(ABC):
+    def __init__(self):
+        pass
+    @abstractmethod
+    def generate_pattern(self, mass: float, charge: int) -> Tuple[ArrayLike, ArrayLike]:
+        pass
+
+    @abstractmethod
+    def generate_spectrum(self, mass: int, charge: int, min_intensity: int) -> MzSpectrum:
+        pass
+
+
+class AveragineGenerator(IsotopePatternGenerator):
+    def __init__(self):
+        super().__init__()
+        self.default_abundancy = 1e4
+
+    def generate_pattern(self, mass: float, charge: int, k: int = 7,
+                         amp: Optional[float] = None, resolution: float = 3,
+                         min_intensity: int = 5) -> Tuple[ArrayLike, ArrayLike]:
+        pass
+
+    def generate_spectrum(self, mass: int, charge: int, min_intensity: int = 5, k: int = 7,
+                          amp :Optional[float] = None, resolution:float =3,  centroided: bool = True) -> MzSpectrum:
+        if amp is None:
+            amp = self.default_abundancy
+
+        lb = mass / charge - .2
+        ub = mass / charge + k + .2
+
+        mz, i = numba_generate_pattern(lower_bound=lb, upper_bound=ub,
+                                 mass=mass, charge=charge, amp=amp, k=k, resolution=resolution)
+        #Todo implement centroid method
+
+        if centroided:
+            return MzSpectrum(mz, i)\
+                    .to_resolution(resolution).filter(lb,ub,min_intensity)\
+                    .to_centroided(baseline_noise_level=max(min_intensity,1), sigma=1/np.power(10,resolution-1))
+
+        return MzSpectrum(mz, i).to_resolution(resolution).filter(lb,ub,min_intensity)
+
+    def generate_spectrum_by_sampling(self,
+                                      mass: float,
+                                      charge: int,
+                                      k:int =7,
+                                      sigma: ArrayLike = 0.008492569002123142,
+                                      ion_count:int = 1000000,
+                                      resolution:float = 3,
+                                      intensity_per_ion:int = 1,
+                                      centroided = True,
+                                      min_intensity = 5) -> MzSpectrum:
+
+        assert 100 <= mass / charge <= 2000, f"m/z should be between 100 and 2000, was: {mass / charge}"
+        mz, i = numba_ion_sampler(mass, charge, sigma, k, ion_count, intensity_per_ion)
+
+        if centroided:
+            return ( MzSpectrum(mz,i)
+                    .to_resolution(resolution).filter(-1,-1,min_intensity) 
+                    .to_centroided(baseline_noise_level=max(min_intensity,1), sigma=1/np.power(10,resolution-1))
+            ) 
+        return MzSpectrum(mz,i)\
+                .to_resolution(resolution).filter(-1,-1,min_intensity)
\ No newline at end of file
diff --git a/imspy/imspy/noise.py b/imspy/imspy/noise.py
new file mode 100644
index 00000000..c2cf163b
--- /dev/null
+++ b/imspy/imspy/noise.py
@@ -0,0 +1,124 @@
+""" This module contains several noise models,
+    such as detection noise, shot noise and baseline noise.
+"""
+import numpy as np
+import numba
+from typing import Callable, Optional, Tuple
+from numpy.typing import ArrayLike
+from imspy.data import MzSpectrum
+from imspy.utility import normal_pdf
+
+@numba.jit(nopython=True)
+def mu_function_normal_default(intensity: ArrayLike) -> ArrayLike:
+    return intensity
+
+@numba.jit(nopython=True)
+def sigma_function_normal_default(intensity:ArrayLike) -> ArrayLike:
+    return np.sqrt(intensity)
+
+@numba.jit(nopython=True)
+def mu_function_poisson_default(intensity:ArrayLike) -> ArrayLike:
+    offset = 0
+    return intensity+ offset
+
+@numba.jit(nopython=True)
+def detection_noise(signal: ArrayLike,
+                    method: str = "poisson",
+                    custom_mu_function: Optional[Callable] = None,
+                    custom_sigma_function: Optional[Callable] = None) -> ArrayLike:
+    """
+
+    :param signal:
+    :param method:
+    :param custom_mu_function:
+    :param custom_sigma_function:
+    """
+    if method == "normal":
+        if custom_sigma_function is None:
+            sigma_function:Callable = sigma_function_normal_default
+        else:
+            sigma_function:Callable = custom_sigma_function
+        if custom_mu_function is None:
+            mu_function:Callable = mu_function_normal_default
+        else:
+            mu_function:Callable = custom_mu_function
+
+        sigmas = sigma_function(signal)
+        mus = mu_function(signal)
+        noised_signal = np.zeros_like(mus)
+
+        for i in range(noised_signal.size):
+            s = sigmas[i]
+            m = mus[i]
+            n = np.random.normal()*s+m
+            noised_signal[i] = n
+
+    elif method == "poisson":
+        if custom_sigma_function is not None:
+            raise ValueError("Sigma function is not used if method is 'poisson'")
+        if custom_mu_function is None:
+            mu_function:Callable = mu_function_poisson_default
+        else:
+            mu_function:Callable = custom_mu_function
+        mus = mu_function(signal)
+        noised_signal = np.zeros_like(mus)
+
+        for i in range(noised_signal.size):
+            mu = mus[i]
+            n = np.random.poisson(lam = mu)
+            noised_signal[i] = n
+
+    else:
+        raise NotImplementedError("This method is not implemented, choose 'normal' or 'poisson'.")
+
+    return noised_signal
+
+@numba.jit(nopython=True)
+def generate_noise_peak(pos:float, sigma: float, intensity: float, min_intensity:int = 0, resolution:float = 3):
+    step_size = min(sigma/10,1/np.power(10,resolution))
+    lower = int((pos-4*sigma)//step_size)
+    upper = int((pos+4*sigma)//step_size)
+    mzs = np.arange(lower,upper)*step_size
+    intensities = normal_pdf(mzs,pos,sigma,normalize=True)*intensity*step_size
+    return (mzs[intensities>=min_intensity], intensities[intensities>=min_intensity].astype(np.int64))
+
+
+def baseline_shot_noise_window(window:MzSpectrum,
+                               window_theoretical_mz_min:float,
+                               window_theoretical_mz_max:float,
+                               expected_noise_peaks: int = 5,
+                               expected_noise_intensity: float = 10,
+                               expected_noise_sigma:float = 0.001,
+                               resolution:float = 3) -> MzSpectrum:
+    """
+
+    """
+    num_noise_peaks = np.random.poisson(lam=expected_noise_peaks)
+    noised_window = MzSpectrum([],[])
+    for i in range(num_noise_peaks):
+        location_i = np.random.uniform(window_theoretical_mz_min,window_theoretical_mz_max)
+        intensity_i = np.random.exponential(expected_noise_intensity)
+        sigma_i = np.random.exponential(expected_noise_sigma)
+        noise_mz, noise_intensity = generate_noise_peak(location_i,sigma_i,intensity_i,resolution=resolution)
+        noise_peak = MzSpectrum(noise_mz, noise_intensity)
+        noised_window = noised_window+noise_peak
+    return noised_window
+
+
+def baseline_shot_noise(spectrum:MzSpectrum,window_size:float=50,expected_noise_peaks_per_Th:int=10,min_intensity:int = 5, resolution = 3):
+    """
+
+
+    """
+    min_mz = spectrum.mz.min()-0.1
+    max_mz = spectrum.mz.max()+0.1
+    bins,windows = spectrum.windows(window_size,overlapping=False,min_peaks=0,min_intensity=0)
+    noised_spectrum = spectrum
+    for b,w in zip(bins,windows):
+        lower = b*window_size
+        upper = (b+1)*window_size
+        noised_spectrum = noised_spectrum+baseline_shot_noise_window(w,lower,upper,window_size*expected_noise_peaks_per_Th)
+    return noised_spectrum.to_resolution(resolution).filter(min_mz,max_mz,min_intensity)
+
+def baseline_noise():
+    pass
\ No newline at end of file
diff --git a/imspy/imspy/proteome.py b/imspy/imspy/proteome.py
new file mode 100644
index 00000000..85103430
--- /dev/null
+++ b/imspy/imspy/proteome.py
@@ -0,0 +1,322 @@
+from __future__ import annotations
+import os
+import warnings
+import numpy as np
+from numpy.typing import ArrayLike
+import pandas as pd
+import sqlite3
+from imspy.data import MzSpectrum
+from imspy.chemistry import get_mono_isotopic_weight, MASS_PROTON
+
+from imspy.feature import RTProfile, ScanProfile, ChargeProfile
+from imspy.utility import tokenize_proforma_sequence, TokenSequence, get_aa_num_proforma_sequence
+from enum import Enum
+from abc import ABC, abstractmethod
+
+from typing import Optional, List, Union
+
+class ENZYME(Enum):
+    TRYPSIN = 1
+
+
+class ORGANISM(Enum):
+    HOMO_SAPIENS = 1
+
+
+class Enzyme(ABC):
+    def __init__(self, name: ENZYME):
+        self.name = name
+
+    @abstractmethod
+    def calculate_cleavages(self, sequence: str) -> np.array:
+        pass
+
+    def digest(self, sequence: str, missed_cleavages: int, min_length) -> list[str]:
+        pass
+
+
+class Trypsin(Enzyme):
+    def __init__(self, name: ENZYME = ENZYME.TRYPSIN):
+        super().__init__(name)
+        self.name = name
+
+    def calculate_cleavages(self, sequence: str):
+        """
+        Scans sequence for cleavage motifs
+        and returns list of tuples of peptide intervals.
+
+        :param sequence: protein sequence to digest
+        :type sequence: str
+        :return: beginning with
+        :rtype: List[Tuple[int,int]]
+        """
+        cut_sites = []
+        start = 0
+        end = 0
+        in_unimod_bracket = False
+        for i,aa in enumerate(sequence[:-1]):
+            # skip unimod brackets
+            if aa == "(":
+                in_unimod_bracket = True
+            if in_unimod_bracket:
+                if aa == ")":
+                    in_unimod_bracket = False
+                continue
+
+            # cut after every 'K' or 'R' that is not followed by 'P'
+
+            next_aa = sequence[i+1]
+            if ((aa == 'K') or (aa == 'R')) and next_aa != 'P':
+                end = i+1
+                cut_sites.append((start, end))
+                start = end
+
+        cut_sites.append((start, len(sequence)))
+
+        return np.array(cut_sites)
+
+    def __repr__(self):
+        return f'Enzyme(name: {self.name.name})'
+
+    def digest(self, sequence:str, abundancy:float, missed_cleavages:int =0, min_length:int =7):
+        """
+        Digests a protein into peptides.
+
+        :param sequence: Sequence of protein in ProForma standard.
+        :type sequence: str
+        :param abundancy: Abundance of protein
+        :type abundancy: float
+        :param missed_cleavages: Number of allowed missed cleavages, defaults to 0
+        :type missed_cleavages: int, optional
+        :param min_length: Min length of recorded peptides, defaults to 7
+        :type min_length: int, optional
+        :return: List of dictionaries with `sequence`,`start`, `end` and `abundance` as keys.
+        :rtype: List[Dict]
+        """
+        assert 0 <= missed_cleavages <= 2, f'Number of missed cleavages might be between 0 and 2, was: {missed_cleavages}'
+
+        peptide_intervals = self.calculate_cleavages(sequence)
+
+        # TODO: implement
+        if missed_cleavages == 1:
+            pass
+        if missed_cleavages == 2:
+            pass
+
+        dig_list = []
+
+        for s, e in peptide_intervals:
+            dig_list.append(sequence[s: e])
+
+        # pair sequence digests with indices
+        wi = zip(dig_list, peptide_intervals)
+        # filter out short sequences and clean index display
+        wi = map(lambda p: (p[0], p[1][0], p[1][1]), filter(lambda s: get_aa_num_proforma_sequence(s[0]) >= min_length, wi))
+
+        return list(map(lambda e: {'sequence': e[0], 'start': e[1], 'end': e[2], 'abundancy': abundancy}, wi))
+
+
+class PeptideDigest:
+    def __init__(self, data: pd.DataFrame, organism: ORGANISM, enzyme: ENZYME):
+        self.data = data
+        self.organism = organism
+        self.enzyme = enzyme
+
+    def __repr__(self):
+        return f'PeptideMix(Sample: {self.organism.name}, Enzyme: {self.enzyme.name}, number peptides: {self.data.shape[0]})'
+
+
+class ProteinSample:
+
+    def __init__(self, data: pd.DataFrame, name: ORGANISM):
+        self.data = data
+        self.name = name
+
+    def digest(self, enzyme: Enzyme, missed_cleavages: int = 0, min_length: int = 7) -> PeptideDigest:
+        """
+        Digest all proteins in the sample with `enzyme`
+
+        :param enzyme: Enzyme for digestion e.g. instance of `Trypsin`
+        :type enzyme: Enzyme
+        :param missed_cleavages: Number of allowed missed cleavages, defaults to 0
+        :type missed_cleavages: int, optional
+        :param min_length: Minimum length of peptide to be recorded, defaults to 7
+        :type min_length: int, optional
+        :return: Digested sample
+        :rtype: PeptideDigest
+        """
+        # digest with enzyme row-wise (one protein at a time)
+        digest = self.data.apply(lambda r: enzyme.digest(r['sequence'], r['abundancy'], missed_cleavages, min_length), axis=1)
+
+        V = zip(self.data['id'].values, digest.values)
+
+        r_list = []
+
+        for (gene, peptides) in V:
+            for (pep_idx, pep) in enumerate(peptides):
+                # discard sequences with unknown amino acids
+                if pep['sequence'].find('X') == -1:
+                    pep['gene_id'] = gene
+                    pep['pep_id'] = f"{gene}_{pep_idx}"
+                    pep['sequence_tokenized'] = tokenize_proforma_sequence(pep['sequence'])
+                    pep['mass_theoretical'] = get_mono_isotopic_weight(pep['sequence_tokenized'])
+                    pep['sequence_tokenized'] = TokenSequence(pep['sequence_tokenized'])
+                    pep['sequence'] = '_' + pep['sequence'] + '_'
+                    r_list.append(pep)
+
+        return PeptideDigest(pd.DataFrame(r_list), self.name, enzyme.name)
+
+    def __repr__(self):
+        return f'ProteinSample(Organism: {self.name.name})'
+
+class ProteomicsExperimentDatabaseHandle:
+    def __init__(self,path:str):
+        if not os.path.exists(path):
+            print("Connect to Database")
+
+        self.con = sqlite3.connect(path)
+        self._chunk_size = None
+
+    def push(self, table_name:str, data:PeptideDigest):
+        if table_name == "PeptideDigest":
+            assert isinstance(data, PeptideDigest), "For pushing to table 'PeptideDigest' data type must be `PeptideDigest`"
+            df = data.data.apply(self.make_sql_compatible)
+        else:
+            raise ValueError("This Table does not exist and is not supported")
+
+        df.to_sql(table_name, self.con, if_exists="replace")
+
+    def update(self, data_slice: ProteomicsExperimentSampleSlice):
+        assert isinstance(data_slice, ProteomicsExperimentSampleSlice)
+        df_separated_peptides = data_slice.peptides.apply(self.make_sql_compatible)
+        df_ions = data_slice.ions.apply(self.make_sql_compatible)
+        df_separated_peptides.to_sql("SeparatedPeptides", self.con, if_exists="append")
+        df_ions.to_sql("Ions", self.con , if_exists="append")
+
+    def load(self, table_name:str, query:Optional[str] = None):
+        if query is None:
+            query = f"SELECT * FROM {table_name}"
+        return pd.read_sql(query,self.con, index_col="index")
+
+    def load_chunks(self, chunk_size: int, query:Optional[str] = None):
+        if query is None:
+            query = "SELECT * FROM PeptideDigest"
+        self.__chunk_generator =  pd.read_sql(query,self.con, chunksize=chunk_size, index_col="index")
+        for chunk in self.__chunk_generator:
+            chunk.loc[:,"sequence_tokenized"] = chunk.loc[:,"sequence_tokenized"].transform(lambda st: TokenSequence(None, jsons=st))
+            yield(ProteomicsExperimentSampleSlice(peptides = chunk))
+
+    def load_frames(self, frame_range:Tuple[int,int], spectra_as_jsons = False):
+        query = (
+                "SELECT SeparatedPeptides.pep_id, "
+                "SeparatedPeptides.sequence, "
+                "SeparatedPeptides.simulated_frame_profile, "
+                "SeparatedPeptides.mass_theoretical, "
+                "SeparatedPeptides.abundancy, "
+                "SeparatedPeptides.frame_min, "
+                "SeparatedPeptides.frame_max, "
+                "Ions.mz, "
+                "Ions.charge, "
+                "Ions.relative_abundancy, "
+                "Ions.scan_min, "
+                "Ions.scan_max, "
+                "Ions.simulated_scan_profile, "
+                "Ions.simulated_mz_spectrum "
+                "FROM SeparatedPeptides "
+                "INNER JOIN Ions "
+                "ON SeparatedPeptides.pep_id = Ions.pep_id "
+                f"AND SeparatedPeptides.frame_min < {frame_range[1]} "
+                f"AND SeparatedPeptides.frame_max >= {frame_range[0]} "
+                )
+        df = pd.read_sql(query, self.con)
+
+        # unzip jsons
+        df.loc[:,"simulated_scan_profile"] = df["simulated_scan_profile"].transform(lambda sp: ScanProfile(jsons=sp))
+        df.loc[:,"simulated_frame_profile"] = df["simulated_frame_profile"].transform(lambda rp: RTProfile(jsons=rp))
+        if spectra_as_jsons:
+            return df
+        else:
+            df.loc[:,"simulated_mz_spectrum"] = df["simulated_mz_spectrum"].transform(lambda s: MzSpectrum.from_jsons(jsons=s))
+
+        return df
+
+    @staticmethod
+    def make_sql_compatible(column):
+        if column.size < 1:
+            return
+        if isinstance(column.iloc[0], (RTProfile, ScanProfile, ChargeProfile, TokenSequence)):
+            return column.apply(lambda x: x.jsons)
+        elif isinstance(column.iloc[0], MzSpectrum):
+            return column.apply(lambda x: x.to_jsons())
+        else:
+            return column
+
+class ProteomicsExperimentSampleSlice:
+    """
+    exposed dataframe of database
+    """
+    def __init__(self, peptides:pd.DataFrame, ions:Optional[pd.DataFrame]=None):
+        self.peptides = peptides
+        self.ions = ions
+
+    def add_simulation(self, simulation_name:str, simulation_data: ArrayLike):
+        accepted_peptide_simulations = [
+                                "simulated_irt_apex",
+                                "simulated_frame_apex",
+                                "simulated_frame_profile",
+                                "simulated_charge_profile",
+                                ]
+
+        accepted_ion_simulations = [
+                                "simulated_scan_apex",
+                                "simulated_k0",
+                                "simulated_scan_profile",
+                                "simulated_mz_spectrum",
+                                ]
+
+        # for profiles store min and max values
+        get_min_position = np.vectorize(lambda p:p.positions.min(), otypes=[int])
+        get_max_position = np.vectorize(lambda p:p.positions.max(), otypes=[int])
+
+        if simulation_name == "simulated_frame_profile":
+
+            self.peptides["frame_min"] = get_min_position(simulation_data)
+            self.peptides["frame_max"] = get_max_position(simulation_data)
+
+        elif simulation_name == "simulated_charge_profile":
+            ions_dict = {
+                "sequence":[],
+                "pep_id":[],
+                "mz":[],
+                "charge":[],
+                "relative_abundancy":[]
+                }
+            sequences = self.peptides["sequence"].values
+            pep_ids = self.peptides["pep_id"].values
+            masses = self.peptides["mass_theoretical"].values
+
+            for (s, pi, m, charge_profile) in zip(sequences, pep_ids, masses, simulation_data):
+                for (c, r) in charge_profile:
+                    ions_dict["sequence"].append(s)
+                    ions_dict["pep_id"].append(pi)
+                    ions_dict["charge"].append(c)
+                    ions_dict["relative_abundancy"].append(r)
+                    ions_dict["mz"].append(m/c + MASS_PROTON)
+            self.ions = pd.DataFrame(ions_dict)
+
+            self.peptides["charge_min"] = get_min_position(simulation_data)
+            self.peptides["charge_max"] = get_max_position(simulation_data)
+
+        elif simulation_name == "simulated_scan_profile":
+
+            self.ions["scan_min"] = get_min_position(simulation_data)
+            self.ions["scan_max"] = get_max_position(simulation_data)
+
+        if simulation_name in accepted_peptide_simulations:
+            self.peptides[simulation_name] = simulation_data
+
+        elif simulation_name in accepted_ion_simulations:
+            self.ions[simulation_name] = simulation_data
+
+        else:
+            raise ValueError(f"Simulation name '{simulation_name}' is not defined")
\ No newline at end of file
diff --git a/imspy/imspy/simulation/__init__.py b/imspy/imspy/simulation/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/imspy/imspy/simulation/experiment.py b/imspy/imspy/simulation/experiment.py
new file mode 100644
index 00000000..df60d5d8
--- /dev/null
+++ b/imspy/imspy/simulation/experiment.py
@@ -0,0 +1,203 @@
+import os
+from multiprocessing import Pool
+import functools
+from abc import ABC, abstractmethod
+import numpy as np
+import pyarrow as pa
+import pyarrow.parquet as pq
+from tqdm import tqdm
+from imspy.core import MzSpectrum
+from imspy.proteome import PeptideDigest, ProteomicsExperimentDatabaseHandle
+import imspy.simulation.hardware_models as hardware
+
+
+class ProteomicsExperiment(ABC):
+    def __init__(self, path: str):
+
+        # path strings to experiment folder, database and output subfolder
+        self.path = path
+        self.output_path = f"{os.path.dirname(path)}/output"
+        self.database_path = f"{self.path}/experiment_database.db"
+
+        if not os.path.exists(self.path):
+            os.makedirs(self.path)
+
+        if os.path.exists(self.database_path):
+            raise FileExistsError("Experiment found in the given path.")
+
+        if not os.path.exists(self.output_path):
+            os.mkdir(self.output_path)
+
+        # output folder must be empty, otherwise it is
+        # assumend that it contains old experiments
+        if len(os.listdir(self.output_path)) > 0:
+            raise FileExistsError("Experiment found in the given path.")
+
+        # init database and loaded sample
+        self.database = ProteomicsExperimentDatabaseHandle(self.database_path)
+        self.loaded_sample = None
+
+        # hardware methods
+        self._lc_method = None
+        self._ionization_method = None
+        self._ion_mobility_separation_method = None
+        self._mz_separation_method = None
+
+    @property
+    def lc_method(self):
+        return self._lc_method
+
+    @lc_method.setter
+    def lc_method(self, method: hardware.LiquidChromatography):
+        self._lc_method = method
+
+    @property
+    def ionization_method(self):
+        return self._ionization_method
+
+    @ionization_method.setter
+    def ionization_method(self, method: hardware.IonSource):
+        self._ionization_method = method
+
+    @property
+    def ion_mobility_separation_method(self):
+        return self._ion_mobility_separation_method
+
+    @ion_mobility_separation_method.setter
+    def ion_mobility_separation_method(self, method: hardware.IonMobilitySeparation):
+        self._ion_mobility_separation_method = method
+
+    @property
+    def mz_separation_method(self):
+        return self._mz_separation_method
+
+    @mz_separation_method.setter
+    def mz_separation_method(self, method: hardware.MzSeparation):
+        self._mz_separation_method = method
+
+    @abstractmethod
+    def load_sample(self, sample: PeptideDigest):
+        self.database.push("PeptideDigest",sample)
+
+    @abstractmethod
+    def run(self):
+        pass
+
+
+class LcImsMsMs(ProteomicsExperiment):
+    def __init__(self, path:str):
+        super().__init__(path)
+
+    def load_sample(self, sample: PeptideDigest):
+        return super().load_sample(sample)
+
+    def run(self, chunk_size: int = 1000, assemble_processes: int = 8, frames_per_assemble_process:int = 20):
+        self._simulate_features(chunk_size)
+        self._assemble(frames_per_assemble_process, assemble_processes)
+
+
+    def _simulate_features(self, chunk_size):
+        # load bulks of data here as dataframe if necessary
+        for data_chunk in self.database.load_chunks(chunk_size):
+            self.lc_method.run(data_chunk)
+            self.ionization_method.run(data_chunk)
+            self.ion_mobility_separation_method.run(data_chunk)
+            self.mz_separation_method.run(data_chunk)
+            self.database.update(data_chunk)
+    
+    @staticmethod
+    def _assemble_frame_range(frame_range, scan_id_min, scan_id_max, default_abundance, resolution, output_path, database_path):
+
+        frame_range_start = frame_range[0]
+        frame_range_end = frame_range[1]
+        # generate file_name
+        file_name = f"frames_{frame_range_start}_{frame_range_end}.parquet"
+        output_file_path = f"{output_path}/{file_name}"
+
+        frame_range = range(frame_range_start,frame_range_end)
+        scan_range  = range(scan_id_min, scan_id_max+1)
+
+        thread_db_handle = ProteomicsExperimentDatabaseHandle(database_path)
+        ions_in_split = thread_db_handle.load_frames((frame_range_start, frame_range_end), spectra_as_jsons = True)
+
+
+        # skip if no peptides in split
+        if ions_in_split.shape[0] == 0:
+            return {}
+
+        # spectra are currently stored in json format (from SQL db)
+        ions_in_split.loc[:,"simulated_mz_spectrum"] = ions_in_split["simulated_mz_spectrum"].transform(lambda s: MzSpectrum.from_jsons(jsons=s))
+
+        # construct signal data set
+        signal = {f_id:{s_id:[] for s_id in scan_range} for f_id in frame_range }
+
+        for _,row in ions_in_split.iterrows():
+
+            ion_frame_start = max(frame_range_start, row["frame_min"])
+            ion_frame_end = min(frame_range_end-1, row["frame_max"]) # -1 here because frame_range_end is covered by next frame range
+
+            ion_scan_start = max(scan_id_min, row["scan_min"])
+            ion_scan_end = min(scan_id_max, row["scan_max"])
+
+            ion_frame_profile = row["simulated_frame_profile"]
+            ion_scan_profile = row["simulated_scan_profile"]
+
+            ion_charge_abundance = row["abundancy"]*row["relative_abundancy"]
+
+            ion_spectrum = row["simulated_mz_spectrum"]
+
+            # frame start and end inclusive
+            for f_id in range(ion_frame_start, ion_frame_end+1):
+                # scan start and end inclusive
+                for s_id in range(ion_scan_start, ion_scan_end+1):
+
+                    abundance = ion_charge_abundance*ion_frame_profile[f_id]*ion_scan_profile[s_id]
+                    rel_to_default_abundance = abundance/default_abundance
+
+                    signal[f_id][s_id].append(ion_spectrum*rel_to_default_abundance)
+
+        output_dict = {"frame_id" : [],
+                       "scan_id" : [],
+                       "mz" : [],
+                       "intensity" : [],
+        }
+        for (f_id,frame_dict) in signal.items():
+            for (s_id,scan_spectra_list) in frame_dict.items():
+
+                if len(scan_spectra_list) > 0:
+                    scan_spectrum = MzSpectrum.from_mz_spectra_list(scan_spectra_list,resolution = resolution).vectorized(resolution=resolution).to_centroided()
+                    output_dict["mz"].append(scan_spectrum.mz.tolist())
+                    output_dict["intensity"].append(scan_spectrum.intensity.tolist())
+                    output_dict["scan_id"].append(s_id)
+                    output_dict["frame_id"].append(f_id)
+                    signal[f_id][s_id] = None
+
+        for key, value in output_dict.items():
+            output_dict[key] = pa.array(value)
+
+        pa_table = pa.Table.from_pydict(output_dict)
+
+        pq.write_table(pa_table, output_file_path, compression=None)
+
+    def _assemble(self, frames_per_process:int, num_processes:int):
+
+        scan_id_min = self.ion_mobility_separation_method.scan_id_min
+        scan_id_max = self.ion_mobility_separation_method.scan_id_max
+        default_abundance = self.mz_separation_method.model.default_abundance
+        resolution = self.mz_separation_method.resolution
+
+        split_positions = np.arange(0, self.lc_method.num_frames , step=frames_per_process ).astype(int)
+        split_start = split_positions[:-1]
+        split_end = split_positions[1:]
+
+        assemble_frame_range = functools.partial(self._assemble_frame_range, scan_id_min = scan_id_min, scan_id_max = scan_id_max, default_abundance = default_abundance, resolution = resolution, output_path = self.output_path, database_path= self.database_path)
+
+        if num_processes > 1:
+            with Pool(num_processes) as pool:
+                list(tqdm(pool.imap(assemble_frame_range, zip(split_start, split_end)),total=len(split_start)))
+        else:
+            for start,end in tqdm(zip(split_start,split_end), total = len(split_start)):
+                assemble_frame_range((start, end))
+
+
+
diff --git a/imspy/imspy/simulation/hardware_models.py b/imspy/imspy/simulation/hardware_models.py
new file mode 100644
index 00000000..b2e29b1c
--- /dev/null
+++ b/imspy/imspy/simulation/hardware_models.py
@@ -0,0 +1,774 @@
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from typing import List, Tuple
+from multiprocessing import Pool
+
+import tensorflow as tf
+import numpy as np
+from numpy.typing import ArrayLike, NDArray
+import pandas as pd
+from scipy.stats import exponnorm, norm, binom, gamma
+
+from imspy.chemistry.mass import STANDARD_TEMPERATURE, STANDARD_PRESSURE, BufferGas, get_num_protonizable_sites
+from imspy.chemistry.mobility import SUMMARY_CONSTANT as CCS_K0_CONVERSION_CONSTANT
+from imspy.proteome import ProteomicsExperimentSampleSlice
+from imspy.feature import RTProfile, ScanProfile, ChargeProfile
+from imspy.isotopes import AveragineGenerator
+from imspy.utility.utilities import tokenizer_from_json
+
+
+class Device(ABC):
+    def __init__(self, name: str):
+        self.name = name
+        self._temperature = STANDARD_TEMPERATURE
+        self._pressure = STANDARD_PRESSURE
+
+    @property
+    def temperature(self):
+        """
+        Get device temperature
+
+        :return: Temperature of device in Kelvin.
+        :rtype: float
+        """
+        return self._temperature
+
+    @temperature.setter
+    def temperature(self, T:float):
+        """
+        Set device temperature
+
+        :param T: Temperature in Kelvin.
+        :type T: float
+        """
+        self._temperature = T
+
+    @property
+    def pressure(self):
+        """
+        Get device pressure
+
+        :return: Pressure of device in Pa.
+        :rtype: float
+        """
+        return self._pressure
+
+    @pressure.setter
+    def pressure(self, p:float):
+        """
+        Set device pressure
+        :param p: Pressure in Pa.
+        :type p: float
+        """
+        self._pressure = p
+
+    @abstractmethod
+    def run(self, sample: ProteomicsExperimentSampleSlice):
+        pass
+
+
+class Model(ABC):
+    def __init__(self):
+        pass
+
+    @abstractmethod
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: Device):
+        pass
+
+
+class Chromatography(Device):
+    def __init__(self, name: str="ChromatographyDevice"):
+        super().__init__(name)
+        self._apex_model = None
+        self._profile_model = None
+        self._irt_to_rt_converter = None
+        self._frame_length = 1200
+        self._gradient_length = 120*60*1000 # 120 minutes in miliseconds
+
+    @property
+    def frame_length(self):
+        return self._frame_length
+
+    @frame_length.setter
+    def frame_length(self, milliseconds: int):
+        self._frame_length = milliseconds
+
+    @property
+    def gradient_length(self):
+        return self._gradient_length/(60*1000)
+
+    @gradient_length.setter
+    def gradient_length(self, minutes: int):
+        self._gradient_length = minutes*60*1000
+
+    @property
+    def num_frames(self):
+        return self._gradient_length//self._frame_length
+
+    @property
+    def apex_model(self):
+        return self._apex_model
+
+    @apex_model.setter
+    def apex_model(self, model: ChromatographyApexModel):
+        self._apex_model = model
+
+    @property
+    def profile_model(self):
+        return self._profile_model
+
+    @property
+    def irt_to_rt_converter(self):
+        return self._irt_to_rt_converter
+
+    @irt_to_rt_converter.setter
+    def irt_to_rt_converter(self, converter:callable):
+        self._irt_to_rt_converter = converter
+
+    @profile_model.setter
+    def profile_model(self, model:ChromatographyProfileModel):
+        self._profile_model = model
+
+    def irt_to_frame_id(self, irt: float):
+        return self.rt_to_frame_id(self.irt_to_rt(irt))
+
+    @abstractmethod
+    def rt_to_frame_id(self, rt: float):
+        pass
+
+    def irt_to_rt(self, irt):
+        return self.irt_to_rt_converter(irt)
+
+    def frame_time_interval(self, frame_id:ArrayLike):
+        frame_id = np.atleast_1d(frame_id)
+        frame_length_minutes = self.frame_length/(60*1000)
+        s = (frame_id-1)*frame_length_minutes
+        e = frame_id*frame_length_minutes
+        return np.stack([s, e], axis = 1)
+
+    def frame_time_middle(self, frame_id: ArrayLike):
+        return np.mean(self.frame_time_interval(frame_id),axis=1)
+
+    def frame_time_end(self, frame_id: ArrayLike):
+        return self.frame_time_interval(frame_id)[:,1]
+
+    def frame_time_start(self, frame_id: ArrayLike):
+        return self.frame_time_interval(frame_id)[:,0]
+
+
+class LiquidChromatography(Chromatography):
+    def __init__(self, name: str = "LiquidChromatographyDevice"):
+        super().__init__(name)
+
+    def run(self, sample: ProteomicsExperimentSampleSlice):
+        # retention time apex simulation
+        retention_time_apex = self._apex_model.simulate(sample, self)
+        # in irt and rt
+        sample.add_simulation("simulated_irt_apex", retention_time_apex)
+        # in frame id
+        sample.add_simulation("simulated_frame_apex", self.irt_to_frame_id(retention_time_apex))
+
+        # profile simulation
+
+        retention_profile = self._profile_model.simulate(sample, self)
+        sample.add_simulation("simulated_frame_profile", retention_profile)
+
+    def rt_to_frame_id(self,  rt_minutes: ArrayLike):
+        rt_minutes = np.asarray(rt_minutes)
+        # first frame is completed not at 0 but at frame_length
+        frame_id = (rt_minutes/self.frame_length*1000*60).astype(np.int64)+1
+        return frame_id
+
+
+class ChromatographyApexModel(Model):
+    def __init__(self):
+        self._device = None
+
+    @abstractmethod
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: Chromatography) -> NDArray[np.float64]:
+        pass
+
+
+class ChromatographyProfileModel(Model):
+    def __init__(self):
+        pass
+
+    @abstractmethod
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: Chromatography) -> List[RTProfile]:
+        pass
+
+
+class EMGChromatographyProfileModel(ChromatographyProfileModel):
+
+    def __init__(self):
+        super().__init__()
+
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: Chromatography) -> List[RTProfile]:
+        mus = sample.peptides["simulated_irt_apex"].values
+        profile_list = []
+
+        for mu in mus:
+            σ = 0.1 # must be sampled
+            λ = 10 # must be sampled
+            K = 1/(σ*λ)
+            μ = device.irt_to_rt(mu)
+            model_params = { "sigma":σ,
+                             "lambda":λ,
+                             "mu":μ,
+                             "name":"EMG"
+                            }
+
+            emg = exponnorm(loc=μ, scale=σ, K = K)
+            # start and end value (in retention times)
+            s_rt, e_rt = emg.ppf([0.01,0.9])
+            # as frames
+            s_frame, e_frame = device.rt_to_frame_id(s_rt), device.rt_to_frame_id(e_rt)
+
+            profile_frames = np.arange(s_frame-1,e_frame+1) # starting with s_frame-1 for cdf interval calculation
+            profile_rt_ends = device.frame_time_end(profile_frames)
+            profile_rt_cdfs = emg.cdf(profile_rt_ends)
+            profile_rel_intensities = np.diff(profile_rt_cdfs)
+
+            profile_list.append(RTProfile(profile_frames[1:],profile_rel_intensities,model_params))
+        return profile_list
+
+
+class NormalChromatographyProfileModel(ChromatographyProfileModel):
+
+    def __init__(self):
+        super().__init__()
+
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: Chromatography) -> List[RTProfile]:
+        mus = sample.peptides["simulated_irt_apex"].values
+        sigmas = gamma(a=4.929,scale=1/18.784).rvs(mus.size)/6
+        profile_list = []
+
+        for mu,σ in zip(mus,sigmas):
+
+            μ = device.irt_to_rt(mu)
+            model_params = { "sigma":σ,
+                             "mu":μ,
+                             "name":"Normal"
+                            }
+
+            normal = norm(loc=μ, scale=σ)
+            # start and end value (in retention times)
+            s_rt, e_rt = normal.ppf([0.02,0.98])
+            # as frames
+            s_frame, e_frame = device.rt_to_frame_id(s_rt), device.rt_to_frame_id(e_rt)
+
+            profile_frames = np.arange(s_frame-1,e_frame+1) # starting with s_frame-1 for cdf interval calculation
+            profile_rt_ends = device.frame_time_end(profile_frames)
+            profile_rt_cdfs = normal.cdf(profile_rt_ends)
+            profile_rel_intensities = np.diff(profile_rt_cdfs)
+
+            profile_list.append(RTProfile(profile_frames[1:],profile_rel_intensities,model_params))
+        return profile_list
+
+
+class NeuralChromatographyApex(ChromatographyApexModel):
+
+    def __init__(self, model_path: str, tokenizer_path: str):
+        super().__init__()
+        self.model_path = model_path
+        self.tokenizer = tokenizer_from_json(tokenizer_path)
+
+    def sequences_to_tokens(self, sequences_tokenized: np.array) -> np.array:
+        print('tokenizing sequences...')
+        tokens = self.tokenizer.texts_to_sequences(sequences_tokenized)
+        tokens_padded = tf.keras.preprocessing.sequence.pad_sequences(tokens, 50, padding='post')
+        return tokens_padded
+
+    @staticmethod
+    def _worker(model_path: str, tokens_padded: np.array, batched: bool = True, bs: int = 2048):
+        pseudo_target = np.expand_dims(np.zeros_like(tokens_padded[:, 0]), axis=1)
+        if batched:
+            dataset = tf.data.Dataset.from_tensor_slices((tokens_padded, pseudo_target)).batch(bs)
+        else:
+            dataset = tf.data.Dataset.from_tensor_slices((tokens_padded, pseudo_target))
+        model = tf.keras.models.load_model(model_path)
+        print('predicting irts...')
+        return_val = model.predict(dataset)
+        return return_val
+
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: Chromatography) ->  NDArray[np.float64]:
+
+        data = sample.peptides
+        tokens = data["sequence_tokenized"].apply(lambda st: st.sequence_tokenized).to_numpy()
+        print('generating tf dataset...')
+        tokens_padded = self.sequences_to_tokens(tokens)
+
+        with Pool(1) as pool:
+            r = pool.starmap(self._worker, [(self.model_path, tokens_padded)])
+        return r[0]
+
+
+class IonSource(Device):
+    def __init__(self, name:str ="IonizationDevice"):
+        super().__init__(name)
+        self._ionization_model = None
+
+    @property
+    def ionization_model(self):
+        return self._ionization_model
+
+    @ionization_model.setter
+    def ionization_model(self, model: IonizationModel):
+        self._ionization_model = model
+
+    @abstractmethod
+    def run(self, sample: ProteomicsExperimentSampleSlice):
+        pass
+
+class ElectroSpray(IonSource):
+    def __init__(self, name:str ="ElectrosprayDevice"):
+        super().__init__(name)
+
+    def run(self, sample: ProteomicsExperimentSampleSlice):
+        charge_profiles = self.ionization_model.simulate(sample, self)
+        sample.add_simulation("simulated_charge_profile", charge_profiles)
+
+class IonizationModel(Model):
+    def __init__(self):
+        pass
+
+    @abstractmethod
+    def simulate(self, sample:ProteomicsExperimentSampleSlice, device: IonSource) -> NDArray:
+        pass
+
+class RandomIonSource(IonizationModel):
+    def __init__(self):
+        super().__init__()
+        self._charge_probabilities =  np.array([0.1, 0.5, 0.3, 0.1])
+        self._allowed_charges =  np.array([1, 2, 3, 4], dtype=np.int8)
+
+    @property
+    def allowed_charges(self):
+        return self._allowed_charges
+
+    @allowed_charges.setter
+    def allowed_charges(self, charges: ArrayLike):
+        self._allowed_charges = np.asarray(charges, dtype=np.int8)
+
+    @property
+    def charge_probabilities(self):
+        return self._charge_probabilities
+
+    @charge_probabilities.setter
+    def charge_probabilities(self, probabilities: ArrayLike):
+        self._charge_probabilities = np.asarray(probabilities)
+
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: IonSource) -> List[ChargeProfile]:
+        if self.charge_probabilities.shape != self.allowed_charges.shape:
+            raise ValueError("Number of allowed charges must fit to number of probabilities")
+
+        data = sample.peptides
+        charge = np.random.choice(self.allowed_charges, data.shape[0], p=self.charge_probabilities)
+        rel_intensity = np.ones_like(charge)
+        charge_profiles = []
+        for c,i in zip(charge, rel_intensity):
+            charge_profiles.append(ChargeProfile([c],[i],model_params={"name":"RandomIonSource"}))
+        return charge_profiles
+
+class BinomialIonSource():
+    def __init__(self):
+        super().__init__()
+        self._charged_probability =  0.5
+        self._allowed_charges =  np.array([1, 2, 3, 4], dtype=np.int8)
+
+    @property
+    def allowed_charges(self):
+        return self._allowed_charges
+
+    @allowed_charges.setter
+    def allowed_charges(self, charges: ArrayLike):
+        self._allowed_charges = np.asarray(charges, dtype=np.int8)
+
+    @property
+    def charged_probability(self):
+        return self._charged_probability
+
+    @charged_probability.setter
+    def charged_probability(self, probability: float):
+        self._charged_probability = probability
+
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: IonSource) -> List[ChargeProfile]:
+        sequences = sample.peptides.sequence
+        vec_get_num_protonizable_sites = np.vectorize(get_num_protonizable_sites)
+        basic_aa_nums = vec_get_num_protonizable_sites(sequences)
+
+        charge_profiles = []
+        for baa_num in basic_aa_nums:
+            rel_intensities = binom(baa_num,self.charged_probability).pmf(self.allowed_charges)
+            charge_profiles.append(ChargeProfile(self.allowed_charges,rel_intensities,model_params={"name":"BinomialIonSource","basic_aa_num":baa_num}))
+        return charge_profiles
+
+class IonMobilitySeparation(Device):
+    def __init__(self, name:str = "IonMobilityDevice"):
+        super().__init__(name)
+        # models
+        self._apex_model = None
+        self._profile_model = None
+
+        # hardware parameter
+        self._scan_intervall = 1
+        self._scan_time = None
+        self._scan_id_min = None
+        self._scan_id_max = None
+        self._buffer_gas = None
+
+        # converters
+        self._reduced_im_to_scan_converter = None
+        self._scan_to_reduced_im_interval_converter = None
+
+    @property
+    def reduced_im_to_scan_converter(self):
+        return self._reduced_im_to_scan_converter
+
+    @reduced_im_to_scan_converter.setter
+    def reduced_im_to_scan_converter(self, converter:callable):
+        self._reduced_im_to_scan_converter = converter
+
+    @property
+    def scan_to_reduced_im_interval_converter(self):
+        return self._scan_to_reduced_im_interval_converter
+
+    @scan_to_reduced_im_interval_converter.setter
+    def scan_to_reduced_im_interval_converter(self, converter:callable):
+        self._scan_to_reduced_im_interval_converter = converter
+
+    @property
+    def buffer_gas(self):
+        return self._buffer_gas
+
+    @buffer_gas.setter
+    def buffer_gas(self, gas: BufferGas):
+        self._buffer_gas = gas
+
+    @property
+    def scan_intervall(self):
+        return self._scan_intervall
+
+    @scan_intervall.setter
+    def scan_intervall(self, number:int):
+        self._scan_intervall = number
+
+    @property
+    def scan_id_min(self):
+        return self._scan_id_min
+
+    @scan_id_min.setter
+    def scan_id_min(self, number:int):
+        self._scan_id_min = number
+
+    @property
+    def scan_id_max(self):
+        return self._scan_id_max
+
+    @scan_id_max.setter
+    def scan_id_max(self, number:int):
+        self._scan_id_max = number
+
+    @property
+    def scan_time(self):
+        return self._scan_time
+
+    @scan_time.setter
+    def scan_time(self, microseconds:int):
+        self._scan_time = microseconds
+
+    @property
+    def apex_model(self):
+        return self._apex_model
+
+    @apex_model.setter
+    def apex_model(self, model: IonMobilityApexModel):
+        self._apex_model = model
+
+    @property
+    def profile_model(self):
+        return self._profile_model
+
+    @profile_model.setter
+    def profile_model(self, model: IonMobilityProfileModel):
+        self._profile_model = model
+
+    def scan_to_reduced_im_interval(self, scan_id: ArrayLike):
+        return self.scan_to_reduced_im_interval_converter(scan_id)
+
+    def reduced_im_to_scan(self, ion_mobility):
+        return self.reduced_im_to_scan_converter(ion_mobility)
+
+    def scan_im_middle(self, scan_id: ArrayLike):
+        return np.mean(self.scan_to_reduced_im_interval(scan_id), axis = 1)
+
+    def scan_im_lower(self, scan_id: ArrayLike):
+        return self.scan_to_reduced_im_interval(scan_id)[:,0]
+
+    def scan_im_upper(self, scan_id:ArrayLike):
+        return self.scan_to_reduced_im_interval(scan_id)[:,1]
+
+    def im_to_reduced_im(self, ion_mobility: float, p_0: float = STANDARD_PRESSURE, T_0: float = STANDARD_TEMPERATURE):
+        """
+        Calculate reduced ion mobility K_0
+        (normalized to standard pressure p_0
+        and standard temperature T_0), from
+        ion mobility K.
+
+        K_0 = K * p/p_0 * T_0/T
+
+        [1] J. N. Dodds and E. S. Baker,
+        “Ion mobility spectrometry: fundamental concepts,
+        instrumentation, applications, and the road ahead,”
+        Journal of the American Society for Mass Spectrometry,
+        vol. 30, no. 11, pp. 2185–2195, 2019,
+        doi: 10.1007/s13361-019-02288-2.
+
+        :param ion_mobility: Ion mobility K to
+            normalize to standard conditions
+        :param p_0: Standard pressure (Pa).
+        :param T_0: Standard temperature (K).
+        """
+        T = self.temperature
+        p = self.pressure
+        return ion_mobility*p/p_0*T_0/T
+
+    def reduced_im_to_im(self, reduced_ion_mobility: float, p_0: float = STANDARD_PRESSURE, T_0: float = STANDARD_TEMPERATURE):
+        """
+        Inverse of `.im_to_reduced_im()`
+        """
+        T = self.temperature
+        p = self.pressure
+        return reduced_ion_mobility*p_0/p*T/T_0
+
+    def ccs_to_reduced_im(self, ccs:float, mz:float, charge:int):
+        # TODO Citation
+        """
+        Conversion of collision cross-section values (ccs)
+        to reduced ion mobility according to
+        Mason-Schamp equation.
+
+        :param ccs: collision cross-section (ccs)
+        :type ccs: float
+        :param mz: Mass (Da) to charge ratio of peptide
+        :type mz: float
+        :param charge: Charge of peptide
+        :type charge: int
+        :return: Reduced ion mobility
+        :rtype: float
+        """
+
+        T = self.temperature
+        mass = mz*charge
+        μ = self.buffer_gas.mass*mass/(self.buffer_gas.mass+mass)
+        z = charge
+
+        K0 = CCS_K0_CONVERSION_CONSTANT*z*1/(np.sqrt(μ*T)*ccs)
+        return K0
+
+    def reduced_im_to_ccs(self, reduced_ion_mobility:float, mz:float, charge:int):
+        """
+        Conversion of reduced ion mobility
+        to collision cross-section values (ccs)
+        according to Mason-Schamp equation.
+
+        :param reduced_ion_mobility: reduced ion mobility K0
+        :type reduced_ion_mobility: float
+        :param mz: Mass (Da) to charge ratio of peptide
+        :type mz: float
+        :param charge: Charge of peptide
+        :type charge: int
+        :return: Collision cross-section (ccs)
+        :rtype: float
+        """
+
+        T = self.temperature
+        mass = mz*charge
+        μ = self.buffer_gas.mass*mass/(self.buffer_gas.mass+mass)
+        z = charge
+        K0 = reduced_ion_mobility
+
+        ccs = CCS_K0_CONVERSION_CONSTANT*z*1/(np.sqrt(μ*T)*K0)
+        return ccs
+
+    @abstractmethod
+    def run(self, sample: ProteomicsExperimentSampleSlice):
+        pass
+
+
+
+class TrappedIon(IonMobilitySeparation):
+
+    def __init__(self, name:str = "TrappedIonMobilitySeparation"):
+        super().__init__()
+        self._scan_id_min = 1
+        self._scan_id_max = 918
+
+    def run(self, sample: ProteomicsExperimentSampleSlice):
+        # scan apex simulation
+        one_over_k0, scan_apex = self._apex_model.simulate(sample, self)
+        # in irt and rt
+        sample.add_simulation("simulated_scan_apex", scan_apex)
+        sample.add_simulation("simulated_k0", one_over_k0)
+        # scan profile simulation
+        scan_profile = self._profile_model.simulate(sample, self)
+        sample.add_simulation("simulated_scan_profile", scan_profile)
+
+
+class IonMobilityApexModel(Model):
+    def __init__(self):
+        pass
+
+    @abstractmethod
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: IonMobilitySeparation) -> NDArray[np.float64]:
+        return super().simulate(sample, device)
+
+class IonMobilityProfileModel(Model):
+    def __init__(self):
+        pass
+
+    @abstractmethod
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: IonMobilitySeparation) -> NDArray:
+        return super().simulate(sample, device)
+
+class NeuralIonMobilityApex(IonMobilityApexModel):
+
+    def __init__(self, model_path: str, tokenizer_path: str):
+        super().__init__()
+        self.model_path = model_path
+        self.tokenizer = tokenizer_from_json(tokenizer_path)
+
+    def sequences_to_tokens(self, sequences_tokenized: np.array) -> np.array:
+        print('tokenizing sequences...')
+        tokens = self.tokenizer.texts_to_sequences(sequences_tokenized)
+        tokens_padded = tf.keras.preprocessing.sequence.pad_sequences(tokens, 50, padding='post')
+        return tokens_padded
+
+    @staticmethod
+    def _worker(model_path: str, tokens_padded: np.array, mz: np.array, charges: np.array, batched: bool = True, bs: int = 2048):
+
+        mz = np.expand_dims(mz, 1)
+        c = tf.one_hot(charges - 1, depth=4)
+        pseudo_target = np.expand_dims(np.zeros_like(tokens_padded[:, 0]), axis=1)
+        if batched:
+            dataset = tf.data.Dataset.from_tensor_slices(((mz, c, tokens_padded), pseudo_target)).batch(bs)
+        else:
+            dataset = tf.data.Dataset.from_tensor_slices(((mz, c, tokens_padded), pseudo_target))
+
+        print('predicting mobilities...')
+        model = tf.keras.models.load_model(model_path)
+        ccs, _ = model.predict(dataset)
+        return ccs
+
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: IonMobilitySeparation) -> Tuple[NDArray]:
+        data = sample.ions.merge(sample.peptides.loc[:,["pep_id","sequence_tokenized"]],on="pep_id",validate="many_to_one")
+        tokens = data.sequence_tokenized.apply(lambda st: st.sequence_tokenized).to_numpy()
+        tokens_padded = self.sequences_to_tokens(tokens)
+        mz = data['mz'].values
+        charges = data["charge"].values
+        with Pool(1) as pool:
+            r = pool.starmap(self._worker, [(self.model_path, tokens_padded, mz, charges)])
+            ccs = r[0]
+        K0s = device.ccs_to_reduced_im(np.squeeze(ccs), mz, data['charge'].values)
+        scans = device.reduced_im_to_scan(K0s)
+        return K0s,scans
+
+class NormalIonMobilityProfileModel(IonMobilityProfileModel):
+    def __init__(self):
+        super().__init__()
+
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: IonMobilitySeparation) -> List[ScanProfile]:
+        mus = 1/sample.ions["simulated_k0"].values #1 over k0
+        sigmas = gamma(a=3.703,scale=1/79.614).rvs(mus.size)/6
+        profile_list = []
+        for μ,σ in zip(mus,sigmas):
+
+            model_params = { "sigma":σ,
+                             "mu":μ,
+                             "name":"NORMAL"
+                            }
+
+            normal = norm(loc=μ, scale=σ)
+            # start and end value (k0)
+            s_one_over_k0, e_one_over_k0 = normal.ppf([0.01,0.99])
+            # as scan ids, remember first scans elutes largest ions
+            e_scan, s_scan = device.reduced_im_to_scan(1/s_one_over_k0), device.reduced_im_to_scan(1/e_one_over_k0)
+
+            profile_scans = np.arange(s_scan-1,e_scan+1) # starting s_scan-1 is necessary here to include its end value for cdf interval
+            profile_end_im = 1/device.scan_im_upper(profile_scans)
+            profile_end_cdfs = 1-normal.cdf(profile_end_im)
+            profile_rel_intensities = np.diff(profile_end_cdfs)
+
+            profile_list.append(ScanProfile(profile_scans[1:],profile_rel_intensities,model_params))
+        return profile_list
+
+
+class MzSeparation(Device):
+    def __init__(self, name:str = "MassSpectrometer"):
+        super().__init__(name)
+        self._model = None
+        self._resolution = 3
+
+    @property
+    def resolution(self):
+        return self._resolution
+
+    @resolution.setter
+    def resolution(self, res: int):
+        self._resolution = res
+
+    @property
+    def model(self):
+        return self._model
+
+    @model.setter
+    def model(self, model: MzSeparationModel):
+        self._model = model
+
+    @abstractmethod
+    def run(self, sample: ProteomicsExperimentSampleSlice):
+        pass
+
+class TOF(MzSeparation):
+    def __init__(self, name:str = "TimeOfFlightMassSpectrometer"):
+        super().__init__(name)
+
+    def run(self, sample: ProteomicsExperimentSampleSlice):
+        spectra = self.model.simulate(sample, self)
+        sample.add_simulation("simulated_mz_spectrum", spectra)
+
+class MzSeparationModel(Model):
+    def __init__(self):
+        self._default_abundance = 1e4
+
+    @property
+    def default_abundance(self):
+        return self._default_abundance
+
+    @default_abundance.setter
+    def default_abundance(self, abundance:int):
+        self._default_abundance = abundance
+
+    @abstractmethod
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: MzSeparation):
+        return super().simulate(sample, device)
+
+class AveragineModel(MzSeparationModel):
+    def __init__(self):
+        super().__init__()
+
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: MzSeparation):
+
+        avg = AveragineGenerator()
+        # right join here to keep order of keys in right df
+        joined_df = pd.merge(sample.peptides[["pep_id","mass_theoretical"]],
+                        sample.ions[["pep_id","charge"]],
+                        how = "right",
+                        on = "pep_id")
+        masses = joined_df["mass_theoretical"].values
+        charges = joined_df["charge"].values
+        spectra = []
+        for (m, c) in zip(masses, charges):
+            spectrum = avg.generate_spectrum(m,c,amp = self.default_abundance, centroided=False)
+            spectra.append(spectrum)
+        return spectra
diff --git a/imspy/imspy/timstof/data.py b/imspy/imspy/timstof/data.py
index c1e35ca6..a2121910 100644
--- a/imspy/imspy/timstof/data.py
+++ b/imspy/imspy/timstof/data.py
@@ -8,8 +8,8 @@
 
 from abc import ABC
 
-from imspy.core import TimsFrame
-from imspy.core import TimsSlice
+from imspy.core.frame import TimsFrame
+from imspy.core.slice import TimsSlice
 
 
 class TimsDataset(ABC):
diff --git a/imspy/imspy/utility.py b/imspy/imspy/utility.py
new file mode 100644
index 00000000..dd504b39
--- /dev/null
+++ b/imspy/imspy/utility.py
@@ -0,0 +1,508 @@
+import io
+import json
+from typing import List, Optional
+import tensorflow as tf
+import numpy as np
+from numpy.typing import ArrayLike
+
+import numba
+import math
+import pandas as pd
+
+from tqdm import tqdm
+from imspy.data import MzSpectrum
+# TODO bring to imspy
+#from proteolizardalgo.hashing import ReferencePattern
+
+
+class TokenSequence:
+
+    def __init__(self, sequence_tokenized: Optional[List[str]] = None, jsons:Optional[str] = None):
+        if jsons is not None:
+            self.sequence_tokenized = self._from_jsons(jsons)
+            self._jsons = jsons
+        else :
+            self.sequence_tokenized = sequence_tokenized
+            self._jsons = self._to_jsons()
+
+    def _to_jsons(self):
+        json_dict = self.sequence_tokenized
+        return json.dumps(json_dict)
+
+    def _from_jsons(self, jsons:str):
+        return json.loads(jsons)
+
+    @property
+    def jsons(self):
+        return self._jsons
+
+def proteome_from_fasta(path: str) -> pd.DataFrame:
+    """
+    Read a fasta file and return a dataframe with the protein name and sequence.
+    :param path: path to the fasta file
+    :return: a dataframe with the protein name and sequence
+    """
+    d = {}
+    with open(path) as infile:
+        gene = ''
+        header = ''
+        for i, line in enumerate(infile):
+            if line.find('>') == -1:
+                gene += line
+            elif line.find('>') != -1 and i > 0:
+                header = line
+                d[header.replace('\n', '')[4:]] = gene.replace('\n', '')
+                gene = ''
+            elif i == 0:
+                header = line
+
+    row_list = []
+
+    for key, value in d.items():
+        split_index = key.find(' ')
+        gene_id = key[:split_index].split('|')[0]
+        rest = key[split_index:]
+        row = {'id': gene_id, 'meta_data': rest, 'sequence': value}
+        row_list.append(row)
+
+    return pd.DataFrame(row_list)
+
+
+def peak_width_preserving_mz_transform(
+        mz: np.array,
+        M0: float = 500,
+        resolution: float = 50_000):
+    """
+    Transform values into an index that fixes the width of the peak at half full width.
+    Arguments:
+        mz (np.array): An array of mass to charge ratios to transform.
+        M0 (float): The m/z value at which TOFs resolution is reported.
+        resolution (float): The resolution of the TOF instrument.
+    """
+    return (np.log(mz) - np.log(M0)) / np.log1p(1 / resolution)
+
+
+def bins_to_mz(mz_bin, win_len):
+    return np.abs(mz_bin) * win_len + (int(mz_bin < 0) * (0.5 * win_len))
+
+
+def get_ref_pattern_as_spectra(ref_patterns):
+    """
+    Get the reference patterns as a list of spectra.
+    :param ref_patterns: the reference patterns
+    :return: a list of spectra
+    """
+    spec_list = []
+
+    for index, row in ref_patterns.iterrows():
+
+        if 150 <= (row['m'] + 1) / row['z'] <= 1700:
+            last_contrib = row['last_contrib']
+
+            pattern = row['pattern'][:1000]
+
+            mz_bin = np.arange(1000)
+
+            both = list(zip(mz_bin, pattern))
+
+            both = list(filter(lambda x: x[1] >= 0.001, both))
+
+            mz = [x[0] for x in both]
+            i = [x[1] for x in both]
+
+            first_peak = np.where(np.diff(mz) > 1)[0][0]
+            max_i = np.argmax(i[:first_peak])
+            mono_mz_max = mz[max_i]
+
+            mz = (mz - mono_mz_max) / 100 + row['m'] / row['z']
+
+            mz = np.round(np.array(mz), 2) + 1.0
+            i = np.array([int(x) for x in i])
+
+            spectrum = MzSpectrum(mz, i)
+            spec_list.append((row['m'], row['z'], spectrum, last_contrib))
+
+    return spec_list
+
+
+def get_refspec_list(ref_patterns, win_len=5):
+    """
+
+    """
+
+    d_list = []
+
+    for spec in tqdm(ref_patterns, desc='creating reference patterns', ncols=100):
+        m = spec[0]
+        z = spec[1]
+        sp = spec[2]
+        lc = spec[3]
+        peak_width = 0.04
+        mz_mono = np.round((m + 1.001) / z, 2) + peak_width
+        mz_bin = np.floor(mz_mono / win_len)
+        ref_spec = ReferencePattern(mz_bin=mz_bin, mz_mono=mz_mono, charge=z, mz_last_contrib=lc,
+                                    spectrum=sp, window_length=win_len)
+
+        d_list.append((mz_bin, ref_spec))
+
+    return d_list
+
+
+def create_reference_dict(D):
+    # create members
+    keys = set([t[0] for t in D])
+    ref_d = dict([(k, []) for k in keys])
+
+    for b, p in D:
+        ref_d[b].append(p)
+
+    tmp_dict = {}
+    # go over all keys
+    for key, values in ref_d.items():
+
+        # create charges
+        r = np.arange(5) + 1
+
+        # go over all charge states
+        for c in r:
+
+            # go over all reference pattern
+            for ca in values:
+
+                # append exactly one charge state per reference bin
+                if c == ca.charge:
+                    # if key already exists, append
+                    if key in tmp_dict:
+                        tmp_dict[key].append(ca)
+                        break
+
+                    # if key does not exist, create new list
+                    else:
+                        tmp_dict[key] = [ca]
+                        break
+
+    return tmp_dict
+
+@numba.jit(nopython=True)
+def normal_pdf(x: ArrayLike, mass: float, s: float = 0.001, inv_sqrt_2pi: float = 0.3989422804014327, normalize: bool = False):
+    """
+    :param inv_sqrt_2pi:
+    :param x:
+    :param mass:
+    :param s:
+    :return:
+    """
+    a = (x - mass) / s
+    if normalize:
+        return np.exp(-0.5 * np.power(a,2))
+    else:
+        return inv_sqrt_2pi / s * np.exp(-0.5 * np.power(a,2))
+
+
+@numba.jit
+def gaussian(x, μ=0, σ=1):
+    """
+    Gaussian function
+    :param x:
+    :param μ:
+    :param σ:
+    :return:
+    """
+    A = 1 / np.sqrt(2 * np.pi * np.power(σ, 2))
+    B = np.exp(- (np.power(x - μ, 2) / 2 * np.power(σ, 2)))
+
+    return A * B
+
+
+@numba.jit
+def exp_distribution(x, λ=1):
+    """
+    Exponential function
+    :param x:
+    :param λ:
+    :return:
+    """
+    if x > 0:
+        return λ * np.exp(-λ * x)
+    return 0
+
+
+@numba.jit
+def exp_gaussian(x, μ=-3, σ=1, λ=.25):
+    """
+    laplacian distribution with exponential decay
+    :param x:
+    :param μ:
+    :param σ:
+    :param λ:
+    :return:
+    """
+    A = λ / 2 * np.exp(λ / 2 * (2 * μ + λ * np.power(σ, 2) - 2 * x))
+    B = math.erfc((μ + λ * np.power(σ, 2) - x) / (np.sqrt(2) * σ))
+    return A * B
+
+
+class NormalDistribution:
+    def __init__(self, μ: float, σ: float):
+        self.μ = μ
+        self.σ = σ
+
+    def __call__(self, x):
+        return gaussian(x, self.μ, self.σ)
+
+
+class ExponentialGaussianDistribution:
+    def __init__(self, μ: float = -3, σ: float = 1, λ: float = .25):
+        self.μ = μ
+        self.σ = σ
+        self.λ = λ
+
+    def __call__(self, x):
+        return exp_gaussian(x, self.μ, self.σ, self.λ)
+
+
+def preprocess_max_quant_evidence(exp: pd.DataFrame) -> pd.DataFrame:
+    """
+    select columns from evidence txt, rename to ionmob naming convention and transform to raw data rt in seconds
+    Args:
+        exp: a MaxQuant evidence dataframe from evidence.txt table
+    Returns: cleaned evidence dataframe, columns renamed to ionmob naming convention
+    """
+
+    # select columns
+    exp = exp[['m/z', 'Mass', 'Charge', 'Modified sequence', 'Retention time',
+               'Retention length', 'Ion mobility index', 'Ion mobility length', '1/K0', '1/K0 length',
+               'Number of isotopic peaks', 'Max intensity m/z 0', 'Intensity', 'Raw file', 'CCS']].rename(
+        # rename columns to ionmob naming convention
+        columns={'m/z': 'mz', 'Mass': 'mass',
+                 'Charge': 'charge', 'Modified sequence': 'sequence', 'Retention time': 'rt',
+                 'Retention length': 'rt_length', 'Ion mobility index': 'im', 'Ion mobility length': 'im_length',
+                 '1/K0': 'inv_ion_mob', '1/K0 length': 'inv_ion_mob_length', 'CCS': 'ccs',
+                 'Number of isotopic peaks': 'num_peaks', 'Max intensity m/z 0': 'mz_max_intensity',
+                 'Intensity': 'intensity', 'Raw file': 'raw'}).dropna()
+
+    # transform retention time from minutes to seconds as stored in tdf raw data
+    exp['rt'] = exp.apply(lambda r: r['rt'] * 60, axis=1)
+    exp['rt_length'] = exp.apply(lambda r: r['rt_length'] * 60, axis=1)
+    exp['rt_start'] = exp.apply(lambda r: r['rt'] - r['rt_length'] / 2, axis=1)
+    exp['rt_stop'] = exp.apply(lambda r: r['rt'] + r['rt_length'] / 2, axis=1)
+
+    exp['im_start'] = exp.apply(lambda r: int(np.round(r['im'] - r['im_length'] / 2)), axis=1)
+    exp['im_stop'] = exp.apply(lambda r: int(np.round(r['im'] + r['im_length'] / 2)), axis=1)
+
+    exp['inv_ion_mob_start'] = exp.apply(lambda r: r['inv_ion_mob'] - r['inv_ion_mob_length'] / 2, axis=1)
+    exp['inv_ion_mob_stop'] = exp.apply(lambda r: r['inv_ion_mob'] + r['inv_ion_mob_length'] / 2, axis=1)
+
+    # remove duplicates
+    exp = exp.drop_duplicates(['sequence', 'charge', 'rt', 'ccs'])
+
+    return exp
+
+def max_quant_to_proforma(s:str, old_annotation:bool=False):
+    """
+    Convert MaxQuant amino acid sequences (with PTM) to
+    proForma format.
+
+    :param s: amino acid sequence
+    :type s: str
+    :param old_annotation: Wether old annotation is used in `s`, defaults to False
+    :type old_annotation: bool, optional
+    """
+    seq = s.strip("_")
+
+    proforma_seq = ""
+    if old_annotation:
+        proforma_seq = (seq
+                        .replace("(ox)", "[UNIMOD:35]")
+                        .replace("(ac)", "[UNIMOD:1]")
+        )
+    else:
+        proforma_seq = (seq
+                        .replace("(Oxidation (M))", "[UNIMOD:35]")
+                        .replace("(Phospho (STY))", "[UNIMOD:21]")
+                        .replace("(Acetyl (Protein N-term))", "[UNIMOD:1]")
+                        )
+    return f"_{proforma_seq}_"
+
+def preprocess_max_quant_sequence(s, old_annotation=False):
+    """
+    :param s:
+    :param old_annotation:
+    """
+
+    seq = s[1:-1]
+
+    is_acc = False
+
+    if old_annotation:
+        seq = seq.replace('(ox)', '$')
+
+        if seq.find('(ac)') != -1:
+            is_acc = True
+            seq = seq.replace('(ac)', '')
+
+    else:
+        seq = seq.replace('(Oxidation (M))', '$')
+        seq = seq.replace('(Phospho (STY))', '&')
+
+        if seq.find('(Acetyl (Protein N-term))') != -1:
+            is_acc = True
+            seq = seq.replace('(Acetyl (Protein N-term))', '')
+
+    # form list from string
+    slist = list(seq)
+
+    tmp_list = []
+
+    for item in slist:
+        if item == '$':
+            tmp_list.append('<OX>')
+
+        elif item == '&':
+            tmp_list.append('<PH>')
+
+        else:
+            tmp_list.append(item)
+
+    slist = tmp_list
+
+    r_list = []
+
+    for i, char in enumerate(slist):
+
+        if char == '<OX>':
+            C = slist[i - 1]
+            C = C + '-<OX>'
+            r_list = r_list[:-1]
+            r_list.append(C)
+
+        elif char == 'C':
+            r_list.append('C-<CM>')
+
+        elif char == '<PH>':
+            C = slist[i - 1]
+            C = C + '-<PH>'
+            r_list = r_list[:-1]
+            r_list.append(C)
+
+        else:
+            r_list.append(char)
+
+    if is_acc:
+        return ['<START>-<AC>'] + r_list + ['<END>']
+
+    return ['<START>'] + r_list + ['<END>']
+
+def is_unimod_start(char:str):
+    """
+    Tests if char is start of unimod
+    bracket
+
+    :param char: Character of a proForma formatted aa sequence
+    :type char: str
+    :return: Wether char is start of unimod bracket
+    :rtype: bool
+    """
+    if char in ["(","[","{"]:
+        return True
+    else:
+        return False
+
+def is_unimod_end(char:str):
+    """
+    Tests if char is end of unimod
+    bracket
+
+    :param char: Character of a proForma formatted aa sequence
+    :type char: str
+    :return: Wether char is end of unimod bracket
+    :rtype: bool
+    """
+    if char in [")","]","}"]:
+        return True
+    else:
+        return False
+
+def tokenize_proforma_sequence(sequence: str):
+    """
+    Tokenize a ProForma formatted sequence string.
+
+    :param sequence: Sequence string (ProForma formatted)
+    :type sequence: str
+    :return: List of tokens
+    :rtype: List
+    """
+    sequence = sequence.upper().replace("(","[").replace(")","]")
+    token_list = ["<START>"]
+    in_unimod_bracket = False
+    tmp_token = ""
+
+    for aa in sequence:
+        if is_unimod_start(aa):
+            in_unimod_bracket = True
+        if in_unimod_bracket:
+            if is_unimod_end(aa):
+                in_unimod_bracket = False
+            tmp_token += aa
+            continue
+        if tmp_token != "":
+            token_list.append(tmp_token)
+            tmp_token = ""
+        tmp_token += aa
+
+    if tmp_token != "":
+        token_list.append(tmp_token)
+
+    if len(token_list) > 1:
+        if token_list[1].find("UNIMOD:1") != -1:
+            token_list[1] = "<START>"+token_list[1]
+            token_list = token_list[1:]
+    token_list.append("<END>")
+
+    return token_list
+
+def get_aa_num_proforma_sequence(sequence:str):
+    """
+    get number of amino acids in sequence
+
+    :param sequence: proforma formatted aa sequence
+    :type sequence: str
+    :return: Number of amino acids
+    :rtype: int
+    """
+    num_aa = 0
+    inside_bracket = False
+
+    for aa in sequence:
+        if is_unimod_start(aa):
+            inside_bracket = True
+        if inside_bracket:
+            if is_unimod_end(aa):
+                inside_bracket = False
+            continue
+        num_aa += 1
+    return num_aa
+
+
+
+def tokenizer_to_json(tokenizer: tf.keras.preprocessing.text.Tokenizer, path: str):
+    """
+    save a fit keras tokenizer to json for later use
+    :param tokenizer: fit keras tokenizer to save
+    :param path: path to save json to
+    """
+    tokenizer_json = tokenizer.to_json()
+    with io.open(path, 'w', encoding='utf-8') as f:
+        f.write(json.dumps(tokenizer_json, ensure_ascii=False))
+
+
+def tokenizer_from_json(path: str):
+    """
+    load a pre-fit tokenizer from a json file
+    :param path: path to tokenizer as json file
+    :return: a keras tokenizer loaded from json
+    """
+    with open(path) as f:
+        data = json.load(f)
+    return tf.keras.preprocessing.text.tokenizer_from_json(data)
+
diff --git a/imspy/pyproject.toml b/imspy/pyproject.toml
index e672cf12..f95a7f9d 100644
--- a/imspy/pyproject.toml
+++ b/imspy/pyproject.toml
@@ -11,9 +11,13 @@ pandas = ">=2.1"
 numpy = ">=1.21"
 tensorflow = ">=2.14"
 tensorflow-probability = ">=0.22.1"
+
 imspy-connector = ">=0.2.7"
 tqdm = ">=4.62.0"
 scipy = ">=1.7.1"
+tqdm = ">=4.66"
+pyarrow =">=13.0"
+mendeleev = ">=0.7.0"
 
 [build-system]
 requires = ["poetry-core"]
diff --git a/imspy_connector/src/py_mz_spectrum.rs b/imspy_connector/src/py_mz_spectrum.rs
index 46e01577..4efc8ec8 100644
--- a/imspy_connector/src/py_mz_spectrum.rs
+++ b/imspy_connector/src/py_mz_spectrum.rs
@@ -25,11 +25,13 @@ impl PyMsType {
 }
 
 #[pyclass]
+#[derive(Clone)]
 pub struct PyMzSpectrum {
     pub inner: MzSpectrum,
 }
 
 #[pymethods]
+
 impl PyMzSpectrum {
     #[new]
     pub unsafe fn new(mz: &PyArray1<f64>, intensity: &PyArray1<f64>) -> PyResult<Self> {
@@ -37,9 +39,27 @@ impl PyMzSpectrum {
             inner: MzSpectrum {
                 mz: mz.as_slice()?.to_vec(),
                 intensity: intensity.as_slice()?.to_vec(),
-            },
+            }
         })
     }
+    #[staticmethod]
+    pub unsafe fn from_mzspectra_list(list: Vec<PyMzSpectrum>, resolution: i32) -> PyResult<Self> {
+        if list.is_empty(){
+            Ok(PyMzSpectrum { 
+                inner: MzSpectrum {
+                    mz: Vec::new(),
+                    intensity: Vec::new(),
+                }
+            })
+        }
+        else {
+            let mut convoluted: MzSpectrum = MzSpectrum { mz: vec![], intensity: vec![] };
+            for spectrum in list {
+                convoluted = convoluted + spectrum.inner;
+            }
+            Ok(PyMzSpectrum { inner: convoluted.to_resolution(resolution) })
+        }
+    }
 
     #[getter]
     pub fn mz(&self, py: Python) -> Py<PyArray1<f64>> {
@@ -86,6 +106,12 @@ impl PyMzSpectrum {
         };
         Ok(py_filtered)
     }
+    pub fn __add__(&self, other: PyMzSpectrum) -> PyResult<PyMzSpectrum> {
+        Ok(PyMzSpectrum { inner: (self.inner.clone() + other.inner) })
+    }
+    pub fn __mul__(&self, scale: f64) -> PyResult<PyMzSpectrum> {
+        Ok(PyMzSpectrum { inner: (self.inner.clone() * scale) })
+    }
 }
 
 #[pyclass]
@@ -107,6 +133,10 @@ impl PyMzSpectrumVectorized {
         })
     }
 
+    pub fn to_dense_spectrum(&self, max_index: Option<usize>) -> PyMzSpectrumVectorized {
+        PyMzSpectrumVectorized { inner: self.inner.to_dense_spectrum(max_index) }
+    }
+    
     #[getter]
     pub fn resolution(&self) -> i32 {
         self.inner.resolution
@@ -242,4 +272,8 @@ impl PyTimsSpectrum {
     pub fn mz_spectrum(&self) -> PyMzSpectrum {
         PyMzSpectrum { inner: self.inner.spectrum.mz_spectrum.clone() }
     }
+
+    pub fn to_resolution(&self, resolution: i32) -> Self {
+        PyTimsSpectrum{ inner: self.inner.to_resolution(resolution)}
+    }
 }
diff --git a/mscore/src/mz_spectrum.rs b/mscore/src/mz_spectrum.rs
index 88daf84f..1f2728bb 100644
--- a/mscore/src/mz_spectrum.rs
+++ b/mscore/src/mz_spectrum.rs
@@ -293,6 +293,17 @@ impl std::ops::Add for MzSpectrum {
     }
 }
 
+impl std::ops::Mul<f64> for MzSpectrum {
+    type Output = Self;
+    fn mul(self, scale: f64) -> Self::Output{
+        let mut scaled_intensities: Vec<f64> = vec![0.0; self.intensity.len()];
+        for (idx,intensity) in self.intensity.iter().enumerate(){
+            scaled_intensities[idx] = scale*intensity;
+        }
+        Self{ mz: self.mz.clone(), intensity: scaled_intensities}
+
+    }
+}
 /// Represents a mass spectrum with associated m/z indices, m/z values, and intensities
 #[derive(Clone)]
 pub struct IndexedMzSpectrum {
@@ -559,15 +570,36 @@ impl MzSpectrumVectorized {
     /// # Arguments
     ///
     /// * `max_index` - The maximum index for the dense vector.
-    pub fn to_dense(&self, max_index: usize) -> DVector<f64> {
-        let mut dense = DVector::zeros(max_index + 1);
+    
+    fn get_max_index(&self) -> usize {
+        let base: i32 = 10;
+        let max_mz: i32 = 2000;
+        let max_index: usize = (max_mz*base.pow(self.resolution as u32)) as usize;
+        max_index
+    }
 
+    pub fn to_dense(&self, max_index: Option<usize>) -> DVector<f64> {
+        let max_index = match max_index {
+            Some(max_index) => max_index,
+            None => self.get_max_index(),
+        };
+        let mut dense_intensities: DVector<f64> = DVector::<f64>::zeros(max_index + 1);
         for (&index, &intensity) in self.indices.iter().zip(self.values.iter()) {
             if (index as usize) <= max_index {
-                dense[index as usize] = intensity;
+                dense_intensities[index as usize] = intensity;
             }
         }
-        dense
+        dense_intensities
+    }
+    pub fn to_dense_spectrum(&self, max_index: Option<usize>) -> MzSpectrumVectorized{
+        let max_index = match max_index {
+            Some(max_index) => max_index,
+            None => self.get_max_index(),
+        };
+        let dense_intensities: Vec<f64> = self.to_dense(Some(max_index)).data.into();
+        let dense_indices: Vec<i32> = (0..=max_index).map(|i| i as i32).collect();
+        let dense_spectrum: MzSpectrumVectorized = MzSpectrumVectorized { resolution: (self.resolution), indices: (dense_indices), values: (dense_intensities) };
+        dense_spectrum
     }
 }