Skip to content

Commit

Permalink
prevent circular import and fix tokenizer error
Browse files Browse the repository at this point in the history
  • Loading branch information
TimOliverMaier committed Oct 27, 2023
1 parent c0ba7d9 commit 85fbd8d
Show file tree
Hide file tree
Showing 8 changed files with 31 additions and 15 deletions.
2 changes: 1 addition & 1 deletion pyims/examples/simulation/run_example_simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def build_experiment():


# to reduce computational load in example
sample_digest.data = sample_digest.data.sample(100000, random_state= rng)
sample_digest.data = sample_digest.data.sample(100, random_state= rng)


t.load_sample(sample_digest)
Expand Down
2 changes: 1 addition & 1 deletion pyims/pyims/data/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import numpy as np
import pyims_connector as pims
from pyims.spectrum import MzSpectrum, TimsSpectrum
from pyims.data.spectrum import MzSpectrum, TimsSpectrum


class TimsFrame:
Expand Down
3 changes: 2 additions & 1 deletion pyims/pyims/data/handle.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@

from abc import ABC

from pyims.data import TimsFrame, TimsSlice
from pyims.data.frame import TimsFrame
from pyims.data.slice import TimsSlice



Expand Down
3 changes: 2 additions & 1 deletion pyims/pyims/data/slice.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from typing import List

import pyims_connector as pims
from pyims.data import TimsFrame, MzSpectrum
from pyims.data.frame import TimsFrame
from pyims.data.spectrum import MzSpectrum


class TimsSlice:
Expand Down
22 changes: 17 additions & 5 deletions pyims/pyims/data/spectrum.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np
from typing import List, Tuple

from __future__ import annotations
import pandas as pd
from numpy.typing import NDArray

Expand Down Expand Up @@ -67,7 +67,7 @@ def __repr__(self):
return f"MzSpectrum(num_peaks={len(self.mz)})"

def to_windows(self, window_length: float = 10, overlapping: bool = True, min_num_peaks: int = 5,
min_intensity: float = 1) -> Tuple[NDArray, List['MzSpectrum']]:
min_intensity: float = 1) -> Tuple[NDArray, List[MzSpectrum]]:
"""Convert the spectrum to a list of windows.
Args:
Expand All @@ -83,8 +83,20 @@ def to_windows(self, window_length: float = 10, overlapping: bool = True, min_nu
indices, windows = self.__spec_ptr.to_windows(window_length, overlapping, min_num_peaks, min_intensity)
return indices, [MzSpectrum.from_py_mz_spectrum(window) for window in windows]

def to_resolution(self, resolution: int) -> MzSpectrum:
"""Bins the spectrum's m/z values to a
given resolution and sums the intensities.
Args:
resolution (int): Negative decadic logarithm of bin size.
Returns:
MzSpectrum: A new `MzSpectrum` where m/z values are binned according to the given resolution.
"""
return self.__spec_ptr.to_resolution(resolution)

def filter(self, mz_min: float = 0.0, mz_max: float = 2000.0, intensity_min: float = 0.0,
intensity_max: float = 1e9) -> 'MzSpectrum':
intensity_max: float = 1e9) -> MzSpectrum:
"""Filter the spectrum for a given m/z range and intensity range.
Args:
Expand All @@ -99,7 +111,7 @@ def filter(self, mz_min: float = 0.0, mz_max: float = 2000.0, intensity_min: flo
return MzSpectrum.from_py_mz_spectrum(
self.__spec_ptr.filter_ranged(mz_min, mz_max, intensity_min, intensity_max))

def vectorized(self, resolution: int = 2) -> 'MzSpectrumVectorized':
def vectorized(self, resolution: int = 2) -> MzSpectrumVectorized:
"""Convert the spectrum to a vectorized spectrum.
Args:
Expand Down Expand Up @@ -145,7 +157,7 @@ def from_py_mz_spectrum_vectorized(cls, spec: pims.PyMzSpectrumVectorized):
@property
def resolution(self) -> float:
"""Resolution.
Returns:
float: Resolution.
"""
Expand Down
2 changes: 1 addition & 1 deletion pyims/pyims/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import json

from pyims.data import TimsSlice, TimsFrame, MzSpectrum
from pyims.data import TimsSlice, TimsFrame
from pyims.utility import gaussian, exp_gaussian
from pyims.isotopes import IsotopePatternGenerator, create_initial_feature_distribution
from abc import ABC, abstractmethod
Expand Down
8 changes: 4 additions & 4 deletions pyims/pyims/simulation/hardware_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def __init__(self, model_path: str, tokenizer_path: str):

def sequences_to_tokens(self, sequences_tokenized: np.array) -> np.array:
print('tokenizing sequences...')
tokens = np.apply_along_axis(self.tokenizer.texts_to_sequences, 0, sequences_tokenized)
tokens = self.tokenizer.texts_to_sequences(sequences_tokenized)
tokens_padded = tf.keras.preprocessing.sequence.pad_sequences(tokens, 50, padding='post')
return tokens_padded

Expand All @@ -286,7 +286,7 @@ def _worker(model_path: str, tokens_padded: np.array, batched: bool = True, bs:
def simulate(self, sample: ProteomicsExperimentSampleSlice, device: Chromatography) -> NDArray[np.float64]:

data = sample.peptides
tokens = data["sequence_tokenized"].apply(lambda st: st.sequence_tokenized)
tokens = data["sequence_tokenized"].apply(lambda st: st.sequence_tokenized).to_numpy()
print('generating tf dataset...')
tokens_padded = self.sequences_to_tokens(tokens)

Expand Down Expand Up @@ -632,7 +632,7 @@ def __init__(self, model_path: str, tokenizer_path: str):

def sequences_to_tokens(self, sequences_tokenized: np.array) -> np.array:
print('tokenizing sequences...')
tokens = np.apply_along_axis(self.tokenizer.texts_to_sequences, 0, sequences_tokenized)
tokens = self.tokenizer.texts_to_sequences(sequences_tokenized)
tokens_padded = tf.keras.preprocessing.sequence.pad_sequences(tokens, 50, padding='post')
return tokens_padded

Expand All @@ -654,7 +654,7 @@ def _worker(model_path: str, tokens_padded: np.array, mz: np.array, charges: np.

def simulate(self, sample: ProteomicsExperimentSampleSlice, device: IonMobilitySeparation) -> Tuple[NDArray]:
data = sample.ions.merge(sample.peptides.loc[:,["pep_id","sequence_tokenized"]],on="pep_id",validate="many_to_one")
tokens = data.sequence_tokenized.apply(lambda st: st.sequence_tokenized)
tokens = data.sequence_tokenized.apply(lambda st: st.sequence_tokenized).to_numpy()
tokens_padded = self.sequences_to_tokens(tokens)
mz = data['mz'].values
charges = data["charge"].values
Expand Down
4 changes: 3 additions & 1 deletion pyims/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@ readme = "README.md"
[tool.poetry.dependencies]
python = ">=3.10, <3.12"
pandas = ">=2.1"
numpy = ">=1.21"
numpy = ">=1.21, <1.25"
mendeleev = ">=0.14"
pyopenms = ">=3.1"
scipy = ">=1.11.2"
tqdm = ">=4.66"
pyarrow =">=13.0"
tensorflow = ">=2.14"
numba = ">=0.57"

[build-system]
requires = ["poetry-core"]
Expand Down

0 comments on commit 85fbd8d

Please sign in to comment.