Skip to content

Commit

Permalink
Merge pull request #201 from theGreatHerrLebert/david@simulation
Browse files Browse the repository at this point in the history
David@simulation
  • Loading branch information
theGreatHerrLebert authored May 16, 2024
2 parents 001d492 + 992f70d commit 04f9ddb
Show file tree
Hide file tree
Showing 8 changed files with 765 additions and 9 deletions.
43 changes: 43 additions & 0 deletions imspy/imspy/algorithm/intensity/predictors.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,49 @@ def simulate_ion_intensities_pandas(self, data: pd.DataFrame, batch_size: int =

return data

def predict_intensities(
self,
sequences: List[str],
charges: List[int],
collision_energies: List[float],
divide_collision_energy_by: float = 1e2,
batch_size: int = 512,
flatten: bool = False,
) -> List[NDArray]:
sequences_unmod = [remove_unimod_annotation(s) for s in sequences]
sequence_length = [len(s) for s in sequences_unmod]
collision_energies_norm = [ce / divide_collision_energy_by for ce in collision_energies]

tf_ds = generate_prosit_intensity_prediction_dataset(
sequences_unmod,
charges,
np.expand_dims(collision_energies_norm, 1)).batch(batch_size)

ds_unpacked = tf_ds.map(unpack_dict)

intensity_predictions = []
for peptides_in, precursor_charge_in, collision_energy_in in tqdm(ds_unpacked, desc='Predicting intensities',
total=len(sequences) // batch_size + 1,
ncols=100,
disable=not self.verbose):
model_input = [peptides_in, precursor_charge_in, collision_energy_in]
model_output = self.model(model_input).numpy()
intensity_predictions.append(model_output)

I_pred = list(np.vstack(intensity_predictions))
I_pred = np.squeeze(reshape_dims(post_process_predicted_fragment_spectra(pd.DataFrame({
'sequence': sequences,
'charge': charges,
'collision_energy': collision_energies,
'sequence_length': sequence_length,
'intensity_raw': I_pred,
}))))

if flatten:
I_pred = np.vstack([flatten_prosit_array(r) for r in I_pred])

return I_pred

def simulate_ion_intensities(
self,
sequences: List[str],
Expand Down
13 changes: 11 additions & 2 deletions imspy/imspy/algorithm/rt/predictors.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,18 @@ def simulate_separation_times(self, sequences: list[str], batch_size: int = 1024

return self.model.predict(tf_ds, verbose=self.verbose)

def fit_model(self, data: pd.DataFrame, epochs: int = 10, batch_size: int = 1024, re_compile=False):
assert 'sequence' in data.columns, 'Data must contain a column named "sequence"'
assert 'retention_time_observed' in data.columns, 'Data must contain a column named "retention_time_observed"'
tokens = self._preprocess_sequences(data.sequence.values)
rts = data.retention_time_observed.values
tf_ds = tf.data.Dataset.from_tensor_slices((tokens, rts)).shuffle(len(data)).batch(batch_size)
if re_compile:
self.model.compile(optimizer='adam', loss='mean_squared_error')
self.model.fit(tf_ds, epochs=epochs, verbose=self.verbose)

def simulate_separation_times_pandas(self, data: pd.DataFrame,
gradient_length: float,
batch_size: int = 1024) -> pd.DataFrame:
gradient_length: float, batch_size: int = 1024) -> pd.DataFrame:
tokens = self._preprocess_sequences(data.sequence.values)
tf_ds = tf.data.Dataset.from_tensor_slices(tokens).batch(batch_size)

Expand Down
Empty file.
Loading

0 comments on commit 04f9ddb

Please sign in to comment.