Skip to content

Commit

Permalink
Merge pull request #4 from theGreatHerrLebert/david@rustdf
Browse files Browse the repository at this point in the history
David@rustdf
  • Loading branch information
theGreatHerrLebert authored Sep 19, 2023
2 parents 43a722f + 54a6d0b commit f9e9248
Show file tree
Hide file tree
Showing 4 changed files with 200 additions and 18 deletions.
2 changes: 1 addition & 1 deletion mscore/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
pub mod mz_spectrum;
pub use {mz_spectrum::MzSpectrum, mz_spectrum::TOFMzSpectrum, mz_spectrum::ImsFrame, mz_spectrum::TimsFrame};
pub use {mz_spectrum::MzSpectrum, mz_spectrum::TOFMzSpectrum, mz_spectrum::ImsSpectrum, mz_spectrum::TimsSpectrum, mz_spectrum::ImsFrame, mz_spectrum::TimsFrame};
90 changes: 87 additions & 3 deletions mscore/src/mz_spectrum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ impl fmt::Display for MzSpectrum {
.max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
.unwrap();

write!(f, "MzSpectrum(data points: {}, max value:({}, {}))", self.mz.len(), mz, i)
write!(f, "MzSpectrum(data points: {}, max by intensity:({}, {}))", self.mz.len(), format!("{:.3}", mz), i)
}
}

Expand Down Expand Up @@ -214,7 +214,7 @@ impl fmt::Display for TOFMzSpectrum {
.max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
.unwrap();

write!(f, "TOFMzSpectrum(data points: {}, max value:({}, {}))", self.mz.len(), mz, i)
write!(f, "TOFMzSpectrum(data points: {}, max by intensity:({}, {}))", self.mz.len(), format!("{:.3}", mz), i)
}
}

Expand All @@ -226,6 +226,28 @@ pub struct ImsSpectrum {
pub spectrum: MzSpectrum,
}

impl ImsSpectrum {
///
/// Creates a new `ImsSpectrum` instance.
///
/// # Arguments
///
/// * `retention_time` - The retention time in seconds.
/// * `inv_mobility` - The inverse ion mobility.
/// * `spectrum` - A `MzSpectrum` instance.
///
/// # Examples
///
/// ```
/// use mscore::{ImsSpectrum, MzSpectrum};
///
/// let spectrum = ImsSpectrum::new(100.0, 0.1, MzSpectrum::new(vec![100.5, 200.5], vec![50.0, 60.0]));
/// ```
pub fn new(retention_time: f64, inv_mobility: f64, spectrum: MzSpectrum) -> Self {
ImsSpectrum { retention_time, inv_mobility, spectrum }
}
}

impl fmt::Display for ImsSpectrum {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "ImsSpectrum(rt: {}, inv_mobility: {}, spectrum: {})", self.retention_time, self.inv_mobility, self.spectrum)
Expand All @@ -242,6 +264,23 @@ pub struct TimsSpectrum {
}

impl TimsSpectrum {
/// Creates a new `TimsSpectrum` instance.
///
/// # Arguments
///
/// * `frame_id` - index of frame in TDF raw file.
/// * `scan_id` - index of scan in TDF raw file.
/// * `retention_time` - The retention time in seconds.
/// * `inv_mobility` - The inverse ion mobility.
/// * `spectrum` - A `TOFMzSpectrum` instance.
///
/// # Examples
///
/// ```
/// use mscore::{TimsSpectrum, TOFMzSpectrum};
///
/// let spectrum = TimsSpectrum::new(1, 1, 100.0, 0.1, TOFMzSpectrum::new(vec![1000, 2000], vec![100.5, 200.5], vec![50.0, 60.0]));
/// ```
pub fn new(frame_id: i32, scan_id: i32, retention_time: f64, inv_mobility: f64, spectrum: TOFMzSpectrum) -> Self {
TimsSpectrum { frame_id, scan_id, retention_time, inv_mobility, spectrum }
}
Expand Down Expand Up @@ -324,10 +363,32 @@ impl TimsFrame {
TimsFrame { frame_id, retention_time, scan, inv_mobility, tof, mz, intensity }
}

///
/// Convert a given TimsFrame to an ImsFrame.
///
/// # Examples
///
/// ```
/// use mscore::{TimsSpectrum, TimsFrame};
///
/// let frame = TimsFrame::new(1, 100.0, vec![1, 2], vec![0.1, 0.2], vec![1000, 2000], vec![100.5, 200.5], vec![50.0, 60.0]);
/// let ims_spectrum = frame.to_ims_frame();
/// ```
pub fn to_ims_frame(&self) -> ImsFrame {
ImsFrame { retention_time: self.retention_time, inv_mobility: self.inv_mobility.clone(), mz: self.mz.clone(), intensity: self.intensity.clone() }
}

///
/// Convert a given TimsFrame to a vector of TimsSpectrum.
///
/// # Examples
///
/// ```
/// use mscore::{TimsSpectrum, TimsFrame};
///
/// let frame = TimsFrame::new(1, 100.0, vec![1, 2], vec![0.1, 0.2], vec![1000, 2000], vec![100.5, 200.5], vec![50.0, 60.0]);
/// let tims_spectra = frame.to_tims_spectra();
/// ```
pub fn to_tims_spectra(&self) -> Vec<TimsSpectrum> {
let mut spectra = BTreeMap::<i32, (f64, Vec<i32>, Vec<f64>, Vec<f64>)>::new();

Expand All @@ -353,6 +414,29 @@ impl TimsFrame {

tims_spectra
}

///
/// Convert a given TimsFrame to a vector of ImsSpectrum.
///
/// # Examples
///
/// ```
/// use mscore::{TimsSpectrum, TimsFrame};
///
/// let frame = TimsFrame::new(1, 100.0, vec![1, 2], vec![0.1, 0.2], vec![1000, 2000], vec![100.5, 200.5], vec![50.0, 60.0]);
/// let ims_spectra = frame.to_ims_spectra();
/// ```
pub fn to_ims_spectra(&self) -> Vec<ImsSpectrum> {
let tims_spectra = self.to_tims_spectra();
let mut ims_spectra: Vec<ImsSpectrum> = Vec::new();

for spec in tims_spectra {
let ims_spec = ImsSpectrum::new(spec.retention_time, spec.inv_mobility, MzSpectrum::new(spec.spectrum.mz, spec.spectrum.intensity));
ims_spectra.push(ims_spec);
}

ims_spectra
}
}

impl fmt::Display for TimsFrame {
Expand All @@ -363,7 +447,7 @@ impl fmt::Display for TimsFrame {
.max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
.unwrap();

write!(f, "TimsFrame(id: {}, rt: {}, data points: {}, max value: (mz: {}, intensity: {}))", self.frame_id, self.retention_time, self.scan.len(), mz, i)
write!(f, "TimsFrame(id: {}, rt: {}, data points: {}, max by intensity: (mz: {}, intensity: {}))", self.frame_id, self.retention_time, self.scan.len(), format!("{:.3}", mz), i)
}
}

Expand Down
124 changes: 111 additions & 13 deletions rustdf/src/data/handle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,35 @@ use byteorder::{LittleEndian, ByteOrder, ReadBytesExt};

use mscore::{TimsFrame, ImsFrame};

/// Decompresses a ZSTD compressed byte array
///
/// # Arguments
///
/// * `compressed_data` - A byte slice that holds the compressed data
///
/// # Returns
///
/// * `decompressed_data` - A vector of u8 that holds the decompressed data
///
fn zstd_decompress(compressed_data: &[u8]) -> io::Result<Vec<u8>> {
let mut decoder = zstd::Decoder::new(compressed_data)?;
let mut decompressed_data = Vec::new();
decoder.read_to_end(&mut decompressed_data)?;
Ok(decompressed_data)
}

/// Parses the decompressed bruker binary data
///
/// # Arguments
///
/// * `decompressed_bytes` - A byte slice that holds the decompressed data
///
/// # Returns
///
/// * `scan_indices` - A vector of u32 that holds the scan indices
/// * `tof_indices` - A vector of u32 that holds the tof indices
/// * `intensities` - A vector of u32 that holds the intensities
///
fn parse_decompressed_bruker_binary_data(decompressed_bytes: &[u8]) -> Result<(Vec<u32>, Vec<u32>, Vec<u32>), Box<dyn std::error::Error>> {

let mut buffer_u32 = Vec::new();
Expand All @@ -30,16 +52,22 @@ fn parse_decompressed_bruker_binary_data(decompressed_bytes: &[u8]) -> Result<(V
buffer_u32.push(value);
}

// get the number of scans
let scan_count = buffer_u32[0] as usize;

// get the scan indices
let mut scan_indices: Vec<u32> = buffer_u32[..scan_count].to_vec();
for index in &mut scan_indices {
*index /= 2;
}

// first scan index is always 0?
scan_indices[0] = 0;


// get the tof indices, which are the first half of the buffer after the scan indices
let mut tof_indices: Vec<u32> = buffer_u32.iter().skip(scan_count).step_by(2).cloned().collect();


// convert the tof indices to cumulative sums
let mut index = 0;
for &size in &scan_indices {
let mut current_sum = 0;
Expand All @@ -49,21 +77,24 @@ fn parse_decompressed_bruker_binary_data(decompressed_bytes: &[u8]) -> Result<(V
index += 1;
}
}


// get the intensities, which are the second half of the buffer
let intensities: Vec<u32> = buffer_u32.iter().skip(scan_count + 1).step_by(2).cloned().collect();


// get the last scan index
let last_scan = intensities.len() as u32 - scan_indices[1..].iter().sum::<u32>();


// shift the scan indices to the right
for i in 0..(scan_indices.len() - 1) {
scan_indices[i] = scan_indices[i + 1];
}

// set the last scan index
let len = scan_indices.len();

scan_indices[len - 1] = last_scan;

let adjusted_tof_indices: Vec<u32> = tof_indices.iter().map(|&val| val - 1).collect();

// adjust the tof indices to be zero-indexed
let adjusted_tof_indices: Vec<u32> = tof_indices.iter().map(|&val| val - 1).collect();
Ok((scan_indices, adjusted_tof_indices, intensities))
}

Expand All @@ -90,23 +121,39 @@ pub struct TimsDataset {
}

impl TimsDataset {
/// Creates a new TimsDataset
///
/// # Arguments
///
/// * `bruker_lib_path` - A string slice that holds the path to the bruker library
/// * `data_path` - A string slice that holds the path to the data
///
/// # Returns
///
/// * `tims_dataset` - A TimsDataset struct
///
pub fn new(bruker_lib_path: &str, data_path: &str) -> Result<TimsDataset, Box<dyn std::error::Error>> {


// Load the library
let bruker_lib = BrukerTimsDataLibrary::new(bruker_lib_path, data_path)?;
// get the global and frame meta data
let global_meta_data = read_global_meta_sql(data_path)?;
let frame_meta_data = read_meta_data_sql(data_path)?;

// get the max scan count
let max_scan_count = frame_meta_data.iter().map(|x| x.num_scans).max().unwrap() + 1;

let mut frame_idptr: Vec<i64> = Vec::new();
frame_idptr.resize(frame_meta_data.len() + 1, 0);

// get the frame idptr values
for (i, row) in frame_meta_data.iter().enumerate() {
frame_idptr[i + 1] = row.num_peaks + frame_idptr[i];
}

// get the tims offset values
let tims_offset_values = frame_meta_data.iter().map(|x| x.tims_id).collect::<Vec<i64>>();

// get the acquisition mode
let aquisition_mode = match frame_meta_data[0].scan_mode {
8 => AcquisitionMode::DDA,
9 => AcquisitionMode::DIA,
Expand All @@ -127,8 +174,18 @@ impl TimsDataset {
})
}

/// translate tof to mz values calling the bruker library
///
/// # Arguments
///
/// * `frame_id` - A u32 that holds the frame id
/// * `tof` - A vector of u32 that holds the tof values
///
/// # Returns
///
/// * `mz_values` - A vector of f64 that holds the mz values
///
pub fn tof_to_mz(&self, frame_id: u32, tof: &Vec<u32>) -> Vec<f64> {
// TRANSLATE TOF TO MZ
let mut dbl_tofs: Vec<f64> = Vec::new();
dbl_tofs.resize(tof.len(), 0.0);

Expand All @@ -144,8 +201,18 @@ impl TimsDataset {
mz_values
}

/// translate scan to inverse mobility values calling the bruker library
///
/// # Arguments
///
/// * `frame_id` - A u32 that holds the frame id
/// * `scan` - A vector of i32 that holds the scan values
///
/// # Returns
///
/// * `inv_mob` - A vector of f64 that holds the inverse mobility values
///
pub fn scan_to_inverse_mobility(&self, frame_id: u32, scan: &Vec<i32>) -> Vec<f64> {
// TRANSLATE SCAN TO INV MOB
let mut dbl_scans: Vec<f64> = Vec::new();
dbl_scans.resize(scan.len(), 0.0);

Expand All @@ -161,13 +228,34 @@ impl TimsDataset {
inv_mob
}

/// helper function to flatten the scan values
///
/// # Arguments
///
/// * `scan` - A vector of u32 that holds the scan values
/// * `zero_indexed` - A bool that indicates if the scan values are zero indexed
///
/// # Returns
///
/// * `scan_i32` - A vector of i32 that holds the scan values
///
pub fn flatten_scan_values(&self, scan: &Vec<u32>, zero_indexed: bool) -> Vec<i32> {
let add = if zero_indexed { 0 } else { 1 };
scan.iter().enumerate()
.flat_map(|(index, &count)| vec![(index + add) as i32; count as usize]
.into_iter()).collect()
}

/// get a frame from the tims dataset
///
/// # Arguments
///
/// * `frame_id` - A u32 that holds the frame id
///
/// # Returns
///
/// * `frame` - A TimsFrame struct
///
pub fn get_frame(&self, frame_id: u32) -> Result<TimsFrame, Box<dyn std::error::Error>> {

let frame_index = (frame_id - 1) as usize;
Expand Down Expand Up @@ -202,7 +290,7 @@ impl TimsDataset {
let (scan, tof, intensity) = parse_decompressed_bruker_binary_data(&decompressed_bytes)?;
let intensity_dbl = intensity.iter().map(|&x| x as f64).collect();
let tof_i32 = tof.iter().map(|&x| x as i32).collect();
let scan_i32: Vec<i32> = self.flatten_scan_values(&scan, false);
let scan_i32: Vec<i32> = self.flatten_scan_values(&scan, true);

let mz = self.tof_to_mz(frame_id, &tof);
let inv_mobility = self.scan_to_inverse_mobility(frame_id, &scan_i32);
Expand All @@ -224,6 +312,16 @@ impl TimsDataset {
}
}

/// get a frame from the tims dataset as an ImsFrame
///
/// # Arguments
///
/// * `frame_id` - A u32 that holds the frame id
///
/// # Returns
///
/// * `frame` - An ImsFrame struct
///
pub fn get_ims_frame(&self, frame_id: u32) -> Result<ImsFrame, Box<dyn std::error::Error>> {
let frame = self.get_frame(frame_id)?;
Ok(ImsFrame{ retention_time: frame.retention_time, inv_mobility: frame.inv_mobility, mz: frame.mz, intensity: frame.intensity})
Expand Down
2 changes: 1 addition & 1 deletion rustdf/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ fn main() {
let tims_data = TimsDataset::new(bruker_lib_path, data_path);
match tims_data {
Ok(tims_data) => {
for i in 25_000..30_000 {
for i in 1..9 {
let frame = tims_data.get_frame(i);
match frame {
Ok(frame) => {
Expand Down

0 comments on commit f9e9248

Please sign in to comment.