Skip to content

Commit

Permalink
refactor: support chunk-wise processing in containsMz
Browse files Browse the repository at this point in the history
- Add support for chunk-wise processing to `containsMz()`. Related to issue #340.
  • Loading branch information
jorainer committed Nov 20, 2024
1 parent 803c9d2 commit 6eeeb79
Show file tree
Hide file tree
Showing 9 changed files with 62 additions and 88 deletions.
4 changes: 2 additions & 2 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ root = true
charset = utf-8
end_of_line = lf
trim_trailing_whitespace = true
insert_final_newline = false
insert_final_newline = true

[*.R]
indent_style = space
Expand All @@ -22,4 +22,4 @@ indent_style = tab

[*.yml]
indent_style = space
indent_size = 2
indent_size = 2
2 changes: 1 addition & 1 deletion .github/workflows/check-bioc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ jobs:
fail-fast: false
matrix:
config:
- { os: ubuntu-latest, r: '4.4', bioc: '3.20', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" }
- { os: ubuntu-latest, r: 'devel', bioc: 'devel', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" }
- { os: macOS-latest, r: '4.4', bioc: '3.20'}
- { os: windows-latest, r: '4.4', bioc: '3.20'}
env:
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: Spectra
Title: Spectra Infrastructure for Mass Spectrometry Data
Version: 1.17.0
Version: 1.17.1
Description: The Spectra package defines an efficient infrastructure
for storing and handling mass spectrometry spectra and functionality to
subset, process, visualize and compare spectra data. It provides different
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# Spectra 1.17

## Changes in 1.17.1

- Refactor `containsMz()` to support chunk-wise processing.

# Spectra 1.15

## Changes in 1.15.13
Expand Down
46 changes: 11 additions & 35 deletions R/Spectra-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,13 @@ NULL
#' @description
#'
#' This function applies the processing queue and an arbitrary function to
#' the peaks matrix of each spectrum of the `Spectra` object `object`.
#' the peaks matrix of each spectrum of the `Spectra` object `object`. It has
#' build-in parallel and/or chunk-wise processing enabled through parameter
#' `f`, that allows to define how the `Spectra` (or rather its backend) needs
#' to be splitted. The default `f = .parallel_processing_factor(object)` splits
#' the backend by chunk (if a finite chunk size is defined for the `Spectra`)
#' or by it's optimal parallel processing factor. See the description of
#' the `.parallel_processing_factor()` function below for information.
#'
#' @param object `Spectra` object.
#'
Expand All @@ -78,7 +84,8 @@ NULL
#'
#' @param f `factor` or `vector` that can be coerced to one defining how the
#' data should be split for parallel processing. Set to `NULL` or
#' `factor()` to disable splitting and parallel processing.
#' `factor()` to disable splitting and parallel processing. See function
#' description above for details and information.
#'
#' @param columns `character` defining the columns that should be returned.
#' This will be passed to the backend's `peaksData` function.
Expand Down Expand Up @@ -571,39 +578,8 @@ combineSpectra <- function(x, f = x$dataStorage, p = x$dataStorage,

#' @description
#'
#' Internal function to check if any (or all) of the provided `mz` values are
#' in the spectras' m/z.
#'
#' @param x `Spectra` object
#'
#' @param mz `numeric` of m/z value(s) to check in each spectrum of `x`.
#'
#' @param tolarance `numeric(1)` with the tolerance.
#'
#' @param ppm `numeric(1)` with the ppm.
#'
#' @param condFun `function` such as `any` or `all`.
#'
#' @param parallel `BiocParallel` parameter object.
#'
#' @return `logical` same length than `x`.
#'
#' @author Johannes Rainer
#'
#' @importFrom MsCoreUtils common
#'
#' @noRd
.has_mz <- function(x, mz = numeric(), tolerance = 0, ppm = 20, condFun = any,
parallel = SerialParam()) {
mzs <- mz(x, BPPARAM = parallel)
vapply(mzs, FUN = function(z)
condFun(common(mz, z, tolerance = tolerance, ppm = ppm)), logical(1))
}

#' @description
#'
#' Same as `.has_mz` only that a different `mz` is used for each spectrum in
#' `x`. Length of `mz` is thus expected to be equal to length of `x`.
#' Check for presence of an m/z value in each spectrum. Each spectrum gets
#' its own m/z.
#'
#' @param mz `numeric` **same length as `x`**.
#'
Expand Down
41 changes: 18 additions & 23 deletions R/Spectra.R
Original file line number Diff line number Diff line change
Expand Up @@ -3278,23 +3278,18 @@ setMethod("containsMz", "Spectra", function(object, mz = numeric(),
tolerance = 0,
ppm = 20, which = c("any", "all"),
BPPARAM = bpparam()) {
cond_fun <- match.fun(match.arg(which))
if (all(is.na(mz)))
return(rep(NA, length(object)))
mz <- unique(sort(mz))
BPPARAM <- backendBpparam(object@backend, BPPARAM)
## TODO: fix to use .peaksapply instead.
if (is(BPPARAM, "SerialParam"))
.has_mz(object, mz, tolerance = tolerance, ppm = ppm,
condFun = cond_fun, parallel = BPPARAM)
else {
sp <- SerialParam()
f <- as.factor(dataStorage(object))
res <- .lapply(object, FUN = .has_mz, mz = mz, tolerance = tolerance,
condFun = cond_fun, parallel = sp, f = f,
BPPARAM = BPPARAM)
unsplit(res, f = f)
}
if (length(object)) {
cond_fun <- match.fun(match.arg(which))
if (all(is.na(mz)))
return(rep(NA, length(object)))
mz <- unique(sort(mz))
BPPARAM <- backendBpparam(object@backend, BPPARAM)
unlist(.peaksapply(
object, FUN = .peaks_contain_mz, mz = mz, tolerance = tolerance,
ppm = ppm, condFun = cond_fun, BPPARAM = BPPARAM),
use.names = FALSE
)
} else logical()
})

#' @rdname addProcessing
Expand Down Expand Up @@ -3327,12 +3322,12 @@ setMethod("containsNeutralLoss", "Spectra", function(object, neutralLoss = 0,
#' @export
setMethod("entropy", "Spectra", function(object, normalized = TRUE) {
if (length(object)) {
if (normalized) entropy_fun <- nentropy
else entropy_fun <- entropy
unlist(.peaksapply(
object, FUN = function(pks, ...) entropy_fun(pks[, "intensity"])),
use.names = FALSE
)
if (normalized) entropy_fun <- nentropy
else entropy_fun <- entropy
unlist(.peaksapply(
object, FUN = function(pks, ...) entropy_fun(pks[, "intensity"])),
use.names = FALSE
)
} else numeric()
})
#' @rdname addProcessing
Expand Down
10 changes: 10 additions & 0 deletions R/peaks-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -737,3 +737,13 @@ joinPeaksNone <- function(x, y, ...) {
if (keep) x[sel, , drop = FALSE]
else x[!sel, , drop = FALSE]
}

#' Check for presence of peaks defined by their m/z value. Note that this
#' function does **not** return a peak matrix, but only a logical of length 1!
#'
#' @return `logical(1)`
#' @noRd
.peaks_contain_mz <- function(x, mz = numeric(), tolerance = 0, ppm = 20,
condFun = any, ...) {
condFun(common(mz, x[, "mz"], tolerance = tolerance, ppm = ppm))
}
26 changes: 0 additions & 26 deletions tests/testthat/test_Spectra-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -352,32 +352,6 @@ test_that("dropNaSpectraVariables works", {
function(z) !any(is.na(z)))))
})

test_that(".has_mz works", {
sps <- Spectra(sciex_mzr)[1:10]
sps <- setBackend(sps, MsBackendDataFrame())
mzs <- mz(sps)
x <- c(mzs[[2]][5], mzs[[3]][8])

res <- .has_mz(sps, mz = x, ppm = 0)
expect_true(length(res) == length(sps))
expect_true(is.logical(res))

spd <- DataFrame(msLevel = c(2L, 2L, 2L), rtime = c(1, 2, 3))
spd$mz <- list(c(12, 14, 45, 56), c(14.1, 34, 56.1), c(12.1, 14.15, 34.1))
spd$intensity <- list(c(10, 20, 30, 40), c(11, 21, 31), c(12, 22, 32))
sps <- Spectra(spd)

res <- .has_mz(sps, mz = c(14, 34))
expect_equal(res, c(TRUE, TRUE, FALSE))
res <- .has_mz(sps, mz = c(14, 34), tolerance = 0.15)
expect_equal(res, c(TRUE, TRUE, TRUE))

res <- .has_mz(sps, mz = c(14, 34), condFun = all)
expect_true(all(!res))
res <- .has_mz(sps, mz = c(14, 34), condFun = all, tolerance = 0.15)
expect_equal(res, c(FALSE, TRUE, TRUE))
})

test_that(".has_mz_each works", {
spd <- DataFrame(msLevel = c(2L, 2L, 2L), rtime = c(1, 2, 3))
spd$mz <- list(c(12, 14, 45, 56), c(14.1, 34, 56.1), c(12.1, 14.15, 34.1))
Expand Down
13 changes: 13 additions & 0 deletions tests/testthat/test_peaks-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -722,3 +722,16 @@ test_that(".peaks_filter_ranges works", {
ranges = ranges, keep = FALSE)
expect_equal(res, x)
})

test_that(".peaks_contain_mz works", {
pks <- cbind(mz = c(1.3, 1.5, 32.1, 45.6), c(1, 2, 3, 4))

expect_false(.peaks_contain_mz(pks))
expect_true(.peaks_contain_mz(pks, 1.5))
expect_false(.peaks_contain_mz(pks, c(1.5, 32.2), condFun = all))
expect_true(.peaks_contain_mz(pks, c(1.5, 32.2), condFun = any))
expect_true(.peaks_contain_mz(pks, c(1.5, 32.2), condFun = any,
tolerance = 0.1))
expect_true(.peaks_contain_mz(pks, c(1.5, 32.2), condFun = all,
tolerance = 0.1))
})

0 comments on commit 6eeeb79

Please sign in to comment.