From 4687a57a73a59c774497f4febf7296bd96b45d83 Mon Sep 17 00:00:00 2001 From: jorainer Date: Fri, 10 Dec 2021 14:14:52 +0100 Subject: [PATCH] feat: add filterPrecursorMzValues (issue #230) - Add the `filterPrecursorMzValues` method to enable filtering based on multiple target precursor m/z values. --- DESCRIPTION | 2 +- NAMESPACE | 1 + NEWS.md | 5 +++++ R/AllGenerics.R | 3 +++ R/MsBackend.R | 29 +++++++++++++++++++++++++--- R/Spectra.R | 20 +++++++++++++++++-- R/functions-util.R | 19 ++++++++++++++++++ man/MsBackend.Rd | 18 ++++++++++++++++- man/Spectra.Rd | 10 ++++++++-- man/hidden_aliases.Rd | 3 +++ tests/testthat/test_functions-util.R | 20 +++++++++++++++++++ 11 files changed, 121 insertions(+), 9 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index dca31189..365ea82a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Spectra Title: Spectra Infrastructure for Mass Spectrometry Data -Version: 1.5.2 +Version: 1.5.3 Description: The Spectra package defines an efficient infrastructure for storing and handling mass spectrometry spectra and functionality to subset, process, visualize and compare spectra data. It provides different diff --git a/NAMESPACE b/NAMESPACE index 834d76fd..19ca7e28 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -71,6 +71,7 @@ exportMethods(filterMzValues) exportMethods(filterPolarity) exportMethods(filterPrecursorCharge) exportMethods(filterPrecursorMz) +exportMethods(filterPrecursorMzValues) exportMethods(filterPrecursorScan) exportMethods(filterRt) exportMethods(intensity) diff --git a/NEWS.md b/NEWS.md index 3fdb9a8a..4aae0953 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # Spectra 1.5 +## Changes in 1.5.3 + +- Add `filterPrecursorMzValues` method to filter `Spectra` keeping all spectra + with matching precursor m/z (supports multiple target precursor m/z values). + ## Changes in 1.5.2 - Small documentation update (related to `MsCoreUtils` issue diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 58b3e9dc..d8048239 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -35,6 +35,9 @@ setGeneric("filterMzRange", function(object, ...) setGeneric("filterMzValues", function(object, ...) standardGeneric("filterMzValues")) #' @rdname hidden_aliases +setGeneric("filterPrecursorMzValues", function(object, ...) + standardGeneric("filterPrecursorMzValues")) +#' @rdname hidden_aliases setGeneric("isReadOnly", function(object, ...) standardGeneric("isReadOnly")) #' @rdname hidden_aliases diff --git a/R/MsBackend.R b/R/MsBackend.R index 54a5ec32..6ec9331b 100644 --- a/R/MsBackend.R +++ b/R/MsBackend.R @@ -90,7 +90,12 @@ #' #' @param mz For `filterIsolationWindow`: `numeric(1)` with the m/z value to #' filter the object. For `filterPrecursorMz`: `numeric(2)` with the lower -#' and upper m/z boundary. +#' and upper m/z boundary. For `filterPrecursorMzValues`: `numeric` with the +#' m/z value(s) to filter the object. +#' +#' @param ppm For `filterPrecursorMzValues`: `numeric(1)` with the m/z-relative +#' maximal acceptable difference for a m/z to be considered matching. See +#' [closest()] for details. #' #' @param z For `filterPrecursorCharge`: `integer()` with the precursor charges #' to be used as filter. @@ -112,6 +117,10 @@ #' @param spectraVariables For `selectSpectraVariables`: `character` with the #' names of the spectra variables to which the backend should be subsetted. #' +#' @param tolerance For `filterPrecursorMzValues`: `numeric(1)` with the +#' maximal absolute acceptable difference for an m/z value to be considered +#' matching. See [closest()] for details. +#' #' @param use.names For `lengths`: whether spectrum names should be used. #' #' @param value replacement value for `<-` methods. See individual @@ -253,6 +262,11 @@ #' Implementation of this method is optional since a default implementation #' for `MsBackend` is available. #' +#' - `filterPrecursorMzValues`: retains spectra with a precursor m/z matching +#' any of the provided m/z values (given `ppm` and `tolerance`). +#' Implementation of this method is optional since a default implementation +#' for `MsBackend` is available. +#' #' - `filterPrecursorCharge`: retains spectra with the defined precursor #' charge(s). #' Implementation of this method is optional since a default implementation @@ -844,6 +858,17 @@ setMethod("filterPrecursorMz", "MsBackend", } else object }) +#' @exportMethod filterPrecursorMzValues +#' +#' @rdname MsBackend +setMethod("filterPrecursorMzValues", "MsBackend", + function(object, mz = numeric(), ppm = 20, tolerance = 0) { + if (length(mz)) { + object[.values_match_mz(precursorMz(object), mz = mz, + ppm = ppm, tolerance = tolerance)] + } else object + }) + #' @exportMethod filterPrecursorCharge #' #' @importMethodsFrom ProtGenerics filterPrecursorCharge @@ -857,8 +882,6 @@ setMethod("filterPrecursorCharge", "MsBackend", } else object }) - - #' @exportMethod filterPrecursorScan #' #' @importMethodsFrom ProtGenerics filterPrecursorScan diff --git a/R/Spectra.R b/R/Spectra.R index d23c3786..fcd941e5 100644 --- a/R/Spectra.R +++ b/R/Spectra.R @@ -353,6 +353,10 @@ NULL #' provided m/z range. See examples for details on selecting spectra with #' a precursor m/z for a target m/z accepting a small difference in *ppm*. #' +#' - `filterPrecursorMzValues`: retains spectra with precursor m/z matching any +#' of the provided m/z values (given `ppm` and `tolerance`). Spectra with +#' missing precursor m/z value (e.g. MS1 spectra) are dropped. +#' #' - `filterPrecursorCharge`: retains spectra with the defined precursor #' charge(s). #' @@ -702,8 +706,8 @@ NULL #' @param mz For `filterIsolationWindow`: `numeric(1)` with the m/z value to #' filter the object. For `filterPrecursorMz` and `filterMzRange`: #' `numeric(2)` defining the lower and upper m/z boundary. -#' For `filterMzValues`: `numeric` with the m/z values to match peaks -#' against. +#' For `filterMzValues` and `filterPrecursorMzValues`: `numeric` with the +#' m/z values to match peaks or precursor m/z against. #' #' @param z For `filterPrecursorCharge`: `integer()` with the precursor charges #' to be used as filter. @@ -1943,6 +1947,18 @@ setMethod("filterPrecursorMz", "Spectra", object }) +#' @rdname Spectra +setMethod("filterPrecursorMzValues", "Spectra", + function(object, mz = numeric(), ppm = 20, tolerance = 0) { + object@backend <- filterPrecursorMzValues( + object@backend, mz, ppm = ppm, tolerance = tolerance) + object@processing <- .logging( + object@processing, + "Filter: select spectra with precursor m/z matching ", + paste0(mz, collapse = ", "), "") + object + }) + #' @rdname Spectra setMethod("filterPrecursorCharge", "Spectra", function(object, z = integer()) { diff --git a/R/functions-util.R b/R/functions-util.R index 85ac53e2..2abe12a4 100644 --- a/R/functions-util.R +++ b/R/functions-util.R @@ -70,3 +70,22 @@ setAs("logical", "factor", function(from, to) factor(from)) sanitize_file_name <- function(x) { file.path(normalizePath(dirname(x)), path_sanitize(basename(x))) } + +#' Helper function that matches `x` against `mz` (using the `closest` function) +#' and returns the indices of `x` that match any of the values in `mz`. The +#' function takes care of sorting `x` and `mz` and deals also with missing +#' values. +#' +#' @return `integer` with the indices of values in `x` that are not `NA` and +#' are matching any of the values in `mz` given `ppm` and `tolerance`. +#' +#' @noRd +#' +#' @author Johannes Rainer +.values_match_mz <- function(x, mz, ppm = 20, tolerance = 0) { + keep <- which(!is.na(x)) + idx <- order(x[keep]) + mtch <- closest(x[keep][idx], sort(mz), tolerance = tolerance, ppm = ppm, + duplicates = "keep", .check = FALSE) + keep[!is.na(mtch[order(idx)])] +} diff --git a/man/MsBackend.Rd b/man/MsBackend.Rd index 5a2f1ff1..39cf1913 100644 --- a/man/MsBackend.Rd +++ b/man/MsBackend.Rd @@ -32,6 +32,7 @@ \alias{filterMsLevel,MsBackend-method} \alias{filterPolarity,MsBackend-method} \alias{filterPrecursorMz,MsBackend-method} +\alias{filterPrecursorMzValues,MsBackend-method} \alias{filterPrecursorCharge,MsBackend-method} \alias{filterPrecursorScan,MsBackend-method} \alias{filterRt,MsBackend-method} @@ -129,6 +130,8 @@ \S4method{filterPrecursorMz}{MsBackend}(object, mz = numeric()) +\S4method{filterPrecursorMzValues}{MsBackend}(object, mz = numeric(), ppm = 20, tolerance = 0) + \S4method{filterPrecursorCharge}{MsBackend}(object, z = integer()) \S4method{filterPrecursorScan}{MsBackend}(object, acquisitionNum = integer(), f = dataOrigin(object)) @@ -256,7 +259,8 @@ only for spectra of selected \code{dataStorage}.} \item{mz}{For \code{filterIsolationWindow}: \code{numeric(1)} with the m/z value to filter the object. For \code{filterPrecursorMz}: \code{numeric(2)} with the lower -and upper m/z boundary.} +and upper m/z boundary. For \code{filterPrecursorMzValues}: \code{numeric} with the +m/z value(s) to filter the object.} \item{msLevel}{\code{integer} defining the MS level of the spectra to which the function should be applied. For \code{filterMsLevel}: the MS level to which @@ -265,6 +269,14 @@ function should be applied. For \code{filterMsLevel}: the MS level to which \item{polarity}{For \code{filterPolarity}: \code{integer} specifying the polarity to to subset \code{object}.} +\item{ppm}{For \code{filterPrecursorMzValues}: \code{numeric(1)} with the m/z-relative +maximal acceptable difference for a m/z to be considered matching. See +\code{\link[=closest]{closest()}} for details.} + +\item{tolerance}{For \code{filterPrecursorMzValues}: \code{numeric(1)} with the +maximal absolute acceptable difference for an m/z value to be considered +matching. See \code{\link[=closest]{closest()}} for details.} + \item{z}{For \code{filterPrecursorCharge}: \code{integer()} with the precursor charges to be used as filter.} @@ -457,6 +469,10 @@ for \code{MsBackend} is available. provided m/z range. Implementation of this method is optional since a default implementation for \code{MsBackend} is available. +\item \code{filterPrecursorMzValues}: retains spectra with a precursor m/z matching +any of the provided m/z values (given \code{ppm} and \code{tolerance}). +Implementation of this method is optional since a default implementation +for \code{MsBackend} is available. \item \code{filterPrecursorCharge}: retains spectra with the defined precursor charge(s). Implementation of this method is optional since a default implementation diff --git a/man/Spectra.Rd b/man/Spectra.Rd index 0275c769..82c49572 100644 --- a/man/Spectra.Rd +++ b/man/Spectra.Rd @@ -79,6 +79,7 @@ \alias{filterMzValues,Spectra-method} \alias{filterPolarity,Spectra-method} \alias{filterPrecursorMz,Spectra-method} +\alias{filterPrecursorMzValues,Spectra-method} \alias{filterPrecursorCharge,Spectra-method} \alias{filterPrecursorScan,Spectra-method} \alias{filterRt,Spectra-method} @@ -331,6 +332,8 @@ estimatePrecursorIntensity( \S4method{filterPrecursorMz}{Spectra}(object, mz = numeric()) +\S4method{filterPrecursorMzValues}{Spectra}(object, mz = numeric(), ppm = 20, tolerance = 0) + \S4method{filterPrecursorCharge}{Spectra}(object, z = integer()) \S4method{filterPrecursorScan}{Spectra}(object, acquisitionNum = integer(), f = dataOrigin(object)) @@ -492,8 +495,8 @@ method description or expected data type.} \item{mz}{For \code{filterIsolationWindow}: \code{numeric(1)} with the m/z value to filter the object. For \code{filterPrecursorMz} and \code{filterMzRange}: \code{numeric(2)} defining the lower and upper m/z boundary. -For \code{filterMzValues}: \code{numeric} with the m/z values to match peaks -against.} +For \code{filterMzValues} and \code{filterPrecursorMzValues}: \code{numeric} with the +m/z values to match peaks or precursor m/z against.} \item{which}{for \code{containsMz}: either \code{"any"} or \code{"all"} defining whether any (the default) or all provided \code{mz} have to be present in the spectrum.} @@ -916,6 +919,9 @@ original order). \item \code{filterPrecursorMz}: retains spectra with a precursor m/z within the provided m/z range. See examples for details on selecting spectra with a precursor m/z for a target m/z accepting a small difference in \emph{ppm}. +\item \code{filterPrecursorMzValues}: retains spectra with precursor m/z matching any +of the provided m/z values (given \code{ppm} and \code{tolerance}). Spectra with +missing precursor m/z value (e.g. MS1 spectra) are dropped. \item \code{filterPrecursorCharge}: retains spectra with the defined precursor charge(s). \item \code{filterPrecursorScan}: retains parent (e.g. MS1) and children scans (e.g. diff --git a/man/hidden_aliases.Rd b/man/hidden_aliases.Rd index 39716f59..ad4ef921 100644 --- a/man/hidden_aliases.Rd +++ b/man/hidden_aliases.Rd @@ -17,6 +17,7 @@ \alias{export} \alias{filterMzRange} \alias{filterMzValues} +\alias{filterPrecursorMzValues} \alias{isReadOnly} \alias{peaksData} \alias{peaksData<-} @@ -141,6 +142,8 @@ filterMzRange(object, ...) filterMzValues(object, ...) +filterPrecursorMzValues(object, ...) + isReadOnly(object, ...) peaksData(object, ...) diff --git a/tests/testthat/test_functions-util.R b/tests/testthat/test_functions-util.R index ae2842f2..d0fc86e1 100644 --- a/tests/testthat/test_functions-util.R +++ b/tests/testthat/test_functions-util.R @@ -27,3 +27,23 @@ test_that("sanitize_file_name works", { expect_equal(basename(res)[1], "memory") expect_equal(basename(res)[2], "path") }) + +test_that(".values_match_mz works", { + pmz <- c(12.4, 15, 3, 12.4, 3, 1234, 23, 5, 12.4, NA, 3) + mz <- c(200, 12.4, 3) + + res <- .values_match_mz(pmz, mz) + expect_true(all(pmz[res] %in% mz)) + expect_false(any(pmz[-res] %in% mz)) + + pmz <- rev(pmz) + res <- .values_match_mz(pmz, mz) + expect_true(all(pmz[res] %in% mz)) + expect_false(any(pmz[-res] %in% mz)) + + res <- .values_match_mz(c(NA, NA), mz) + expect_identical(res, integer()) + + res <- .values_match_mz(pmz, c(NA, 3)) + expect_true(all(pmz[res] == 3)) +})