Skip to content

Commit

Permalink
Addition of filterValues + move main code to MsBackend.R
Browse files Browse the repository at this point in the history
  • Loading branch information
philouail committed Feb 28, 2024
1 parent e3a07a1 commit 484e929
Show file tree
Hide file tree
Showing 9 changed files with 341 additions and 71 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ exportMethods(filterPrecursorMzValues)
exportMethods(filterPrecursorScan)
exportMethods(filterRanges)
exportMethods(filterRt)
exportMethods(filterValues)
exportMethods(intensity)
exportMethods(ionCount)
exportMethods(isCentroided)
Expand Down
5 changes: 3 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@

## Changes in 1.13.5

- Add `filterRanges` function to allow to filter the spectra object based on
ranges of any variables of the `spectraData`.
- Add `filterRanges()` and `filterValues()` functions to allow filtering of a
Spectra object based on ranges or similarities of any existing `spectraData`
variables.

## Changes in 1.13.4

Expand Down
3 changes: 3 additions & 0 deletions R/AllGenerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ setGeneric("filterPrecursorMzRange", function(object, ...)
setGeneric("filterRanges", function(object, ...)
standardGeneric("filterRanges"))
#' @rdname hidden_aliases
setGeneric("filterValues", function(object, ...)
standardGeneric("filterValues"))
#' @rdname hidden_aliases
setGeneric("isReadOnly", function(object, ...)
standardGeneric("isReadOnly"))
#' @rdname neutralLoss
Expand Down
109 changes: 104 additions & 5 deletions R/MsBackend.R
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@
#' @param ppm For `filterPrecursorMzValues`: `numeric(1)` with the m/z-relative
#' maximal acceptable difference for a m/z to be considered matching. See
#' [closest()] for details.
#' For `filterValues`: `numeric` with relative, value-specific
#' parts-per-million (PPM) tolerance values that are added to `tolerance`
#' (see below). Default is 0.
#'
#' @param z For `filterPrecursorCharge`: `integer()` with the precursor charges
#' to be used as filter.
Expand All @@ -136,21 +139,33 @@
#' @param polarity For `filterPolarity`: `integer` specifying the polarity to
#' to subset `object`.
#'
#' @param ranges for `filterRanges`: A `numeric` vector of paired values (upper
#' and lower boundary) that define the ranges to filter the `object`.
#' These paired values need to be in the same order as the
#' `spectraVariables` parameter (see below).
#'
#' @param rt for `filterRt`: `numeric(2)` defining the retention time range to
#' be used to subset/filter `object`.
#'
#' @param spectraVariables For `selectSpectraVariables`: `character` with the
#' names of the spectra variables to which the backend should be subsetted.
#' @param spectraVariables For `selectSpectraVariables`, `filterRanges` and
#' `filterValues`: `character` with the names of the spectra variables to
#' which the backend should be subsetted.
#'
#' @param tolerance For `filterPrecursorMzValues`: `numeric(1)` with the
#' maximal absolute acceptable difference for a m/z value to be considered
#' matching. See [closest()] for details.
#' matching. See [closest()] for details. For `filterValues`: `numeric`
#' accepted tolerance between the `values` and the spectra variables.
#' Defaults to `tolerance = Inf`.
#'
#' @param use.names For `lengths`: whether spectrum names should be used.
#'
#' @param value replacement value for `<-` methods. See individual
#' method description or expected data type.
#'
#' @param values for `filterValues`: A `numeric` vector that define the values
#' to filter the `object`. These values need to be in the same order as the
#' `spectraVariables` parameter.
#'
#' @param x Object extending `MsBackend`.
#'
#' @param ... Additional arguments.
Expand Down Expand Up @@ -357,10 +372,23 @@
#' Implementation of this method is optional since a default implementation
#' for `MsBackend` is available.
#'
#' - `filterRanges`: allows filtering of the `Spectra` object based on
#' specified ranges for *as many* and *any* values of
#' `spectraVariables(object)` wanted whether already existing, future-added
#' or user-specific. Implementation of this method is optional since a
#' default implementation for `MsBackend` is available.
#'
#' - `filterRt`: retains spectra of MS level `msLevel` with retention times
#' within (`>=`) `rt[1]` and (`<=`) `rt[2]`.
#' Implementation of this method is optional since a default implementation
#' for `MsBackend` is available.
#' Implementation of this method is optional since a default implementation
#' for `MsBackend` is available.
#'
#' - `filterValues`: allows filtering of the `Spectra` object based
#' similarities of *as many* and *any* values of `spectraVariables(object)`
#' to user defined `values` (given `tolerance`/`ppm`). These
#' `spectraVariables` can be already existing, future-added or user-specific.
#' Implementation of this method is optional since a default implementation
#' for `MsBackend` is available.
#'
#' - `intensity`: gets the intensity values from the spectra. Returns
#' a [NumericList()] of `numeric` vectors (intensity values for each
Expand Down Expand Up @@ -1140,6 +1168,37 @@ setMethod("filterPrecursorScan", "MsBackend",
} else object
})

#' @exportMethod filterRanges
#'
#' @importFrom MsCoreUtils between
#'
#' @rdname MsBackend
setMethod("filterRanges", "MsBackend",
function(object, spectraVariables, ranges){
if (is.character(spectraVariables)){
if(!all(spectraVariables %in% spectraVariables(object)))
stop("One or more values passed with parameter ",
"'spectraVariables' are not available as spectra ",
"variables in object. Use the 'spectraVariables()' ",
"function to list possible values.")
} else
stop("The 'spectraVariables' parameter needs to be a ",
"character")
if (length(spectraVariables) != length(ranges) / 2)
stop("Length of 'ranges' needs to be twice the length of ",
"the parameter 'spectraVariables' and define the lower ",
"and upper bound for values of each spectra variable ",
"defined with parameter 'spectraVariables'.")
query <- spectraData(object, columns = spectraVariables)
nc <- ncol(query)
within_ranges <- vapply(seq_len(nc), function(i) {
pairs <- c(ranges[2*i - 1], ranges[2*i])
between(query[[i]], pairs)
}, logical(nrow(query)))
keep <- which(rowSums(within_ranges, na.rm = FALSE) == nc)
object <- object[keep]
})

#' @exportMethod filterRt
#'
#' @importMethodsFrom ProtGenerics filterRt
Expand All @@ -1155,6 +1214,46 @@ setMethod("filterRt", "MsBackend",
} else object
})

#' @exportMethod filterValues
#'
#' @importFrom MsCoreUtils ppm
#'
#' @rdname MsBackend
setMethod("filterValues", "MsBackend",
function(object, spectraVariables, values, ppm = 0, tolerance = Inf){
nsv <- length(spectraVariables)
if (is.character(spectraVariables)){
if(!all(spectraVariables %in% spectraVariables(object)))
stop("One or more values passed with parameter ",
"'spectraVariables' are not available as spectra ",
"variables in object. Use the 'spectraVariables()' ",
"function to list possible values.")
} else
stop("'spectraVariables' needs to be a character")
if (nsv != length(values))
stop("Length of 'values' needs to be same length as the ",
"parameter 'spectraVariables' and define .")
if (length(ppm) != nsv){
ppm <- rep(ppm[1], nsv)
warning("Length of 'ppm' does not match the amount of ",
"'spectravariables', the first value of the vector ",
"will be recycled")
}
if (length(tolerance) != nsv){
tolerance <- rep(tolerance[1], nsv)
warning("Length of 'tolerance' does not match the amount of ",
"'spectravariables', the first value of the vector ",
"will be recycled")
}

## create ranges
lower_bounds <- values - (tolerance + ppm(values, ppm))
upper_bounds <- values + (tolerance + ppm(values, ppm))
ranges <- c(rbind(lower_bounds, upper_bounds))

object <- filterRanges(object, spectraVariables, ranges)
})

#' @exportMethod intensity
#'
#' @importMethodsFrom ProtGenerics intensity
Expand Down
99 changes: 66 additions & 33 deletions R/Spectra.R
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,12 @@ NULL
#' `spectraVariables(object)` wanted whether already existing, future-added
#' or user-specific. See the example below for more details.
#'
#' - `filterValues`: allows filtering of the `Spectra` object based
#' similarities of *as many* and *any* values of `spectraVariables(object)`
#' to user defined `values` (given `tolerance`/`ppm`). These
#' `spectraVariables` can be already existing, future-added or user-specific.
#' See the example below for more details.
#'
#' - `reduceSpectra`: for groups of peaks within highly similar m/z values
#' within each spectrum (given `ppm` and `tolerance`), this function keeps
#' only the peak with the highest intensity removing all other peaks hence
Expand Down Expand Up @@ -957,13 +963,17 @@ NULL
#' maximal accepted difference of precursor m/z values of spectra for
#' grouping them into *precursor groups*. For `filterPrecursorIsotopes`:
#' passed directly to the [isotopologues()] function.
#' For `filterValues`: `numeric` of any length allowing to define
#' a maximal accepted difference between user input `values` and the
#' `spectraVariables` values.If it is not the length of `spectraVariables`,
#' `ppm[1]` will be recycled.
#'
#' @param processingQueue For `Spectra`: optional `list` of
#' [ProcessingStep-class] objects.
#'
#' @param ranges for `filterRanges`: A `numeric` vector of paired values (upper
#' and lower boundary) that that define the ranges to filter the spectra
#' data. These paired values need to be in the same order as the
#' and lower boundary) that define the ranges to filter the Spectra data.
#' These paired values need to be in the same order as the
#' `spectraVariables` parameter.
#'
#' @param rt for `filterRt`: `numeric(2)` defining the retention time range to
Expand All @@ -987,9 +997,10 @@ NULL
#' - For `addProcessing`: `character` with additional spectra variables that
#' should be passed along to the function defined with `FUN`. See function
#' description for details.
#' - For `filterRanges`: A `character` vector specifying the column from
#' `spectraData(object)` that correspond to the ranges provided. The order
#' must match the order of the parameter `ranges`.
#' - For `filterRanges` and `filterValues`: A `character` vector specifying the
#' column from `spectraData(object)` that correspond to the
#' `ranges`/`values` provided. The order must match the order of the
#' parameter `ranges`/`values`.
#'
#' @param substDefinition For `deisotopeSpectra` and `filterPrecursorIsotopes`:
#' `matrix` or `data.frame` with definitions of isotopic substitutions.
Expand All @@ -1011,6 +1022,10 @@ NULL
#' maximal accepted difference of precursor m/z values of spectra for
#' grouping them into *precursor groups*. For `filterPrecursorIsotopes`:
#' passed directly to the [isotopologues()] function.
#' For `filterValues`: `numeric` of any length allowing to define
#' a maximal accepted difference between user input `values` and the
#' spectraVariables values. If it is not the length of `spectraVariables`,
#' `tolerance[1]` will be recycled.
#'
#' @param threshold
#' - For `pickPeaks`: a `double(1)` defining the proportion of the maximal peak
Expand All @@ -1029,6 +1044,10 @@ NULL
#' @param value replacement value for `<-` methods. See individual
#' method description or expected data type.
#'
#' @param values for `filterValues`: A `numeric` vector that define the values
#' to filter the Spectra data. These values need to be in the same order as
#' the `spectraVariables` parameter.
#'
#' @param weighted For `combinePeaks`: `logical(1)` whether m/z values of peaks
#' within each peak group should be aggregated into a single m/z value
#' using an intensity-weighted mean. Defaults to `weighted = TRUE`.
Expand Down Expand Up @@ -1272,20 +1291,41 @@ NULL
#'
#' ## Using filterRanges to filter spectra object based on variables available
#' ## in `spectraData`.
#' ## First determine the variable on which to base the filtering:
#' ## First, determine the variable(s) on which to base the filtering:
#' spectraVariables <- c("rtime", "precursorMz", "peaksCount")
#' ## Note that ANY variables can be chosen here, and as many as wanted.
#'
#' ## Defining the ranges (pairs of values with lower and upper boundary) to be
#' ## Define the ranges (pairs of values with lower and upper boundary) to be
#' ## used for the individual spectra variables. The first two values will be
#' ## used for the first spectra variable (e.g. rtime here), the next two for the
#' ## second (e.g. precursorMz here) and so on:
#' ## used for the first spectra variable (e.g., rtime here), the next two for
#' ## the second (e.g. precursorMz here) and so on:
#' ranges <- c(30, 350, 200,500, 350, 600)
#'
#' ## Input the parameters within the filterRanges function:
#' filt_spectra <- filterRanges(sciex, spectraVariables = spectraVariables,
#' ranges = ranges)
#'
#' ## Using filterValues in a similar way to a filter spectra object based on
#' ## variables available in `spectraData`. However, this time not based on
#' ## ranges but similarities to user input single values with given
#' ## tolerance/ppm
#' ## First determine the variable(s) on which to base the filtering:
#' spectraVariables <- c("rtime", "precursorMz")
#' ## Note that ANY variables can be chosen here, and as many as wanted.
#'
#' ## Define the values that will be used to filter the spectra based on their
#' ## similarities to their respective spectraVariables.
#' ## The first values in the parameters values, tolerance and ppm will be
#' ## used for the first spectra variable (e.g. rtime here), the next for the
#' ## second (e.g. precursorMz here) and so on:
#' values <- c(350, 400)
#' tolerance <- c(100, 0)
#' ppm <- c(0,50)
#'
#' ## Input the parameters within the filterValues function:
#' filt_spectra <- filterValues(sciex, spectraVariables = spectraVariables,
#' values = values, tolerance = tolerance, ppm = ppm)
#'
#' ## ---- DATA MANIPULATIONS AND OTHER OPERATIONS ----
#'
#' ## Set the data to be centroided
Expand Down Expand Up @@ -2426,39 +2466,32 @@ setMethod("reset", "Spectra", function(object, ...) {
object
})


#' @rdname Spectra
#' @importFrom MsCoreUtils between
#' @export
setMethod("filterRanges", "Spectra",
function(object, spectraVariables, ranges, ...){
if (is.character(spectraVariables)){
if(!all(spectraVariables %in% spectraVariables(object)))
stop("'spectraVariables' need to correspond to colnames of",
"the 'spectraData' of the object")
} else
stop("'spectraVariables' needs to be a character")
if (length(spectraVariables) != length(ranges) / 2)
stop("Length of 'spectraVariables' must be half the length ",
"of 'ranges'")

query <- spectraData(object, columns = spectraVariables)
nc <- ncol(query)
within_ranges <- vapply(seq_len(nc), function(i) {
pairs <- c(ranges[2*i - 1], ranges[2*i])
between(query[[i]], pairs)
}, logical(nrow(query)))

idc <- which(rowSums(within_ranges, na.rm = FALSE) == nc)
function(object, spectraVariables, ranges,...){
object@backend <- filterRanges(object@backend, spectraVariables,
ranges)
object@processing <- .logging(object@processing,
"Filter: select spectra with a ",
spectraVariables, " within: [",
ranges[seq(ranges)%% 2 == 0], ", ",
ranges[seq(ranges)%% 2 != 0], "]"
)
object <- object[idc]
})
object
})

#' @rdname Spectra
setMethod("filterValues", "Spectra",
function(object, spectraVariables, values, ppm = 0, tolerance = Inf,
...){
object@backend <- filterValues(object@backend, spectraVariables,
values, ppm, tolerance)
object@processing <- .logging(object@processing,
"Filter: select spectra with a ",
spectraVariables, " similar to: ",
values)
object
})

#### ---------------------------------------------------------------------------
##
Expand Down
Loading

0 comments on commit 484e929

Please sign in to comment.