diff --git a/DESCRIPTION b/DESCRIPTION index f372bd07..d67643f5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -78,7 +78,8 @@ Imports: limma, statmod, MatrixGenerics, - rlang + rlang, + dplyr RoxygenNote: 7.3.2 Roxygen: list(markdown = TRUE) URL: https://github.com/LieberInstitute/spatialLIBD diff --git a/NAMESPACE b/NAMESPACE index d18236fa..4eba582c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export(add10xVisiumAnalysis) export(add_images) export(add_key) +export(add_qc_metrics) export(annotate_registered_clusters) export(check_modeling_results) export(check_sce) @@ -83,8 +84,14 @@ importFrom(SummarizedExperiment,"rowRanges<-") importFrom(SummarizedExperiment,assay) importFrom(SummarizedExperiment,assayNames) importFrom(SummarizedExperiment,assays) +importFrom(SummarizedExperiment,colData) importFrom(benchmarkme,get_ram) importFrom(cowplot,plot_grid) +importFrom(dplyr,group_by) +importFrom(dplyr,left_join) +importFrom(dplyr,mutate) +importFrom(dplyr,select) +importFrom(dplyr,summarize) importFrom(edgeR,calcNormFactors) importFrom(edgeR,filterByExpr) importFrom(fields,image.plot) @@ -124,6 +131,7 @@ importFrom(methods,new) importFrom(png,readPNG) importFrom(rlang,arg_match) importFrom(rtracklayer,import) +importFrom(scater,isOutlier) importFrom(scater,plotReducedDim) importFrom(scuttle,aggregateAcrossCells) importFrom(sessioninfo,session_info) diff --git a/R/add_qc_metrics.R b/R/add_qc_metrics.R new file mode 100644 index 00000000..5008e304 --- /dev/null +++ b/R/add_qc_metrics.R @@ -0,0 +1,144 @@ +#' Quality Control for Spatial Data +#' +#' This function identify spots in a +#' [SpatialExperiment-class][SpatialExperiment::SpatialExperiment-class] (SPE) +#' with outlier quality control values: low `sum_umi` or `sum_gene`, or high +#' `expr_chrM_ratio`, utilizing `scran::isOutlier()`. Also identifies in-tissue +#' edge spots and distance to the edge for each spot. +#' +#' @param spe a [SpatialExperiment][SpatialExperiment::SpatialExperiment-class] +#' +#' @return A [SpatialExperiment][SpatialExperiment::SpatialExperiment-class] +#' with added quiality control information added to the colData. +#' +#' @export +#' @importFrom dplyr group_by summarize left_join select mutate +#' @importFrom SummarizedExperiment colData +#' @importFrom scater isOutlier +#' +#' @examples +#' if (enough_ram()) { +#' ## Obtain the necessary data +#' if (!exists("spe")) spe <- fetch_data("spe") +#' +#' ## fake out tissue spots in example data (TODO add pre-qc data) +#' spe_qc <- spe +#' spe_qc$in_tissue[spe_qc$array_col < 10] <- FALSE +#' +#' ## adds QC metrics to colData of the spe +#' spe_qc <- add_qc_metrics(spe_qc) +#' colData(spe_qc) +#' +#' ## visualize edge spots +#' vis_clus(spe_qc, sampleid = "151507", clustervar = "edge_spot") +#' vis_gene(spe_qc, sampleid = "151507", geneid = "edge_distance", minCount = -1) +#' +#' ## visualize scran QC flags +#' +#' vis_clus(spe_qc, sample_id = "151507", clustervar = "scran_low_lib_size") +#' +#' # scater::plotColData(spe_qc[, spe_qc$in_tissue], x = "sample_id", y = "sum_umi", colour_by = "scran_low_lib_size") +#' +#' vis_clus(spe_qc, sampleid = "151507", clustervar = "scran_discard") +#' vis_clus(spe_qc, sampleid = "151507", clustervar = "scran_low_lib_size_edge") +#' } +#' +add_qc_metrics <- function(spe) { + stopifnot("in_tissue" %in% colnames(colData(spe))) + stopifnot("sum_umi" %in% colnames(colData(spe))) + stopifnot("sum_gene" %in% colnames(colData(spe))) + stopifnot("expr_chrM_ratio" %in% colnames(colData(spe))) + + spe$in_tissue <- as.logical(spe$in_tissue) + spe_in <- spe[, spe$in_tissue] + + ## QC in-tissue spots + + # define variables + low_lib_size <- low_n_features <- in_tissue <- sample_id <- NULL + + qc_df <- data.frame( + log2sum = log2(spe_in$sum_umi), + log2detected = log2(spe_in$sum_gene), + subsets_Mito_percent = spe_in$expr_chrM_ratio * 100, + sample_id = spe_in$sample_id + ) + + qcfilter <- data.frame( + low_lib_size = scater::isOutlier(qc_df$log2sum, type = "lower", log = TRUE, batch = qc_df$sample_id), + low_n_features = scater::isOutlier(qc_df$log2detected, type = "lower", log = TRUE, batch = qc_df$sample_id), + high_subsets_Mito_percent = scater::isOutlier(qc_df$subsets_Mito_percent, type = "higher", batch = qc_df$sample_id) + ) |> + dplyr::mutate(discard = (low_lib_size | low_n_features) | high_subsets_Mito_percent) + + ## Add qcfilter cols to colData(spe) after factoring + ## discard + spe$scran_discard <- NA + spe$scran_discard[which(spe$in_tissue)] <- qcfilter$discard + spe$scran_discard <- factor(spe$scran_discard, levels = c("TRUE", "FALSE")) + + ## low_lib_size + spe$scran_low_lib_size <- NA + spe$scran_low_lib_size[which(spe$in_tissue)] <- qcfilter$low_lib_size + spe$scran_low_lib_size <- factor(spe$scran_low_lib_size, + levels = c("TRUE", "FALSE") + ) + ## low_n_features + spe$scran_low_n_features <- NA + spe$scran_low_n_features[which(spe$in_tissue)] <- qcfilter$low_n_features + spe$scran_low_n_features <- factor(spe$scran_low_n_features, + levels = c("TRUE", "FALSE") + ) + + ## high mito percent + spe$scran_high_Mito_percent <- NA + spe$scran_high_Mito_percent[which(spe$in_tissue)] <- + qcfilter$high_subsets_Mito_percent + spe$scran_high_Mito_percent <- + factor(spe$scran_high_Mito_percent, levels = c("TRUE", "FALSE")) + + ## Find edge spots + # define variables + array_row <- array_col <- edge_row <- edge_col <- row_distance <- NULL + col_distance <- high_subsets_Mito_percent <- NULL + + spot_coords <- colData(spe_in) |> + as.data.frame() |> + select(in_tissue, sample_id, array_row, array_col) |> + group_by(sample_id, array_row) |> + mutate( + edge_col = array_col == min(array_col) | array_col == max(array_col), + col_distance = pmin( + abs(array_col - min(array_col)), + abs(array_col - max(array_col)) + ) + ) |> + group_by(sample_id, array_col) |> + mutate( + edge_row = array_row == min(array_row) | array_row == max(array_row), + row_distance = pmin( + abs(array_row - min(array_row)), + abs(array_row - max(array_row)) + ) + ) |> + group_by(sample_id) |> + mutate( + edge_spot = edge_row | edge_col, + edge_distance = pmin(row_distance, col_distance) + ) + + + ## Add Edge info to spe + spe$edge_spot <- NA + spe$edge_spot[which(spe$in_tissue)] <- spot_coords$edge_spot + spe$edge_spot <- factor(spe$edge_spot, levels = c("TRUE", "FALSE")) + + spe$edge_distance <- NA + spe$edge_distance[which(spe$in_tissue)] <- spot_coords$edge_distance + + spe$scran_low_lib_size_edge <- NA + spe$scran_low_lib_size_edge[which(spe$in_tissue)] <- qcfilter$low_lib_size & spot_coords$edge_spot + spe$scran_low_lib_size_edge <- factor(spe$scran_low_lib_size_edge, levels = c("TRUE", "FALSE")) + + return(spe) +} diff --git a/man/add_qc_metrics.Rd b/man/add_qc_metrics.Rd new file mode 100644 index 00000000..b1d5dc5d --- /dev/null +++ b/man/add_qc_metrics.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/add_qc_metrics.R +\name{add_qc_metrics} +\alias{add_qc_metrics} +\title{Quality Control for Spatial Data} +\usage{ +add_qc_metrics(spe) +} +\arguments{ +\item{spe}{a \link[SpatialExperiment:SpatialExperiment]{SpatialExperiment}} +} +\value{ +A \link[SpatialExperiment:SpatialExperiment]{SpatialExperiment} +with added quiality control information added to the colData. +} +\description{ +This function identify spots in a +\link[SpatialExperiment:SpatialExperiment]{SpatialExperiment-class} (SPE) +with outlier quality control values: low \code{sum_umi} or \code{sum_gene}, or high +\code{expr_chrM_ratio}, utilizing \code{scran::isOutlier()}. Also identifies in-tissue +edge spots and distance to the edge for each spot. +} +\examples{ +if (enough_ram()) { + ## Obtain the necessary data + if (!exists("spe")) spe <- fetch_data("spe") + + ## fake out tissue spots in example data (TODO add pre-qc data) + spe_qc <- spe + spe_qc$in_tissue[spe_qc$array_col < 10] <- FALSE + + ## adds QC metrics to colData of the spe + spe_qc <- add_qc_metrics(spe_qc) + colData(spe_qc) + + ## visualize edge spots + vis_clus(spe_qc, sampleid = "151507", clustervar = "edge_spot") + vis_gene(spe_qc, sampleid = "151507", geneid = "edge_distance", minCount = -1) + + ## visualize scran QC flags + + vis_clus(spe_qc, sample_id = "151507", clustervar = "scran_low_lib_size") + + # scater::plotColData(spe_qc[, spe_qc$in_tissue], x = "sample_id", y = "sum_umi", colour_by = "scran_low_lib_size") + + vis_clus(spe_qc, sampleid = "151507", clustervar = "scran_discard") + vis_clus(spe_qc, sampleid = "151507", clustervar = "scran_low_lib_size_edge") +} + +} diff --git a/tests/testthat/test-add_qc_metrics.R b/tests/testthat/test-add_qc_metrics.R new file mode 100644 index 00000000..39d39a0a --- /dev/null +++ b/tests/testthat/test-add_qc_metrics.R @@ -0,0 +1,14 @@ +test_that( + "add_qc_metrics returns modified spe", + { + if (!exists("spe")) spe <- fetch_data("spe") + + # run metrics spe + spe_qc <- add_qc_metrics(spe) + expect_equal(ncol(spe), ncol(spe_qc)) ## same number of spots + expect_equal(ncol(colData(spe)) + 7, ncol(colData(spe_qc))) ## add 7 QC cols to colData + # [1] "scran_discard" "scran_low_lib_size" "scran_low_n_features" + # [4] "scran_high_subsets_Mito_percent" "edge_spot" "edge_distance" + # [7] "scran_low_lib_size_edge" + } +)