Merge pull request #739 from joshua-d-campbell/master

v2.12.1
compbiomed · Jan 11, 2024 · 111cb1e · 111cb1e
2 parents 7339f76 + ae66d83
commit 111cb1e
Show file tree

Hide file tree

Showing 352 changed files with 2,825 additions and 6,532 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: singleCellTK
 Type: Package
 Title: Comprehensive and Interactive Analysis of Single Cell RNA-Seq Data
-Version: 2.12.0
-Authors@R: c(person(given="Yichen", family="Wang", email="wangych@bu.edu", role=c("aut", "cre"),
+Version: 2.12.1
+Authors@R: c(person(given="Yichen", family="Wang", email="wangych@bu.edu", role=c("aut"),
                     comment = c(ORCID = "0000-0003-4347-5199")),
              person(given="Irzam", family="Sarfraz", email="isarfraz@bu.edu", role=c("aut"),
                     comment = c(ORCID = "0000-0001-8121-792X")),
@@ -22,7 +22,8 @@ Authors@R: c(person(given="Yichen", family="Wang", email="wangych@bu.edu", role=
              person(given=c("W.", "Evan"), family="Johnson", email="wej@bu.edu", role=c("aut"),
                     comment = c(ORCID = "0000-0002-6247-6595")),
              person(given="Ming", family="Liu", email="mingl@bu.edu", role=c("aut")),
-             person(given=c("Joshua", "David"), family="Campbell", email="camp@bu.edu", role=c("aut"))
+             person(given=c("Joshua", "David"), family="Campbell", email="camp@bu.edu", role=c("aut", "cre"),
+             comment = c(ORCID = "0000-0003-0780-8662"))
              )
 Depends:
     R (>= 4.0),
@@ -137,7 +138,8 @@ Suggests:
     BiocGenerics,
     RColorBrewer,
     fastmap (>= 1.1.0),
-    harmony
+    harmony,
+    optparse
 VignetteBuilder: knitr
 URL: https://www.camplab.net/sctk/
 BugReports: https://github.com/compbiomed/singleCellTK/issues

diff --git a/Dockerfile b/Dockerfile
@@ -42,7 +42,7 @@ RUN R -e "BiocManager::install('scRNAseq')"
 RUN R -e "BiocManager::install('celda')"
 #RUN R -e "devtools::install_github('wleepang/shiny-directory-input')"
 RUN R -e "options(timeout=360000)" \
-	&& R -e "devtools::install_github('compbiomed/singleCellTK', ref = 'devel', force = TRUE, dependencies = TRUE)"
+	&& R -e "devtools::install_github('compbiomed/singleCellTK', force = TRUE, dependencies = TRUE)"
 
 RUN R -e "install.packages('reticulate')"
 RUN R -e "Sys.setenv(RETICULATE_PYTHON = '/usr/bin/python3')"

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,12 @@
+Changes in Version 2.12.1 (2024-01-10)
+================================================================================
+* Updates to documentation
+* Fixes to runTSCAN and plotSeurat Genes
+* Added support for flat file import into SCTK-QC
+* Fixed directory issue in importCellRanger
+* Added Bubble plot to Shiny GUI
+* Updated Dockerfile
+
 Changes in Version 2.12.0 (2023-10-24)
 ================================================================================
 * Updated version to match Bioconductor 3.18

diff --git a/R/importCellRanger.R b/R/importCellRanger.R
@@ -192,8 +192,18 @@
                 }
             }
 
-            sampleLength <- length(unlist(lapply(cellRangerDirs,
-                list.dirs, recursive = FALSE)))
+            # check cellRangerDirs sample length
+            sampleLength = 0
+            for (i in cellRangerDirs) {
+                temp = paste0(i, "/outs")
+                if (dir.exists(temp) == TRUE) {
+                    sampleLength = sampleLength + 1
+                }
+                rm(temp)
+            }
+
+            #sampleLength <- length(unlist(lapply(cellRangerDirs,
+            #    list.dirs, recursive = FALSE)))
 
             if (!is.null(sampleNames)) {
                 if (sampleLength != length(sampleNames)) {

diff --git a/R/plotBubble.R b/R/plotBubble.R
@@ -12,6 +12,7 @@
 #' @param ylab The y-axis label
 #' @param colorLow The color to be used for lowest value of mean expression
 #' @param colorHigh The color to be used for highest value of mean expression
+#' @param scale Option to scale the data. Default: /code{FALSE}. Selected assay will not be scaled. 
 #' @return A ggplot of the bubble plot.
 #' @importFrom rlang .data
 #' @importFrom reshape2 melt
@@ -21,14 +22,14 @@
 #' displayName="feature_name", groupNames="type", title="cell type test",
 #' xlab="gene", ylab="cluster", colorLow="white", colorHigh="blue")
 #' @export
-plotBubble <- function(inSCE, useAssay="logcounts", featureNames, displayName=NULL, groupNames="cluster", title="", xlab=NULL, ylab=NULL, colorLow="white", colorHigh="blue"){
+plotBubble <- function(inSCE, useAssay="logcounts", featureNames, displayName=NULL, groupNames="cluster", title="", xlab=NULL, ylab=NULL, colorLow="white", colorHigh="blue", scale = FALSE){
   metrics <- runClusterSummaryMetrics(inSCE, useAssay=useAssay, featureNames=featureNames, 
-                                      displayName=displayName, groupNames=groupNames)
+                                      displayName=displayName, groupNames=groupNames, scale = scale)
   .ggBubble(avgExpr = metrics$avgExpr, percExpr = metrics$percExpr, colorLow = colorLow, 
-            colorHigh = colorHigh, title = title)
+            colorHigh = colorHigh, title = title, xlab=xlab, ylab=ylab)
 }
 
-.ggBubble <- function(avgExpr, percExpr, groupNames=NULL, featureNames=NULL, colorLow="white", colorHigh="blue", title=""){
+.ggBubble <- function(avgExpr, percExpr, groupNames=NULL, featureNames=NULL, colorLow="white", colorHigh="blue", title="", xlab="Features", ylab="Clusters"){
   if(is.null(featureNames)) {
     if(is.null(rownames(avgExpr))) {
       stop("'featureNames' must be supplied or the 'rownames' of the average expression matrix must be set.")
@@ -65,7 +66,12 @@ plotBubble <- function(inSCE, useAssay="logcounts", featureNames, displayName=NU
   gg <- ggplot2::ggplot(df, ggplot2::aes(x = .data[['featureNames']], y = .data[['groupNames']])) +
     ggplot2::geom_point(ggplot2::aes(color=.data[['avgExpr']], size=.data[['percExpr']])) +
     ggplot2::ggtitle(title) +
+    ggplot2::xlab(xlab) + 
+    ggplot2::ylab(ylab) + 
     ggplot2::scale_color_gradient2(low=colorLow, high=colorHigh)
-  .ggSCTKTheme(gg)
+  g <- .ggSCTKTheme(gg)
+
+  g <- g + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust=1))
+  g
 }
 
diff --git a/R/runBatchCorrection.R b/R/runBatchCorrection.R
@@ -274,9 +274,12 @@ runFastMNN <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL,
 #' conditions.
 #' @param inSCE Input \linkS4class{SingleCellExperiment} object
 #' @param useAssay A single character indicating the name of the assay requiring
-#' batch correction. Default \code{"logcounts"}.
+#' batch correction. Default \code{NULL}. It is recommended to use a reducedDim
+#' such as PCA through the `useReducedDim` parameter of this function.
 #' @param useReducedDim A single character indicating the name of the reducedDim
-#' used to be corrected. Specifying this will ignore \code{useAssay}. Default
+#' to be used. It is recommended to use a reducedDim instead of a full assay as 
+#' using an assay might cause the algorithm to not converge and throw error.
+#' Specifying this will ignore \code{useAssay}. Default
 #' \code{NULL}.
 #' @param batch A single character indicating a field in \code{colData} that
 #' annotates the batches of each cell; or a vector/factor with the same length
@@ -317,7 +320,7 @@ runFastMNN <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL,
 #' if (require("harmony"))
 #'     sceCorr <- runHarmony(sceBatches)
 #' }
-runHarmony <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL,
+runHarmony <- function(inSCE, useAssay = NULL, useReducedDim = NULL,
                        batch = "batch", reducedDimName = "HARMONY",
                        nComponents = 50, lambda = 0.1, theta = 5,
                        sigma = 0.1, nIter = 10, seed = 12345, verbose = TRUE, ...) {
@@ -327,6 +330,17 @@ runHarmony <- function(inSCE, useAssay = "logcounts", useReducedDim = NULL,
          "install.packages('harmony')",
          call. = FALSE)
   }
+
+  # Check if both useAssay and useReducedDim are not NULL
+  if(is.null(useAssay) && is.null(useReducedDim)){
+    stop("Both 'useAssay' & 'useReducedDim' cannot be NULL. It is recommended to use a reducedDim (PCA) for this algorithm but a full-sized assay can also be used. However, using an assay may cause the algorithm to not converge.")
+  }
+
+  # If using useAssay, send a warning to recommend using PCA
+  if((!is.null(useAssay)) && is.null(useReducedDim)){
+    warning("You are using a full-sized assay with Harmony. It is recommended to use a reducedDim (PCA) for better results, as using a full sized assay may cause the algorithm to not converge. Computation will proceed with selected assay ...")
+  }
+
   ## Input check
   useMat <- .selectSCEMatrix(inSCE, useAssay, useReducedDim,
                              useAltExp = NULL, returnMatrix = TRUE)

diff --git a/R/runClusterSummaryMetrics.R b/R/runClusterSummaryMetrics.R
@@ -7,6 +7,7 @@
 #' @param featureNames A string or vector of strings with each gene to aggregate.
 #' @param displayName A string that is the name of the column used for genes.
 #' @param groupNames The name of a colData entry that can be used as groupNames.
+#' @param scale Option to scale the data. Default: /code{FALSE}. Selected assay will not be scaled. 
 #' @return A dataframe with mean expression and percent of cells in cluster that 
 #' express for each cluster.
 #' @examples
@@ -15,8 +16,11 @@
 #' displayName="feature_name", groupNames="type")
 #' @export
 
-runClusterSummaryMetrics <- function(inSCE, useAssay="logcounts", featureNames, displayName=NULL, groupNames="cluster"){
-
+runClusterSummaryMetrics <- function(inSCE, useAssay="logcounts", featureNames, displayName=NULL, groupNames="cluster", scale = FALSE){
+  if(isTRUE(scale)){
+    runNormalization(inSCE=inSCE, useAssay=useAssay, scale = TRUE, normalizationMethod = NULL, transformation = NULL, 
+                     pseudocountsBeforeNorm = NULL, pseudocountsBeforeTransform = NULL)
+  }
   if (!groupNames %in% names(SingleCellExperiment::colData(inSCE))) {
     stop("Specified variable '", groupNames, "' not found in colData(inSCE)")
   }
@@ -40,16 +44,24 @@ runClusterSummaryMetrics <- function(inSCE, useAssay="logcounts", featureNames,
     warning("Specified genes '", toString(falseGenes), "' not found in ", warning)
   }
 
+  tempSCE <- inSCE[featureNames, ]
+
+
+  if(isTRUE(scale)){
+    tempSCE <- runNormalization(inSCE=tempSCE, outAssayName = "scaled", useAssay=useAssay,scale = TRUE, normalizationMethod = NULL, transformation = NULL,
+                     pseudocountsBeforeNorm = NULL, pseudocountsBeforeTransform = NULL)
+    useAssay <- "scaled"
+  }
 
-  avgExpr <- assay(scuttle::aggregateAcrossCells(inSCE, ids=SingleCellExperiment::colData(inSCE)[,groupNames], 
+  avgExpr <- assay(scuttle::aggregateAcrossCells(tempSCE, ids=SingleCellExperiment::colData(inSCE)[,groupNames], 
                                                             statistics="mean", use.assay.type=useAssay, 
-                                                            subset.row=featureNames))
+                                                 subset.row=NULL))
 
 
 
-  percExpr <- assay(scuttle::aggregateAcrossCells(inSCE, ids=SingleCellExperiment::colData(inSCE)[,groupNames], 
+  percExpr <- assay(scuttle::aggregateAcrossCells(tempSCE, ids=SingleCellExperiment::colData(inSCE)[,groupNames], 
                                                              statistics="prop.detected", use.assay.type=useAssay, 
-                                                             subset.row=featureNames))
+                                                  subset.row=NULL))
 
 
   df <- data.frame(featureNames = featureNames)

diff --git a/R/runDimReduce.R b/R/runDimReduce.R
@@ -58,7 +58,7 @@ runDimReduce <- function(inSCE,
                          useAssay = NULL, useReducedDim = NULL,
                          useAltExp = NULL, reducedDimName = method,
                          nComponents = 20, useFeatureSubset = NULL,
-                         scale = FALSE, seed = NULL, ...)
+                         scale = FALSE, seed = 12345, ...)
 {
 
   method <- match.arg(method)

diff --git a/R/runNormalization.R b/R/runNormalization.R
@@ -48,8 +48,8 @@
 #'  outAssayName = "logcounts")
 runNormalization <- function(inSCE,
                              useAssay = "counts",
-                             outAssayName = "customNormalizedAssay",
-                             normalizationMethod = NULL,
+                             outAssayName = "logcounts",
+                             normalizationMethod = "logNormCounts",
                              scale = FALSE,
                              seuratScaleFactor = 10000,
                              transformation = NULL,