for bioconductor

jyyulab · Aug 13, 2024 · 0685034 · 0685034
1 parent 32bc9b2
commit 0685034
Show file tree

Hide file tree

Showing 40 changed files with 843 additions and 256 deletions.
diff --git a/R/clustering_analysis.R b/R/clustering_analysis.R
@@ -15,7 +15,10 @@
 #' @export
 #'
 #' @examples
-#' generateMICAinput(input_eset = log2cpm.ese, output_file = "./MICA/micaInput.txt")
+#' data(pbmc14k_expression.eset)
+#' generateMICAinput(input_eset = pbmc14k_expression.eset,
+#'                   output_file = "/work-path/PBMC14k/MICA/micaInput.txt",
+#'                   overwrite = FALSE)
 generateMICAinput <- function(input_eset,
                               output_file,
                               overwrite = F,
@@ -125,7 +128,10 @@ generateMICAinput <- function(input_eset,
 #' @export
 #'
 #' @examples
-#' clustered.eset <- addMICAoutput(input_eset = input_eset, mica_output_file = "/path-to-mica-input/clustering_UMAP_euclidean_20_2.22554.txt", visual_method = "umap")
+#' data(pbmc14k_expression.eset)
+#' pbmc14k_log2cpm.eset <- addMICAoutput(pbmc14k_expression.eset,
+#'                                       mica_output_file = system.file("extdata/demo_pbmc14k/MICA/clustering_UMAP_euclidean_20_2.05.txt", package = "scMINER"),
+#'                                       visual_method = "umap")
 addMICAoutput <- function(input_eset,
                           mica_output_file,
                           visual_method = "umap")
@@ -157,23 +163,35 @@ addMICAoutput <- function(input_eset,
 
 #' Draw a scatter plot showing the coordinates and cluster id of each cell
 #'
-#' @description
-#' This function is used to visualize the clustering results generated by MICA.
+#' @description This function is used to visualize the clustering results
+#' generated by MICA.
 #'
 #' @param input_eset The sparse eset object
-#' @param color_by Factor, character or numeric, name of the column of MICA cluster labels. Default: "`clusterID`".
-#' @param colors A character vector or `NULL`, colors of the MICA cluster labels. The length of this vector should be same as the number of groups in color_by column. If `NULL`, the ggplot default colors will be use. Default: `NULL`.
-#' @param do.logTransform Logical, whether to do the log2(value + 1) transformation. Only valid when color_by is numeric Default: `TRUE`.
-#' @param X,Y Character, name of the columns of x-axis and y-axis coordinates. Default: "`UMAP_1`", and "`UMAP_2`".
+#' @param color_by Factor, character or numeric, name of the column of MICA
+#'   cluster labels. Default: "`clusterID`".
+#' @param colors A character vector or `NULL`, colors of the MICA cluster
+#'   labels. The length of this vector should be same as the number of groups in
+#'   color_by column. If `NULL`, the ggplot default colors will be use. Default:
+#'   `NULL`.
+#' @param do.logTransform Logical, whether to do the log2(value + 1)
+#'   transformation. Only valid when color_by is numeric Default: `TRUE`.
+#' @param X,Y Character, name of the columns of x-axis and y-axis coordinates.
+#'   Default: "`UMAP_1`", and "`UMAP_2`".
 #' @param point.size Numeric, size of points. Default: 0.3.
-#' @param point.alpha Numeric, transparency of points, ranging from 0 (more transparent) to 1 (less transparent). Default: 1.
+#' @param point.alpha Numeric, transparency of points, ranging from 0 (more
+#'   transparent) to 1 (less transparent). Default: 1.
 #' @param name.plot_title Character or NULL, title of the plot. Default: `NULL`.
 #' @param fontsize.plot_table Numeric, font size of the title. Default: 20.
-#' @param show.cluster_label Logical, whether to show labels on the plot. Ignored when color_by is numeric. Default: `TRUE`.
-#' @param fontsize.cluster_label Numeric, font size of the labels. Ignored when color_by is numeric. Default: 12.
-#' @param legend.position Character, position of legend: "`right`", "`left`", "`top`", "`bottom`" or "`none`". Default: "`right`".
-#' @param fontsize.legend_title Integer, font size of the legend title. Default: 10.
-#' @param fontsize.legend_text Integer, font size of the legend text. Default: 8.
+#' @param show.cluster_label Logical, whether to show labels on the plot.
+#'   Ignored when color_by is numeric. Default: `TRUE`.
+#' @param fontsize.cluster_label Numeric, font size of the labels. Ignored when
+#'   color_by is numeric. Default: 12.
+#' @param legend.position Character, position of legend: "`right`", "`left`",
+#'   "`top`", "`bottom`" or "`none`". Default: "`right`".
+#' @param fontsize.legend_title Integer, font size of the legend title. Default:
+#'   10.
+#' @param fontsize.legend_text Integer, font size of the legend text. Default:
+#'   8.
 #' @param fontsize.axis_title Integer, font size of the axis title. Default: 10.
 #' @param fontsize.axis_text Integer, font size of the axis text. Default: 8.
 #'
@@ -182,11 +200,20 @@ addMICAoutput <- function(input_eset,
 #' @export
 #'
 #' @examples
+#'
+#' data(pbmc14k_expression.eset)
 #' ## 1. color-coded by factor or character variable
-#' p_umap <- MICAplot(input_eset = pbmc14k_log2cpm.eset, color_by = 'clusterID', point.size = 0.1)
+#' p1 <- MICAplot(input_eset = pbmc14k_expression.eset,
+#'                color_by = "clusterID",
+#'                X = "UMAP_1", Y = "UMAP_2",
+#'                point.size = 0.1,
+#'                fontsize.cluster_label = 6)
 #'
 #' ## 2. color-coded by numeric variable
-#' p_umap <- MICAplot(input_eset = pbmc14k_log2cpm.eset, color_by = 'nUMI', do.logTransform = TRUE)
+#' p2 <- MICAplot(input_eset = pbmc14k_expression.eset,
+#'                color_by = "nUMI",
+#'                do.logTransform = TRUE,
+#'                point.size = 0.1)
 MICAplot <- function(input_eset,
                      color_by = "clusterID", colors = NULL, do.logTransform = TRUE,
                      X = "UMAP_1", Y = "UMAP_2",

diff --git a/R/data.R b/R/data.R
@@ -12,3 +12,16 @@
 #' }
 #' @source A subset of Filtered_DownSampled_SortedPBMC_data.csv from <https://zenodo.org/record/3357167#.YhQNF2RKj6V>
 "pbmc14k_rawCount"
+
+
+#' SparseEset object of PBMC14k dataset
+#'
+#' This dataset contains the SparseEset object of PBMC14k dataset. For demonstration purposes, it has been downsampled to 3.5k cells, with 500 cells per population.
+#'
+#' @format ## `pbmc14k_expression.eset`
+#' A large dgCMatrix with 17,986 rows and 14,000 columns:
+#' \describe{
+#'   This data set provides the SparseEset object of PBMC14k dataset that has been filtered, normalized, clustered and annotated.
+#' }
+#' @source It's generated by scMINER from Filtered_DownSampled_SortedPBMC_data.csv from <https://zenodo.org/record/3357167#.YhQNF2RKj6V>
+"pbmc14k_expression.eset"
diff --git a/R/differential_analysis.R b/R/differential_analysis.R
@@ -99,7 +99,11 @@ combinePvalVector <- function(pvals,
 #'
 #' @examples
 #' ## to call this function
-#' res <- compare2groups(input_eset = input_eset, group_by = group_by, g1 = g1_tmp, g0 = g0_tmp, use_method = use_method)
+#' res <- compare2groups(input_eset = input_eset,
+#'                       group_by = group_by,
+#'                       g1 = g1_tmp,
+#'                       g0 = g0_tmp,
+#'                       use_method = use_method)
 compare2groups <- function(input_eset,
                            group_by = "clusterID",
                            g1 = NULL, g0 = NULL,
@@ -200,16 +204,28 @@ compare2groups <- function(input_eset,
 #'
 #' @examples
 #' ## 1. To perform differential expression analysis in a 1-vs-rest manner for all groups in "clusterID" column
-#' de_res <- getDE(input_eset = clustered.eset, group_by = "clusterID", use_method = "limma")
+#' de_res <- getDE(input_eset = clustered.eset,
+#'                 group_by = "clusterID",
+#'                 use_method = "limma")
 #'
 #' ## 2. To perform differential expression analysis in a 1-vs-rest manner for one specific group in "clusterID" column
-#' de_res <- getDE(input_eset = clustered.eset, group_by = "clusterID", g1 = c("1"), use_method = "limma")
+#' de_res <- getDE(input_eset = clustered.eset,
+#'                 group_by = "clusterID",
+#'                 g1 = c("1"),
+#'                 use_method = "limma")
 #'
 #' ## 3. To perform differential expression analysis in a rest-vs-1 manner for one specific group in "clusterID" column
-#' de_res <- getDE(input_eset = clustered.eset, group_by = "clusterID", g0 = c("1"), use_method = "limma")
+#' de_res <- getDE(input_eset = clustered.eset,
+#'                 group_by = "clusterID",
+#'                 g0 = c("1"),
+#'                 use_method = "limma")
 #'
 #' ## 4. To perform differential expression analysis in a 1-vs-1 manner for groups in "clusterID" column
-#' de_res <- getDE(input_eset = clustered.eset, group_by = "clusterID", g1 = c("1"), g0 = c("3"), use_method = "limma")
+#' de_res <- getDE(input_eset = clustered.eset,
+#'                 group_by = "clusterID",
+#'                 g1 = c("1"),
+#'                 g0 = c("3"),
+#'                 use_method = "limma")
 getDE <- function(input_eset,
                   group_by = "clusterID",
                   g1 = NULL, g0 = NULL,
@@ -291,16 +307,28 @@ getDE <- function(input_eset,
 #'
 #' @examples
 #' ## 1. To perform differential activity analysis in a 1-vs-rest manner for all groups in "clusterID" column
-#' da_res <- getDA(input_eset = activity_clustered.eset, group_by = "clusterID", use_method = "t.test")
+#' da_res <- getDA(input_eset = activity_clustered.eset,
+#'                 group_by = "clusterID",
+#'                 use_method = "t.test")
 #'
 #' ## 2. To perform differential activity analysis in a 1-vs-rest manner for one specific group in "clusterID" column
-#' da_res <- getDA(input_eset = activity_clustered.eset, group_by = "clusterID", g1 = c("1"), use_method = "t.test")
+#' da_res <- getDA(input_eset = activity_clustered.eset,
+#'                 group_by = "clusterID",
+#'                 g1 = c("1"),
+#'                 use_method = "t.test")
 #'
 #' ## 3. To perform differential activity analysis in a rest-vs-1 manner for one specific group in "clusterID" column
-#' da_res <- getDA(input_eset = activity_clustered.eset, group_by = "clusterID", g0 = c("1"), use_method = "t.test")
+#' da_res <- getDA(input_eset = activity_clustered.eset,
+#'                 group_by = "clusterID",
+#'                 g0 = c("1"),
+#'                 use_method = "t.test")
 #'
 #' ## 4. To perform differential activity analysis in a 1-vs-1 manner for groups in "clusterID" column
-#' da_res <- getDA(input_eset = activity_clustered.eset, group_by = "clusterID", g1 = c("1"), g0 = c("3"), use_method = "t.test")
+#' da_res <- getDA(input_eset = activity_clustered.eset,
+#'                 group_by = "clusterID",
+#'                 g1 = c("1"),
+#'                 g0 = c("3"),
+#'                 use_method = "t.test")
 getDA <- function(input_eset,
                   group_by = "clusterID",
                   g1 = NULL, g0 = NULL,

diff --git a/R/manipulate_sparseEset.R b/R/manipulate_sparseEset.R
@@ -34,7 +34,10 @@ methods::setClass(Class = "SparseExpressionSet",
 #' @export
 #'
 #' @examples
-#' expression_raw.eset <- createSparseEset(input_matrix = sparseMatrix, projectID = "demoSample", addMetaData = T)
+#' data("pbmc14k_rawCount")
+#' pbmc14k_raw.eset <- createSparseEset(input_matrix = pbmc14k_rawCount,
+#'                                      projectID = "PBMC14k",
+#'                                      addMetaData = TRUE)
 createSparseEset <- function(input_matrix,
                              do.sparseConversion = TRUE,
                              cellData = NULL,
@@ -140,7 +143,18 @@ createSparseEset <- function(input_matrix,
 #' @export
 #'
 #' @examples
-#' combined.eset <- combineSparseEset(c(sample_1.eset, sample_2.eset, sample_3.eset), projectID = c("sample1", "sample2", "sample3"), addPrefix = c("tag1", "tag2", "tag3"), addMetaData = TRUE)
+#' demo1_mtx <- readInput_10x.dir(input_dir = system.file("extdata/demo_inputs/cell_matrix_10x", package = "scMINER"),
+#'                                featureType = "gene_symbol", removeSuffix = TRUE)
+#' demo1.eset <- createSparseEset(input_matrix = demo1_mtx, projectID = "demo1", addMetaData = TRUE)
+#' demo2_mtx <- readInput_table(table_file = system.file("extdata/demo_inputs/table_file/demoData2.txt.gz", package = "scMINER"),
+#'                              is.geneBYcell = TRUE, removeSuffix = TRUE)
+#' demo2.eset <- createSparseEset(input_matrix = demo2_mtx, projectID = "demo2", addMetaData = TRUE)
+#' combined.eset <- combineSparseEset(eset_list = c(demo1.eset, demo2.eset),
+#'                                    projectID = c("sample1", "sample2"),
+#'                                    addPrefix = c("demo1", "demo2"),
+#'                                    addSurfix = NULL,
+#'                                    addMetaData = TRUE,
+#'                                    imputeNA = TRUE)
 combineSparseEset <- function(eset_list,
                               projectID = NULL,
                               addPrefix = NULL,
@@ -271,7 +285,13 @@ combineSparseEset <- function(eset_list,
 #' @export
 #'
 #' @examples
-#' updated.eset <- updateSparseEset(input_eset = input.eset, cellData = data.frame(pData(input.eset), cellType = "B_cells"), addMetaData = TRUE)
+#' true_label <- read.table(system.file("extdata/demo_pbmc14k/PBMC14k_trueLabel.txt.gz", package = "scMINER"),
+#'                          header = T, row.names = 1, sep = "\t", quote = "", stringsAsFactors = FALSE)
+#' pbmc14k_raw.eset <- createSparseEset(input_matrix = pbmc14k_rawCount,
+#'                                      cellData = true_label,
+#'                                      featureData = NULL,
+#'                                      projectID = "PBMC14k",
+#'                                      addMetaData = TRUE)
 updateSparseEset <- function(input_eset,
                              dataMatrix = NULL,
                              cellData = NULL,
@@ -381,8 +401,17 @@ updateSparseEset <- function(input_eset,
 #' @export
 #'
 #' @examples
-#' filtered.eset <- filterSparseEset(raw.eset) ## filter the input eset using the cutoffs calculated by scMINER.
-#' filtered.eset <- filterSparseEset(raw.eset, gene.nCell_min = 10, cell.nUMI_min = 500, cell.nFeature_min = 100, cell.nFeature_max = 5000, cell.pctMito_max = 0.15)
+#' ## 1. using the cutoffs automatically calculated by scMINER
+#' pbmc14k_filtered.eset <- filterSparseEset(pbmc14k_raw.eset, filter_mode = "auto", filter_type = "both")
+#'
+#' ## 2. using the cutoffs manually specified
+#' pbmc14k_filtered_manual.eset <- filterSparseEset(pbmc14k_raw.eset, filter_mode = "manual", filter_type = "both",
+#'                                                  gene.nCell_min = 10,
+#'                                                  cell.nUMI_min = 500,
+#'                                                  cell.nUMI_max = 6500,
+#'                                                  cell.nFeature_min = 200,
+#'                                                  cell.nFeature_max = 2500,
+#'                                                  cell.pctMito_max = 0.1)
 filterSparseEset <- function(input_eset,
                              filter_mode = "auto",
                              filter_type = "both",
@@ -508,7 +537,11 @@ filterSparseEset <- function(input_eset,
 #' @return A sparse eset object that has been normalized and log-transformed
 #' @export
 #'
-#' @examples normalized.eset <- normalizeSparseEset(input_eset = filtered.eset, scale_factor = 1000000, do.logTransform = TRUE)
+#' @examples
+#' pbmc14k_log2cpm.eset <- normalizeSparseEset(pbmc14k_filtered.eset,
+#'                                             scale_factor = 1000000,
+#'                                             log_base = 2,
+#'                                             log_pseudoCount = 1)
 normalizeSparseEset <- function(input_eset,
                                 scale_factor = 1000000,
                                 do.logTransform = TRUE,
@@ -527,6 +560,7 @@ normalizeSparseEset <- function(input_eset,
     cat("Done! The data matrix of eset has been normalized but NOT log-transformed!\n")
   }
 
+  exp_mat.normalized <- Matrix::Matrix(exp_mat.normalized, sparse = TRUE)
   eset <- new( "SparseExpressionSet",
                assayData = assayDataNew("environment", exprs = exp_mat.normalized),
                phenoData = new("AnnotatedDataFrame", data = Biobase::pData(input_eset)),
@@ -551,7 +585,17 @@ normalizeSparseEset <- function(input_eset,
 #' @export
 #'
 #' @examples
-#' drawSparseEsetQC(input_eset, output_html_file = "./QC/esetQCreport.html", overwrite = FALSE, group = "projectID")
+#' ## 1. To generate the QC report in a group-specific manner, recommended whenever group information is avaiable.
+#' drawSparseEsetQC(input_eset = pbmc14k_raw.eset,
+#'                  output_html_file = "/your-path/PBMC14k/PLOT/pbmc14k_rawCount.html",
+#'                  overwrite = FALSE,
+#'                  group_by = "trueLabel")
+#'
+#' ## 2. To generate the QC report from a whole view
+#' drawSparseEsetQC(input_eset = pbmc14k_raw.eset,
+#'                  output_html_file = "/your-path/PBMC14k/PLOT/pbmc14k_rawCount.html",
+#'                  overwrite = FALSE,
+#'                  group_by = NULL)
 drawSparseEsetQC <- function(input_eset,
                              output_html_file,
                              overwrite = FALSE,