diff --git a/R/clustering_analysis.R b/R/clustering_analysis.R index 9574fed..6edf8a7 100644 --- a/R/clustering_analysis.R +++ b/R/clustering_analysis.R @@ -15,7 +15,10 @@ #' @export #' #' @examples -#' generateMICAinput(input_eset = log2cpm.ese, output_file = "./MICA/micaInput.txt") +#' data(pbmc14k_expression.eset) +#' generateMICAinput(input_eset = pbmc14k_expression.eset, +#' output_file = "/work-path/PBMC14k/MICA/micaInput.txt", +#' overwrite = FALSE) generateMICAinput <- function(input_eset, output_file, overwrite = F, @@ -125,7 +128,10 @@ generateMICAinput <- function(input_eset, #' @export #' #' @examples -#' clustered.eset <- addMICAoutput(input_eset = input_eset, mica_output_file = "/path-to-mica-input/clustering_UMAP_euclidean_20_2.22554.txt", visual_method = "umap") +#' data(pbmc14k_expression.eset) +#' pbmc14k_log2cpm.eset <- addMICAoutput(pbmc14k_expression.eset, +#' mica_output_file = system.file("extdata/demo_pbmc14k/MICA/clustering_UMAP_euclidean_20_2.05.txt", package = "scMINER"), +#' visual_method = "umap") addMICAoutput <- function(input_eset, mica_output_file, visual_method = "umap") @@ -157,23 +163,35 @@ addMICAoutput <- function(input_eset, #' Draw a scatter plot showing the coordinates and cluster id of each cell #' -#' @description -#' This function is used to visualize the clustering results generated by MICA. +#' @description This function is used to visualize the clustering results +#' generated by MICA. #' #' @param input_eset The sparse eset object -#' @param color_by Factor, character or numeric, name of the column of MICA cluster labels. Default: "`clusterID`". -#' @param colors A character vector or `NULL`, colors of the MICA cluster labels. The length of this vector should be same as the number of groups in color_by column. If `NULL`, the ggplot default colors will be use. Default: `NULL`. -#' @param do.logTransform Logical, whether to do the log2(value + 1) transformation. Only valid when color_by is numeric Default: `TRUE`. -#' @param X,Y Character, name of the columns of x-axis and y-axis coordinates. Default: "`UMAP_1`", and "`UMAP_2`". +#' @param color_by Factor, character or numeric, name of the column of MICA +#' cluster labels. Default: "`clusterID`". +#' @param colors A character vector or `NULL`, colors of the MICA cluster +#' labels. The length of this vector should be same as the number of groups in +#' color_by column. If `NULL`, the ggplot default colors will be use. Default: +#' `NULL`. +#' @param do.logTransform Logical, whether to do the log2(value + 1) +#' transformation. Only valid when color_by is numeric Default: `TRUE`. +#' @param X,Y Character, name of the columns of x-axis and y-axis coordinates. +#' Default: "`UMAP_1`", and "`UMAP_2`". #' @param point.size Numeric, size of points. Default: 0.3. -#' @param point.alpha Numeric, transparency of points, ranging from 0 (more transparent) to 1 (less transparent). Default: 1. +#' @param point.alpha Numeric, transparency of points, ranging from 0 (more +#' transparent) to 1 (less transparent). Default: 1. #' @param name.plot_title Character or NULL, title of the plot. Default: `NULL`. #' @param fontsize.plot_table Numeric, font size of the title. Default: 20. -#' @param show.cluster_label Logical, whether to show labels on the plot. Ignored when color_by is numeric. Default: `TRUE`. -#' @param fontsize.cluster_label Numeric, font size of the labels. Ignored when color_by is numeric. Default: 12. -#' @param legend.position Character, position of legend: "`right`", "`left`", "`top`", "`bottom`" or "`none`". Default: "`right`". -#' @param fontsize.legend_title Integer, font size of the legend title. Default: 10. -#' @param fontsize.legend_text Integer, font size of the legend text. Default: 8. +#' @param show.cluster_label Logical, whether to show labels on the plot. +#' Ignored when color_by is numeric. Default: `TRUE`. +#' @param fontsize.cluster_label Numeric, font size of the labels. Ignored when +#' color_by is numeric. Default: 12. +#' @param legend.position Character, position of legend: "`right`", "`left`", +#' "`top`", "`bottom`" or "`none`". Default: "`right`". +#' @param fontsize.legend_title Integer, font size of the legend title. Default: +#' 10. +#' @param fontsize.legend_text Integer, font size of the legend text. Default: +#' 8. #' @param fontsize.axis_title Integer, font size of the axis title. Default: 10. #' @param fontsize.axis_text Integer, font size of the axis text. Default: 8. #' @@ -182,11 +200,20 @@ addMICAoutput <- function(input_eset, #' @export #' #' @examples +#' +#' data(pbmc14k_expression.eset) #' ## 1. color-coded by factor or character variable -#' p_umap <- MICAplot(input_eset = pbmc14k_log2cpm.eset, color_by = 'clusterID', point.size = 0.1) +#' p1 <- MICAplot(input_eset = pbmc14k_expression.eset, +#' color_by = "clusterID", +#' X = "UMAP_1", Y = "UMAP_2", +#' point.size = 0.1, +#' fontsize.cluster_label = 6) #' #' ## 2. color-coded by numeric variable -#' p_umap <- MICAplot(input_eset = pbmc14k_log2cpm.eset, color_by = 'nUMI', do.logTransform = TRUE) +#' p2 <- MICAplot(input_eset = pbmc14k_expression.eset, +#' color_by = "nUMI", +#' do.logTransform = TRUE, +#' point.size = 0.1) MICAplot <- function(input_eset, color_by = "clusterID", colors = NULL, do.logTransform = TRUE, X = "UMAP_1", Y = "UMAP_2", diff --git a/R/data.R b/R/data.R index fbacc4b..2c85662 100644 --- a/R/data.R +++ b/R/data.R @@ -12,3 +12,16 @@ #' } #' @source A subset of Filtered_DownSampled_SortedPBMC_data.csv from "pbmc14k_rawCount" + + +#' SparseEset object of PBMC14k dataset +#' +#' This dataset contains the SparseEset object of PBMC14k dataset. For demonstration purposes, it has been downsampled to 3.5k cells, with 500 cells per population. +#' +#' @format ## `pbmc14k_expression.eset` +#' A large dgCMatrix with 17,986 rows and 14,000 columns: +#' \describe{ +#' This data set provides the SparseEset object of PBMC14k dataset that has been filtered, normalized, clustered and annotated. +#' } +#' @source It's generated by scMINER from Filtered_DownSampled_SortedPBMC_data.csv from +"pbmc14k_expression.eset" diff --git a/R/differential_analysis.R b/R/differential_analysis.R index e210af1..b3eb654 100644 --- a/R/differential_analysis.R +++ b/R/differential_analysis.R @@ -99,7 +99,11 @@ combinePvalVector <- function(pvals, #' #' @examples #' ## to call this function -#' res <- compare2groups(input_eset = input_eset, group_by = group_by, g1 = g1_tmp, g0 = g0_tmp, use_method = use_method) +#' res <- compare2groups(input_eset = input_eset, +#' group_by = group_by, +#' g1 = g1_tmp, +#' g0 = g0_tmp, +#' use_method = use_method) compare2groups <- function(input_eset, group_by = "clusterID", g1 = NULL, g0 = NULL, @@ -200,16 +204,28 @@ compare2groups <- function(input_eset, #' #' @examples #' ## 1. To perform differential expression analysis in a 1-vs-rest manner for all groups in "clusterID" column -#' de_res <- getDE(input_eset = clustered.eset, group_by = "clusterID", use_method = "limma") +#' de_res <- getDE(input_eset = clustered.eset, +#' group_by = "clusterID", +#' use_method = "limma") #' #' ## 2. To perform differential expression analysis in a 1-vs-rest manner for one specific group in "clusterID" column -#' de_res <- getDE(input_eset = clustered.eset, group_by = "clusterID", g1 = c("1"), use_method = "limma") +#' de_res <- getDE(input_eset = clustered.eset, +#' group_by = "clusterID", +#' g1 = c("1"), +#' use_method = "limma") #' #' ## 3. To perform differential expression analysis in a rest-vs-1 manner for one specific group in "clusterID" column -#' de_res <- getDE(input_eset = clustered.eset, group_by = "clusterID", g0 = c("1"), use_method = "limma") +#' de_res <- getDE(input_eset = clustered.eset, +#' group_by = "clusterID", +#' g0 = c("1"), +#' use_method = "limma") #' #' ## 4. To perform differential expression analysis in a 1-vs-1 manner for groups in "clusterID" column -#' de_res <- getDE(input_eset = clustered.eset, group_by = "clusterID", g1 = c("1"), g0 = c("3"), use_method = "limma") +#' de_res <- getDE(input_eset = clustered.eset, +#' group_by = "clusterID", +#' g1 = c("1"), +#' g0 = c("3"), +#' use_method = "limma") getDE <- function(input_eset, group_by = "clusterID", g1 = NULL, g0 = NULL, @@ -291,16 +307,28 @@ getDE <- function(input_eset, #' #' @examples #' ## 1. To perform differential activity analysis in a 1-vs-rest manner for all groups in "clusterID" column -#' da_res <- getDA(input_eset = activity_clustered.eset, group_by = "clusterID", use_method = "t.test") +#' da_res <- getDA(input_eset = activity_clustered.eset, +#' group_by = "clusterID", +#' use_method = "t.test") #' #' ## 2. To perform differential activity analysis in a 1-vs-rest manner for one specific group in "clusterID" column -#' da_res <- getDA(input_eset = activity_clustered.eset, group_by = "clusterID", g1 = c("1"), use_method = "t.test") +#' da_res <- getDA(input_eset = activity_clustered.eset, +#' group_by = "clusterID", +#' g1 = c("1"), +#' use_method = "t.test") #' #' ## 3. To perform differential activity analysis in a rest-vs-1 manner for one specific group in "clusterID" column -#' da_res <- getDA(input_eset = activity_clustered.eset, group_by = "clusterID", g0 = c("1"), use_method = "t.test") +#' da_res <- getDA(input_eset = activity_clustered.eset, +#' group_by = "clusterID", +#' g0 = c("1"), +#' use_method = "t.test") #' #' ## 4. To perform differential activity analysis in a 1-vs-1 manner for groups in "clusterID" column -#' da_res <- getDA(input_eset = activity_clustered.eset, group_by = "clusterID", g1 = c("1"), g0 = c("3"), use_method = "t.test") +#' da_res <- getDA(input_eset = activity_clustered.eset, +#' group_by = "clusterID", +#' g1 = c("1"), +#' g0 = c("3"), +#' use_method = "t.test") getDA <- function(input_eset, group_by = "clusterID", g1 = NULL, g0 = NULL, diff --git a/R/manipulate_sparseEset.R b/R/manipulate_sparseEset.R index 40a9e01..0a01600 100644 --- a/R/manipulate_sparseEset.R +++ b/R/manipulate_sparseEset.R @@ -34,7 +34,10 @@ methods::setClass(Class = "SparseExpressionSet", #' @export #' #' @examples -#' expression_raw.eset <- createSparseEset(input_matrix = sparseMatrix, projectID = "demoSample", addMetaData = T) +#' data("pbmc14k_rawCount") +#' pbmc14k_raw.eset <- createSparseEset(input_matrix = pbmc14k_rawCount, +#' projectID = "PBMC14k", +#' addMetaData = TRUE) createSparseEset <- function(input_matrix, do.sparseConversion = TRUE, cellData = NULL, @@ -140,7 +143,18 @@ createSparseEset <- function(input_matrix, #' @export #' #' @examples -#' combined.eset <- combineSparseEset(c(sample_1.eset, sample_2.eset, sample_3.eset), projectID = c("sample1", "sample2", "sample3"), addPrefix = c("tag1", "tag2", "tag3"), addMetaData = TRUE) +#' demo1_mtx <- readInput_10x.dir(input_dir = system.file("extdata/demo_inputs/cell_matrix_10x", package = "scMINER"), +#' featureType = "gene_symbol", removeSuffix = TRUE) +#' demo1.eset <- createSparseEset(input_matrix = demo1_mtx, projectID = "demo1", addMetaData = TRUE) +#' demo2_mtx <- readInput_table(table_file = system.file("extdata/demo_inputs/table_file/demoData2.txt.gz", package = "scMINER"), +#' is.geneBYcell = TRUE, removeSuffix = TRUE) +#' demo2.eset <- createSparseEset(input_matrix = demo2_mtx, projectID = "demo2", addMetaData = TRUE) +#' combined.eset <- combineSparseEset(eset_list = c(demo1.eset, demo2.eset), +#' projectID = c("sample1", "sample2"), +#' addPrefix = c("demo1", "demo2"), +#' addSurfix = NULL, +#' addMetaData = TRUE, +#' imputeNA = TRUE) combineSparseEset <- function(eset_list, projectID = NULL, addPrefix = NULL, @@ -271,7 +285,13 @@ combineSparseEset <- function(eset_list, #' @export #' #' @examples -#' updated.eset <- updateSparseEset(input_eset = input.eset, cellData = data.frame(pData(input.eset), cellType = "B_cells"), addMetaData = TRUE) +#' true_label <- read.table(system.file("extdata/demo_pbmc14k/PBMC14k_trueLabel.txt.gz", package = "scMINER"), +#' header = T, row.names = 1, sep = "\t", quote = "", stringsAsFactors = FALSE) +#' pbmc14k_raw.eset <- createSparseEset(input_matrix = pbmc14k_rawCount, +#' cellData = true_label, +#' featureData = NULL, +#' projectID = "PBMC14k", +#' addMetaData = TRUE) updateSparseEset <- function(input_eset, dataMatrix = NULL, cellData = NULL, @@ -381,8 +401,17 @@ updateSparseEset <- function(input_eset, #' @export #' #' @examples -#' filtered.eset <- filterSparseEset(raw.eset) ## filter the input eset using the cutoffs calculated by scMINER. -#' filtered.eset <- filterSparseEset(raw.eset, gene.nCell_min = 10, cell.nUMI_min = 500, cell.nFeature_min = 100, cell.nFeature_max = 5000, cell.pctMito_max = 0.15) +#' ## 1. using the cutoffs automatically calculated by scMINER +#' pbmc14k_filtered.eset <- filterSparseEset(pbmc14k_raw.eset, filter_mode = "auto", filter_type = "both") +#' +#' ## 2. using the cutoffs manually specified +#' pbmc14k_filtered_manual.eset <- filterSparseEset(pbmc14k_raw.eset, filter_mode = "manual", filter_type = "both", +#' gene.nCell_min = 10, +#' cell.nUMI_min = 500, +#' cell.nUMI_max = 6500, +#' cell.nFeature_min = 200, +#' cell.nFeature_max = 2500, +#' cell.pctMito_max = 0.1) filterSparseEset <- function(input_eset, filter_mode = "auto", filter_type = "both", @@ -508,7 +537,11 @@ filterSparseEset <- function(input_eset, #' @return A sparse eset object that has been normalized and log-transformed #' @export #' -#' @examples normalized.eset <- normalizeSparseEset(input_eset = filtered.eset, scale_factor = 1000000, do.logTransform = TRUE) +#' @examples +#' pbmc14k_log2cpm.eset <- normalizeSparseEset(pbmc14k_filtered.eset, +#' scale_factor = 1000000, +#' log_base = 2, +#' log_pseudoCount = 1) normalizeSparseEset <- function(input_eset, scale_factor = 1000000, do.logTransform = TRUE, @@ -527,6 +560,7 @@ normalizeSparseEset <- function(input_eset, cat("Done! The data matrix of eset has been normalized but NOT log-transformed!\n") } + exp_mat.normalized <- Matrix::Matrix(exp_mat.normalized, sparse = TRUE) eset <- new( "SparseExpressionSet", assayData = assayDataNew("environment", exprs = exp_mat.normalized), phenoData = new("AnnotatedDataFrame", data = Biobase::pData(input_eset)), @@ -551,7 +585,17 @@ normalizeSparseEset <- function(input_eset, #' @export #' #' @examples -#' drawSparseEsetQC(input_eset, output_html_file = "./QC/esetQCreport.html", overwrite = FALSE, group = "projectID") +#' ## 1. To generate the QC report in a group-specific manner, recommended whenever group information is avaiable. +#' drawSparseEsetQC(input_eset = pbmc14k_raw.eset, +#' output_html_file = "/your-path/PBMC14k/PLOT/pbmc14k_rawCount.html", +#' overwrite = FALSE, +#' group_by = "trueLabel") +#' +#' ## 2. To generate the QC report from a whole view +#' drawSparseEsetQC(input_eset = pbmc14k_raw.eset, +#' output_html_file = "/your-path/PBMC14k/PLOT/pbmc14k_rawCount.html", +#' overwrite = FALSE, +#' group_by = NULL) drawSparseEsetQC <- function(input_eset, output_html_file, overwrite = FALSE, diff --git a/R/network_analysis.R b/R/network_analysis.R index 2640596..08c755a 100644 --- a/R/network_analysis.R +++ b/R/network_analysis.R @@ -12,8 +12,11 @@ #' @export #' #' @examples -#' hg_tf <- getDriverList(species_type = "hg", driver_type = "TF") # get the TF driver list of human -#' mm_driver <- getDriverList(species_type = "mm", driver_type = "TF_SIG") # get the total driver list, including both TF and SIG, of mouse +#' ## 1. Get the TF driver list of human +#' hg_tf <- getDriverList(species_type = "hg", driver_type = "TF") +#' +#' ## 2. Get the total driver list, including both TF and SIG, of mouse +#' mm_driver <- getDriverList(species_type = "mm", driver_type = "TF_SIG") getDriverList <- function(species_type = "hg", driver_type = "TF") { @@ -78,20 +81,38 @@ getDriverList <- function(species_type = "hg", #' #' @examples #' ## 1. The most commonly used command: pre-defined driver lists, automatic down-sampling, no metacell method -#' generateSJARACNeInput(input_eset = normalized.eset, group_name = "cell_type", sjaracne_dir = "./SJARACNe", species_type = "hg", driver_type = "TF_SIG") +#' generateSJARACNeInput(input_eset = normalized.eset, +#' group_name = "cell_type", +#' sjaracne_dir = "./SJARACNe", +#' species_type = "hg", +#' driver_type = "TF_SIG") #' #' ## 2. to disable the downsampling -#' generateSJARACNeInput(input_eset = normalized.eset, group_name = "cell_type", sjaracne_dir = "./SJARACNe", species_type = "hg", driver_type = "TF_SIG", downSample_N = NULL) +#' generateSJARACNeInput(input_eset = normalized.eset, +#' group_name = "cell_type", +#' sjaracne_dir = "./SJARACNe", +#' species_type = "hg", +#' driver_type = "TF_SIG", +#' downSample_N = NULL) +#' +#' ## 3. Use the customized driver list: (add TUBB4A is the gene of interest but currently not in the pre-defined driver list) +#' +#' # when the driver-to-add is known as a transcription factor +#' generateSJARACNeInput(input_eset = normalized.eset, group_name = "cell_type", sjaracne_dir = "./SJARACNe", species_type = "hg", driver_type = "TF_SIG", +#' customDriver_TF = c(getDriverList(species_type = "hg", driver_type = "TF"), "TUBB4A")) #' -#' ## 3. Use the customized driver list: TUBB4A is the gene of interest but currently not in the pre-defined driver list. -#' hg_driver <- getDriverList(species_type = "hg", driver_type = "TF_SIG") -#' "TUBB4A" %in% hg_driver # It would returen FALSE if TUBB4A is not in the pre-defined driver lists -#' generateSJARACNeInput(input_eset = normalized.eset, group_name = "cell_type", sjaracne_dir = "./SJARACNe", species_type = "hg", driver_type = "TF_SIG", customDriver_TF = c(getDriverList(species_type = "hg", driver_type = "TF"), "TUBB4A")) # when the driver-to-add is known as a transcription factor -#' generateSJARACNeInput(input_eset = normalized.eset, group_name = "cell_type", sjaracne_dir = "./SJARACNe", species_type = "hg", driver_type = "TF_SIG", customDriver_SIG = c(getDriverList(species_type = "hg", driver_type = "SIG"), "TUBB4A")) # when the driver-to-add is known as a non-transcription factor -#' generateSJARACNeInput(input_eset = normalized.eset, group_name = "cell_type", sjaracne_dir = "./SJARACNe", species_type = "hg", driver_type = "TF_SIG", customDriver_TF = c(getDriverList(species_type = "hg", driver_type = "TF"), "TUBB4A"), customDriver_SIG = c(getDriverList(species_type = "hg", driver_type = "SIG"), "TUBB4A")) # when it's ambiguous to tell if the driver-to-add is a transcriptional factor +#' # when the driver-to-add is known as a non-transcription factor +#' generateSJARACNeInput(input_eset = normalized.eset, group_name = "cell_type", sjaracne_dir = "./SJARACNe", species_type = "hg", driver_type = "TF_SIG", +#' customDriver_SIG = c(getDriverList(species_type = "hg", driver_type = "SIG"), "TUBB4A")) +#' +#' # when it's ambiguous to tell if the driver-to-add is a transcriptional factor +#' generateSJARACNeInput(input_eset = normalized.eset, group_name = "cell_type", sjaracne_dir = "./SJARACNe", species_type = "hg", driver_type = "TF_SIG", +#' customDriver_TF = c(getDriverList(species_type = "hg", driver_type = "TF"), "TUBB4A"), +#' customDriver_SIG = c(getDriverList(species_type = "hg", driver_type = "SIG"), "TUBB4A")) #' #' ## 4. Use the metacell method -#' generateSJARACNeInput(input_eset = normalized.eset, group_name = "cell_type", sjaracne_dir = "./SJARACNe", species_type = "hg", driver_type = "TF_SIG", superCell_N = 1000, superCell_count = 100, seed = 123) +#' generateSJARACNeInput(input_eset = normalized.eset, group_name = "cell_type", sjaracne_dir = "./SJARACNe", species_type = "hg", driver_type = "TF_SIG", +#' superCell_N = 1000, superCell_count = 100, seed = 123) generateSJARACNeInput <- function(input_eset, group_name = "clusterID", group_name.refine = FALSE, @@ -360,13 +381,15 @@ generateSJARACNeInput <- function(input_eset, #' @export #' #' @examples -#' ## 1. assess the quality of network from a network file -#' drawNetworkQC(network_file = ./SJARACNE/B/SIG/b100/consensus_network_ncol_.txt, generate_htmal = TRUE) # the html file will be saved to the same folder as the network file -#' drawNetworkQC(network_file = ./SJARACNE/B/SIG/b100/consensus_network_ncol_.txt, generate_htmal = TRUE, outdir = "/path-to-cutomized-folder") # the html file will be saved to the customized folder -#' drawNetworkQC(network_file = ./SJARACNE/B/SIG/b100/consensus_network_ncol_.txt, generate_htmal = TRUE, prefix = "PBMC14") # add the "PBMC14" in front of the default name of html report +#' ## 1. assess the quality of network from network files +#' drawNetworkQC(network_file = ./SJARACNE/B/SIG/b100/consensus_network_ncol_.txt, generate_htmal = TRUE) +#' drawNetworkQC(network_file = ./SJARACNE/B/SIG/b100/consensus_network_ncol_.txt, generate_htmal = TRUE, +#' outdir = "/path-to-cutomized-folder") +#' drawNetworkQC(network_file = ./SJARACNE/B/SIG/b100/consensus_network_ncol_.txt, generate_htmal = TRUE, +#' prefix = "PBMC14") #' -#' ## 2. assess the qulity of network from a directory -#' drawNetworkQC(sjaracne_dir = ./SJARACNE, generate_html = TRUE) # the html file will be saved to the same folder as the network file +#' ## 2. assess the quality of network from the directory of network files +#' drawNetworkQC(sjaracne_dir = ./SJARACNE, generate_html = TRUE) drawNetworkQC <- function(network_file = NULL, sjaracne_dir = NULL, directed = TRUE, weighted = TRUE, @@ -595,7 +618,8 @@ z_normalization <- function(x) { #' #' @return A list of source genes, for each source gene, the value is a data frame with "`target`", "`MI`" and "`spearman`" as the columns #' -#' @examples net_data <- read.table("./consensus_network_ncol_.txt", header = T, sep = "\t", stringsAsFactors = F, quote = "") +#' @examples +#' net_data <- read.table("./consensus_network_ncol_.txt", header = T, sep = "\t", stringsAsFactors = F, quote = "") #' target_list <- get_net2target_list(net_data) get_net2target_list <- function(net_dat = NULL) { all_source <- base::unique(net_dat$source) @@ -660,7 +684,8 @@ get_target_list2matrix <- function(target_list = NULL, activity_method = 'mean') #' #' @return a matrix of activities, drivers by cells #' -#' @examples act_mat <- cal_Activity(target_list = target_list, cal_mat = exprs(normalized.eset), activity_method = 'mean', do.std = TRUE) +#' @examples +#' act_mat <- cal_Activity(target_list = target_list, cal_mat = exprs(normalized.eset), activity_method = 'mean', do.std = TRUE) cal_Activity <- function(target_list = NULL, cal_mat = NULL, activity_method = 'mean', do.std = TRUE) { ## check parameters if (is.null(target_list) == TRUE) { @@ -675,7 +700,7 @@ cal_Activity <- function(target_list = NULL, cal_mat = NULL, activity_method = ' message('The cal_mat is not specified.'); return(FALSE) } - ## prepare matrixes + ## prepare matrices mat_weight <- get_target_list2matrix(target_list, activity_method = activity_method) # matrix contains 0 (no edge) and signed MI mat_binary <- get_target_list2matrix(target_list, activity_method = 'mean') # matrix contains 0 (no edge) and 1 (with edge) only @@ -737,7 +762,10 @@ cal_Activity <- function(target_list = NULL, cal_mat = NULL, activity_method = ' #' @export #' #' @examples -#' activity_group.eset <- getActivity_individual(input_eset = group_specific.est, network_file.tf = "consensus_network_ncol_.txt", network_file.sig = "consensus_network_ncol_.txt", driver_type = "TF_SIG") +#' activity_group.eset <- getActivity_individual(input_eset = group_specific.est, +#' network_file.tf = "consensus_network_ncol_.txt", +#' network_file.sig = "consensus_network_ncol_.txt", +#' driver_type = "TF_SIG") getActivity_individual <- function(input_eset, network_file.tf = NULL, network_file.sig = NULL, @@ -833,19 +861,53 @@ getActivity_individual <- function(input_eset, #' #' @examples #' ## 1. when no tag was used in runing SJARACNE: the network file folder ("sjaracne_workflow-*") is directly under TF/SIG folder of each group. -#' activity.eset <- getActivity_inBatch(input_eset = normalized.eset, sjaracne_dir = "./SJARACNe", group_name = "cell_type", driver_type = "TF_SIG", activity_method = "mean", do.z_normalization = TRUE) +#' activity.eset <- getActivity_inBatch(input_eset = normalized.eset, +#' sjaracne_dir = "./SJARACNe", +#' group_name = "cell_type", +#' driver_type = "TF_SIG", +#' activity_method = "mean", +#' do.z_normalization = TRUE) #' #' ## 2. when tag (e.g. "bs_100" ) was used: the nework file folder ("sjaracne_workflow-*") is directly under a subfolder "bs_100" of the TF/SIG folder of each group. -#' activity.eset <- getActivity_inBatch(input_eset = normalized.eset, sjaracne_dir = "./SJARACNe", group_name = "cell_type", network_tag.tf = "bs_100", network_tag.sig = "bs_100", driver_type = "TF_SIG", activity_method = "mean", do.z_normalization = TRUE) +#' activity.eset <- getActivity_inBatch(input_eset = normalized.eset, +#' sjaracne_dir = "./SJARACNe", +#' group_name = "cell_type", +#' network_tag.tf = "bs_100", +#' network_tag.sig = "bs_100", +#' driver_type = "TF_SIG", +#' activity_method = "mean", +#' do.z_normalization = TRUE) #' #' ## 3. to calculate the activities of TF only -#' activity.eset <- getActivity_inBatch(input_eset = normalized.eset, sjaracne_dir = "./SJARACNe", group_name = "cell_type", network_tag.tf = "bs_100", network_tag.sig = "bs_100", driver_type = "TF", activity_method = "mean", do.z_normalization = TRUE) +#' activity.eset <- getActivity_inBatch(input_eset = normalized.eset, +#' sjaracne_dir = "./SJARACNe", +#' group_name = "cell_type", +#' network_tag.tf = "bs_100", +#' network_tag.sig = "bs_100", +#' driver_type = "TF", +#' activity_method = "mean", +#' do.z_normalization = TRUE) #' #' ## 4. to exclude some groups in the activity calculation (e.g. "NK" and "Monocyte") -#' activity.eset <- getActivity_inBatch(input_eset = normalized.eset, sjaracne_dir = "./SJARACNe", group_name = "cell_type", group_exclude = c("NK", "Monocyte"), network_tag.tf = "bs_100", network_tag.sig = "bs_100", driver_type = "TF", activity_method = "mean", do.z_normalization = TRUE) +#' activity.eset <- getActivity_inBatch(input_eset = normalized.eset, +#' sjaracne_dir = "./SJARACNe", +#' group_name = "cell_type", +#' group_exclude = c("NK", "Monocyte"), +#' network_tag.tf = "bs_100", +#' network_tag.sig = "bs_100", +#' driver_type = "TF", +#' activity_method = "mean", +#' do.z_normalization = TRUE) #' #' ## 5. when calculate the activities from the gene expression values scaled by other methods (e.g. ScaleData() from Seurat package) -#' activity.eset <- getActivity_inBatch(input_eset = normalized.eset, sjaracne_dir = "./SJARACNe", group_name = "cell_type", network_tag.tf = "bs_100", network_tag.sig = "bs_100", driver_type = "TF_SIG", activity_method = "mean", do.z_normalization = FALSE) +#' activity.eset <- getActivity_inBatch(input_eset = normalized.eset, +#' sjaracne_dir = "./SJARACNe", +#' group_name = "cell_type", +#' network_tag.tf = "bs_100", +#' network_tag.sig = "bs_100", +#' driver_type = "TF_SIG", +#' activity_method = "mean", +#' do.z_normalization = FALSE) getActivity_inBatch <- function(input_eset, sjaracne_dir, group_name, group_exclude = NULL, @@ -1031,7 +1093,7 @@ getActivity_inBatch <- function(input_eset, } ## prepare activity eset - acs_mtx <- data.frame(acs_master) + acs_mtx <- data.frame(acs_master, check.names = FALSE) acs_mtx <- acs_mtx[!is.na(acs_mtx$driver_id),] row.names(acs_mtx) <- acs_mtx$driver_id acs_mtx <- acs_mtx[,-1] diff --git a/R/read_input.R b/R/read_input.R index 496f5fa..6903563 100644 --- a/R/read_input.R +++ b/R/read_input.R @@ -1,26 +1,35 @@ #' Read the input data generated by 10x Genomics from a directory #' -#' @description -#' This function is used to read the gene expression data from a directory containing three files generated by 10x Genomics: **matrix.mtx**, **barcodes.tsv** and +#' @description This function is used to read the gene expression data from a +#' directory containing three files generated by 10x Genomics: **matrix.mtx**, +#' **barcodes.tsv** and #' **features.tsv** (or **genes.tsv**). This function can handle these conditions well: #' - Alternative file names for feature data: **features.tsv** by CellRanger > 3.0, and **genes.tsv** by CellRanger < 3.0; #' - One or more input files are compressed, usually in "**.gz**" format; #' - Data with multiple modalities: like the single cell multiome data. In this case, it only retains the data of "Gene Expression". #' -#' @param input_dir Path to the directory containing the 3 files generated by 10x Genomics: **matrix.mtx**, **barcodes.tsv** and **features.tsv** (or **genes.tsv**) -#' @param featureType Character, feature type to use as the gene name of expression matrix: `"gene_symbol"` (the default) or `"gene_id"`. -#' @param removeSuffix Logical, whether to remove the suffix "**-1**" when present in all cell barcodes. Default: `TRUE`. -#' @param addPrefix Character or `NULL`, add a prefix to the cell barcodes, like Sample ID. It is highly recommended to use a prefix containing letters and/or numbers only, -#' and not starting with numbers. Default: `NULL`. +#' @param input_dir Path to the directory containing the 3 files generated by +#' 10x Genomics: **matrix.mtx**, **barcodes.tsv** and **features.tsv** (or +#' **genes.tsv**) +#' @param featureType Character, feature type to use as the gene name of +#' expression matrix: `"gene_symbol"` (the default) or `"gene_id"`. +#' @param removeSuffix Logical, whether to remove the suffix "**-1**" when +#' present in all cell barcodes. Default: `TRUE`. +#' @param addPrefix Character or `NULL`, add a prefix to the cell barcodes, like +#' Sample ID. It is highly recommended to use a prefix containing letters +#' and/or numbers only, and not starting with numbers. Default: `NULL`. #' #' @return A sparse gene expression matrix of raw UMI counts, genes by cells #' @export #' #' @examples -#' input_dir <- 'path-to-directory' +#' input_dir <- system.file("extdata/demo_inputs/cell_matrix_10x", package = "scMINER") # path to input data #' list.files(input_dir, full.names = FALSE) # you should see three files: matrix.mtx, barcodes.tsv and features.tsv (or genes.tsv) -#' sparseMatrix <- readInput_10x.dir(input_dir, featureType = "gene_symbol", removeSuffix = TRUE, addPrefix = "demoSample") +#' sparseMatrix <- readInput_10x.dir(input_dir, +#' featureType = "gene_symbol", +#' removeSuffix = TRUE, +#' addPrefix = "demoSample") readInput_10x.dir <- function(input_dir, featureType = "gene_symbol", removeSuffix = TRUE, addPrefix = NULL) @@ -112,22 +121,30 @@ readInput_10x.dir <- function(input_dir, #' Read the input data generated by 10x Genomics from the HDF5 file #' -#' @description -#' This function is used to read the gene expression data from the HDF5 file generated by CellRanger pipeline of 10x Genomics. This function can automatically distinguish -#' the data of different modalities (e.g. expression data, ATAC data) and retains the gene expression data only. The `**hdf5r**` package is needed to use this function. +#' @description This function is used to read the gene expression data from the +#' HDF5 file generated by CellRanger pipeline of 10x Genomics. This function can +#' automatically distinguish the data of different modalities (e.g. expression +#' data, ATAC data) and retains the gene expression data only. The `**hdf5r**` +#' package is needed to use this function. #' #' @param h5_file H5 file generated by CellRanger pipeline of 10x Genomics -#' @param featureType Character, feature type to use as the gene name of expression matrix: `"gene_symbol"` (the default) or `"gene_id"`. -#' @param removeSuffix Logical, whether to remove the suffix "**-1**" when present in all cell barcodes. Default: `TRUE`. -#' @param addPrefix Character or `NULL`, add a prefix to the cell barcodes, like Sample ID. It is highly recommended to use a prefix containing letters and/or numbers only, -#' and not starting with numbers. Default: `NULL`. +#' @param featureType Character, feature type to use as the gene name of +#' expression matrix: `"gene_symbol"` (the default) or `"gene_id"`. +#' @param removeSuffix Logical, whether to remove the suffix "**-1**" when +#' present in all cell barcodes. Default: `TRUE`. +#' @param addPrefix Character or `NULL`, add a prefix to the cell barcodes, like +#' Sample ID. It is highly recommended to use a prefix containing letters +#' and/or numbers only, and not starting with numbers. Default: `NULL`. #' #' @return A sparse gene expression matrix of raw UMI counts, genes by cells #' @export #' #' @examples -#' h5_file <- 'path-to-h5_file' -#' sparseMatrix <- readInput_10x.h5(h5_file, featureType = "gene_symbol", removeSuffix = TRUE, addPrefix = "demoSample") +#' h5_file <- system.file("extdata/demo_inputs/hdf5_10x/demoData3.h5", package = "scMINER") # path to hdf5 file +#' sparseMatrix <- readInput_10x.h5(h5_file, +#' featureType = "gene_symbol", +#' removeSuffix = TRUE, +#' addPrefix = "demoSample") readInput_10x.h5 <- function(h5_file, featureType = "gene_symbol", removeSuffix = TRUE, @@ -213,20 +230,28 @@ readInput_10x.h5 <- function(h5_file, #' Read the h5ad file #' -#' @description -#' This function is used to read the h5ad file, a popular file format for storing and sharing single-cell RNA sequencing data. The `**anndata**` package is needed to use this function. +#' @description This function is used to read the h5ad file, a popular file +#' format for storing and sharing single-cell RNA sequencing data. The +#' `**anndata**` package is needed to use this function. #' #' @param h5ad_file H5ad file of sc/snRNA-seq data -#' @param removeSuffix Logical, whether to remove the suffix "**-1**" when present in all cell barcodes. Default: `TRUE`. -#' @param addPrefix Character or `NULL`, add a prefix to the cell barcodes, like Sample ID. It is highly recommended to use a prefix containing letters and/or numbers only, -#' and not starting with numbers. Default: `NULL`. +#' @param removeSuffix Logical, whether to remove the suffix "**-1**" when +#' present in all cell barcodes. Default: `TRUE`. +#' @param addPrefix Character or `NULL`, add a prefix to the cell barcodes, like +#' Sample ID. It is highly recommended to use a prefix containing letters +#' and/or numbers only, and not starting with numbers. Default: `NULL`. #' -#' @return A AnnData object containing `"X"` (a observations x variables data matrix), `"obs"` (data frame of observations), `"var"` (data frame of variables) and more. For more details, please check out . +#' @return A AnnData object containing `"X"` (a observations x variables data +#' matrix), `"obs"` (data frame of observations), `"var"` (data frame of +#' variables) and more. For more details, please check out +#' . #' @export #' #' @examples -#' h5ad_file <- 'path-to-h5ad_file' -#' sparseMatrix <- readInput_h5ad(h5ad_file, removeSuffix = FALSE, addPrefix = "demoSample") +#' h5ad_file <- system.file("extdata/demo_inputs/h5ad_file/demoData4.h5ad", package = "scMINER") # path to h5ad file +#' sparseMatrix <- readInput_h5ad(h5ad_file, +#' removeSuffix = FALSE, +#' addPrefix = "demoSample") readInput_h5ad <- function(h5ad_file, removeSuffix = FALSE, addPrefix = NULL) { @@ -269,24 +294,34 @@ readInput_h5ad <- function(h5ad_file, #' Read the table format file #' -#' @description -#' This function is used to read data from a table-format file. The user needs to specify the format of the table using the parameter **`is.geneBYcell`**: +#' @description This function is used to read data from a table-format file. The +#' user needs to specify the format of the table using the parameter +#' **`is.geneBYcell`**: #' - `TRUE` (the default): the rows will be treated as genes, while the columns will be treated as cells; #' - `FALSE`: the rows will be treated as cells, while the columns will be treated as genes. #' -#' @param table_file The table format file (e.g. **txt**, **tsv**, **csv**, and others) which the data are to be read from. +#' @param table_file The table format file (e.g. **txt**, **tsv**, **csv**, and +#' others) which the data are to be read from. #' @param sep String, The field separator character. Default: `"\t"`. -#' @param is.geneBYcell Logical, whether the table is organized in **gene (row) by cell (column)** format. If `FALSE`, the rows will be treated as cells. Default: `TRUE`. -#' @param removeSuffix Logical, whether to remove the suffix "**-1**" when present in all cell barcodes. Default: `FALSE`. -#' @param addPrefix Character or `NULL`, add a prefix to the cell barcodes, like Sample ID. It is highly recommended to use a prefix containing letters and/or numbers only, -#' and not starting with numbers. Default: `NULL`. +#' @param is.geneBYcell Logical, whether the table is organized in **gene (row) +#' by cell (column)** format. If `FALSE`, the rows will be treated as cells. +#' Default: `TRUE`. +#' @param removeSuffix Logical, whether to remove the suffix "**-1**" when +#' present in all cell barcodes. Default: `FALSE`. +#' @param addPrefix Character or `NULL`, add a prefix to the cell barcodes, like +#' Sample ID. It is highly recommended to use a prefix containing letters +#' and/or numbers only, and not starting with numbers. Default: `NULL`. #' #' @return A sparse gene expression matrix, genes by cells #' @export #' #' @examples -#' table_file <- 'path-to-table_file' -#' sparseMatrix <- readInput_table(table_file, sep = "\t", is.geneBYcell = TRUE, removeSuffix = FALSE, addPrefix = "demoSample") +#' table_file <- system.file("extdata/demo_inputs/table_file/demoData2.txt.gz", package = "scMINER") # path to text-table file +#' sparseMatrix <- readInput_table(table_file, +#' sep = "\t", +#' is.geneBYcell = TRUE, +#' removeSuffix = FALSE, +#' addPrefix = "demoSample") readInput_table <- function(table_file, sep = "\t", is.geneBYcell = TRUE, removeSuffix = FALSE, addPrefix = NULL) { @@ -327,19 +362,24 @@ readInput_table <- function(table_file, sep = "\t", is.geneBYcell = TRUE, #' Create a project space for scMINER analysis #' -#' @description -#' This function is used to create a folder of the specified project names in the specified project directory as the project space to run scMINER analysis. -#' It also creastes 4 subfolers inside of it: "`DATA`", "`MICA`", "`SJARACNe`" and "`PLOT`". +#' @description This function is used to create a folder of the specified +#' project names in the specified project directory as the project space to run +#' scMINER analysis. It also creastes 4 subfolers inside of it: "`DATA`", +#' "`MICA`", "`SJARACNe`" and "`PLOT`". #' #' @param project_dir The directory to create the project space in -#' @param project_name The name of the project, will be used as the name of the folder -#' @param do.unlink Logical, whether to remove the files and/or folders inside of pre-existing project space. Default: `FALSE`. +#' @param project_name The name of the project, will be used as the name of the +#' folder +#' @param do.unlink Logical, whether to remove the files and/or folders inside +#' of pre-existing project space. Default: `FALSE`. #' -#' @return It creates a folder of project name and 4 subfolders in the project directory, and returns the path of project space. +#' @return It creates a folder of project name and 4 subfolders in the project +#' directory, and returns the path of project space. #' @export #' #' @examples -#' createProjectSpace(project_dir = "path-to-a-folder", project_name = "PBMC14k") +#' scminer_dir <- createProjectSpace(project_dir = "path-to-a-folder", +#' project_name = "PBMC14k") createProjectSpace <- function(project_dir, project_name, do.unlink = FALSE) diff --git a/R/visualization.R b/R/visualization.R index 0912665..7b329ee 100644 --- a/R/visualization.R +++ b/R/visualization.R @@ -32,20 +32,34 @@ #' @export #' #' @examples +#' data(pbmc14k_expression.eset) +#' #' ## 1. violin plots grouped by clusters (say the column name is 'clusterID') -#' p_vln <- feature_vlnplot(input_eset = clustered.eset, features = c("CD14", "CD19", "CD8A"), group_by = "clusterID") +#' p_vln <- feature_vlnplot(input_eset = pbmc14k_expression.eset, +#' features = c("CD14", "CD19", "CD8A"), +#' group_by = "clusterID") #' #' ## 2. violin plots grouped by cell types (say the column name is 'cellType') -#' p_vln <- feature_vlnplot(input_eset = clustered.eset, features = c("CD14", "CD19", "CD8A"), group_by = "cellType") +#' p_vln <- feature_vlnplot(input_eset = pbmc14k_expression.eset, +#' features = c("CD14", "CD19", "CD8A"), +#' group_by = "cellType") #' #' ## 3. customize the colors to fill the violin plots -#' p_vln <- feature_vlnplot(input_eset = clustered.eset, features = c("CD14", "CD19", "CD8A"), group_by = "clusterID", colors = c("blue", "red", "green")) +#' p_vln <- feature_vlnplot(input_eset = pbmc14k_expression.eset, +#' features = c("CD14", "CD19", "CD8A"), +#' group_by = "clusterID", +#' colors = c("blue", "red", "green")) #' #' ## 4. add jittered points -#' p_vln <- feature_vlnplot(input_eset = clustered.eset, features = c("CD14", "CD19", "CD8A"), group_by = "clusterID", add_jitter = TRUE, jitter.width = 0.5, jitter.size = 0.5) +#' p_vln <- feature_vlnplot(input_eset = pbmc14k_expression.eset, +#' features = c("CD14", "CD19", "CD8A"), +#' group_by = "clusterID", +#' add_jitter = TRUE, jitter.width = 0.5, jitter.size = 0.5) #' #' ## 5. using activity data -#' p_vln <- feature_vlnplot(input_eset = activity_clustered.eset, features = c("CD14_SIG", "CD19_SIG", "CD8A_SIG"), group_by = "clusterID", ylabel_text = "Activity") +#' p_vln <- feature_vlnplot(input_eset = activity_clustered.eset, +#' features = c("CD14_SIG", "CD19_SIG", "CD8A_SIG"), +#' group_by = "clusterID", ylabel_text = "Activity") feature_vlnplot <- function(input_eset, features = NULL, group_by = "clusterID", @@ -136,19 +150,32 @@ feature_vlnplot <- function(input_eset, #' #' @examples #' ## 1. violin plots grouped by clusters (say the column name is 'clusterID') -#' p_box <- feature_boxplot(input_eset = clustered.eset, features = c("CD14", "CD19", "CD8A"), group_by = "clusterID") +#' p_box <- feature_boxplot(input_eset = clustered.eset, +#' features = c("CD14", "CD19", "CD8A"), +#' group_by = "clusterID") #' #' ## 2. violin plots grouped by cell types (say the column name is 'cellType') -#' p_box <- feature_boxplot(input_eset = clustered.eset, features = c("CD14", "CD19", "CD8A"), group_by = "cellType") +#' p_box <- feature_boxplot(input_eset = clustered.eset, +#' features = c("CD14", "CD19", "CD8A"), +#' group_by = "cellType") #' #' ## 3. customize the colors to fill the violin plots -#' p_box <- feature_boxplot(input_eset = clustered.eset, features = c("CD14", "CD19", "CD8A"), group_by = "clusterID", colors = c("blue", "red", "green")) +#' p_box <- feature_boxplot(input_eset = clustered.eset, +#' features = c("CD14", "CD19", "CD8A"), +#' group_by = "clusterID", +#' colors = c("blue", "red", "green")) #' #' ## 4. add jittered points -#' p_box <- feature_boxplot(input_eset = clustered.eset, features = c("CD14", "CD19", "CD8A"), group_by = "clusterID", add_jitter = TRUE, jitter.width = 0.5, jitter.size = 0.5) +#' p_box <- feature_boxplot(input_eset = clustered.eset, +#' features = c("CD14", "CD19", "CD8A"), +#' group_by = "clusterID", +#' add_jitter = TRUE, +#' jitter.width = 0.5, jitter.size = 0.5) #' #' ## 5. using activity data -#' p_box <- feature_boxplot(input_eset = activity_clustered.eset, features = c("CD14_SIG", "CD19_SIG", "CD8A_SIG"), group_by = "clusterID", ylabel_text = "Activity") +#' p_box <- feature_boxplot(input_eset = activity_clustered.eset, +#' features = c("CD14_SIG", "CD19_SIG", "CD8A_SIG"), +#' group_by = "clusterID", ylabel_text = "Activity") feature_boxplot <- function(input_eset, features = NULL, group_by = "clusterID", @@ -234,13 +261,21 @@ feature_boxplot <- function(input_eset, #' #' @examples #' ## 1. scatter plots with UMAP projections -#' p_scatter <- feature_scatterplot(input_eset = clustered.eset, features = c("CD14", "CD19", "CD8A"), location_x = "UMAP_1", location_y = "UMAP_2") +#' feature_scatterplot(input_eset = clustered.eset, +#' features = c("CD14", "CD19", "CD8A"), +#' location_x = "UMAP_1", location_y = "UMAP_2") #' #' ## 2. scatter plots with t-SNE projections -#' p_scatter <- feature_scatterplot(input_eset = clustered.eset, features = c("CD14", "CD19", "CD8A"), location_x = "tSNE_1", location_y = "tSNE_2") +#' feature_scatterplot(input_eset = clustered.eset, +#' features = c("CD14", "CD19", "CD8A"), +#' location_x = "tSNE_1", location_y = "tSNE_2") #' #' ## 3. change the point size and font size -#' p_scatter <- feature_scatterplot(input_eset = clustered.eset, features = c("CD14", "CD19", "CD8A"), location_x = "UMAP_1", location_y = "UMAP_2", point.size = 1, fontsize.strip = 12, fontsize.axis = 10) +#' feature_scatterplot(input_eset = clustered.eset, +#' features = c("CD14", "CD19", "CD8A"), +#' location_x = "UMAP_1", location_y = "UMAP_2", +#' point.size = 1, +#' fontsize.strip = 12, fontsize.axis = 10) feature_scatterplot <- function(input_eset, features = NULL, location_x = "UMAP_1", location_y = "UMAP_2", @@ -323,10 +358,15 @@ feature_scatterplot <- function(input_eset, #' @examples #' features_of_interest <- c("CD3D","CD27","IL7R","SELL","CCR7","IL32","GZMA","GZMK","DUSP2","CD8A","GZMH","GZMB","CD79A","CD79B","CD86","CD14") #' ## 1. the most commonly used command -#' p_bubble <- feature_bubbleplot(input_eset = clustered.eset, features = features_of_interest, group_by = "clusterID") +#' feature_bubbleplot(input_eset = clustered.eset, +#' features = features_of_interest, +#' group_by = "clusterID") #' #' ## 2. customize the colors -#' p_bubble <- feature_bubbleplot(input_eset = clustered.eset, features = features_of_interest, group_by = "clusterID", colors = c("lightgrey", "red")) +#' feature_bubbleplot(input_eset = clustered.eset, +#' features = features_of_interest, +#' group_by = "clusterID", +#' colors = c("lightgrey", "red")) feature_bubbleplot <- function(input_eset, features = NULL, group_by = "clusterID", @@ -407,19 +447,34 @@ feature_bubbleplot <- function(input_eset, #' @examples #' features_of_interest <- c("CD3D","CD27","IL7R","SELL","CCR7","IL32","GZMA","GZMK","DUSP2","CD8A","GZMH","GZMB","CD79A","CD79B","CD86","CD14") #' ## 1. the most commonly used command -#' feature_heatmap(input_eset = clustered.eset, features = features_of_interest, group_by = "clusterID") +#' feature_heatmap(input_eset = clustered.eset, +#' features = features_of_interest, +#' group_by = "clusterID") #' #' ## 2. add one more column ('true_label') for cell annotation -#' feature_heatmap(input_eset = clustered.eset, features = features_of_interest, group_by = "clusterID", annotation_columns = c("true_label")) +#' feature_heatmap(input_eset = clustered.eset, +#' features = features_of_interest, +#' group_by = "clusterID", +#' annotation_columns = c("true_label")) #' #' ## 3. scale the data by row -#' feature_heatmap(input_eset = clustered.eset, features = features_of_interest, group_by = "clusterID", scale_method = "row") +#' feature_heatmap(input_eset = clustered.eset, +#' features = features_of_interest, +#' group_by = "clusterID", +#' scale_method = "row") #' #' ## 4. cluster the rows -#' feature_heatmap(input_eset = clustered.eset, features = features_of_interest, group_by = "clusterID", cluster_rows = TRUE) +#' feature_heatmap(input_eset = clustered.eset, +#' features = features_of_interest, +#' group_by = "clusterID", +#' cluster_rows = TRUE) #' #' ## 5. add gaps -#' feature_heatmap(input_eset = clustered.eset, features = features_of_interest, group_by = "clusterID", use_gaps.column = TRUE, use_gaps.row = TRUE) +#' feature_heatmap(input_eset = clustered.eset, +#' features = features_of_interest, +#' group_by = "clusterID", +#' use_gaps.column = TRUE, +#' use_gaps.row = TRUE) feature_heatmap <- function(input_eset, features = NULL, group_by = "clusterID", @@ -500,10 +555,15 @@ feature_heatmap <- function(input_eset, #' #' @examples #' ## 1. bar plot grouped by clusters ("clusterID") and colored by true labels ("true_label) -#' p_bar <- draw_barplot(input_eset = clustered.eset, group_by = "clusterID", color_by = "true_label") +#' draw_barplot(input_eset = clustered.eset, +#' group_by = "clusterID", +#' color_by = "true_label") #' #' ## 2. customize the colors -#' p_bar <- draw_barplot(input_eset = clustered.eset, group_by = "clusterID", color_by = "true_label", colors = c("green", "red", "blue", "grey", "orange", "purple", "yellow")) +#' draw_barplot(input_eset = clustered.eset, +#' group_by = "clusterID", +#' color_by = "true_label", +#' colors = c("green", "red", "blue", "grey", "orange", "purple", "yellow")) draw_barplot <- function(input_eset, group_by = "clusterID", color_by = "cell_type", @@ -562,10 +622,15 @@ draw_barplot <- function(input_eset, #' signature_table <- openxlsx::read.xlsx(marker_file) #' head(signature_table) #' ## 1. the most commonly used command -#' p_bubbleplot <- draw_bubbleplot(input_eset = clustered.eset, signature_table = signature_table, group_by = "clusterID") +#' draw_bubbleplot(input_eset = clustered.eset, +#' signature_table = signature_table, +#' group_by = "clusterID") #' #' ## 2. customize the colors -#' p_bubbleplot <- draw_bubbleplot(input_eset = clustered.eset, signature_table = signature_table, group_by = "clusterID", colors = c("lightgrey", "red")) +#' draw_bubbleplot(input_eset = clustered.eset, +#' signature_table = signature_table, +#' group_by = "clusterID", +#' colors = c("lightgrey", "red")) draw_bubbleplot <- function(input_eset, signature_table = NULL, group_by = "clusterID", @@ -666,18 +731,27 @@ draw_bubbleplot <- function(input_eset, #' #' @examples #' ## 1. the most commonly used command -#' generatePortalInputs(input_expression.eset = expression_clustered.eset, group_by = "cellType", input_activity.eset = activity_clustered.eset, input_network.dir = "./SJARACNe", output_dir = "./scMINERportal") +#' generatePortalInputs(input_expression.eset = expression_clustered.eset, +#' group_by = "cellType", +#' input_activity.eset = activity_clustered.eset, +#' input_network.dir = "./SJARACNe", +#' output_dir = "./scMINERportal") #' #' ## 2. prepare expression data from Seurat object ("pbmc14.obj") -#' generatePortalInputs(input_expression.seuratObj = pbmc14.obj, output_dir = "./path-to-output_dir") +#' generatePortalInputs(input_expression.seuratObj = pbmc14.obj, +#' output_dir = "./path-to-output_dir") #' #' ## 3. prepare network data from a table -#' network.table <- data.frame(CellGroup = c("CD4__CD25_T_Reg", "CD4__CD25_T_Reg", "CD19__B", "CD19__B"),NetworkType = c("SIG", "TF", "SIG", "TF"), -#' NetworkFile = c("./sjaracne/CD4__CD25_T_Reg/SIG/b100_pce-3/sjaracne_workflow-1474c41b-067b-4f86-ab99-09f73dadb16g/consensus_network_ncol_.txt", -#' "./sjaracne/CD4__CD25_T_Reg/TF/b100_pce-3/sjaracne_workflow-a93cd6db-7253-4ffb-ae4e-633b9dedf11d/consensus_network_ncol_.txt", -#' "./sjaracne/CD19__B/SIG/sjaracne_workflow-da0c3c72-7afb-44fa-973b-e4d767e20b6f/consensus_network_ncol_.txt", -#' "./sjaracne/CD19__B/TF/sjaracne_workflow-0426ea12-10bf-428c-b199-d5bd1a7aab5f/consensus_network_ncol_.txt")) -#' generatePortalInputs(input_expression.eset = expression_clustered.eset, group_by = "cellType", input_network.table = network.table, output_dir = "./path-to-output_dir") +#' network.table <- data.frame(CellGroup = c("CD4Treg", "CD4Treg", "B", "B"), +#' NetworkType = c("SIG", "TF", "SIG", "TF"), +#' NetworkFile = c("./sjaracne/CD4Treg/SIG/consensus_network_ncol_.txt", +#' "./sjaracne/CD4Treg/TF/consensus_network_ncol_.txt", +#' "./sjaracne/B/SIG/consensus_network_ncol_.txt", +#' "./sjaracne/B/TF/consensus_network_ncol_.txt")) +#' generatePortalInputs(input_expression.eset = expression_clustered.eset, +#' group_by = "cellType", +#' input_network.table = network.table, +#' output_dir = "./path-to-output_dir") generatePortalInputs <- function(input_expression.eset = NULL, input_expression.seuratObj = NULL, group_by = NULL, diff --git a/data/pbmc14k_expression.eset.rda b/data/pbmc14k_expression.eset.rda new file mode 100644 index 0000000..447001d Binary files /dev/null and b/data/pbmc14k_expression.eset.rda differ diff --git a/man/MICAplot.Rd b/man/MICAplot.Rd index 6b4eac5..f5cd84e 100644 --- a/man/MICAplot.Rd +++ b/man/MICAplot.Rd @@ -27,31 +27,43 @@ MICAplot( \arguments{ \item{input_eset}{The sparse eset object} -\item{color_by}{Factor, character or numeric, name of the column of MICA cluster labels. Default: "\code{clusterID}".} +\item{color_by}{Factor, character or numeric, name of the column of MICA +cluster labels. Default: "\code{clusterID}".} -\item{colors}{A character vector or \code{NULL}, colors of the MICA cluster labels. The length of this vector should be same as the number of groups in color_by column. If \code{NULL}, the ggplot default colors will be use. Default: \code{NULL}.} +\item{colors}{A character vector or \code{NULL}, colors of the MICA cluster +labels. The length of this vector should be same as the number of groups in +color_by column. If \code{NULL}, the ggplot default colors will be use. Default: +\code{NULL}.} -\item{do.logTransform}{Logical, whether to do the log2(value + 1) transformation. Only valid when color_by is numeric Default: \code{TRUE}.} +\item{do.logTransform}{Logical, whether to do the log2(value + 1) +transformation. Only valid when color_by is numeric Default: \code{TRUE}.} -\item{X, Y}{Character, name of the columns of x-axis and y-axis coordinates. Default: "\code{UMAP_1}", and "\code{UMAP_2}".} +\item{X, Y}{Character, name of the columns of x-axis and y-axis coordinates. +Default: "\code{UMAP_1}", and "\code{UMAP_2}".} \item{point.size}{Numeric, size of points. Default: 0.3.} -\item{point.alpha}{Numeric, transparency of points, ranging from 0 (more transparent) to 1 (less transparent). Default: 1.} +\item{point.alpha}{Numeric, transparency of points, ranging from 0 (more +transparent) to 1 (less transparent). Default: 1.} \item{name.plot_title}{Character or NULL, title of the plot. Default: \code{NULL}.} \item{fontsize.plot_table}{Numeric, font size of the title. Default: 20.} -\item{show.cluster_label}{Logical, whether to show labels on the plot. Ignored when color_by is numeric. Default: \code{TRUE}.} +\item{show.cluster_label}{Logical, whether to show labels on the plot. +Ignored when color_by is numeric. Default: \code{TRUE}.} -\item{fontsize.cluster_label}{Numeric, font size of the labels. Ignored when color_by is numeric. Default: 12.} +\item{fontsize.cluster_label}{Numeric, font size of the labels. Ignored when +color_by is numeric. Default: 12.} -\item{legend.position}{Character, position of legend: "\code{right}", "\code{left}", "\code{top}", "\code{bottom}" or "\code{none}". Default: "\code{right}".} +\item{legend.position}{Character, position of legend: "\code{right}", "\code{left}", +"\code{top}", "\code{bottom}" or "\code{none}". Default: "\code{right}".} -\item{fontsize.legend_title}{Integer, font size of the legend title. Default: 10.} +\item{fontsize.legend_title}{Integer, font size of the legend title. Default: +10.} -\item{fontsize.legend_text}{Integer, font size of the legend text. Default: 8.} +\item{fontsize.legend_text}{Integer, font size of the legend text. Default: +8.} \item{fontsize.axis_title}{Integer, font size of the axis title. Default: 10.} @@ -61,12 +73,22 @@ MICAplot( A UMAP or T-SNE plot. It also print the plot to screen. } \description{ -This function is used to visualize the clustering results generated by MICA. +This function is used to visualize the clustering results +generated by MICA. } \examples{ + +data(pbmc14k_expression.eset) ## 1. color-coded by factor or character variable -p_umap <- MICAplot(input_eset = pbmc14k_log2cpm.eset, color_by = 'clusterID', point.size = 0.1) +p1 <- MICAplot(input_eset = pbmc14k_expression.eset, + color_by = "clusterID", + X = "UMAP_1", Y = "UMAP_2", + point.size = 0.1, + fontsize.cluster_label = 6) ## 2. color-coded by numeric variable -p_umap <- MICAplot(input_eset = pbmc14k_log2cpm.eset, color_by = 'nUMI', do.logTransform = TRUE) +p2 <- MICAplot(input_eset = pbmc14k_expression.eset, + color_by = "nUMI", + do.logTransform = TRUE, + point.size = 0.1) } diff --git a/man/addMICAoutput.Rd b/man/addMICAoutput.Rd index 2f6b23c..0ae25bf 100644 --- a/man/addMICAoutput.Rd +++ b/man/addMICAoutput.Rd @@ -24,5 +24,8 @@ This function is used to add the clustering results by MICA into the sparse eset } } \examples{ -clustered.eset <- addMICAoutput(input_eset = input_eset, mica_output_file = "/path-to-mica-input/clustering_UMAP_euclidean_20_2.22554.txt", visual_method = "umap") +data(pbmc14k_expression.eset) +pbmc14k_log2cpm.eset <- addMICAoutput(pbmc14k_expression.eset, + mica_output_file = system.file("extdata/demo_pbmc14k/MICA/clustering_UMAP_euclidean_20_2.05.txt", package = "scMINER"), + visual_method = "umap") } diff --git a/man/combineSparseEset.Rd b/man/combineSparseEset.Rd index a4c58c1..d67fef0 100644 --- a/man/combineSparseEset.Rd +++ b/man/combineSparseEset.Rd @@ -34,5 +34,16 @@ This function is used to combine the sparse expression set objects. The combined different features, \code{NA} values will be generated and automatically imputed by the minimum value of the combined gene expression matrix. } \examples{ -combined.eset <- combineSparseEset(c(sample_1.eset, sample_2.eset, sample_3.eset), projectID = c("sample1", "sample2", "sample3"), addPrefix = c("tag1", "tag2", "tag3"), addMetaData = TRUE) +demo1_mtx <- readInput_10x.dir(input_dir = system.file("extdata/demo_inputs/cell_matrix_10x", package = "scMINER"), + featureType = "gene_symbol", removeSuffix = TRUE) +demo1.eset <- createSparseEset(input_matrix = demo1_mtx, projectID = "demo1", addMetaData = TRUE) +demo2_mtx <- readInput_table(table_file = system.file("extdata/demo_inputs/table_file/demoData2.txt.gz", package = "scMINER"), + is.geneBYcell = TRUE, removeSuffix = TRUE) +demo2.eset <- createSparseEset(input_matrix = demo2_mtx, projectID = "demo2", addMetaData = TRUE) +combined.eset <- combineSparseEset(eset_list = c(demo1.eset, demo2.eset), + projectID = c("sample1", "sample2"), + addPrefix = c("demo1", "demo2"), + addSurfix = NULL, + addMetaData = TRUE, + imputeNA = TRUE) } diff --git a/man/compare2groups.Rd b/man/compare2groups.Rd index 36c032a..5a33123 100644 --- a/man/compare2groups.Rd +++ b/man/compare2groups.Rd @@ -31,5 +31,9 @@ This function is used to perform the differential analysis between two groups. I } \examples{ ## to call this function -res <- compare2groups(input_eset = input_eset, group_by = group_by, g1 = g1_tmp, g0 = g0_tmp, use_method = use_method) +res <- compare2groups(input_eset = input_eset, + group_by = group_by, + g1 = g1_tmp, + g0 = g0_tmp, + use_method = use_method) } diff --git a/man/createProjectSpace.Rd b/man/createProjectSpace.Rd index 4a16a2c..380fb68 100644 --- a/man/createProjectSpace.Rd +++ b/man/createProjectSpace.Rd @@ -9,17 +9,23 @@ createProjectSpace(project_dir, project_name, do.unlink = FALSE) \arguments{ \item{project_dir}{The directory to create the project space in} -\item{project_name}{The name of the project, will be used as the name of the folder} +\item{project_name}{The name of the project, will be used as the name of the +folder} -\item{do.unlink}{Logical, whether to remove the files and/or folders inside of pre-existing project space. Default: \code{FALSE}.} +\item{do.unlink}{Logical, whether to remove the files and/or folders inside +of pre-existing project space. Default: \code{FALSE}.} } \value{ -It creates a folder of project name and 4 subfolders in the project directory, and returns the path of project space. +It creates a folder of project name and 4 subfolders in the project +directory, and returns the path of project space. } \description{ -This function is used to create a folder of the specified project names in the specified project directory as the project space to run scMINER analysis. -It also creastes 4 subfolers inside of it: "\code{DATA}", "\code{MICA}", "\code{SJARACNe}" and "\code{PLOT}". +This function is used to create a folder of the specified +project names in the specified project directory as the project space to run +scMINER analysis. It also creastes 4 subfolers inside of it: "\code{DATA}", +"\code{MICA}", "\code{SJARACNe}" and "\code{PLOT}". } \examples{ -createProjectSpace(project_dir = "path-to-a-folder", project_name = "PBMC14k") +scminer_dir <- createProjectSpace(project_dir = "path-to-a-folder", + project_name = "PBMC14k") } diff --git a/man/createSparseEset.Rd b/man/createSparseEset.Rd index 4a88f32..1ad497e 100644 --- a/man/createSparseEset.Rd +++ b/man/createSparseEset.Rd @@ -45,5 +45,8 @@ It can also generate the meta data for both automatically, if \strong{\code{addM } } \examples{ -expression_raw.eset <- createSparseEset(input_matrix = sparseMatrix, projectID = "demoSample", addMetaData = T) +data("pbmc14k_rawCount") +pbmc14k_raw.eset <- createSparseEset(input_matrix = pbmc14k_rawCount, + projectID = "PBMC14k", + addMetaData = TRUE) } diff --git a/man/drawNetworkQC.Rd b/man/drawNetworkQC.Rd index 9cd7a9c..22712a3 100644 --- a/man/drawNetworkQC.Rd +++ b/man/drawNetworkQC.Rd @@ -36,11 +36,13 @@ This function will print the statistics of several key quality metrics of networ This function is used to assess the quality of networks generated by SJARACNe. It returns a summary table of key statistics of networks. The users can also generate the html quality control report by turning \code{generate_html} = \code{TRUE}. } \examples{ -## 1. assess the quality of network from a network file -drawNetworkQC(network_file = ./SJARACNE/B/SIG/b100/consensus_network_ncol_.txt, generate_htmal = TRUE) # the html file will be saved to the same folder as the network file -drawNetworkQC(network_file = ./SJARACNE/B/SIG/b100/consensus_network_ncol_.txt, generate_htmal = TRUE, outdir = "/path-to-cutomized-folder") # the html file will be saved to the customized folder -drawNetworkQC(network_file = ./SJARACNE/B/SIG/b100/consensus_network_ncol_.txt, generate_htmal = TRUE, prefix = "PBMC14") # add the "PBMC14" in front of the default name of html report +## 1. assess the quality of network from network files +drawNetworkQC(network_file = ./SJARACNE/B/SIG/b100/consensus_network_ncol_.txt, generate_htmal = TRUE) +drawNetworkQC(network_file = ./SJARACNE/B/SIG/b100/consensus_network_ncol_.txt, generate_htmal = TRUE, + outdir = "/path-to-cutomized-folder") +drawNetworkQC(network_file = ./SJARACNE/B/SIG/b100/consensus_network_ncol_.txt, generate_htmal = TRUE, + prefix = "PBMC14") -## 2. assess the qulity of network from a directory -drawNetworkQC(sjaracne_dir = ./SJARACNE, generate_html = TRUE) # the html file will be saved to the same folder as the network file +## 2. assess the quality of network from the directory of network files +drawNetworkQC(sjaracne_dir = ./SJARACNE, generate_html = TRUE) } diff --git a/man/drawSparseEsetQC.Rd b/man/drawSparseEsetQC.Rd index 1863f8f..6b012d2 100644 --- a/man/drawSparseEsetQC.Rd +++ b/man/drawSparseEsetQC.Rd @@ -28,5 +28,15 @@ This function is used to generate a html quality control report from a sparse es contains more comprehensive and detailed QC results and can be used to estimate the cutoffs to filter the eset object. It also contains some plots for presentation purpose. } \examples{ -drawSparseEsetQC(input_eset, output_html_file = "./QC/esetQCreport.html", overwrite = FALSE, group = "projectID") +## 1. To generate the QC report in a group-specific manner, recommended whenever group information is avaiable. +drawSparseEsetQC(input_eset = pbmc14k_raw.eset, + output_html_file = "/your-path/PBMC14k/PLOT/pbmc14k_rawCount.html", + overwrite = FALSE, + group_by = "trueLabel") + +## 2. To generate the QC report from a whole view +drawSparseEsetQC(input_eset = pbmc14k_raw.eset, + output_html_file = "/your-path/PBMC14k/PLOT/pbmc14k_rawCount.html", + overwrite = FALSE, + group_by = NULL) } diff --git a/man/draw_barplot.Rd b/man/draw_barplot.Rd index e23ca1c..a57bb4a 100644 --- a/man/draw_barplot.Rd +++ b/man/draw_barplot.Rd @@ -46,8 +46,13 @@ This function is used to draw a bar plot showing the cell composition of self-de } \examples{ ## 1. bar plot grouped by clusters ("clusterID") and colored by true labels ("true_label) -p_bar <- draw_barplot(input_eset = clustered.eset, group_by = "clusterID", color_by = "true_label") +draw_barplot(input_eset = clustered.eset, + group_by = "clusterID", + color_by = "true_label") ## 2. customize the colors -p_bar <- draw_barplot(input_eset = clustered.eset, group_by = "clusterID", color_by = "true_label", colors = c("green", "red", "blue", "grey", "orange", "purple", "yellow")) +draw_barplot(input_eset = clustered.eset, + group_by = "clusterID", + color_by = "true_label", + colors = c("green", "red", "blue", "grey", "orange", "purple", "yellow")) } diff --git a/man/draw_bubbleplot.Rd b/man/draw_bubbleplot.Rd index 9574e24..06539b5 100644 --- a/man/draw_bubbleplot.Rd +++ b/man/draw_bubbleplot.Rd @@ -49,8 +49,13 @@ marker_file <- system.file('PBMC14KDS_DemoDataSet/DATA/', 'Immune_signatures.xls signature_table <- openxlsx::read.xlsx(marker_file) head(signature_table) ## 1. the most commonly used command -p_bubbleplot <- draw_bubbleplot(input_eset = clustered.eset, signature_table = signature_table, group_by = "clusterID") +draw_bubbleplot(input_eset = clustered.eset, + signature_table = signature_table, + group_by = "clusterID") ## 2. customize the colors -p_bubbleplot <- draw_bubbleplot(input_eset = clustered.eset, signature_table = signature_table, group_by = "clusterID", colors = c("lightgrey", "red")) +draw_bubbleplot(input_eset = clustered.eset, + signature_table = signature_table, + group_by = "clusterID", + colors = c("lightgrey", "red")) } diff --git a/man/feature_boxplot.Rd b/man/feature_boxplot.Rd index 5506d4a..a1ba4eb 100644 --- a/man/feature_boxplot.Rd +++ b/man/feature_boxplot.Rd @@ -70,17 +70,30 @@ This function is used to draw a box plot of selected features among self-defined } \examples{ ## 1. violin plots grouped by clusters (say the column name is 'clusterID') -p_box <- feature_boxplot(input_eset = clustered.eset, features = c("CD14", "CD19", "CD8A"), group_by = "clusterID") +p_box <- feature_boxplot(input_eset = clustered.eset, + features = c("CD14", "CD19", "CD8A"), + group_by = "clusterID") ## 2. violin plots grouped by cell types (say the column name is 'cellType') -p_box <- feature_boxplot(input_eset = clustered.eset, features = c("CD14", "CD19", "CD8A"), group_by = "cellType") +p_box <- feature_boxplot(input_eset = clustered.eset, + features = c("CD14", "CD19", "CD8A"), + group_by = "cellType") ## 3. customize the colors to fill the violin plots -p_box <- feature_boxplot(input_eset = clustered.eset, features = c("CD14", "CD19", "CD8A"), group_by = "clusterID", colors = c("blue", "red", "green")) +p_box <- feature_boxplot(input_eset = clustered.eset, + features = c("CD14", "CD19", "CD8A"), + group_by = "clusterID", + colors = c("blue", "red", "green")) ## 4. add jittered points -p_box <- feature_boxplot(input_eset = clustered.eset, features = c("CD14", "CD19", "CD8A"), group_by = "clusterID", add_jitter = TRUE, jitter.width = 0.5, jitter.size = 0.5) +p_box <- feature_boxplot(input_eset = clustered.eset, + features = c("CD14", "CD19", "CD8A"), + group_by = "clusterID", + add_jitter = TRUE, + jitter.width = 0.5, jitter.size = 0.5) ## 5. using activity data -p_box <- feature_boxplot(input_eset = activity_clustered.eset, features = c("CD14_SIG", "CD19_SIG", "CD8A_SIG"), group_by = "clusterID", ylabel_text = "Activity") +p_box <- feature_boxplot(input_eset = activity_clustered.eset, + features = c("CD14_SIG", "CD19_SIG", "CD8A_SIG"), + group_by = "clusterID", ylabel_text = "Activity") } diff --git a/man/feature_bubbleplot.Rd b/man/feature_bubbleplot.Rd index 0c8a11e..d799bd5 100644 --- a/man/feature_bubbleplot.Rd +++ b/man/feature_bubbleplot.Rd @@ -47,8 +47,13 @@ This function is used to draw a bubble plot of selected features among self-defi \examples{ features_of_interest <- c("CD3D","CD27","IL7R","SELL","CCR7","IL32","GZMA","GZMK","DUSP2","CD8A","GZMH","GZMB","CD79A","CD79B","CD86","CD14") ## 1. the most commonly used command -p_bubble <- feature_bubbleplot(input_eset = clustered.eset, features = features_of_interest, group_by = "clusterID") +feature_bubbleplot(input_eset = clustered.eset, + features = features_of_interest, + group_by = "clusterID") ## 2. customize the colors -p_bubble <- feature_bubbleplot(input_eset = clustered.eset, features = features_of_interest, group_by = "clusterID", colors = c("lightgrey", "red")) +feature_bubbleplot(input_eset = clustered.eset, + features = features_of_interest, + group_by = "clusterID", + colors = c("lightgrey", "red")) } diff --git a/man/feature_heatmap.Rd b/man/feature_heatmap.Rd index 0f2926e..e5d78f1 100644 --- a/man/feature_heatmap.Rd +++ b/man/feature_heatmap.Rd @@ -47,17 +47,32 @@ This function is used to draw a heatmap of selected features among self-defined \examples{ features_of_interest <- c("CD3D","CD27","IL7R","SELL","CCR7","IL32","GZMA","GZMK","DUSP2","CD8A","GZMH","GZMB","CD79A","CD79B","CD86","CD14") ## 1. the most commonly used command -feature_heatmap(input_eset = clustered.eset, features = features_of_interest, group_by = "clusterID") +feature_heatmap(input_eset = clustered.eset, + features = features_of_interest, + group_by = "clusterID") ## 2. add one more column ('true_label') for cell annotation -feature_heatmap(input_eset = clustered.eset, features = features_of_interest, group_by = "clusterID", annotation_columns = c("true_label")) +feature_heatmap(input_eset = clustered.eset, + features = features_of_interest, + group_by = "clusterID", + annotation_columns = c("true_label")) ## 3. scale the data by row -feature_heatmap(input_eset = clustered.eset, features = features_of_interest, group_by = "clusterID", scale_method = "row") +feature_heatmap(input_eset = clustered.eset, + features = features_of_interest, + group_by = "clusterID", + scale_method = "row") ## 4. cluster the rows -feature_heatmap(input_eset = clustered.eset, features = features_of_interest, group_by = "clusterID", cluster_rows = TRUE) +feature_heatmap(input_eset = clustered.eset, + features = features_of_interest, + group_by = "clusterID", + cluster_rows = TRUE) ## 5. add gaps -feature_heatmap(input_eset = clustered.eset, features = features_of_interest, group_by = "clusterID", use_gaps.column = TRUE, use_gaps.row = TRUE) +feature_heatmap(input_eset = clustered.eset, + features = features_of_interest, + group_by = "clusterID", + use_gaps.column = TRUE, + use_gaps.row = TRUE) } diff --git a/man/feature_scatterplot.Rd b/man/feature_scatterplot.Rd index a86415f..5b301f9 100644 --- a/man/feature_scatterplot.Rd +++ b/man/feature_scatterplot.Rd @@ -61,11 +61,19 @@ This function is used to draw a scatter plot of selected features on UMAP or t-S } \examples{ ## 1. scatter plots with UMAP projections -p_scatter <- feature_scatterplot(input_eset = clustered.eset, features = c("CD14", "CD19", "CD8A"), location_x = "UMAP_1", location_y = "UMAP_2") +feature_scatterplot(input_eset = clustered.eset, + features = c("CD14", "CD19", "CD8A"), + location_x = "UMAP_1", location_y = "UMAP_2") ## 2. scatter plots with t-SNE projections -p_scatter <- feature_scatterplot(input_eset = clustered.eset, features = c("CD14", "CD19", "CD8A"), location_x = "tSNE_1", location_y = "tSNE_2") +feature_scatterplot(input_eset = clustered.eset, + features = c("CD14", "CD19", "CD8A"), + location_x = "tSNE_1", location_y = "tSNE_2") ## 3. change the point size and font size -p_scatter <- feature_scatterplot(input_eset = clustered.eset, features = c("CD14", "CD19", "CD8A"), location_x = "UMAP_1", location_y = "UMAP_2", point.size = 1, fontsize.strip = 12, fontsize.axis = 10) +feature_scatterplot(input_eset = clustered.eset, + features = c("CD14", "CD19", "CD8A"), + location_x = "UMAP_1", location_y = "UMAP_2", + point.size = 1, + fontsize.strip = 12, fontsize.axis = 10) } diff --git a/man/feature_vlnplot.Rd b/man/feature_vlnplot.Rd index 3163bae..baf573f 100644 --- a/man/feature_vlnplot.Rd +++ b/man/feature_vlnplot.Rd @@ -82,17 +82,29 @@ This function is used to draw a violin plot of selected features among self-defi } \examples{ ## 1. violin plots grouped by clusters (say the column name is 'clusterID') -p_vln <- feature_vlnplot(input_eset = clustered.eset, features = c("CD14", "CD19", "CD8A"), group_by = "clusterID") +p_vln <- feature_vlnplot(input_eset = pbmc14k_log2cpm.eset, + features = c("CD14", "CD19", "CD8A"), + group_by = "clusterID") ## 2. violin plots grouped by cell types (say the column name is 'cellType') -p_vln <- feature_vlnplot(input_eset = clustered.eset, features = c("CD14", "CD19", "CD8A"), group_by = "cellType") +p_vln <- feature_vlnplot(input_eset = pbmc14k_log2cpm.eset, + features = c("CD14", "CD19", "CD8A"), + group_by = "cellType") ## 3. customize the colors to fill the violin plots -p_vln <- feature_vlnplot(input_eset = clustered.eset, features = c("CD14", "CD19", "CD8A"), group_by = "clusterID", colors = c("blue", "red", "green")) +p_vln <- feature_vlnplot(input_eset = pbmc14k_log2cpm.eset, + features = c("CD14", "CD19", "CD8A"), + group_by = "clusterID", + colors = c("blue", "red", "green")) ## 4. add jittered points -p_vln <- feature_vlnplot(input_eset = clustered.eset, features = c("CD14", "CD19", "CD8A"), group_by = "clusterID", add_jitter = TRUE, jitter.width = 0.5, jitter.size = 0.5) +p_vln <- feature_vlnplot(input_eset = pbmc14k_log2cpm.eset, + features = c("CD14", "CD19", "CD8A"), + group_by = "clusterID", + add_jitter = TRUE, jitter.width = 0.5, jitter.size = 0.5) ## 5. using activity data -p_vln <- feature_vlnplot(input_eset = activity_clustered.eset, features = c("CD14_SIG", "CD19_SIG", "CD8A_SIG"), group_by = "clusterID", ylabel_text = "Activity") +p_vln <- feature_vlnplot(input_eset = activity_clustered.eset, + features = c("CD14_SIG", "CD19_SIG", "CD8A_SIG"), + group_by = "clusterID", ylabel_text = "Activity") } diff --git a/man/filterSparseEset.Rd b/man/filterSparseEset.Rd index 4051391..6730d13 100644 --- a/man/filterSparseEset.Rd +++ b/man/filterSparseEset.Rd @@ -59,6 +59,15 @@ features would be removed under the default cutoffs of each metrics. } } \examples{ -filtered.eset <- filterSparseEset(raw.eset) ## filter the input eset using the cutoffs calculated by scMINER. -filtered.eset <- filterSparseEset(raw.eset, gene.nCell_min = 10, cell.nUMI_min = 500, cell.nFeature_min = 100, cell.nFeature_max = 5000, cell.pctMito_max = 0.15) +## 1. using the cutoffs automatically calculated by scMINER +pbmc14k_filtered.eset <- filterSparseEset(pbmc14k_raw.eset, filter_mode = "auto", filter_type = "both") + +## 2. using the cutoffs manually specified +pbmc14k_filtered_manual.eset <- filterSparseEset(pbmc14k_raw.eset, filter_mode = "manual", filter_type = "both", + gene.nCell_min = 10, + cell.nUMI_min = 500, + cell.nUMI_max = 6500, + cell.nFeature_min = 200, + cell.nFeature_max = 2500, + cell.pctMito_max = 0.1) } diff --git a/man/generateMICAinput.Rd b/man/generateMICAinput.Rd index 01d53a3..36aae92 100644 --- a/man/generateMICAinput.Rd +++ b/man/generateMICAinput.Rd @@ -31,5 +31,7 @@ This function is used to generate the standard input files for MICA (Mutual Info supports two file formats, "\strong{.txt}" or "\strong{.h5ad}". To generate a "\strong{.h5ad}" file, the "\strong{anndata}" package is required. } \examples{ -generateMICAinput(input_eset = log2cpm.ese, output_file = "./MICA/micaInput.txt") +generateMICAinput(input_eset = pbmc14k_log2cpm.eset, + output_file = "/work-path/PBMC14k/MICA/micaInput.txt", + overwrite = FALSE) } diff --git a/man/generatePortalInputs.Rd b/man/generatePortalInputs.Rd index 697cef9..5d5c2b3 100644 --- a/man/generatePortalInputs.Rd +++ b/man/generatePortalInputs.Rd @@ -37,16 +37,25 @@ This function is used to generated the standard input files that can be directly } \examples{ ## 1. the most commonly used command -generatePortalInputs(input_expression.eset = expression_clustered.eset, group_by = "cellType", input_activity.eset = activity_clustered.eset, input_network.dir = "./SJARACNe", output_dir = "./scMINERportal") +generatePortalInputs(input_expression.eset = expression_clustered.eset, + group_by = "cellType", + input_activity.eset = activity_clustered.eset, + input_network.dir = "./SJARACNe", + output_dir = "./scMINERportal") ## 2. prepare expression data from Seurat object ("pbmc14.obj") -generatePortalInputs(input_expression.seuratObj = pbmc14.obj, output_dir = "./path-to-output_dir") +generatePortalInputs(input_expression.seuratObj = pbmc14.obj, + output_dir = "./path-to-output_dir") ## 3. prepare network data from a table -network.table <- data.frame(CellGroup = c("CD4__CD25_T_Reg", "CD4__CD25_T_Reg", "CD19__B", "CD19__B"),NetworkType = c("SIG", "TF", "SIG", "TF"), - NetworkFile = c("./sjaracne/CD4__CD25_T_Reg/SIG/b100_pce-3/sjaracne_workflow-1474c41b-067b-4f86-ab99-09f73dadb16g/consensus_network_ncol_.txt", - "./sjaracne/CD4__CD25_T_Reg/TF/b100_pce-3/sjaracne_workflow-a93cd6db-7253-4ffb-ae4e-633b9dedf11d/consensus_network_ncol_.txt", - "./sjaracne/CD19__B/SIG/sjaracne_workflow-da0c3c72-7afb-44fa-973b-e4d767e20b6f/consensus_network_ncol_.txt", - "./sjaracne/CD19__B/TF/sjaracne_workflow-0426ea12-10bf-428c-b199-d5bd1a7aab5f/consensus_network_ncol_.txt")) -generatePortalInputs(input_expression.eset = expression_clustered.eset, group_by = "cellType", input_network.table = network.table, output_dir = "./path-to-output_dir") +network.table <- data.frame(CellGroup = c("CD4Treg", "CD4Treg", "B", "B"), + NetworkType = c("SIG", "TF", "SIG", "TF"), + NetworkFile = c("./sjaracne/CD4Treg/SIG/consensus_network_ncol_.txt", + "./sjaracne/CD4Treg/TF/consensus_network_ncol_.txt", + "./sjaracne/B/SIG/consensus_network_ncol_.txt", + "./sjaracne/B/TF/consensus_network_ncol_.txt")) +generatePortalInputs(input_expression.eset = expression_clustered.eset, + group_by = "cellType", + input_network.table = network.table, + output_dir = "./path-to-output_dir") } diff --git a/man/generateSJARACNeInput.Rd b/man/generateSJARACNeInput.Rd index 352c437..3255e3b 100644 --- a/man/generateSJARACNeInput.Rd +++ b/man/generateSJARACNeInput.Rd @@ -86,18 +86,36 @@ This function is used to generate the standard input files for SJARACNe, a scala } \examples{ ## 1. The most commonly used command: pre-defined driver lists, automatic down-sampling, no metacell method -generateSJARACNeInput(input_eset = normalized.eset, group_name = "cell_type", sjaracne_dir = "./SJARACNe", species_type = "hg", driver_type = "TF_SIG") +generateSJARACNeInput(input_eset = normalized.eset, + group_name = "cell_type", + sjaracne_dir = "./SJARACNe", + species_type = "hg", + driver_type = "TF_SIG") ## 2. to disable the downsampling -generateSJARACNeInput(input_eset = normalized.eset, group_name = "cell_type", sjaracne_dir = "./SJARACNe", species_type = "hg", driver_type = "TF_SIG", downSample_N = NULL) +generateSJARACNeInput(input_eset = normalized.eset, + group_name = "cell_type", + sjaracne_dir = "./SJARACNe", + species_type = "hg", + driver_type = "TF_SIG", + downSample_N = NULL) -## 3. Use the customized driver list: TUBB4A is the gene of interest but currently not in the pre-defined driver list. -hg_driver <- getDriverList(species_type = "hg", driver_type = "TF_SIG") -"TUBB4A" \%in\% hg_driver # It would returen FALSE if TUBB4A is not in the pre-defined driver lists -generateSJARACNeInput(input_eset = normalized.eset, group_name = "cell_type", sjaracne_dir = "./SJARACNe", species_type = "hg", driver_type = "TF_SIG", customDriver_TF = c(getDriverList(species_type = "hg", driver_type = "TF"), "TUBB4A")) # when the driver-to-add is known as a transcription factor -generateSJARACNeInput(input_eset = normalized.eset, group_name = "cell_type", sjaracne_dir = "./SJARACNe", species_type = "hg", driver_type = "TF_SIG", customDriver_SIG = c(getDriverList(species_type = "hg", driver_type = "SIG"), "TUBB4A")) # when the driver-to-add is known as a non-transcription factor -generateSJARACNeInput(input_eset = normalized.eset, group_name = "cell_type", sjaracne_dir = "./SJARACNe", species_type = "hg", driver_type = "TF_SIG", customDriver_TF = c(getDriverList(species_type = "hg", driver_type = "TF"), "TUBB4A"), customDriver_SIG = c(getDriverList(species_type = "hg", driver_type = "SIG"), "TUBB4A")) # when it's ambiguous to tell if the driver-to-add is a transcriptional factor +## 3. Use the customized driver list: (add TUBB4A is the gene of interest but currently not in the pre-defined driver list) + +# when the driver-to-add is known as a transcription factor +generateSJARACNeInput(input_eset = normalized.eset, group_name = "cell_type", sjaracne_dir = "./SJARACNe", species_type = "hg", driver_type = "TF_SIG", + customDriver_TF = c(getDriverList(species_type = "hg", driver_type = "TF"), "TUBB4A")) + +# when the driver-to-add is known as a non-transcription factor +generateSJARACNeInput(input_eset = normalized.eset, group_name = "cell_type", sjaracne_dir = "./SJARACNe", species_type = "hg", driver_type = "TF_SIG", + customDriver_SIG = c(getDriverList(species_type = "hg", driver_type = "SIG"), "TUBB4A")) + +# when it's ambiguous to tell if the driver-to-add is a transcriptional factor +generateSJARACNeInput(input_eset = normalized.eset, group_name = "cell_type", sjaracne_dir = "./SJARACNe", species_type = "hg", driver_type = "TF_SIG", + customDriver_TF = c(getDriverList(species_type = "hg", driver_type = "TF"), "TUBB4A"), + customDriver_SIG = c(getDriverList(species_type = "hg", driver_type = "SIG"), "TUBB4A")) ## 4. Use the metacell method -generateSJARACNeInput(input_eset = normalized.eset, group_name = "cell_type", sjaracne_dir = "./SJARACNe", species_type = "hg", driver_type = "TF_SIG", superCell_N = 1000, superCell_count = 100, seed = 123) +generateSJARACNeInput(input_eset = normalized.eset, group_name = "cell_type", sjaracne_dir = "./SJARACNe", species_type = "hg", driver_type = "TF_SIG", + superCell_N = 1000, superCell_count = 100, seed = 123) } diff --git a/man/getActivity_inBatch.Rd b/man/getActivity_inBatch.Rd index 8d05871..0431d9e 100644 --- a/man/getActivity_inBatch.Rd +++ b/man/getActivity_inBatch.Rd @@ -43,17 +43,51 @@ This function is used to calculate the driver activities of multiple groups from } \examples{ ## 1. when no tag was used in runing SJARACNE: the network file folder ("sjaracne_workflow-*") is directly under TF/SIG folder of each group. -activity.eset <- getActivity_inBatch(input_eset = normalized.eset, sjaracne_dir = "./SJARACNe", group_name = "cell_type", driver_type = "TF_SIG", activity_method = "mean", do.z_normalization = TRUE) +activity.eset <- getActivity_inBatch(input_eset = normalized.eset, + sjaracne_dir = "./SJARACNe", + group_name = "cell_type", + driver_type = "TF_SIG", + activity_method = "mean", + do.z_normalization = TRUE) ## 2. when tag (e.g. "bs_100" ) was used: the nework file folder ("sjaracne_workflow-*") is directly under a subfolder "bs_100" of the TF/SIG folder of each group. -activity.eset <- getActivity_inBatch(input_eset = normalized.eset, sjaracne_dir = "./SJARACNe", group_name = "cell_type", network_tag.tf = "bs_100", network_tag.sig = "bs_100", driver_type = "TF_SIG", activity_method = "mean", do.z_normalization = TRUE) +activity.eset <- getActivity_inBatch(input_eset = normalized.eset, + sjaracne_dir = "./SJARACNe", + group_name = "cell_type", + network_tag.tf = "bs_100", + network_tag.sig = "bs_100", + driver_type = "TF_SIG", + activity_method = "mean", + do.z_normalization = TRUE) ## 3. to calculate the activities of TF only -activity.eset <- getActivity_inBatch(input_eset = normalized.eset, sjaracne_dir = "./SJARACNe", group_name = "cell_type", network_tag.tf = "bs_100", network_tag.sig = "bs_100", driver_type = "TF", activity_method = "mean", do.z_normalization = TRUE) +activity.eset <- getActivity_inBatch(input_eset = normalized.eset, + sjaracne_dir = "./SJARACNe", + group_name = "cell_type", + network_tag.tf = "bs_100", + network_tag.sig = "bs_100", + driver_type = "TF", + activity_method = "mean", + do.z_normalization = TRUE) ## 4. to exclude some groups in the activity calculation (e.g. "NK" and "Monocyte") -activity.eset <- getActivity_inBatch(input_eset = normalized.eset, sjaracne_dir = "./SJARACNe", group_name = "cell_type", group_exclude = c("NK", "Monocyte"), network_tag.tf = "bs_100", network_tag.sig = "bs_100", driver_type = "TF", activity_method = "mean", do.z_normalization = TRUE) +activity.eset <- getActivity_inBatch(input_eset = normalized.eset, + sjaracne_dir = "./SJARACNe", + group_name = "cell_type", + group_exclude = c("NK", "Monocyte"), + network_tag.tf = "bs_100", + network_tag.sig = "bs_100", + driver_type = "TF", + activity_method = "mean", + do.z_normalization = TRUE) ## 5. when calculate the activities from the gene expression values scaled by other methods (e.g. ScaleData() from Seurat package) -activity.eset <- getActivity_inBatch(input_eset = normalized.eset, sjaracne_dir = "./SJARACNe", group_name = "cell_type", network_tag.tf = "bs_100", network_tag.sig = "bs_100", driver_type = "TF_SIG", activity_method = "mean", do.z_normalization = FALSE) +activity.eset <- getActivity_inBatch(input_eset = normalized.eset, + sjaracne_dir = "./SJARACNe", + group_name = "cell_type", + network_tag.tf = "bs_100", + network_tag.sig = "bs_100", + driver_type = "TF_SIG", + activity_method = "mean", + do.z_normalization = FALSE) } diff --git a/man/getActivity_individual.Rd b/man/getActivity_individual.Rd index 450d6cf..d761ddf 100644 --- a/man/getActivity_individual.Rd +++ b/man/getActivity_individual.Rd @@ -33,5 +33,8 @@ A expression set object of the group-of-interest. The \code{assayData} is the ac This function is used to calculate the driver activities of one single group from the sparse eset obj and networks generated by SJARACNe. To calculate driver activities of multiple groups from a scMINER directory, please use \code{getActivity_inBatch()}. } \examples{ -activity_group.eset <- getActivity_individual(input_eset = group_specific.est, network_file.tf = "consensus_network_ncol_.txt", network_file.sig = "consensus_network_ncol_.txt", driver_type = "TF_SIG") +activity_group.eset <- getActivity_individual(input_eset = group_specific.est, + network_file.tf = "consensus_network_ncol_.txt", + network_file.sig = "consensus_network_ncol_.txt", + driver_type = "TF_SIG") } diff --git a/man/getDA.Rd b/man/getDA.Rd index 0373032..0a733a0 100644 --- a/man/getDA.Rd +++ b/man/getDA.Rd @@ -31,14 +31,26 @@ Perform differential activity analysis on expression set } \examples{ ## 1. To perform differential activity analysis in a 1-vs-rest manner for all groups in "clusterID" column -da_res <- getDA(input_eset = activity_clustered.eset, group_by = "clusterID", use_method = "t.test") +da_res <- getDA(input_eset = activity_clustered.eset, + group_by = "clusterID", + use_method = "t.test") ## 2. To perform differential activity analysis in a 1-vs-rest manner for one specific group in "clusterID" column -da_res <- getDA(input_eset = activity_clustered.eset, group_by = "clusterID", g1 = c("1"), use_method = "t.test") +da_res <- getDA(input_eset = activity_clustered.eset, + group_by = "clusterID", + g1 = c("1"), + use_method = "t.test") ## 3. To perform differential activity analysis in a rest-vs-1 manner for one specific group in "clusterID" column -da_res <- getDA(input_eset = activity_clustered.eset, group_by = "clusterID", g0 = c("1"), use_method = "t.test") +da_res <- getDA(input_eset = activity_clustered.eset, + group_by = "clusterID", + g0 = c("1"), + use_method = "t.test") ## 4. To perform differential activity analysis in a 1-vs-1 manner for groups in "clusterID" column -da_res <- getDA(input_eset = activity_clustered.eset, group_by = "clusterID", g1 = c("1"), g0 = c("3"), use_method = "t.test") +da_res <- getDA(input_eset = activity_clustered.eset, + group_by = "clusterID", + g1 = c("1"), + g0 = c("3"), + use_method = "t.test") } diff --git a/man/getDE.Rd b/man/getDE.Rd index 8da7824..2b1cef6 100644 --- a/man/getDE.Rd +++ b/man/getDE.Rd @@ -31,14 +31,26 @@ This function is used to perform the differential expression analysis on sparse } \examples{ ## 1. To perform differential expression analysis in a 1-vs-rest manner for all groups in "clusterID" column -de_res <- getDE(input_eset = clustered.eset, group_by = "clusterID", use_method = "limma") +de_res <- getDE(input_eset = clustered.eset, + group_by = "clusterID", + use_method = "limma") ## 2. To perform differential expression analysis in a 1-vs-rest manner for one specific group in "clusterID" column -de_res <- getDE(input_eset = clustered.eset, group_by = "clusterID", g1 = c("1"), use_method = "limma") +de_res <- getDE(input_eset = clustered.eset, + group_by = "clusterID", + g1 = c("1"), + use_method = "limma") ## 3. To perform differential expression analysis in a rest-vs-1 manner for one specific group in "clusterID" column -de_res <- getDE(input_eset = clustered.eset, group_by = "clusterID", g0 = c("1"), use_method = "limma") +de_res <- getDE(input_eset = clustered.eset, + group_by = "clusterID", + g0 = c("1"), + use_method = "limma") ## 4. To perform differential expression analysis in a 1-vs-1 manner for groups in "clusterID" column -de_res <- getDE(input_eset = clustered.eset, group_by = "clusterID", g1 = c("1"), g0 = c("3"), use_method = "limma") +de_res <- getDE(input_eset = clustered.eset, + group_by = "clusterID", + g1 = c("1"), + g0 = c("3"), + use_method = "limma") } diff --git a/man/getDriverList.Rd b/man/getDriverList.Rd index 834b2e2..34f0552 100644 --- a/man/getDriverList.Rd +++ b/man/getDriverList.Rd @@ -18,6 +18,9 @@ A vector of pre-defined driver genes This function is used to extract the pre-defined driver lists of human or mouse. } \examples{ -hg_tf <- getDriverList(species_type = "hg", driver_type = "TF") # get the TF driver list of human -mm_driver <- getDriverList(species_type = "mm", driver_type = "TF_SIG") # get the total driver list, including both TF and SIG, of mouse +## 1. Get the TF driver list of human +hg_tf <- getDriverList(species_type = "hg", driver_type = "TF") + +## 2. Get the total driver list, including both TF and SIG, of mouse +mm_driver <- getDriverList(species_type = "mm", driver_type = "TF_SIG") } diff --git a/man/normalizeSparseEset.Rd b/man/normalizeSparseEset.Rd index af5d1a1..595dafd 100644 --- a/man/normalizeSparseEset.Rd +++ b/man/normalizeSparseEset.Rd @@ -30,5 +30,8 @@ A sparse eset object that has been normalized and log-transformed This function is used to normalize and log-transform the sparse eset object. The default method is "log21p". } \examples{ -normalized.eset <- normalizeSparseEset(input_eset = filtered.eset, scale_factor = 1000000, do.logTransform = TRUE) +pbmc14k_log2cpm.eset <- normalizeSparseEset(pbmc14k_filtered.eset, + scale_factor = 1000000, + log_base = 2, + log_pseudoCount = 1) } diff --git a/man/pbmc14k_expression.eset.Rd b/man/pbmc14k_expression.eset.Rd new file mode 100644 index 0000000..5d71511 --- /dev/null +++ b/man/pbmc14k_expression.eset.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{pbmc14k_expression.eset} +\alias{pbmc14k_expression.eset} +\title{SparseEset object of PBMC14k dataset} +\format{ +\subsection{\code{pbmc14k_expression.eset}}{ + +A large dgCMatrix with 17,986 rows and 14,000 columns: +\describe{ +This data set provides the SparseEset object of PBMC14k dataset that has been filtered, normalized, clustered and annotated. +} +} +} +\source{ +It's generated by scMINER from Filtered_DownSampled_SortedPBMC_data.csv from \url{https://zenodo.org/record/3357167#.YhQNF2RKj6V} +} +\usage{ +pbmc14k_expression.eset +} +\description{ +This dataset contains the SparseEset object of PBMC14k dataset. For demonstration purposes, it has been downsampled to 3.5k cells, with 500 cells per population. +} +\keyword{datasets} diff --git a/man/readInput_10x.dir.Rd b/man/readInput_10x.dir.Rd index 1bed1e5..36c1cd8 100644 --- a/man/readInput_10x.dir.Rd +++ b/man/readInput_10x.dir.Rd @@ -12,20 +12,27 @@ readInput_10x.dir( ) } \arguments{ -\item{input_dir}{Path to the directory containing the 3 files generated by 10x Genomics: \strong{matrix.mtx}, \strong{barcodes.tsv} and \strong{features.tsv} (or \strong{genes.tsv})} +\item{input_dir}{Path to the directory containing the 3 files generated by +10x Genomics: \strong{matrix.mtx}, \strong{barcodes.tsv} and \strong{features.tsv} (or +\strong{genes.tsv})} -\item{featureType}{Character, feature type to use as the gene name of expression matrix: \code{"gene_symbol"} (the default) or \code{"gene_id"}.} +\item{featureType}{Character, feature type to use as the gene name of +expression matrix: \code{"gene_symbol"} (the default) or \code{"gene_id"}.} -\item{removeSuffix}{Logical, whether to remove the suffix "\strong{-1}" when present in all cell barcodes. Default: \code{TRUE}.} +\item{removeSuffix}{Logical, whether to remove the suffix "\strong{-1}" when +present in all cell barcodes. Default: \code{TRUE}.} -\item{addPrefix}{Character or \code{NULL}, add a prefix to the cell barcodes, like Sample ID. It is highly recommended to use a prefix containing letters and/or numbers only, -and not starting with numbers. Default: \code{NULL}.} +\item{addPrefix}{Character or \code{NULL}, add a prefix to the cell barcodes, like +Sample ID. It is highly recommended to use a prefix containing letters +and/or numbers only, and not starting with numbers. Default: \code{NULL}.} } \value{ A sparse gene expression matrix of raw UMI counts, genes by cells } \description{ -This function is used to read the gene expression data from a directory containing three files generated by 10x Genomics: \strong{matrix.mtx}, \strong{barcodes.tsv} and +This function is used to read the gene expression data from a +directory containing three files generated by 10x Genomics: \strong{matrix.mtx}, +\strong{barcodes.tsv} and \strong{features.tsv} (or \strong{genes.tsv}). This function can handle these conditions well: \itemize{ \item Alternative file names for feature data: \strong{features.tsv} by CellRanger > 3.0, and \strong{genes.tsv} by CellRanger < 3.0; @@ -34,7 +41,10 @@ This function is used to read the gene expression data from a directory containi } } \examples{ -input_dir <- 'path-to-directory' +input_dir <- system.file("extdata/demo_inputs/cell_matrix_10x", package = "scMINER") # path to input data list.files(input_dir, full.names = FALSE) # you should see three files: matrix.mtx, barcodes.tsv and features.tsv (or genes.tsv) -sparseMatrix <- readInput_10x.dir(input_dir, featureType = "gene_symbol", removeSuffix = TRUE, addPrefix = "demoSample") +sparseMatrix <- readInput_10x.dir(input_dir, + featureType = "gene_symbol", + removeSuffix = TRUE, + addPrefix = "demoSample") } diff --git a/man/readInput_10x.h5.Rd b/man/readInput_10x.h5.Rd index c3cedc8..e9e1dac 100644 --- a/man/readInput_10x.h5.Rd +++ b/man/readInput_10x.h5.Rd @@ -14,21 +14,30 @@ readInput_10x.h5( \arguments{ \item{h5_file}{H5 file generated by CellRanger pipeline of 10x Genomics} -\item{featureType}{Character, feature type to use as the gene name of expression matrix: \code{"gene_symbol"} (the default) or \code{"gene_id"}.} +\item{featureType}{Character, feature type to use as the gene name of +expression matrix: \code{"gene_symbol"} (the default) or \code{"gene_id"}.} -\item{removeSuffix}{Logical, whether to remove the suffix "\strong{-1}" when present in all cell barcodes. Default: \code{TRUE}.} +\item{removeSuffix}{Logical, whether to remove the suffix "\strong{-1}" when +present in all cell barcodes. Default: \code{TRUE}.} -\item{addPrefix}{Character or \code{NULL}, add a prefix to the cell barcodes, like Sample ID. It is highly recommended to use a prefix containing letters and/or numbers only, -and not starting with numbers. Default: \code{NULL}.} +\item{addPrefix}{Character or \code{NULL}, add a prefix to the cell barcodes, like +Sample ID. It is highly recommended to use a prefix containing letters +and/or numbers only, and not starting with numbers. Default: \code{NULL}.} } \value{ A sparse gene expression matrix of raw UMI counts, genes by cells } \description{ -This function is used to read the gene expression data from the HDF5 file generated by CellRanger pipeline of 10x Genomics. This function can automatically distinguish -the data of different modalities (e.g. expression data, ATAC data) and retains the gene expression data only. The \verb{**hdf5r**} package is needed to use this function. +This function is used to read the gene expression data from the +HDF5 file generated by CellRanger pipeline of 10x Genomics. This function can +automatically distinguish the data of different modalities (e.g. expression +data, ATAC data) and retains the gene expression data only. The \verb{**hdf5r**} +package is needed to use this function. } \examples{ -h5_file <- 'path-to-h5_file' -sparseMatrix <- readInput_10x.h5(h5_file, featureType = "gene_symbol", removeSuffix = TRUE, addPrefix = "demoSample") +h5_file <- system.file("extdata/demo_inputs/hdf5_10x/demoData3.h5", package = "scMINER") # path to hdf5 file +sparseMatrix <- readInput_10x.h5(h5_file, + featureType = "gene_symbol", + removeSuffix = TRUE, + addPrefix = "demoSample") } diff --git a/man/readInput_h5ad.Rd b/man/readInput_h5ad.Rd index bd3fa0c..55dc105 100644 --- a/man/readInput_h5ad.Rd +++ b/man/readInput_h5ad.Rd @@ -9,18 +9,27 @@ readInput_h5ad(h5ad_file, removeSuffix = FALSE, addPrefix = NULL) \arguments{ \item{h5ad_file}{H5ad file of sc/snRNA-seq data} -\item{removeSuffix}{Logical, whether to remove the suffix "\strong{-1}" when present in all cell barcodes. Default: \code{TRUE}.} +\item{removeSuffix}{Logical, whether to remove the suffix "\strong{-1}" when +present in all cell barcodes. Default: \code{TRUE}.} -\item{addPrefix}{Character or \code{NULL}, add a prefix to the cell barcodes, like Sample ID. It is highly recommended to use a prefix containing letters and/or numbers only, -and not starting with numbers. Default: \code{NULL}.} +\item{addPrefix}{Character or \code{NULL}, add a prefix to the cell barcodes, like +Sample ID. It is highly recommended to use a prefix containing letters +and/or numbers only, and not starting with numbers. Default: \code{NULL}.} } \value{ -A AnnData object containing \code{"X"} (a observations x variables data matrix), \code{"obs"} (data frame of observations), \code{"var"} (data frame of variables) and more. For more details, please check out \url{https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.html}. +A AnnData object containing \code{"X"} (a observations x variables data +matrix), \code{"obs"} (data frame of observations), \code{"var"} (data frame of +variables) and more. For more details, please check out +\url{https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.html}. } \description{ -This function is used to read the h5ad file, a popular file format for storing and sharing single-cell RNA sequencing data. The \verb{**anndata**} package is needed to use this function. +This function is used to read the h5ad file, a popular file +format for storing and sharing single-cell RNA sequencing data. The +\verb{**anndata**} package is needed to use this function. } \examples{ -h5ad_file <- 'path-to-h5ad_file' -sparseMatrix <- readInput_h5ad(h5ad_file, removeSuffix = FALSE, addPrefix = "demoSample") +h5ad_file <- system.file("extdata/demo_inputs/h5ad_file/demoData4.h5ad", package = "scMINER") # path to h5ad file +sparseMatrix <- readInput_h5ad(h5ad_file, + removeSuffix = FALSE, + addPrefix = "demoSample") } diff --git a/man/readInput_table.Rd b/man/readInput_table.Rd index 95a1dcb..d7f68d7 100644 --- a/man/readInput_table.Rd +++ b/man/readInput_table.Rd @@ -13,28 +13,39 @@ readInput_table( ) } \arguments{ -\item{table_file}{The table format file (e.g. \strong{txt}, \strong{tsv}, \strong{csv}, and others) which the data are to be read from.} +\item{table_file}{The table format file (e.g. \strong{txt}, \strong{tsv}, \strong{csv}, and +others) which the data are to be read from.} \item{sep}{String, The field separator character. Default: \code{"\\t"}.} -\item{is.geneBYcell}{Logical, whether the table is organized in \strong{gene (row) by cell (column)} format. If \code{FALSE}, the rows will be treated as cells. Default: \code{TRUE}.} +\item{is.geneBYcell}{Logical, whether the table is organized in \strong{gene (row) +by cell (column)} format. If \code{FALSE}, the rows will be treated as cells. +Default: \code{TRUE}.} -\item{removeSuffix}{Logical, whether to remove the suffix "\strong{-1}" when present in all cell barcodes. Default: \code{FALSE}.} +\item{removeSuffix}{Logical, whether to remove the suffix "\strong{-1}" when +present in all cell barcodes. Default: \code{FALSE}.} -\item{addPrefix}{Character or \code{NULL}, add a prefix to the cell barcodes, like Sample ID. It is highly recommended to use a prefix containing letters and/or numbers only, -and not starting with numbers. Default: \code{NULL}.} +\item{addPrefix}{Character or \code{NULL}, add a prefix to the cell barcodes, like +Sample ID. It is highly recommended to use a prefix containing letters +and/or numbers only, and not starting with numbers. Default: \code{NULL}.} } \value{ A sparse gene expression matrix, genes by cells } \description{ -This function is used to read data from a table-format file. The user needs to specify the format of the table using the parameter \strong{\code{is.geneBYcell}}: +This function is used to read data from a table-format file. The +user needs to specify the format of the table using the parameter +\strong{\code{is.geneBYcell}}: \itemize{ \item \code{TRUE} (the default): the rows will be treated as genes, while the columns will be treated as cells; \item \code{FALSE}: the rows will be treated as cells, while the columns will be treated as genes. } } \examples{ -table_file <- 'path-to-table_file' -sparseMatrix <- readInput_table(table_file, sep = "\t", is.geneBYcell = TRUE, removeSuffix = FALSE, addPrefix = "demoSample") +table_file <- system.file("extdata/demo_inputs/table_file/demoData2.txt.gz", package = "scMINER") # path to text-table file +sparseMatrix <- readInput_table(table_file, + sep = "\t", + is.geneBYcell = TRUE, + removeSuffix = FALSE, + addPrefix = "demoSample") } diff --git a/man/updateSparseEset.Rd b/man/updateSparseEset.Rd index cdc0df0..545c27e 100644 --- a/man/updateSparseEset.Rd +++ b/man/updateSparseEset.Rd @@ -30,5 +30,11 @@ A sparse eset object with updated information This function is used to update the three slots ('\strong{assayData}', '\strong{phenoData}', '\strong{featureData}') and/or '\strong{meta data}' of sparse eset object. } \examples{ -updated.eset <- updateSparseEset(input_eset = input.eset, cellData = data.frame(pData(input.eset), cellType = "B_cells"), addMetaData = TRUE) +true_label <- read.table(system.file("extdata/demo_pbmc14k/PBMC14k_trueLabel.txt.gz", package = "scMINER"), + header = T, row.names = 1, sep = "\t", quote = "", stringsAsFactors = FALSE) +pbmc14k_raw.eset <- createSparseEset(input_matrix = pbmc14k_rawCount, + cellData = true_label, + featureData = NULL, + projectID = "PBMC14k", + addMetaData = TRUE) } diff --git a/pbmc14k.txt.gz b/pbmc14k.txt.gz new file mode 100644 index 0000000..b99ddd9 Binary files /dev/null and b/pbmc14k.txt.gz differ