Skip to content

Commit

Permalink
for bioconductor
Browse files Browse the repository at this point in the history
  • Loading branch information
QingfeiPan committed Aug 13, 2024
1 parent 32bc9b2 commit 0685034
Show file tree
Hide file tree
Showing 40 changed files with 843 additions and 256 deletions.
59 changes: 43 additions & 16 deletions R/clustering_analysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
#' @export
#'
#' @examples
#' generateMICAinput(input_eset = log2cpm.ese, output_file = "./MICA/micaInput.txt")
#' data(pbmc14k_expression.eset)
#' generateMICAinput(input_eset = pbmc14k_expression.eset,
#' output_file = "/work-path/PBMC14k/MICA/micaInput.txt",
#' overwrite = FALSE)
generateMICAinput <- function(input_eset,
output_file,
overwrite = F,
Expand Down Expand Up @@ -125,7 +128,10 @@ generateMICAinput <- function(input_eset,
#' @export
#'
#' @examples
#' clustered.eset <- addMICAoutput(input_eset = input_eset, mica_output_file = "/path-to-mica-input/clustering_UMAP_euclidean_20_2.22554.txt", visual_method = "umap")
#' data(pbmc14k_expression.eset)
#' pbmc14k_log2cpm.eset <- addMICAoutput(pbmc14k_expression.eset,
#' mica_output_file = system.file("extdata/demo_pbmc14k/MICA/clustering_UMAP_euclidean_20_2.05.txt", package = "scMINER"),
#' visual_method = "umap")
addMICAoutput <- function(input_eset,
mica_output_file,
visual_method = "umap")
Expand Down Expand Up @@ -157,23 +163,35 @@ addMICAoutput <- function(input_eset,

#' Draw a scatter plot showing the coordinates and cluster id of each cell
#'
#' @description
#' This function is used to visualize the clustering results generated by MICA.
#' @description This function is used to visualize the clustering results
#' generated by MICA.
#'
#' @param input_eset The sparse eset object
#' @param color_by Factor, character or numeric, name of the column of MICA cluster labels. Default: "`clusterID`".
#' @param colors A character vector or `NULL`, colors of the MICA cluster labels. The length of this vector should be same as the number of groups in color_by column. If `NULL`, the ggplot default colors will be use. Default: `NULL`.
#' @param do.logTransform Logical, whether to do the log2(value + 1) transformation. Only valid when color_by is numeric Default: `TRUE`.
#' @param X,Y Character, name of the columns of x-axis and y-axis coordinates. Default: "`UMAP_1`", and "`UMAP_2`".
#' @param color_by Factor, character or numeric, name of the column of MICA
#' cluster labels. Default: "`clusterID`".
#' @param colors A character vector or `NULL`, colors of the MICA cluster
#' labels. The length of this vector should be same as the number of groups in
#' color_by column. If `NULL`, the ggplot default colors will be use. Default:
#' `NULL`.
#' @param do.logTransform Logical, whether to do the log2(value + 1)
#' transformation. Only valid when color_by is numeric Default: `TRUE`.
#' @param X,Y Character, name of the columns of x-axis and y-axis coordinates.
#' Default: "`UMAP_1`", and "`UMAP_2`".
#' @param point.size Numeric, size of points. Default: 0.3.
#' @param point.alpha Numeric, transparency of points, ranging from 0 (more transparent) to 1 (less transparent). Default: 1.
#' @param point.alpha Numeric, transparency of points, ranging from 0 (more
#' transparent) to 1 (less transparent). Default: 1.
#' @param name.plot_title Character or NULL, title of the plot. Default: `NULL`.
#' @param fontsize.plot_table Numeric, font size of the title. Default: 20.
#' @param show.cluster_label Logical, whether to show labels on the plot. Ignored when color_by is numeric. Default: `TRUE`.
#' @param fontsize.cluster_label Numeric, font size of the labels. Ignored when color_by is numeric. Default: 12.
#' @param legend.position Character, position of legend: "`right`", "`left`", "`top`", "`bottom`" or "`none`". Default: "`right`".
#' @param fontsize.legend_title Integer, font size of the legend title. Default: 10.
#' @param fontsize.legend_text Integer, font size of the legend text. Default: 8.
#' @param show.cluster_label Logical, whether to show labels on the plot.
#' Ignored when color_by is numeric. Default: `TRUE`.
#' @param fontsize.cluster_label Numeric, font size of the labels. Ignored when
#' color_by is numeric. Default: 12.
#' @param legend.position Character, position of legend: "`right`", "`left`",
#' "`top`", "`bottom`" or "`none`". Default: "`right`".
#' @param fontsize.legend_title Integer, font size of the legend title. Default:
#' 10.
#' @param fontsize.legend_text Integer, font size of the legend text. Default:
#' 8.
#' @param fontsize.axis_title Integer, font size of the axis title. Default: 10.
#' @param fontsize.axis_text Integer, font size of the axis text. Default: 8.
#'
Expand All @@ -182,11 +200,20 @@ addMICAoutput <- function(input_eset,
#' @export
#'
#' @examples
#'
#' data(pbmc14k_expression.eset)
#' ## 1. color-coded by factor or character variable
#' p_umap <- MICAplot(input_eset = pbmc14k_log2cpm.eset, color_by = 'clusterID', point.size = 0.1)
#' p1 <- MICAplot(input_eset = pbmc14k_expression.eset,
#' color_by = "clusterID",
#' X = "UMAP_1", Y = "UMAP_2",
#' point.size = 0.1,
#' fontsize.cluster_label = 6)
#'
#' ## 2. color-coded by numeric variable
#' p_umap <- MICAplot(input_eset = pbmc14k_log2cpm.eset, color_by = 'nUMI', do.logTransform = TRUE)
#' p2 <- MICAplot(input_eset = pbmc14k_expression.eset,
#' color_by = "nUMI",
#' do.logTransform = TRUE,
#' point.size = 0.1)
MICAplot <- function(input_eset,
color_by = "clusterID", colors = NULL, do.logTransform = TRUE,
X = "UMAP_1", Y = "UMAP_2",
Expand Down
13 changes: 13 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,16 @@
#' }
#' @source A subset of Filtered_DownSampled_SortedPBMC_data.csv from <https://zenodo.org/record/3357167#.YhQNF2RKj6V>
"pbmc14k_rawCount"


#' SparseEset object of PBMC14k dataset
#'
#' This dataset contains the SparseEset object of PBMC14k dataset. For demonstration purposes, it has been downsampled to 3.5k cells, with 500 cells per population.
#'
#' @format ## `pbmc14k_expression.eset`
#' A large dgCMatrix with 17,986 rows and 14,000 columns:
#' \describe{
#' This data set provides the SparseEset object of PBMC14k dataset that has been filtered, normalized, clustered and annotated.
#' }
#' @source It's generated by scMINER from Filtered_DownSampled_SortedPBMC_data.csv from <https://zenodo.org/record/3357167#.YhQNF2RKj6V>
"pbmc14k_expression.eset"
46 changes: 37 additions & 9 deletions R/differential_analysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,11 @@ combinePvalVector <- function(pvals,
#'
#' @examples
#' ## to call this function
#' res <- compare2groups(input_eset = input_eset, group_by = group_by, g1 = g1_tmp, g0 = g0_tmp, use_method = use_method)
#' res <- compare2groups(input_eset = input_eset,
#' group_by = group_by,
#' g1 = g1_tmp,
#' g0 = g0_tmp,
#' use_method = use_method)
compare2groups <- function(input_eset,
group_by = "clusterID",
g1 = NULL, g0 = NULL,
Expand Down Expand Up @@ -200,16 +204,28 @@ compare2groups <- function(input_eset,
#'
#' @examples
#' ## 1. To perform differential expression analysis in a 1-vs-rest manner for all groups in "clusterID" column
#' de_res <- getDE(input_eset = clustered.eset, group_by = "clusterID", use_method = "limma")
#' de_res <- getDE(input_eset = clustered.eset,
#' group_by = "clusterID",
#' use_method = "limma")
#'
#' ## 2. To perform differential expression analysis in a 1-vs-rest manner for one specific group in "clusterID" column
#' de_res <- getDE(input_eset = clustered.eset, group_by = "clusterID", g1 = c("1"), use_method = "limma")
#' de_res <- getDE(input_eset = clustered.eset,
#' group_by = "clusterID",
#' g1 = c("1"),
#' use_method = "limma")
#'
#' ## 3. To perform differential expression analysis in a rest-vs-1 manner for one specific group in "clusterID" column
#' de_res <- getDE(input_eset = clustered.eset, group_by = "clusterID", g0 = c("1"), use_method = "limma")
#' de_res <- getDE(input_eset = clustered.eset,
#' group_by = "clusterID",
#' g0 = c("1"),
#' use_method = "limma")
#'
#' ## 4. To perform differential expression analysis in a 1-vs-1 manner for groups in "clusterID" column
#' de_res <- getDE(input_eset = clustered.eset, group_by = "clusterID", g1 = c("1"), g0 = c("3"), use_method = "limma")
#' de_res <- getDE(input_eset = clustered.eset,
#' group_by = "clusterID",
#' g1 = c("1"),
#' g0 = c("3"),
#' use_method = "limma")
getDE <- function(input_eset,
group_by = "clusterID",
g1 = NULL, g0 = NULL,
Expand Down Expand Up @@ -291,16 +307,28 @@ getDE <- function(input_eset,
#'
#' @examples
#' ## 1. To perform differential activity analysis in a 1-vs-rest manner for all groups in "clusterID" column
#' da_res <- getDA(input_eset = activity_clustered.eset, group_by = "clusterID", use_method = "t.test")
#' da_res <- getDA(input_eset = activity_clustered.eset,
#' group_by = "clusterID",
#' use_method = "t.test")
#'
#' ## 2. To perform differential activity analysis in a 1-vs-rest manner for one specific group in "clusterID" column
#' da_res <- getDA(input_eset = activity_clustered.eset, group_by = "clusterID", g1 = c("1"), use_method = "t.test")
#' da_res <- getDA(input_eset = activity_clustered.eset,
#' group_by = "clusterID",
#' g1 = c("1"),
#' use_method = "t.test")
#'
#' ## 3. To perform differential activity analysis in a rest-vs-1 manner for one specific group in "clusterID" column
#' da_res <- getDA(input_eset = activity_clustered.eset, group_by = "clusterID", g0 = c("1"), use_method = "t.test")
#' da_res <- getDA(input_eset = activity_clustered.eset,
#' group_by = "clusterID",
#' g0 = c("1"),
#' use_method = "t.test")
#'
#' ## 4. To perform differential activity analysis in a 1-vs-1 manner for groups in "clusterID" column
#' da_res <- getDA(input_eset = activity_clustered.eset, group_by = "clusterID", g1 = c("1"), g0 = c("3"), use_method = "t.test")
#' da_res <- getDA(input_eset = activity_clustered.eset,
#' group_by = "clusterID",
#' g1 = c("1"),
#' g0 = c("3"),
#' use_method = "t.test")
getDA <- function(input_eset,
group_by = "clusterID",
g1 = NULL, g0 = NULL,
Expand Down
58 changes: 51 additions & 7 deletions R/manipulate_sparseEset.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@ methods::setClass(Class = "SparseExpressionSet",
#' @export
#'
#' @examples
#' expression_raw.eset <- createSparseEset(input_matrix = sparseMatrix, projectID = "demoSample", addMetaData = T)
#' data("pbmc14k_rawCount")
#' pbmc14k_raw.eset <- createSparseEset(input_matrix = pbmc14k_rawCount,
#' projectID = "PBMC14k",
#' addMetaData = TRUE)
createSparseEset <- function(input_matrix,
do.sparseConversion = TRUE,
cellData = NULL,
Expand Down Expand Up @@ -140,7 +143,18 @@ createSparseEset <- function(input_matrix,
#' @export
#'
#' @examples
#' combined.eset <- combineSparseEset(c(sample_1.eset, sample_2.eset, sample_3.eset), projectID = c("sample1", "sample2", "sample3"), addPrefix = c("tag1", "tag2", "tag3"), addMetaData = TRUE)
#' demo1_mtx <- readInput_10x.dir(input_dir = system.file("extdata/demo_inputs/cell_matrix_10x", package = "scMINER"),
#' featureType = "gene_symbol", removeSuffix = TRUE)
#' demo1.eset <- createSparseEset(input_matrix = demo1_mtx, projectID = "demo1", addMetaData = TRUE)
#' demo2_mtx <- readInput_table(table_file = system.file("extdata/demo_inputs/table_file/demoData2.txt.gz", package = "scMINER"),
#' is.geneBYcell = TRUE, removeSuffix = TRUE)
#' demo2.eset <- createSparseEset(input_matrix = demo2_mtx, projectID = "demo2", addMetaData = TRUE)
#' combined.eset <- combineSparseEset(eset_list = c(demo1.eset, demo2.eset),
#' projectID = c("sample1", "sample2"),
#' addPrefix = c("demo1", "demo2"),
#' addSurfix = NULL,
#' addMetaData = TRUE,
#' imputeNA = TRUE)
combineSparseEset <- function(eset_list,
projectID = NULL,
addPrefix = NULL,
Expand Down Expand Up @@ -271,7 +285,13 @@ combineSparseEset <- function(eset_list,
#' @export
#'
#' @examples
#' updated.eset <- updateSparseEset(input_eset = input.eset, cellData = data.frame(pData(input.eset), cellType = "B_cells"), addMetaData = TRUE)
#' true_label <- read.table(system.file("extdata/demo_pbmc14k/PBMC14k_trueLabel.txt.gz", package = "scMINER"),
#' header = T, row.names = 1, sep = "\t", quote = "", stringsAsFactors = FALSE)
#' pbmc14k_raw.eset <- createSparseEset(input_matrix = pbmc14k_rawCount,
#' cellData = true_label,
#' featureData = NULL,
#' projectID = "PBMC14k",
#' addMetaData = TRUE)
updateSparseEset <- function(input_eset,
dataMatrix = NULL,
cellData = NULL,
Expand Down Expand Up @@ -381,8 +401,17 @@ updateSparseEset <- function(input_eset,
#' @export
#'
#' @examples
#' filtered.eset <- filterSparseEset(raw.eset) ## filter the input eset using the cutoffs calculated by scMINER.
#' filtered.eset <- filterSparseEset(raw.eset, gene.nCell_min = 10, cell.nUMI_min = 500, cell.nFeature_min = 100, cell.nFeature_max = 5000, cell.pctMito_max = 0.15)
#' ## 1. using the cutoffs automatically calculated by scMINER
#' pbmc14k_filtered.eset <- filterSparseEset(pbmc14k_raw.eset, filter_mode = "auto", filter_type = "both")
#'
#' ## 2. using the cutoffs manually specified
#' pbmc14k_filtered_manual.eset <- filterSparseEset(pbmc14k_raw.eset, filter_mode = "manual", filter_type = "both",
#' gene.nCell_min = 10,
#' cell.nUMI_min = 500,
#' cell.nUMI_max = 6500,
#' cell.nFeature_min = 200,
#' cell.nFeature_max = 2500,
#' cell.pctMito_max = 0.1)
filterSparseEset <- function(input_eset,
filter_mode = "auto",
filter_type = "both",
Expand Down Expand Up @@ -508,7 +537,11 @@ filterSparseEset <- function(input_eset,
#' @return A sparse eset object that has been normalized and log-transformed
#' @export
#'
#' @examples normalized.eset <- normalizeSparseEset(input_eset = filtered.eset, scale_factor = 1000000, do.logTransform = TRUE)
#' @examples
#' pbmc14k_log2cpm.eset <- normalizeSparseEset(pbmc14k_filtered.eset,
#' scale_factor = 1000000,
#' log_base = 2,
#' log_pseudoCount = 1)
normalizeSparseEset <- function(input_eset,
scale_factor = 1000000,
do.logTransform = TRUE,
Expand All @@ -527,6 +560,7 @@ normalizeSparseEset <- function(input_eset,
cat("Done! The data matrix of eset has been normalized but NOT log-transformed!\n")
}

exp_mat.normalized <- Matrix::Matrix(exp_mat.normalized, sparse = TRUE)
eset <- new( "SparseExpressionSet",
assayData = assayDataNew("environment", exprs = exp_mat.normalized),
phenoData = new("AnnotatedDataFrame", data = Biobase::pData(input_eset)),
Expand All @@ -551,7 +585,17 @@ normalizeSparseEset <- function(input_eset,
#' @export
#'
#' @examples
#' drawSparseEsetQC(input_eset, output_html_file = "./QC/esetQCreport.html", overwrite = FALSE, group = "projectID")
#' ## 1. To generate the QC report in a group-specific manner, recommended whenever group information is avaiable.
#' drawSparseEsetQC(input_eset = pbmc14k_raw.eset,
#' output_html_file = "/your-path/PBMC14k/PLOT/pbmc14k_rawCount.html",
#' overwrite = FALSE,
#' group_by = "trueLabel")
#'
#' ## 2. To generate the QC report from a whole view
#' drawSparseEsetQC(input_eset = pbmc14k_raw.eset,
#' output_html_file = "/your-path/PBMC14k/PLOT/pbmc14k_rawCount.html",
#' overwrite = FALSE,
#' group_by = NULL)
drawSparseEsetQC <- function(input_eset,
output_html_file,
overwrite = FALSE,
Expand Down
Loading

0 comments on commit 0685034

Please sign in to comment.