diff --git a/NAMESPACE b/NAMESPACE index 7fa691c..850857b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(addGeneAbundance) export(alleleStat) export(anno_PATRIC_keywords) export(anno_eggNOG_keywords) @@ -31,6 +32,7 @@ export(getFasta) export(getID) export(getSlot) export(getTree) +export(getTreePlot) export(loadInStrain) export(loadMIDAS) export(loadMIDAS2) @@ -82,6 +84,8 @@ importFrom(grDevices,colorRampPalette) importFrom(phangorn,NJ) importFrom(phangorn,dist.ml) importFrom(phangorn,read.phyDat) +importFrom(scico,scale_color_scico) +importFrom(scico,scale_color_scico_d) importFrom(scico,scale_fill_scico) importFrom(scico,scale_fill_scico_d) importFrom(stats,as.formula) diff --git a/R/checkEGGNOG.R b/R/checkEGGNOG.R index 25981e9..543c3f6 100644 --- a/R/checkEGGNOG.R +++ b/R/checkEGGNOG.R @@ -210,7 +210,7 @@ checkEGGNOG <- function(annot_file, ret="all", checkIDs=NULL, fill=TRUE) { #' @return data.frame #' @export #' -summariseAbundance <- function(stana, sp, anno, how=mean, verbose=FALSE) { +summariseAbundance <- function(stana, sp, anno, how=sum, verbose=FALSE) { geneDf <- stana@genes[[sp]] annoval <- anno$value |> unique() merged <- lapply(annoval, function(i) { diff --git a/R/consensusSeqFast.R b/R/consensusSeqFast.R index ab933bc..48a6671 100644 --- a/R/consensusSeqFast.R +++ b/R/consensusSeqFast.R @@ -127,7 +127,7 @@ consensusSeqMIDAS2 <- function( site_depth_filter <- depths >= site_depth depth_ratio_filter <- (depths / SAMPLES[[sample]]$mean_depth) <= site_ratio ## freqs == -1 in allele_support - allele_support <- apply(cbind(freqs, 1-freqs), 1, max) >= allele_support + allele_support <- apply(cbind(freqs, 1-freqs), 1, max) >= allele_support ## BOTTLENECK [1] allele_support[which(freqs==-1)] <- FALSE summed <- site_depth_filter + depth_ratio_filter + allele_support list(freqs, depths, site_depth_filter, depth_ratio_filter, allele_support, summed) |> @@ -141,7 +141,7 @@ consensusSeqMIDAS2 <- function( sp_major_allele <- SPECIES[["info"]]$major_allele keep_site_filter_list <- lapply(seq_len(nrow(SPECIES[["freqs"]])), function(i) { - keep_samples <- lapply(names(site_filters), function(sample) { + keep_samples <- lapply(names(site_filters), function(sample) { ## BOTTLENECK [2] if (site_filters[[sample]][["flag"]][i]==3) { list(sample, site_filters[[sample]][["freqs"]][i]) } else { @@ -194,19 +194,21 @@ consensusSeqMIDAS2 <- function( positions <- which(keep_site_filter_list==4) if (!is.infinite(max_sites)) {positions <- positions[1:max_sites]} allele_list <- lapply(positions, function(i) { - per_sample_allele <- lapply(names(site_filters), function(sample) { + per_sample_allele <- lapply(names(site_filters), function(sample) { ## BOTTLENECK [3] if (site_filters[[sample]][["flag"]][i]!=3) { - ap <- "-" - } else if (site_filters[[sample]][["depths"]][i] == 0) { - ap <- "-" - } else if (site_filters[[sample]][["freqs"]][i] == -1) { - ap <- "-" - } else if (site_filters[[sample]][["freqs"]][i] >= 0.5) { - ap <- sp_minor_allele[i] + return("-") + } + if (site_filters[[sample]][["depths"]][i] == 0) { + return("-") + } + if (site_filters[[sample]][["freqs"]][i] == -1) { + return("-") + } + if (site_filters[[sample]][["freqs"]][i] >= 0.5) { + return(sp_minor_allele[i]) } else { - ap <- sp_major_allele[i] + return(sp_major_allele[i]) } - return(ap) }) unlist(per_sample_allele) }) diff --git a/R/doGSEA.R b/R/doGSEA.R index f9ab133..e50afa3 100644 --- a/R/doGSEA.R +++ b/R/doGSEA.R @@ -1,4 +1,7 @@ #' doGSEA +#' +#' By default this uses t-statistics for ranking of the genes. +#' #' @return GSEA results from clusterProfiler #' @export doGSEA <- function(stana, candSp=NULL, cl=NULL, eps=1e-2, how=mean, @@ -44,7 +47,52 @@ doGSEA <- function(stana, candSp=NULL, cl=NULL, eps=1e-2, how=mean, return(enr) } + +#' addGeneAbundance +#' +#' Add the specified gene copy number to metadata +#' +#' @param disc discretize the abundance by the threshold. function for calculating +#' threshold, like {median} +#' @export +addGeneAbundance <- function(stana, candSp, IDs, + target="KO", how=sum, newCol="gene", + disc=NULL, discNumeric=TRUE) { + if (target=="KO") { + subMat <- stana@kos[[candSp]][IDs, ] + } else { + subMat <- stana@genes[[candSp]][IDs, ] + } + if (length(IDs)>1) { + adda <- apply(subMat, 2, how) + } else { + adda <- subMat + } + nm <- names(adda) + if (!is.null(disc)) { + thresh <- do.call(disc, list("x"=adda)) + if (discNumeric) { + adda <- as.numeric(adda > thresh) + } else { + adda <- adda > thresh + } + names(adda) <- nm + } + meta <- stana@meta + meta[[newCol]] <- adda[row.names(stana@meta)] + stana@meta <- meta + return(stana) +} + +#' L2FC +#' +#' Report various statistics for use in GSEA or visualization +#' #' @param mat row corresponds to gene, and column samples +#' @param l1 level1 +#' @param l2 level2 +#' @param method gmean, amean, or t +#' @param eps pseudocount added when calculating log #' @noRd L2FC <- function(mat, l1, l2, method="t", eps=0) { if (method == "gmean") { @@ -68,8 +116,11 @@ L2FC <- function(mat, l1, l2, method="t", eps=0) { #' calcKO +#' @param stana stana object +#' @param candSp candidate species ID +#' @param how how to summarize multiple gene CN assigned to the same KO #' @export -calcKO <- function(stana, candSp=NULL, how=mean) { +calcKO <- function(stana, candSp=NULL, how=sum) { if (is.null(candSp)) {candSp <- stana@ids[1]} if (is.null(stana@eggNOG[[candSp]])) {stop("Please provide list of path to annotation file by `setAnnotation` function.")} ko_df_filt <- summariseAbundance(stana, sp = candSp, diff --git a/R/plotTree.R b/R/plotTree.R index 4e99160..84466cc 100644 --- a/R/plotTree.R +++ b/R/plotTree.R @@ -21,16 +21,18 @@ #' subset the matrix to this ID. #' @param use_point use geom_point for plotting discrete metadata, #' instead of geom_star. +#' @param deleteZeroDepth delete zero depth position #' @importFrom ggtreeExtra geom_fruit #' @importFrom ggnewscale new_scale_fill #' @importFrom ggstar geom_star -#' @importFrom scico scale_fill_scico_d scale_fill_scico +#' @importFrom scico scale_fill_scico_d scale_fill_scico scale_color_scico scale_color_scico_d #' #' @export plotTree <- function(stana, species=NULL, cl=NULL, dist_method="dist.ml", meta=NULL, layout="circular", - target="fasta", IDs=NULL, use_point=FALSE, - tree_args=list(), branch.length="none", point_size=2) { + target="fasta", IDs=NULL, use_point=FALSE, branch_col="black", + tree_args=list(), branch.length="none", point_size=2, + deleteZeroDepth=TRUE) { if (is.null(cl)) {cl <- stana@cl} if (!is.null(meta)) { meta <- checkMeta(stana, meta) @@ -53,6 +55,10 @@ plotTree <- function(stana, species=NULL, cl=NULL, dm <- do.call(dist, tree_args) } else { mat <- stana@snps[[sp]] + if (deleteZeroDepth) { + mat <- mat[rowSums(mat==-1)==0, ] + cat("Position number:", dim(mat)[1], "\n") + } if (!is.null(IDs)) { mat <- mat[intersect(row.names(mat),IDs), ] } @@ -108,8 +114,23 @@ plotTree <- function(stana, species=NULL, cl=NULL, ## Select random shape from ggstar starsh <- sample(1:30, length(colnames(meta)), replace=FALSE) names(starsh) <- colnames(meta) - - p <- ggtree(tre, layout=flyt, branch.length=branch.length) + + + if (branch_col %in% colnames(stana@meta)) { + ## NA value is fixed + tmp_class <- class(stana@meta[[branch_col]]) + stana@meta$label <- stana@meta$id + addNow <- stana@meta[,c(branch_col, "label")] + tre <- full_join(tre, addNow, by="label") + p <- ggtree(tre, layout=flyt, branch.length=branch.length, mapping=aes(color=.data[[branch_col]])) + if (tmp_class %in% c("integer","numeric","logical")) { ## T/F is treated as numeric + p <- p + scico::scale_color_scico(na.value="grey", palette=sample(scico::scico_palette_names(), 1)) + } else { + p <- p + scico::scale_color_scico_d(na.value="grey", palette=sample(scico::scico_palette_names(), 1)) + } + } else { + p <- ggtree(tre, layout=flyt, branch.length=branch.length, color=branch_col) + } for (tmp_show_cv in show_meta) { if (is.numeric(meta[[tmp_show_cv]])) { ## If is numeric diff --git a/R/stana.R b/R/stana.R index c4ba4df..daf9802 100644 --- a/R/stana.R +++ b/R/stana.R @@ -93,6 +93,13 @@ setGeneric("getTree", setMethod("getTree", "stana", function(x) attr(x, "treeList")) +#' @export +setGeneric("getTreePlot", + function(x) standardGeneric("getTreePlot")) + +setMethod("getTreePlot", "stana", + function(x) attr(x, "treePlotList")) + #' @export setGeneric("getID", function(x) standardGeneric("getID")) diff --git a/man/addGeneAbundance.Rd b/man/addGeneAbundance.Rd new file mode 100644 index 0000000..737830c --- /dev/null +++ b/man/addGeneAbundance.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/doGSEA.R +\name{addGeneAbundance} +\alias{addGeneAbundance} +\title{addGeneAbundance} +\usage{ +addGeneAbundance( + stana, + candSp, + IDs, + target = "KO", + how = sum, + newCol = "gene", + disc = NULL, + discNumeric = TRUE +) +} +\arguments{ +\item{disc}{discretize the abundance by the threshold. function for calculating +threshold, like {median}} +} +\description{ +Add the specified gene copy number to metadata +} diff --git a/man/calcKO.Rd b/man/calcKO.Rd index 489a840..7081d31 100644 --- a/man/calcKO.Rd +++ b/man/calcKO.Rd @@ -4,7 +4,14 @@ \alias{calcKO} \title{calcKO} \usage{ -calcKO(stana, candSp = NULL, how = mean) +calcKO(stana, candSp = NULL, how = sum) +} +\arguments{ +\item{stana}{stana object} + +\item{candSp}{candidate species ID} + +\item{how}{how to summarize multiple gene CN assigned to the same KO} } \description{ calcKO diff --git a/man/doGSEA.Rd b/man/doGSEA.Rd index be251a0..5602fa7 100644 --- a/man/doGSEA.Rd +++ b/man/doGSEA.Rd @@ -10,5 +10,5 @@ doGSEA(stana, candSp = NULL, cl = NULL, eps = 0.01, how = mean, zeroPerc = 0) GSEA results from clusterProfiler } \description{ -doGSEA +By default this uses t-statistics for ranking of the genes. } diff --git a/man/plotTree.Rd b/man/plotTree.Rd index cbf7a67..2dd1766 100644 --- a/man/plotTree.Rd +++ b/man/plotTree.Rd @@ -14,9 +14,11 @@ plotTree( target = "fasta", IDs = NULL, use_point = FALSE, + branch_col = "black", tree_args = list(), branch.length = "none", - point_size = 2 + point_size = 2, + deleteZeroDepth = TRUE ) } \arguments{ @@ -48,6 +50,8 @@ instead of geom_star.} \item{point_size}{point size} +\item{deleteZeroDepth}{delete zero depth position} + \item{model}{dist.ml model} } \description{ diff --git a/man/summariseAbundance.Rd b/man/summariseAbundance.Rd index 561d266..8c5257b 100644 --- a/man/summariseAbundance.Rd +++ b/man/summariseAbundance.Rd @@ -4,7 +4,7 @@ \alias{summariseAbundance} \title{summariseAbundance} \usage{ -summariseAbundance(stana, sp, anno, how = mean, verbose = FALSE) +summariseAbundance(stana, sp, anno, how = sum, verbose = FALSE) } \arguments{ \item{stana}{stana object}