Skip to content

Commit

Permalink
various updates in gene evaluation
Browse files Browse the repository at this point in the history
  • Loading branch information
noriakis committed Feb 1, 2024
1 parent deb6680 commit 548caea
Show file tree
Hide file tree
Showing 11 changed files with 143 additions and 23 deletions.
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

export(addGeneAbundance)
export(alleleStat)
export(anno_PATRIC_keywords)
export(anno_eggNOG_keywords)
Expand Down Expand Up @@ -31,6 +32,7 @@ export(getFasta)
export(getID)
export(getSlot)
export(getTree)
export(getTreePlot)
export(loadInStrain)
export(loadMIDAS)
export(loadMIDAS2)
Expand Down Expand Up @@ -82,6 +84,8 @@ importFrom(grDevices,colorRampPalette)
importFrom(phangorn,NJ)
importFrom(phangorn,dist.ml)
importFrom(phangorn,read.phyDat)
importFrom(scico,scale_color_scico)
importFrom(scico,scale_color_scico_d)
importFrom(scico,scale_fill_scico)
importFrom(scico,scale_fill_scico_d)
importFrom(stats,as.formula)
Expand Down
2 changes: 1 addition & 1 deletion R/checkEGGNOG.R
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ checkEGGNOG <- function(annot_file, ret="all", checkIDs=NULL, fill=TRUE) {
#' @return data.frame
#' @export
#'
summariseAbundance <- function(stana, sp, anno, how=mean, verbose=FALSE) {
summariseAbundance <- function(stana, sp, anno, how=sum, verbose=FALSE) {
geneDf <- stana@genes[[sp]]
annoval <- anno$value |> unique()
merged <- lapply(annoval, function(i) {
Expand Down
26 changes: 14 additions & 12 deletions R/consensusSeqFast.R
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ consensusSeqMIDAS2 <- function(
site_depth_filter <- depths >= site_depth
depth_ratio_filter <- (depths / SAMPLES[[sample]]$mean_depth) <= site_ratio
## freqs == -1 in allele_support
allele_support <- apply(cbind(freqs, 1-freqs), 1, max) >= allele_support
allele_support <- apply(cbind(freqs, 1-freqs), 1, max) >= allele_support ## BOTTLENECK [1]
allele_support[which(freqs==-1)] <- FALSE
summed <- site_depth_filter + depth_ratio_filter + allele_support
list(freqs, depths, site_depth_filter, depth_ratio_filter, allele_support, summed) |>
Expand All @@ -141,7 +141,7 @@ consensusSeqMIDAS2 <- function(
sp_major_allele <- SPECIES[["info"]]$major_allele

keep_site_filter_list <- lapply(seq_len(nrow(SPECIES[["freqs"]])), function(i) {
keep_samples <- lapply(names(site_filters), function(sample) {
keep_samples <- lapply(names(site_filters), function(sample) { ## BOTTLENECK [2]
if (site_filters[[sample]][["flag"]][i]==3) {
list(sample, site_filters[[sample]][["freqs"]][i])
} else {
Expand Down Expand Up @@ -194,19 +194,21 @@ consensusSeqMIDAS2 <- function(
positions <- which(keep_site_filter_list==4)
if (!is.infinite(max_sites)) {positions <- positions[1:max_sites]}
allele_list <- lapply(positions, function(i) {
per_sample_allele <- lapply(names(site_filters), function(sample) {
per_sample_allele <- lapply(names(site_filters), function(sample) { ## BOTTLENECK [3]
if (site_filters[[sample]][["flag"]][i]!=3) {
ap <- "-"
} else if (site_filters[[sample]][["depths"]][i] == 0) {
ap <- "-"
} else if (site_filters[[sample]][["freqs"]][i] == -1) {
ap <- "-"
} else if (site_filters[[sample]][["freqs"]][i] >= 0.5) {
ap <- sp_minor_allele[i]
return("-")
}
if (site_filters[[sample]][["depths"]][i] == 0) {
return("-")
}
if (site_filters[[sample]][["freqs"]][i] == -1) {
return("-")
}
if (site_filters[[sample]][["freqs"]][i] >= 0.5) {
return(sp_minor_allele[i])
} else {
ap <- sp_major_allele[i]
return(sp_major_allele[i])
}
return(ap)
})
unlist(per_sample_allele)
})
Expand Down
53 changes: 52 additions & 1 deletion R/doGSEA.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
#' doGSEA
#'
#' By default this uses t-statistics for ranking of the genes.
#'
#' @return GSEA results from clusterProfiler
#' @export
doGSEA <- function(stana, candSp=NULL, cl=NULL, eps=1e-2, how=mean,
Expand Down Expand Up @@ -44,7 +47,52 @@ doGSEA <- function(stana, candSp=NULL, cl=NULL, eps=1e-2, how=mean,
return(enr)
}


#' addGeneAbundance
#'
#' Add the specified gene copy number to metadata
#'
#' @param disc discretize the abundance by the threshold. function for calculating
#' threshold, like {median}
#' @export
addGeneAbundance <- function(stana, candSp, IDs,
target="KO", how=sum, newCol="gene",
disc=NULL, discNumeric=TRUE) {
if (target=="KO") {
subMat <- stana@kos[[candSp]][IDs, ]
} else {
subMat <- stana@genes[[candSp]][IDs, ]
}
if (length(IDs)>1) {
adda <- apply(subMat, 2, how)
} else {
adda <- subMat
}
nm <- names(adda)
if (!is.null(disc)) {
thresh <- do.call(disc, list("x"=adda))
if (discNumeric) {
adda <- as.numeric(adda > thresh)
} else {
adda <- adda > thresh
}
names(adda) <- nm
}
meta <- stana@meta
meta[[newCol]] <- adda[row.names(stana@meta)]
stana@meta <- meta
return(stana)
}

#' L2FC
#'
#' Report various statistics for use in GSEA or visualization
#'
#' @param mat row corresponds to gene, and column samples
#' @param l1 level1
#' @param l2 level2
#' @param method gmean, amean, or t
#' @param eps pseudocount added when calculating log
#' @noRd
L2FC <- function(mat, l1, l2, method="t", eps=0) {
if (method == "gmean") {
Expand All @@ -68,8 +116,11 @@ L2FC <- function(mat, l1, l2, method="t", eps=0) {


#' calcKO
#' @param stana stana object
#' @param candSp candidate species ID
#' @param how how to summarize multiple gene CN assigned to the same KO
#' @export
calcKO <- function(stana, candSp=NULL, how=mean) {
calcKO <- function(stana, candSp=NULL, how=sum) {
if (is.null(candSp)) {candSp <- stana@ids[1]}
if (is.null(stana@eggNOG[[candSp]])) {stop("Please provide list of path to annotation file by `setAnnotation` function.")}
ko_df_filt <- summariseAbundance(stana, sp = candSp,
Expand Down
31 changes: 26 additions & 5 deletions R/plotTree.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,18 @@
#' subset the matrix to this ID.
#' @param use_point use geom_point for plotting discrete metadata,
#' instead of geom_star.
#' @param deleteZeroDepth delete zero depth position
#' @importFrom ggtreeExtra geom_fruit
#' @importFrom ggnewscale new_scale_fill
#' @importFrom ggstar geom_star
#' @importFrom scico scale_fill_scico_d scale_fill_scico
#' @importFrom scico scale_fill_scico_d scale_fill_scico scale_color_scico scale_color_scico_d
#'
#' @export
plotTree <- function(stana, species=NULL, cl=NULL,
dist_method="dist.ml", meta=NULL, layout="circular",
target="fasta", IDs=NULL, use_point=FALSE,
tree_args=list(), branch.length="none", point_size=2) {
target="fasta", IDs=NULL, use_point=FALSE, branch_col="black",
tree_args=list(), branch.length="none", point_size=2,
deleteZeroDepth=TRUE) {
if (is.null(cl)) {cl <- stana@cl}
if (!is.null(meta)) {
meta <- checkMeta(stana, meta)
Expand All @@ -53,6 +55,10 @@ plotTree <- function(stana, species=NULL, cl=NULL,
dm <- do.call(dist, tree_args)
} else {
mat <- stana@snps[[sp]]
if (deleteZeroDepth) {
mat <- mat[rowSums(mat==-1)==0, ]
cat("Position number:", dim(mat)[1], "\n")
}
if (!is.null(IDs)) {
mat <- mat[intersect(row.names(mat),IDs), ]
}
Expand Down Expand Up @@ -108,8 +114,23 @@ plotTree <- function(stana, species=NULL, cl=NULL,
## Select random shape from ggstar
starsh <- sample(1:30, length(colnames(meta)), replace=FALSE)
names(starsh) <- colnames(meta)

p <- ggtree(tre, layout=flyt, branch.length=branch.length)


if (branch_col %in% colnames(stana@meta)) {
## NA value is fixed
tmp_class <- class(stana@meta[[branch_col]])
stana@meta$label <- stana@meta$id
addNow <- stana@meta[,c(branch_col, "label")]
tre <- full_join(tre, addNow, by="label")
p <- ggtree(tre, layout=flyt, branch.length=branch.length, mapping=aes(color=.data[[branch_col]]))
if (tmp_class %in% c("integer","numeric","logical")) { ## T/F is treated as numeric
p <- p + scico::scale_color_scico(na.value="grey", palette=sample(scico::scico_palette_names(), 1))
} else {
p <- p + scico::scale_color_scico_d(na.value="grey", palette=sample(scico::scico_palette_names(), 1))
}
} else {
p <- ggtree(tre, layout=flyt, branch.length=branch.length, color=branch_col)
}
for (tmp_show_cv in show_meta) {
if (is.numeric(meta[[tmp_show_cv]])) {
## If is numeric
Expand Down
7 changes: 7 additions & 0 deletions R/stana.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,13 @@ setGeneric("getTree",
setMethod("getTree", "stana",
function(x) attr(x, "treeList"))

#' @export
setGeneric("getTreePlot",
function(x) standardGeneric("getTreePlot"))

setMethod("getTreePlot", "stana",
function(x) attr(x, "treePlotList"))

#' @export
setGeneric("getID",
function(x) standardGeneric("getID"))
Expand Down
24 changes: 24 additions & 0 deletions man/addGeneAbundance.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 8 additions & 1 deletion man/calcKO.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/doGSEA.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion man/plotTree.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/summariseAbundance.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 548caea

Please sign in to comment.