From 1f2a7d9e27bf4ca5601f013c4fde9bbb15f86bdb Mon Sep 17 00:00:00 2001 From: noriakis Date: Thu, 26 Sep 2024 20:58:22 +0900 Subject: [PATCH] many updates --- DESCRIPTION | 6 ++--- R/NMF.R | 9 ++++--- R/addGeneAbundance.R | 50 +++++++++++++++++++++++++++++++++++ R/alleleStat.R | 2 -- R/annot.R | 2 +- R/calcGF.R | 6 +++-- R/checkEGGNOG.R | 14 +++++----- R/checkPATRIC.R | 2 ++ R/checkPATRICSimple.R | 1 + R/combineGenes.R | 2 +- R/compareGenes.R | 2 +- R/consensusSeq.R | 1 + R/consensusSeqFast.R | 12 ++++++--- R/convertWideToMaf.R | 11 ++++---- R/doAdonis.R | 2 +- R/doBoruta.R | 1 + R/doGSEA.R | 56 +++------------------------------------- R/doL0.R | 4 +-- R/inferAndPlotTree.R | 2 +- R/loadInStrain.R | 2 +- R/loadMIDAS.R | 1 + R/loadMIDAS2Refine.R | 3 ++- R/loadmetaSNV.R | 1 + R/plotCoverage.R | 2 +- R/plotDist.R | 1 + R/plotGenes.R | 2 +- R/plotHeatmap.R | 4 +-- R/plotPCA.R | 1 + R/rankComponents.R | 1 + R/utils.R | 1 + inst/extdata/app_stana.R | 4 +-- man/NMF.Rd | 5 +++- man/addGeneAbundance.Rd | 6 ++--- man/calcGF.Rd | 5 +++- man/checkPATRIC.Rd | 3 +++ man/checkPATRICSimple.Rd | 3 +++ man/cnDiscretize.Rd | 3 +++ man/combineMaf.Rd | 3 +++ man/compareGenes.Rd | 2 +- man/consensusseq.Rd | 13 ++++++++++ man/doAdonis.Rd | 3 +++ man/doBoruta.Rd | 2 ++ man/doGSEA.Rd | 2 +- man/doL0.Rd | 4 +-- man/drawEGGNOG.Rd | 3 +++ man/drawPATRIC.Rd | 3 +++ man/inferAndPlotTree.Rd | 3 +++ man/loadInStrain.Rd | 3 +++ man/loadMIDAS.Rd | 3 +++ man/loadMIDAS2.Rd | 5 +++- man/loadmetaSNV.Rd | 3 +++ man/obtainPositions.Rd | 3 +++ man/plotCoverage.Rd | 2 +- man/plotDist.Rd | 3 +++ man/plotGenes.Rd | 2 +- man/plotHeatmap.Rd | 5 +++- man/plotPCA.Rd | 3 +++ man/rankComponents.Rd | 3 +++ man/reverseAnnot.Rd | 3 +++ 59 files changed, 202 insertions(+), 107 deletions(-) create mode 100644 R/addGeneAbundance.R diff --git a/DESCRIPTION b/DESCRIPTION index 2cf89ac..20bc67f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,9 +5,9 @@ Version: 0.99.0 License: GPL-3 Description: The package aims to analyze intra-species diversity from metagenomics. The package supports the loading functions for single nucleotide variants and copy number variant profiles, filtering, and statistical analysis. Authors@R: person("Noriaki", "Sato", email = "nori@hgc.jp", role = c("cre", "aut")) -Depends: ggplot2, ggstar, ggraph, igraph -Imports: GetoptLong, BiocFileCache, RCurl, vegan, methods, data.table, phangorn, RColorBrewer, ggtree, circlize, ComplexHeatmap, ggkegg, ape, dplyr, exactRankTests, ggblend, ggh4x, scales, tidygraph, ggplotify, ggtreeExtra, ggnewscale, scico, MKmisc, NMF, pillar, cowplot, patchwork, reshape2, Boruta, tidyr, stringr, clusterProfiler, grDevices, stats, utils -Suggests: simplifyEnrichment, knitr, rmarkdown, NNLM, shiny, BiocStyle, matrixStats +Depends: ggplot2, ggraph, igraph, vegan, phangorn +Imports: GetoptLong, BiocFileCache, RCurl, methods, data.table, RColorBrewer, ggtree, circlize, ComplexHeatmap, ggkegg, ape, dplyr, exactRankTests, ggblend, ggh4x, scales, tidygraph, ggplotify, ggtreeExtra, ggnewscale, scico, MKmisc, NMF, pillar, cowplot, patchwork, reshape2, Boruta, tidyr, stringr, clusterProfiler, grDevices, stats, utils, BiocStyle, ggstar +Suggests: simplifyEnrichment, knitr, rmarkdown, NNLM, shiny, matrixStats RoxygenNote: 7.3.2 biocViews: Microbiome VignetteBuilder: knitr diff --git a/R/NMF.R b/R/NMF.R index e478cdd..c1ebed4 100644 --- a/R/NMF.R +++ b/R/NMF.R @@ -22,9 +22,10 @@ #' @param nnlm_na_perc NA frequency when the cross-validation is performed. #' @param tss perform total sum scaling to the matrix #' @param remove_na_perc remove features with too many NA -#' (features with NA below {remove_na_perc} * overall sample numbers will be retained) +#' (features with NA below remove_na_perc * overall sample numbers will be retained) #' @importFrom NMF nmf nmfEstimateRank #' @export +#' @return stana object NMF <- function(stana, species, rank=3, target="kos", seed=53, method="snmf/r", deleteZeroDepth=FALSE, beta=0.01, estimate=FALSE, estimate_range=1:6, nnlm_flag=FALSE, nnlm_args=list(), nnlm_na_perc=0.3, tss=FALSE, remove_na_perc=NULL) { @@ -106,7 +107,7 @@ NMF <- function(stana, species, rank=3, target="kos", seed=53, method="snmf/r", err <- sapply(X = estimate_range, FUN = function(k) { - z <- nnmf(A2, k); + z <- NNLM::nnmf(A2, k); c(mean((with(z, W %*% H)[ind] - A[ind])^2), tail(z$mse, 1)); } ); @@ -158,7 +159,7 @@ NMF <- function(stana, species, rank=3, target="kos", seed=53, method="snmf/r", res <- NMF::nmf(mat, rank = rank, seed = seed, method=method) } coefMat <- coef(res) - basisMat <- basis(res) + basisMat <- NMF::basis(res) } stana@coefMat[[species]] <- data.frame(coefMat) @@ -207,7 +208,7 @@ plotStackedBarPlot <- function(stana, sp, by="NMF") { ## Not showing sample label, instead facet by group ggplot(melted, aes(fill=variable, y=value, x=sample)) + geom_col(position="fill")+ - facet_grid(. ~ group, scale="free")+ + facet_grid(. ~ group, scales="free")+ scale_y_continuous(expand = expansion(mult = c(0, 0.05)))+ cowplot::theme_cowplot()+ cowplot::panel_border()+ theme(axis.text.x = element_blank()) diff --git a/R/addGeneAbundance.R b/R/addGeneAbundance.R new file mode 100644 index 0000000..8d26041 --- /dev/null +++ b/R/addGeneAbundance.R @@ -0,0 +1,50 @@ +#' addGeneAbundance +#' +#' Add the specified gene copy number to metadata +#' +#' @param stana stana object +#' @param candSp candidate species +#' @param IDs gene IDs to add +#' @param target KO or genes +#' @param how how to combine multiple IDs +#' @param newCol new column name +#' @param discNumeric convert discrete value to numeric +#' @param disc discretize the abundance by the threshold. function for calculating +#' threshold, like median +#' @param convert conversion such as log10 +#' @export +#' @return stana object +addGeneAbundance <- function(stana, candSp, IDs, + target="KO", how=sum, newCol="gene", + disc=NULL, discNumeric=TRUE, convert=NULL) { + if (target=="KO") { + ints <- intersect(row.names(stana@kos[[candSp]]), IDs) + subMat <- stana@kos[[candSp]][ints, ] + } else { + ints <- intersect(row.names(stana@genes[[candSp]]), IDs) + subMat <- stana@genes[[candSp]][ints, ] + } + if (length(IDs)>1) { + adda <- apply(subMat, 2, how) + } else { + adda <- subMat + } + nm <- names(adda) + if (!is.null(disc)) { + thresh <- do.call(disc, list("x"=adda)) + if (discNumeric) { + adda <- as.numeric(adda > thresh) + } else { + adda <- adda > thresh + } + names(adda) <- nm + } + meta <- stana@meta + if (!is.null(convert)) { + adda <- do.call(convert, list(x=adda)) + } + meta[[newCol]] <- adda[row.names(stana@meta)] + stana@meta <- meta + return(stana) +} + diff --git a/R/alleleStat.R b/R/alleleStat.R index 3706d43..d0be43c 100644 --- a/R/alleleStat.R +++ b/R/alleleStat.R @@ -1,4 +1,3 @@ - #' @noRd alleleStat_metaSNV <- function(stana, sp, cl, deleteZeroDepth) { statList <- list() @@ -43,7 +42,6 @@ alleleStat_metaSNV <- function(stana, sp, cl, deleteZeroDepth) { #' @param deleteZeroDepth delete snvs with zero depth #' @export #' @return list of statistical values -#' alleleStat <- function(stana, sp=NULL, cl=NULL, base="maj", deleteZeroDepth=FALSE) { if (is.null(sp)) {sp <- stana@ids[1]} diff --git a/R/annot.R b/R/annot.R index acab4b1..dc1a9b9 100644 --- a/R/annot.R +++ b/R/annot.R @@ -65,7 +65,7 @@ anno_eggNOG_keywords <- function(split, genes, tib, }) lt <- lapply(gene_list, function(x) { - (tib |> dplyr::filter(ID %in% x))$V2 + (tib |> dplyr::filter(.data$ID %in% x))$V2 }) names(lt) <- names(gene_list) diff --git a/R/calcGF.R b/R/calcGF.R index f3540bb..06d224f 100644 --- a/R/calcGF.R +++ b/R/calcGF.R @@ -1,12 +1,14 @@ - - #' calcGF +#' +#' Calculate gene family profile +#' #' @param stana stana object #' @param candSp candidate species ID #' @param how how to summarize multiple gene CN assigned to the same KO #' @param annot eggNOG or manual #' @param column When eggNOG, which family to summarize, default to KEGG_ko #' @export +#' @return stana object calcGF <- function(stana, candSp=NULL, how=sum, annot="eggNOG", column="KEGG_ko") { checkID(stana, candSp) if (is.null(candSp)) {cat("Species not specified, the first ID will be used:", stana@ids[1]); diff --git a/R/checkEGGNOG.R b/R/checkEGGNOG.R index 543c3f6..3ba02e0 100644 --- a/R/checkEGGNOG.R +++ b/R/checkEGGNOG.R @@ -11,13 +11,13 @@ #' "KEGG_rclass" "BRITE" "KEGG_TC" "CAZy" "BiGG_Reaction" #' "PFAMs" #' @export +#' @return list containing ggplot2 object and graph #' drawEGGNOG <- function(annot_file, geneIDs, candPlot) { retList <- list() egng <- checkEGGNOG(annot_file,"all", geneIDs) # delete map* entry (in KEGG) egng <- egng[!grepl("map",egng$value),] - print(egng) ap <- NULL for (i in geneIDs) { tmp <- egng[egng$ID == i,] @@ -87,15 +87,15 @@ checkEGGNOG <- function(annot_file, ret="all", checkIDs=NULL, fill=TRUE) { if (ret!="all") { parsed <- ann[,c("#query",ret), with=FALSE] |> tidyr::pivot_longer(-1) |> - dplyr::filter(value!="-") |> - dplyr::mutate(value = strsplit(as.character(value), ",")) |> - tidyr::unnest(value) + dplyr::filter(.data$value!="-") |> + dplyr::mutate(value = strsplit(as.character(.data$value), ",")) |> + tidyr::unnest(.data$value) } else { parsed <- ann[, !c("evalue","score"), with=FALSE] |> tidyr::pivot_longer(-1) |> - dplyr::filter(value!="-") |> - dplyr::mutate(value = strsplit(as.character(value), ",")) |> - tidyr::unnest(value) + dplyr::filter(.data$value!="-") |> + dplyr::mutate(value = strsplit(as.character(.data$value), ",")) |> + tidyr::unnest(.data$value) } parsed <- parsed |> `colnames<-`(c("ID","name","value")) if (!is.null(checkIDs)) { diff --git a/R/checkPATRIC.R b/R/checkPATRIC.R index 5ef128f..9fee970 100644 --- a/R/checkPATRIC.R +++ b/R/checkPATRIC.R @@ -14,6 +14,7 @@ #' @import igraph ggraph BiocFileCache RCurl ggplot2 #' @importFrom data.table fread #' @export +#' @return checkPATRIC results drawPATRIC <- function(genes, whichToCount="ec_description", delSize=0, @@ -51,6 +52,7 @@ drawPATRIC <- function(genes, #' @param skipGraph skip graph plotting #' @import igraph ggraph BiocFileCache RCurl ggplot2 #' @export +#' @return list of annotations checkPATRIC <- function(genes, whichToCount="ec_description", delSize=0, diff --git a/R/checkPATRICSimple.R b/R/checkPATRICSimple.R index ed00f04..ddcacc8 100644 --- a/R/checkPATRICSimple.R +++ b/R/checkPATRICSimple.R @@ -8,6 +8,7 @@ #' @param whichToCount ec_description, ec_number, pathway_name, pathway_id #' @import BiocFileCache RCurl #' @export +#' @return list of annotation checkPATRICSimple <- function(genes, whichToCount="ec_description") { allGenes <- unlist(genes) diff --git a/R/combineGenes.R b/R/combineGenes.R index 802c24a..35d8426 100644 --- a/R/combineGenes.R +++ b/R/combineGenes.R @@ -28,7 +28,7 @@ combineGenes <- function(stana_list, species) { if (ovlgr>0) {qqcat("Duplicate label found in group\n")} new_stana@ids <- species - new_stana@type <- stana@type + new_stana@type <- "Combined" new_stana@cl <- cls new_stana@colors <- getColors(cls) diff --git a/R/compareGenes.R b/R/compareGenes.R index 66f79bc..3607499 100644 --- a/R/compareGenes.R +++ b/R/compareGenes.R @@ -12,7 +12,7 @@ #' @param verbose_zero show zero abundance genes #' @import ggplot2 #' @export -#' @return ggplot +#' @return list of statistical results from wilcox.exact #' compareGenes <- function(stana, species=NULL, geneID=NULL, cl=NULL, argList=list(), verbose_zero=FALSE) { diff --git a/R/consensusSeq.R b/R/consensusSeq.R index 5e93c23..3ed53a1 100644 --- a/R/consensusSeq.R +++ b/R/consensusSeq.R @@ -10,6 +10,7 @@ #' @param argList parameters, passed to corresponding functions #' @export #' @rdname consensusseq +#' @return stana object #' consensusSeq <- function(stana, species=NULL, argList=list()){ diff --git a/R/consensusSeqFast.R b/R/consensusSeqFast.R index 945bec0..2422656 100644 --- a/R/consensusSeqFast.R +++ b/R/consensusSeqFast.R @@ -3,6 +3,7 @@ #' @param allele_columns columns specifying major and minor allele #' @rdname consensusseq #' @export +#' @return stana object consensusSeqGeneral <- function( stana, species=NULL, @@ -69,7 +70,7 @@ consensusSeqGeneral <- function( if (return_mat) { mat <- do.call(cbind, allele_list) - row.names(mat) <- names(site_filters) + # row.names(mat) <- names(site_filters) return(mat) } @@ -133,10 +134,12 @@ consensusSeqGeneral <- function( #' @param output_seq whether to output actual FASTA file #' @param locus_type locus type to be included, default to CDS #' @param site_list site list to be included +#' @param site_type site types to be included #' @param return_mat return matrix of characters #' @param output_seq whether to output actual FASTA file #' @rdname consensusseq #' @export +#' @return stana object consensusSeqMIDAS2 <- function( stana, species=NULL, @@ -150,6 +153,7 @@ consensusSeqMIDAS2 <- function( site_prev=0.9, locus_type="CDS", site_list=NULL, + site_type=c("1D","2D","3D","4D"), cl=NULL, max_sites=Inf, tree=FALSE, @@ -287,7 +291,7 @@ consensusSeqMIDAS2 <- function( sum(c( (length(keep_samples) / length(names(SAMPLES))) >= max(c(1e-6, site_prev)), pooled_maf >= site_maf, - sp_site_type[i] %in% c("1D","2D","3D","4D"), + sp_site_type[i] %in% site_type, sp_locus_type[i] %in% locus_type)) return(keep_site_filter) }) |> unlist() @@ -391,6 +395,7 @@ consensusSeqMIDAS2 <- function( #' @importFrom phangorn read.phyDat #' @rdname consensusseq #' @export +#' @return stana object consensusSeqMIDAS1 <- function( stana, species=NULL, @@ -405,6 +410,7 @@ consensusSeqMIDAS1 <- function( max_sites=Inf, tree=FALSE, locus_type="CDS", + site_type=c("1D","2D","3D","4D"), keep_samples=NULL, verbose=FALSE, site_list=NULL, @@ -517,7 +523,7 @@ consensusSeqMIDAS1 <- function( (length(keep_samples) / length(names(SAMPLES))) >= max(c(1e-6, site_prev)), pooled_maf >= site_maf, sp_ref_allele[i] %in% c("A","T","G","C"), - sp_site_type[i] %in% c("1D","2D","3D","4D"), + sp_site_type[i] %in% site_type, sp_locus_type[i] %in% locus_type)) return(keep_site_filter) }) |> unlist() diff --git a/R/convertWideToMaf.R b/R/convertWideToMaf.R index 86588c2..b7cb83d 100644 --- a/R/convertWideToMaf.R +++ b/R/convertWideToMaf.R @@ -34,7 +34,7 @@ convertWideToMaf <- function(df) { }) sp <- unique(df$species_id) ret <- lapply(sp, function(s) { - tmp <- df %>% dplyr::filter(species_id == s) + tmp <- df %>% dplyr::filter(.data$species_id == s) tmp <- tmp[,c("snv_id", "maf")] row.names(tmp) <- tmp$snv_id tmp$snv_id <- NULL @@ -43,7 +43,7 @@ convertWideToMaf <- function(df) { names(ret) <- sp summary <- lapply(sp, function(s) { - tmp <- df %>% dplyr::filter(species_id == s) + tmp <- df %>% dplyr::filter(.data$species_id == s) tmp <- tmp[,c("snv_id", "major", "minor")] row.names(tmp) <- tmp$snv_id tmp$snv_id <- NULL @@ -62,6 +62,7 @@ convertWideToMaf <- function(df) { #' combine the maf table produced by convertWideToMaf() #' @param mafs named list of the results of convertWideToMaf() #' @export +#' @return stana object combineMaf <- function(mafs) { if (is.null(names(mafs))) {stop("The list must be named")} sps <- do.call(union, lapply(names(mafs), function(s) { @@ -77,9 +78,9 @@ combineMaf <- function(mafs) { return(tmp) })) tbl <- tidyr::pivot_wider(tmpdf, - id_cols=snv_id, - names_from=sample, - values_from = maf) + id_cols="snv_id", + names_from="sample", + values_from = "maf") tbl <- data.frame(tbl) row.names(tbl) <- tbl$snv_id tbl$snv_id <- NULL diff --git a/R/doAdonis.R b/R/doAdonis.R index 0ddabc1..d6d7ea7 100644 --- a/R/doAdonis.R +++ b/R/doAdonis.R @@ -28,6 +28,7 @@ #' @importFrom stats as.formula dist #' @importFrom utils read.table #' @export +#' @return stana object doAdonis <- function(stana, specs, cl=NULL, target="snps", formula=NULL, distMethod="manhattan", pcoa=FALSE, @@ -144,7 +145,6 @@ doAdonis <- function(stana, specs, cl=NULL, } else { argList[["data"]] <- met } - adores <- do.call("adonis2", argList) if (pr) { diff --git a/R/doBoruta.R b/R/doBoruta.R index dcb1dcc..af6837f 100644 --- a/R/doBoruta.R +++ b/R/doBoruta.R @@ -17,6 +17,7 @@ #' @return list of Boruta object and plot #' @import ggplot2 #' @export +#' @return list containing Boruta results doBoruta <- function(stana, sp, cl=NULL, doFix=TRUE, target="genes", mat=NULL, whichToCount="ec_description", argList=list(), deleteZeroDepth=FALSE) { diff --git a/R/doGSEA.R b/R/doGSEA.R index 9541672..f8c47cc 100644 --- a/R/doGSEA.R +++ b/R/doGSEA.R @@ -17,7 +17,7 @@ #' @param background background TERM2GENE data #' @param gseaArgs list of argument passed to GSEA or fgsea function #' @importFrom MKmisc mod.t.test -#' @return GSEA results from clusterProfiler +#' @return stana object with GSEA results from clusterProfiler #' @export doGSEA <- function(stana, candSp=NULL, cl=NULL, eps=1e-2, how=sum, zeroPerc=0, rankMethod="modt", target="pathway", bg_filter=TRUE, @@ -169,64 +169,13 @@ plotGSEA <- function(stana, dataset_names=NULL, padjThreshold=0.05, } -#' addGeneAbundance -#' -#' Add the specified gene copy number to metadata -#' -#' @param stana stana object -#' @param candSp candidate species -#' @param IDs gene IDs to add -#' @param target KO or genes -#' @param how how to combine multiple IDs -#' @param newCol new column name -#' @param discNumeric convert discrete value to numeric -#' @param disc discretize the abundance by the threshold. function for calculating -#' threshold, like {median} -#' @param convert conversion such as log10 -#' @export -#' @return stana -addGeneAbundance <- function(stana, candSp, IDs, - target="KO", how=sum, newCol="gene", - disc=NULL, discNumeric=TRUE, convert=NULL) { - if (target=="KO") { - ints <- intersect(row.names(stana@kos[[candSp]]), IDs) - subMat <- stana@kos[[candSp]][ints, ] - } else { - ints <- intersect(row.names(stana@genes[[candSp]]), IDs) - subMat <- stana@genes[[candSp]][ints, ] - } - if (length(IDs)>1) { - adda <- apply(subMat, 2, how) - } else { - adda <- subMat - } - nm <- names(adda) - if (!is.null(disc)) { - thresh <- do.call(disc, list("x"=adda)) - if (discNumeric) { - adda <- as.numeric(adda > thresh) - } else { - adda <- adda > thresh - } - names(adda) <- nm - } - meta <- stana@meta - if (!is.null(convert)) { - adda <- do.call(convert, list(x=adda)) - } - meta[[newCol]] <- adda[row.names(stana@meta)] - stana@meta <- meta - return(stana) -} - - - #' reverse_annot #' @param stana stana object #' @param candSp candidate species #' @param candidate candidate gene family ids #' @param col column to use in eggNOG mapper annotation #' @export +#' @return tibble containing gene to gene family information reverseAnnot <- function(stana, candSp, candidate, col="KEGG_ko") { annot <- checkEGGNOG(annot_file=stana@eggNOG[[candSp]], col) annot %>% dplyr::filter(.data[["value"]] %in% candidate) %>% @@ -239,6 +188,7 @@ reverseAnnot <- function(stana, candSp, candidate, col="KEGG_ko") { #' @param candSp candidate species #' @param geneID gene IDs #' @export +#' @return vector of gene position obtainPositions <- function(stana, candSp, geneID) { tmp <- stana@snpsInfo[[candSp]] tmp[tmp$gene_id %in% geneID, ] %>% row.names() diff --git a/R/doL0.R b/R/doL0.R index 20d4767..b5af9de 100644 --- a/R/doL0.R +++ b/R/doL0.R @@ -1,7 +1,7 @@ #' doL0 #' -#' feature selection of classification between category -#' using snv frequency and gene matrix, based on L0Learn +#' Feature selection of classification between category +#' using snv frequency and gene copy number matrix, based on L0Learn #' #' @param stana stana object #' @param species candidate species ID diff --git a/R/inferAndPlotTree.R b/R/inferAndPlotTree.R index 38c66b0..963e8a6 100644 --- a/R/inferAndPlotTree.R +++ b/R/inferAndPlotTree.R @@ -33,7 +33,7 @@ #' @importFrom ggnewscale new_scale_fill #' @importFrom ggstar geom_star #' @importFrom scico scale_fill_scico_d scale_fill_scico scale_color_scico scale_color_scico_d -#' +#' @return stana object #' @export inferAndPlotTree <- function(stana, species=NULL, cl=NULL, dist_method="dist.ml", meta=NULL, layout="circular", diff --git a/R/loadInStrain.R b/R/loadInStrain.R index ef09fd5..2a11525 100644 --- a/R/loadInStrain.R +++ b/R/loadInStrain.R @@ -16,7 +16,7 @@ #' (needs pooled SNV data, otherwise use genome wide compraison table) #' @param fill_na fill NA in resulting data.frame by -1 #' @param skip_pool skip pooled results loading -#' +#' @return stana object #' @export loadInStrain <- function(compare_out_dir, candidate_species, diff --git a/R/loadMIDAS.R b/R/loadMIDAS.R index 046a745..00414fa 100644 --- a/R/loadMIDAS.R +++ b/R/loadMIDAS.R @@ -18,6 +18,7 @@ #' @param loadDepth default to FALSE, load depth information. #' @param only_stat return stat for snp and gene only #' @import GetoptLong +#' @return stana object #' @export loadMIDAS <- function(midas_merge_dir, cl=NULL, filtType="group", candSp=NULL, diff --git a/R/loadMIDAS2Refine.R b/R/loadMIDAS2Refine.R index d42519e..68016b1 100644 --- a/R/loadMIDAS2Refine.R +++ b/R/loadMIDAS2Refine.R @@ -7,7 +7,7 @@ #' #' @param midas_merge_dir path to merged directory #' @param cl named list of category for samples -#' @param filtBy filter by {snps} number or {genes} number (default to snps) +#' @param filtBy filter by snps number or genes number (default to snps) #' @param filtNum the species with the samples above this number will be returned #' @param filtPer filter by fraction #' @param db data base that was used to profile, default to gtdb. @@ -19,6 +19,7 @@ #' @param loadDepth default to FALSE, load depth information. #' @param only_stat only samples per species is returned (snpStat and geneStat) #' @importFrom dplyr mutate +#' @return stana object #' @export #' loadMIDAS2 <- function(midas_merge_dir, diff --git a/R/loadmetaSNV.R b/R/loadmetaSNV.R index e0087a4..920a58f 100644 --- a/R/loadmetaSNV.R +++ b/R/loadmetaSNV.R @@ -10,6 +10,7 @@ #' @param just_species just return species id #' @param candSp candidate species ID #' @import GetoptLong +#' @return stana object #' @export loadmetaSNV <- function(metasnv_out_dir, cl=NULL, just_species=FALSE, candSp=NULL) { diff --git a/R/plotCoverage.R b/R/plotCoverage.R index cc5a23f..d194eaa 100644 --- a/R/plotCoverage.R +++ b/R/plotCoverage.R @@ -10,7 +10,7 @@ #' @param pointSize scatter point size #' @import ggplot2 #' @export -#' @return ggplot +#' @return ggplot2 object #' plotCoverage <- function(stana, species, cl=NULL, pointSize=5) { diff --git a/R/plotDist.R b/R/plotDist.R index 3a1e836..9db87ee 100644 --- a/R/plotDist.R +++ b/R/plotDist.R @@ -15,6 +15,7 @@ #' @param distArg passed to `dist() #' @param candidate list of IDs included for the calculation (like SNV IDs) #' @export +#' @return pheatmap object plotDist <- function(stana, sp, cl=NULL, AAfunc=dist.ml, AAargs=list(), target="snps", distMethod="manhattan", distArg=list(), candidate=NULL) { if (is.null(cl)) {cl <- stana@cl} diff --git a/R/plotGenes.R b/R/plotGenes.R index fa76589..f494ad4 100644 --- a/R/plotGenes.R +++ b/R/plotGenes.R @@ -14,7 +14,7 @@ #' @param geom default to geom_violin, can be changed to the geom like `geom_boxplot` #' @import ggplot2 #' @export -#' @return ggplot +#' @return ggplot2 object #' plotGenes <- function(stana, species, geneID, target="genes", cl=NULL, return_df=FALSE, geom=geom_violin()) { diff --git a/R/plotHeatmap.R b/R/plotHeatmap.R index fce50dc..dc36a94 100644 --- a/R/plotHeatmap.R +++ b/R/plotHeatmap.R @@ -20,10 +20,10 @@ #' are removed before sample filtering. As typically gene matrix is large, for further filtering, please use `mat` option #' @param filter_max_frac remove genes with values below `filter_max_value` in this fraction of sample #' @param filter_max_value max value for copy numbers (default to 50), coupled with filter_max_frac -#' @param variable If specified other than 0, subset to top-{variable} variation gene numbers. +#' @param variable If specified other than 0, subset to top-variable variation gene numbers. #' @importFrom ComplexHeatmap Heatmap #' @export -#' +#' @return ComplexHeatmap plotHeatmap <- function(stana, sp, cl=NULL, k=10, mat=NULL, geneID=NULL, variable=0, fnc="KEGG_Pathway", removeHigh=TRUE, removeAdditional=NULL, max_words=10, diff --git a/R/plotPCA.R b/R/plotPCA.R index c485176..6da6f75 100644 --- a/R/plotPCA.R +++ b/R/plotPCA.R @@ -15,6 +15,7 @@ #' @import ggplot2 #' @importFrom stats prcomp #' @export +#' @return ggplot #' plotPCA <- function(stana, species, cl=NULL, target="snps", ignoreZero=FALSE, replaceZero=FALSE, diff --git a/R/rankComponents.R b/R/rankComponents.R index 7d818a1..57bd4f8 100644 --- a/R/rankComponents.R +++ b/R/rankComponents.R @@ -12,6 +12,7 @@ #' @param rankMethod how to rank genes #' @param statHow how to aggregate the multiple statistical values #' @export +#' @return tibble containing name and rank of components rankComponents <- function(stana, pid, candSp=NULL, cl=NULL, eps=1e-2, how=sum, rankMethod="modt", statHow=mean) { if (is.null(candSp)) {candSp <- stana@ids[1]} diff --git a/R/utils.R b/R/utils.R index a0ed6d2..f16ba1f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -2,6 +2,7 @@ #' @param stana stana object #' @param species species id #' @param cutoff cutoff value, default to 0.35 +#' @return discretized data.frame #' @export cnDiscretize <- function(stana, species, cutoff=0.35) { df <- stana@genes[[species]] diff --git a/inst/extdata/app_stana.R b/inst/extdata/app_stana.R index a59319d..01cb734 100644 --- a/inst/extdata/app_stana.R +++ b/inst/extdata/app_stana.R @@ -842,13 +842,13 @@ server <- function(input, output, session) { melted <- reshape2::melt(stb, measure.vars=as.character(seq_len(rank))) panelShow <- ggplot(melted, aes(x=group, y=value)) + geom_point()+ - facet_grid(. ~ variable, scale="free")+ + facet_grid(. ~ variable, scales="free")+ cowplot::theme_cowplot()+ cowplot::panel_border() } else { melted <- reshape2::melt(stb) panelShow <- ggplot(melted, aes(fill=variable, y=value, x=sample)) + geom_col(position="fill")+ - facet_grid(. ~ group, scale="free")+ + facet_grid(. ~ group, scales="free")+ cowplot::theme_cowplot()+ cowplot::panel_border()+ theme(axis.text.x = element_blank()) } diff --git a/man/NMF.Rd b/man/NMF.Rd index 1b62f52..b5b66b4 100644 --- a/man/NMF.Rd +++ b/man/NMF.Rd @@ -55,7 +55,10 @@ When estimate is TRUE, only the error matrix is returned.} \item{tss}{perform total sum scaling to the matrix} \item{remove_na_perc}{remove features with too many NA -(features with NA below {remove_na_perc} * overall sample numbers will be retained)} +(features with NA below remove_na_perc * overall sample numbers will be retained)} +} +\value{ +stana object } \description{ decompose SNV, gene content, or gene family abundance to diff --git a/man/addGeneAbundance.Rd b/man/addGeneAbundance.Rd index edfefb4..c14916c 100644 --- a/man/addGeneAbundance.Rd +++ b/man/addGeneAbundance.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/doGSEA.R +% Please edit documentation in R/addGeneAbundance.R \name{addGeneAbundance} \alias{addGeneAbundance} \title{addGeneAbundance} @@ -30,14 +30,14 @@ addGeneAbundance( \item{newCol}{new column name} \item{disc}{discretize the abundance by the threshold. function for calculating -threshold, like {median}} +threshold, like median} \item{discNumeric}{convert discrete value to numeric} \item{convert}{conversion such as log10} } \value{ -stana +stana object } \description{ Add the specified gene copy number to metadata diff --git a/man/calcGF.Rd b/man/calcGF.Rd index 20a6bfb..97fe850 100644 --- a/man/calcGF.Rd +++ b/man/calcGF.Rd @@ -17,6 +17,9 @@ calcGF(stana, candSp = NULL, how = sum, annot = "eggNOG", column = "KEGG_ko") \item{column}{When eggNOG, which family to summarize, default to KEGG_ko} } +\value{ +stana object +} \description{ -calcGF +Calculate gene family profile } diff --git a/man/checkPATRIC.Rd b/man/checkPATRIC.Rd index 1c0ae06..10be145 100644 --- a/man/checkPATRIC.Rd +++ b/man/checkPATRIC.Rd @@ -35,6 +35,9 @@ checkPATRIC( \item{skipGraph}{skip graph plotting} } +\value{ +list of annotations +} \description{ Obtain gene information from PATRIC server. Input named list of genes, and returns queried results, diff --git a/man/checkPATRICSimple.Rd b/man/checkPATRICSimple.Rd index 38e8114..02566de 100644 --- a/man/checkPATRICSimple.Rd +++ b/man/checkPATRICSimple.Rd @@ -11,6 +11,9 @@ checkPATRICSimple(genes, whichToCount = "ec_description") \item{whichToCount}{ec_description, ec_number, pathway_name, pathway_id} } +\value{ +list of annotation +} \description{ Obtain gene information from PATRIC server. Input named list of genes, and returns named list of functions. diff --git a/man/cnDiscretize.Rd b/man/cnDiscretize.Rd index 67d0616..48b51b6 100644 --- a/man/cnDiscretize.Rd +++ b/man/cnDiscretize.Rd @@ -13,6 +13,9 @@ cnDiscretize(stana, species, cutoff = 0.35) \item{cutoff}{cutoff value, default to 0.35} } +\value{ +discretized data.frame +} \description{ Discretize copy number matrix at specified cutoff } diff --git a/man/combineMaf.Rd b/man/combineMaf.Rd index 6bed097..f2facc2 100644 --- a/man/combineMaf.Rd +++ b/man/combineMaf.Rd @@ -10,6 +10,9 @@ combineMaf(mafs) \arguments{ \item{mafs}{named list of the results of convertWideToMaf()} } +\value{ +stana object +} \description{ combineMaf combine the maf table produced by convertWideToMaf() diff --git a/man/compareGenes.Rd b/man/compareGenes.Rd index dbce966..15a7017 100644 --- a/man/compareGenes.Rd +++ b/man/compareGenes.Rd @@ -27,7 +27,7 @@ compareGenes( \item{verbose_zero}{show zero abundance genes} } \value{ -ggplot +list of statistical results from wilcox.exact } \description{ compare the gene abundances by recursively performing Wilcoxon rank-sum tests diff --git a/man/consensusseq.Rd b/man/consensusseq.Rd index 19d067b..10c364e 100644 --- a/man/consensusseq.Rd +++ b/man/consensusseq.Rd @@ -34,6 +34,7 @@ consensusSeqMIDAS2( site_prev = 0.9, locus_type = "CDS", site_list = NULL, + site_type = c("1D", "2D", "3D", "4D"), cl = NULL, max_sites = Inf, tree = FALSE, @@ -56,6 +57,7 @@ consensusSeqMIDAS1( max_sites = Inf, tree = FALSE, locus_type = "CDS", + site_type = c("1D", "2D", "3D", "4D"), keep_samples = NULL, verbose = FALSE, site_list = NULL, @@ -109,8 +111,19 @@ site prevalence across samples} \item{site_list}{site list to be included} +\item{site_type}{site types to be included} + \item{max_sites}{default to Inf, max sites to be retained} } +\value{ +stana object + +stana object + +stana object + +stana object +} \description{ Output consensus sequences from merged SNV output. Optionally, return phylogenetic tree inferred by `phangorn`. diff --git a/man/doAdonis.Rd b/man/doAdonis.Rd index ba2dea7..b0cb4ab 100644 --- a/man/doAdonis.Rd +++ b/man/doAdonis.Rd @@ -51,6 +51,9 @@ Otherwise the cell is treated as NA} \item{distArg}{passed to `dist()`} } +\value{ +stana object +} \description{ Perform PERMANOVA on distance matrix based on various distances based on such as SNV frequency or diff --git a/man/doBoruta.Rd b/man/doBoruta.Rd index 48c20af..9a7a8de 100644 --- a/man/doBoruta.Rd +++ b/man/doBoruta.Rd @@ -39,6 +39,8 @@ pass to checkPATRIC() (in MIDAS1)} } \value{ list of Boruta object and plot + +list containing Boruta results } \description{ feature selection of classification between category diff --git a/man/doGSEA.Rd b/man/doGSEA.Rd index 3803023..b057241 100644 --- a/man/doGSEA.Rd +++ b/man/doGSEA.Rd @@ -43,7 +43,7 @@ Default to zero, not recommended in GSEA} \item{gseaArgs}{list of argument passed to GSEA or fgsea function} } \value{ -GSEA results from clusterProfiler +stana object with GSEA results from clusterProfiler } \description{ Based on KEGG database, GSEA will be performed by the function. diff --git a/man/doL0.Rd b/man/doL0.Rd index 5b3d334..2cd5cdc 100644 --- a/man/doL0.Rd +++ b/man/doL0.Rd @@ -34,6 +34,6 @@ otherwise the raw gene matrix is used.} L0Learn object } \description{ -feature selection of classification between category -using snv frequency and gene matrix, based on L0Learn +Feature selection of classification between category +using snv frequency and gene copy number matrix, based on L0Learn } diff --git a/man/drawEGGNOG.Rd b/man/drawEGGNOG.Rd index 9dca386..c08c650 100644 --- a/man/drawEGGNOG.Rd +++ b/man/drawEGGNOG.Rd @@ -17,6 +17,9 @@ drawEGGNOG(annot_file, geneIDs, candPlot) "KEGG_rclass" "BRITE" "KEGG_TC" "CAZy" "BiGG_Reaction" "PFAMs"} } +\value{ +list containing ggplot2 object and graph +} \description{ draw the graph representing the eggNOG annotation } diff --git a/man/drawPATRIC.Rd b/man/drawPATRIC.Rd index fed1a8f..38d48fd 100644 --- a/man/drawPATRIC.Rd +++ b/man/drawPATRIC.Rd @@ -32,6 +32,9 @@ drawPATRIC( \item{nodeSize}{'count' or 'degree'} } +\value{ +checkPATRIC results +} \description{ draw graph of ec and kegg pathway name using checkPATRIC(). diff --git a/man/inferAndPlotTree.Rd b/man/inferAndPlotTree.Rd index 787db99..47d29d1 100644 --- a/man/inferAndPlotTree.Rd +++ b/man/inferAndPlotTree.Rd @@ -66,6 +66,9 @@ In this case, FastTree should be in PATH.} \item{tree_only}{return tree only instead of stana object} } +\value{ +stana object +} \description{ infer the tree and plot. If there is already a tree in the list from `treeList` slot, inferring will not be performed. diff --git a/man/loadInStrain.Rd b/man/loadInStrain.Rd index 8aecc9b..8913853 100644 --- a/man/loadInStrain.Rd +++ b/man/loadInStrain.Rd @@ -28,6 +28,9 @@ need `output` in the directory} \item{skip_pool}{skip pooled results loading} } +\value{ +stana object +} \description{ For InStrain, you should provide `compare` output It can import outputs with --bams options implmented from version 1.6. diff --git a/man/loadMIDAS.Rd b/man/loadMIDAS.Rd index c93f0e6..fd6e5c5 100644 --- a/man/loadMIDAS.Rd +++ b/man/loadMIDAS.Rd @@ -42,6 +42,9 @@ for each category is returned} \item{geneType}{"presabs" or "copynum"} } +\value{ +stana object +} \description{ Assess and store profile for species and return filtered species based on the number of samples for each category or whole population. diff --git a/man/loadMIDAS2.Rd b/man/loadMIDAS2.Rd index c81deb7..0df72c7 100644 --- a/man/loadMIDAS2.Rd +++ b/man/loadMIDAS2.Rd @@ -31,7 +31,7 @@ loadMIDAS2( \item{only_stat}{only samples per species is returned (snpStat and geneStat)} -\item{filtBy}{filter by {snps} number or {genes} number (default to snps)} +\item{filtBy}{filter by snps number or genes number (default to snps)} \item{filtPer}{filter by fraction} @@ -47,6 +47,9 @@ loadMIDAS2( \item{loadDepth}{default to FALSE, load depth information.} } +\value{ +stana object +} \description{ load the MIDAS2 merge command output. The location to lz4 binary must be added to PATH. diff --git a/man/loadmetaSNV.Rd b/man/loadmetaSNV.Rd index 98d7611..52c24bd 100644 --- a/man/loadmetaSNV.Rd +++ b/man/loadmetaSNV.Rd @@ -15,6 +15,9 @@ loadmetaSNV(metasnv_out_dir, cl = NULL, just_species = FALSE, candSp = NULL) \item{candSp}{candidate species ID} } +\value{ +stana object +} \description{ Assess and store profile for species for metaSNV. } diff --git a/man/obtainPositions.Rd b/man/obtainPositions.Rd index ecdb39c..c3c0fe0 100644 --- a/man/obtainPositions.Rd +++ b/man/obtainPositions.Rd @@ -13,6 +13,9 @@ obtainPositions(stana, candSp, geneID) \item{geneID}{gene IDs} } +\value{ +vector of gene position +} \description{ obtain_positions } diff --git a/man/plotCoverage.Rd b/man/plotCoverage.Rd index 4c4303f..904ccad 100644 --- a/man/plotCoverage.Rd +++ b/man/plotCoverage.Rd @@ -16,7 +16,7 @@ plotCoverage(stana, species, cl = NULL, pointSize = 5) \item{pointSize}{scatter point size} } \value{ -ggplot +ggplot2 object } \description{ plot the scatters of mean coverage across groups diff --git a/man/plotDist.Rd b/man/plotDist.Rd index 263f551..64ad5fd 100644 --- a/man/plotDist.Rd +++ b/man/plotDist.Rd @@ -35,6 +35,9 @@ plotDist( \item{candidate}{list of IDs included for the calculation (like SNV IDs)} } +\value{ +pheatmap object +} \description{ Perform distance calculation based on the matrix stored in stana, and plot the heatmap with grouping diff --git a/man/plotGenes.Rd b/man/plotGenes.Rd index e5ee068..5ad4a6b 100644 --- a/man/plotGenes.Rd +++ b/man/plotGenes.Rd @@ -30,7 +30,7 @@ plotGenes( \item{geom}{default to geom_violin, can be changed to the geom like `geom_boxplot`} } \value{ -ggplot +ggplot2 object } \description{ Plot the plot for specific genes diff --git a/man/plotHeatmap.Rd b/man/plotHeatmap.Rd index 60ab7f1..c6aa09d 100644 --- a/man/plotHeatmap.Rd +++ b/man/plotHeatmap.Rd @@ -35,7 +35,7 @@ and column sample names} \item{geneID}{gene id to subset} -\item{variable}{If specified other than 0, subset to top-{variable} variation gene numbers.} +\item{variable}{If specified other than 0, subset to top-variable variation gene numbers.} \item{fnc}{One of `KEGG_Pathway` or `KEGG_Module`, when eggNOG annotation is used} @@ -52,6 +52,9 @@ are removed before sample filtering. As typically gene matrix is large, for furt \item{filter_max_value}{max value for copy numbers (default to 50), coupled with filter_max_frac} } +\value{ +ComplexHeatmap +} \description{ Plot a heatmap of gene copy number. } diff --git a/man/plotPCA.Rd b/man/plotPCA.Rd index e7bc1d2..2d77914 100644 --- a/man/plotPCA.Rd +++ b/man/plotPCA.Rd @@ -35,6 +35,9 @@ plotPCA( \item{useBlend}{use ggblend for point} } +\value{ +ggplot +} \description{ plot the PCA results from snp or gene table } diff --git a/man/rankComponents.Rd b/man/rankComponents.Rd index 2da3e3e..2969403 100644 --- a/man/rankComponents.Rd +++ b/man/rankComponents.Rd @@ -32,6 +32,9 @@ rankComponents( \item{statHow}{how to aggregate the multiple statistical values} } +\value{ +tibble containing name and rank of components +} \description{ Returns the rank of the components in the graph based on the statistical values inside the graph. diff --git a/man/reverseAnnot.Rd b/man/reverseAnnot.Rd index 9759190..7321c77 100644 --- a/man/reverseAnnot.Rd +++ b/man/reverseAnnot.Rd @@ -15,6 +15,9 @@ reverseAnnot(stana, candSp, candidate, col = "KEGG_ko") \item{col}{column to use in eggNOG mapper annotation} } +\value{ +tibble containing gene to gene family information +} \description{ reverse_annot }