Skip to content

Commit

Permalink
Merge pull request #39 from MoseleyBioinformaticsLab/simple_helpers
Browse files Browse the repository at this point in the history
Simple helpers
  • Loading branch information
rmflight authored Feb 28, 2024
2 parents d4fa5d6 + c2ec7a3 commit bb4ec9e
Show file tree
Hide file tree
Showing 10 changed files with 189 additions and 67 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: categoryCompare2
Version: 0.100.8
Version: 0.100.11
Title: Meta-Analysis of High-Throughput Experiments Using Feature
Annotations
Author: Robert M. Flight <rflight79@gmail.com>
Expand All @@ -25,5 +25,5 @@ SystemRequirements: Cytoscape (>= 3.0) (if used for visualization of
biocViews: Annotation, GO, MultipleComparison, Pathways, GeneExpression
VignetteBuilder: knitr
Encoding: UTF-8
RoxygenNote: 7.2.1
RoxygenNote: 7.2.3
Config/testthat/edition: 3
12 changes: 12 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export(combined_statistics)
export(csv_annotation_table)
export(enriched_result)
export(executable_path)
export(extract_enrich_stats)
export(extract_statistics)
export(filter_annotation_graph)
export(generate_annotation_graph)
Expand Down Expand Up @@ -58,15 +59,26 @@ exportClasses(combined_enrichment)
exportClasses(hypergeom_features)
exportClasses(node_assign)
exportClasses(statistical_results)
exportMethods(Extract)
exportMethods(a)
exportMethods(annotation_combinations)
exportMethods(combine_annotations)
exportMethods(combine_enrichments)
exportMethods(enrich)
exportMethods(enrichment)
exportMethods(extract)
exportMethods(extract_statistics)
exportMethods(from)
exportMethods(generate_annotation_graph)
exportMethods(generate_table)
exportMethods(get_significant_annotations)
exportMethods(object.)
exportMethods(remove_edges)
exportMethods(show)
exportMethods(single)
exportMethods(statistical)
exportMethods(stats)
exportMethods(table)
import(methods)
importFrom(base64enc,dataURI)
importFrom(colorspace,desaturate)
Expand Down
17 changes: 17 additions & 0 deletions R/combine_enrichments.R
Original file line number Diff line number Diff line change
Expand Up @@ -584,3 +584,20 @@ setMethod("extract_statistics", signature = list(in_results = "combined_enrichme
#'
#' @param combined_enrichment a \code{\link{combined_enrichment}} object
#' @exportMethod


#' extract enrich stats
#'
#' Extract statistical table from a single enrichment object.
#'
#' @param enrichment_result the enrichment result object
#'
#' @export
#' @return data.frame
extract_enrich_stats = function(enrichment_result)
{
stats = as.data.frame(enrichment_result@statistics@statistic_data)
stats$ID = enrichment_results@statistics@annotation_id
stats$description = enrichment_result@annotation@description[stats$ID]
return(stats)
}
81 changes: 81 additions & 0 deletions R/gocats.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#' gocats to annnotations
#'
#' Transforms a gocats ancestors JSON list to a GO annotation object.
#'
#' @param ancestors_file the ancestors.json file from gocats (required)
#' @param namespace_file the namespace.json file from gocats (optional)
#' @param annotation_type what annotations are we making? (gocatsGO by default)
#' @param feature_type what type of features are we using (assume Uniprot)
#' @param feature_translation a data.frame used to convert the feature IDs
#'
#' @return annotation object
#' @export
gocats_to_annotation = function(ancestors_file = "ancestors.json",
namespace_file = "namespace.json",
annotation_type = "gocatsGO",
feature_type = "Uniprot",
feature_translation = NULL)
{
stopifnot(file.exists(ancestors_file))

ancestors = jsonlite::fromJSON(ancestors_file)

if (!is.null(feature_translation)) {
if (!inherits(feature_translation, "data.frame")) {
stop("feature_translation must be a data.frame!")
}
if (!all(names(feature_translation) %in% c("from", "to"))) {
stop("feature_translation must contain the columns 'from' and 'to'!")
} else {
match_names = intersect(feature_translation$from, names(ancestors))

ancestors = ancestors[match_names]
feature_translation = feature_translation[feature_translation$from %in% match_names, ]
translations = feature_translation$to
names(translations) = feature_translation$from
translations = translations[match_names]
names(ancestors) = translations
}

}

go_2_gene = Biobase::reverseSplit(ancestors)
go_2_gene = purrr::map(go_2_gene, unique)

if (!is.null(namespace_file)) {
if (!file.exists(namespace_file)) {
message(paste0(namespace_file, " does not exist. GO namespace will not be updated."))
namespaces_short = character(0)
} else {
namespaces = jsonlite::fromJSON(namespace_file) |> unlist()
namespaces_short = gsub("biological_process", "BP", namespaces)
namespaces_short = gsub("molecular_function", "MF", namespaces_short)
namespaces_short = gsub("cellular_component", "CC", namespaces_short)
}
} else {
namespaces_short = character(0)
}


if (requireNamespace("GO.db", quietly = TRUE)) {
descriptions = suppressMessages(AnnotationDbi::select(GO.db::GO.db, keys = names(go_2_gene), columns = "TERM", keytype = "GOID")$TERM)
names(descriptions) = names(go_2_gene)
} else {
message("GO.db is not installed, no descriptions will be added to the GO terms.")
descriptions = character(0)
}

if ((length(namespaces_short) > 0) && (length(descriptions) > 0)) {
namespaces_short = namespaces_short[names(go_2_gene)]
descriptions = descriptions[names(go_2_gene)]
descriptions = paste0(namespaces_short, ":", descriptions)
names(descriptions) = names(go_2_gene)
}

out_annotation = annotation(annotation_features = go_2_gene,
annotation_type = annotation_type,
description = descriptions,
feature_type = feature_type)
return(out_annotation)

}
56 changes: 0 additions & 56 deletions R/managing_annotations.R
Original file line number Diff line number Diff line change
Expand Up @@ -251,59 +251,3 @@ json_annotation_reversal <- function(json_file, out_file = "annotations.json",
out_json <- annotation_2_json(out_annotation, out_file)
out_json
}


#' annotation from GOcats
#'
#' Given a JSON file of features to annotations from GOcats, reverse to turn it into
#' annotations to features, and optionally add some meta-information about them.
#'
#' @param json_file the json file to use
#' @param feature_type the type of features
#' @param annotation_type the type of annotations
#'
#' @importFrom jsonlite fromJSON
#' @export
#' @return an annotation object
#'
gocats_to_annotation <- function(json_file,
feature_type = NULL,
annotation_type = NULL){
stopifnot(file.exists(json_file))

in_annotation <- jsonlite::fromJSON(json_file, simplifyVector = FALSE, flatten = TRUE)
if (length(in_annotation) == 1) {
in_annotation <- in_annotation[[1]]
}

if (!is.null(in_annotation$Annotations)) {
gene_annotations <- in_annotation$Annotations
} else {
gene_annotations <- in_annotation # we assume that if there is no Annotation
# specific entry, then it is probably just the
# gene annotations, and grab them all.
}

if (!is.null(in_annotation$Description)) {
annotation_description <- in_annotation$Description
if (is.list(annotation_description)) {
annotation_description <- unlist(annotation_description, use.names = TRUE)
} else {
warning("Description must be a named list! Removing Descriptions!")
annotation_description <- character(0)
}
} else {
annotation_description <- character(0)
}

rev_annotation <- Biobase::reverseSplit(gene_annotations)
rev_annotation <- purrr::map(rev_annotation, unique)

out_annotation <- annotation(annotation_features = rev_annotation,
description = annotation_description,
links = character(0),
annotation_type = annotation_type,
feature_type = feature_type)

out_annotation
}
Binary file added inst/extdata/test_data/ancestors.json.gz
Binary file not shown.
Binary file added inst/extdata/test_data/namespace.json.gz
Binary file not shown.
21 changes: 21 additions & 0 deletions man/extract_enrich_stats.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 18 additions & 9 deletions man/gocats_to_annotation.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 38 additions & 0 deletions tests/testthat/test-gocats.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
test_that("gocats annotation importing works", {
ancestors_file = system.file("extdata", "test_data", "ancestors.json.gz", package = "categoryCompare2")
namespace_file = system.file("extdata", "test_data", "namespace.json.gz", package = "categoryCompare2")

ensembl_keys = AnnotationDbi::keys(org.Hs.eg.db::org.Hs.eg.db, keytype = "ENSEMBL")
ensembl_uniprot = suppressMessages(AnnotationDbi::select(org.Hs.eg.db::org.Hs.eg.db, keys = ensembl_keys,
keytype = "ENSEMBL", columns = c("ENSEMBL", "UNIPROT")))
names(ensembl_uniprot) = c("to", "from")
ensembl_uniprot = ensembl_uniprot[!(is.na(ensembl_uniprot$to)) & !is.na(ensembl_uniprot$from), ]
bad_translation = ensembl_uniprot
names(bad_translation) = c("other", "one")
list_translation = as.list(ensembl_uniprot)

expect_error(gocats_to_annotation("random_file"))
expect_warning(gocats_to_annotation(ancestors_file, "random_file"), "does not exist")
expect_error(gocats_to_annotation(ancestors_file, feature_translation = list_translation), "must be a data.frame")
expect_error(gocats_to_annotation(ancestors_file, feature_translation = bad_translation), "must contain the columns")

without_namespace = gocats_to_annotation(ancestors_file, namespace_file = NULL)
has_bp = all(grepl("^BP|^CC|^MF", without_namespace@description))
expect_true(!has_bp)
with_namespace = gocats_to_annotation(ancestors_file, namespace_file)
has_namespace = all(grepl("^BP|^CC|^MF", with_namespace@description))
expect_true(has_namespace)

expect_equal(without_namespace@annotation_features[[1]][1], "O60313")
expect_equal(without_namespace@annotation_type, "gocatsGO")
expect_equal(without_namespace@feature_type, "Uniprot")
expect_equal(length(without_namespace@annotation_features), 22415)

with_translation = gocats_to_annotation(ancestors_file, namespace_file,
feature_type = "ENSEMBL",
annotation_type = "whatever",
feature_translation = ensembl_uniprot)
expect_equal(with_translation@annotation_features[[1]][1], "ENSG00000143799")
expect_equal(with_translation@annotation_type, "whatever")
expect_equal(with_translation@feature_type, "ENSEMBL")
})

0 comments on commit bb4ec9e

Please sign in to comment.