-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #39 from MoseleyBioinformaticsLab/simple_helpers
Simple helpers
- Loading branch information
Showing
10 changed files
with
189 additions
and
67 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
#' gocats to annnotations | ||
#' | ||
#' Transforms a gocats ancestors JSON list to a GO annotation object. | ||
#' | ||
#' @param ancestors_file the ancestors.json file from gocats (required) | ||
#' @param namespace_file the namespace.json file from gocats (optional) | ||
#' @param annotation_type what annotations are we making? (gocatsGO by default) | ||
#' @param feature_type what type of features are we using (assume Uniprot) | ||
#' @param feature_translation a data.frame used to convert the feature IDs | ||
#' | ||
#' @return annotation object | ||
#' @export | ||
gocats_to_annotation = function(ancestors_file = "ancestors.json", | ||
namespace_file = "namespace.json", | ||
annotation_type = "gocatsGO", | ||
feature_type = "Uniprot", | ||
feature_translation = NULL) | ||
{ | ||
stopifnot(file.exists(ancestors_file)) | ||
|
||
ancestors = jsonlite::fromJSON(ancestors_file) | ||
|
||
if (!is.null(feature_translation)) { | ||
if (!inherits(feature_translation, "data.frame")) { | ||
stop("feature_translation must be a data.frame!") | ||
} | ||
if (!all(names(feature_translation) %in% c("from", "to"))) { | ||
stop("feature_translation must contain the columns 'from' and 'to'!") | ||
} else { | ||
match_names = intersect(feature_translation$from, names(ancestors)) | ||
|
||
ancestors = ancestors[match_names] | ||
feature_translation = feature_translation[feature_translation$from %in% match_names, ] | ||
translations = feature_translation$to | ||
names(translations) = feature_translation$from | ||
translations = translations[match_names] | ||
names(ancestors) = translations | ||
} | ||
|
||
} | ||
|
||
go_2_gene = Biobase::reverseSplit(ancestors) | ||
go_2_gene = purrr::map(go_2_gene, unique) | ||
|
||
if (!is.null(namespace_file)) { | ||
if (!file.exists(namespace_file)) { | ||
message(paste0(namespace_file, " does not exist. GO namespace will not be updated.")) | ||
namespaces_short = character(0) | ||
} else { | ||
namespaces = jsonlite::fromJSON(namespace_file) |> unlist() | ||
namespaces_short = gsub("biological_process", "BP", namespaces) | ||
namespaces_short = gsub("molecular_function", "MF", namespaces_short) | ||
namespaces_short = gsub("cellular_component", "CC", namespaces_short) | ||
} | ||
} else { | ||
namespaces_short = character(0) | ||
} | ||
|
||
|
||
if (requireNamespace("GO.db", quietly = TRUE)) { | ||
descriptions = suppressMessages(AnnotationDbi::select(GO.db::GO.db, keys = names(go_2_gene), columns = "TERM", keytype = "GOID")$TERM) | ||
names(descriptions) = names(go_2_gene) | ||
} else { | ||
message("GO.db is not installed, no descriptions will be added to the GO terms.") | ||
descriptions = character(0) | ||
} | ||
|
||
if ((length(namespaces_short) > 0) && (length(descriptions) > 0)) { | ||
namespaces_short = namespaces_short[names(go_2_gene)] | ||
descriptions = descriptions[names(go_2_gene)] | ||
descriptions = paste0(namespaces_short, ":", descriptions) | ||
names(descriptions) = names(go_2_gene) | ||
} | ||
|
||
out_annotation = annotation(annotation_features = go_2_gene, | ||
annotation_type = annotation_type, | ||
description = descriptions, | ||
feature_type = feature_type) | ||
return(out_annotation) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
test_that("gocats annotation importing works", { | ||
ancestors_file = system.file("extdata", "test_data", "ancestors.json.gz", package = "categoryCompare2") | ||
namespace_file = system.file("extdata", "test_data", "namespace.json.gz", package = "categoryCompare2") | ||
|
||
ensembl_keys = AnnotationDbi::keys(org.Hs.eg.db::org.Hs.eg.db, keytype = "ENSEMBL") | ||
ensembl_uniprot = suppressMessages(AnnotationDbi::select(org.Hs.eg.db::org.Hs.eg.db, keys = ensembl_keys, | ||
keytype = "ENSEMBL", columns = c("ENSEMBL", "UNIPROT"))) | ||
names(ensembl_uniprot) = c("to", "from") | ||
ensembl_uniprot = ensembl_uniprot[!(is.na(ensembl_uniprot$to)) & !is.na(ensembl_uniprot$from), ] | ||
bad_translation = ensembl_uniprot | ||
names(bad_translation) = c("other", "one") | ||
list_translation = as.list(ensembl_uniprot) | ||
|
||
expect_error(gocats_to_annotation("random_file")) | ||
expect_warning(gocats_to_annotation(ancestors_file, "random_file"), "does not exist") | ||
expect_error(gocats_to_annotation(ancestors_file, feature_translation = list_translation), "must be a data.frame") | ||
expect_error(gocats_to_annotation(ancestors_file, feature_translation = bad_translation), "must contain the columns") | ||
|
||
without_namespace = gocats_to_annotation(ancestors_file, namespace_file = NULL) | ||
has_bp = all(grepl("^BP|^CC|^MF", without_namespace@description)) | ||
expect_true(!has_bp) | ||
with_namespace = gocats_to_annotation(ancestors_file, namespace_file) | ||
has_namespace = all(grepl("^BP|^CC|^MF", with_namespace@description)) | ||
expect_true(has_namespace) | ||
|
||
expect_equal(without_namespace@annotation_features[[1]][1], "O60313") | ||
expect_equal(without_namespace@annotation_type, "gocatsGO") | ||
expect_equal(without_namespace@feature_type, "Uniprot") | ||
expect_equal(length(without_namespace@annotation_features), 22415) | ||
|
||
with_translation = gocats_to_annotation(ancestors_file, namespace_file, | ||
feature_type = "ENSEMBL", | ||
annotation_type = "whatever", | ||
feature_translation = ensembl_uniprot) | ||
expect_equal(with_translation@annotation_features[[1]][1], "ENSG00000143799") | ||
expect_equal(with_translation@annotation_type, "whatever") | ||
expect_equal(with_translation@feature_type, "ENSEMBL") | ||
}) |