Skip to content

Commit

Permalink
merge 1.3 to devel
Browse files Browse the repository at this point in the history
Merge branch 'gemma1.3'

# Conflicts:
#	R/allEndpoints.R
#	R/convenience.R
#	man/get_all_pages.Rd
#	tests/testthat/testDatasetEndpoints.R
  • Loading branch information
oganm committed Aug 15, 2023
2 parents bfce061 + 311cc12 commit 7ceb003
Show file tree
Hide file tree
Showing 44 changed files with 697 additions and 272 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: gemma.R
Title: A wrapper for Gemma's Restful API to access curated gene expression data and differential expression analyses
Version: 1.3.2
Version: 1.99.0
Authors@R:
c(person(given = "Javier", family = "Castillo-Arnemann",
role = c("aut"), email = "javiercastilloar@gmail.com",
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export(forget_gemma_memoised)
export(gemma_call)
export(get_all_pages)
export(get_dataset_annotations)
export(get_dataset_design)
export(get_dataset_differential_expression_analyses)
Expand All @@ -10,6 +11,7 @@ export(get_dataset_expression_for_genes)
export(get_dataset_object)
export(get_dataset_platforms)
export(get_dataset_samples)
export(get_datasets)
export(get_datasets_by_ids)
export(get_differential_expression_values)
export(get_gene_go_terms)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# gemma.R 2.0.0
* Breaking change to `get_dataset_differential_expression_analyses` function in order to return annotations for contrasts with multiple characteristics.

# gemma.R 0.99.44
* Fixes and changes for Bioconductor 3.16 release

Expand Down
264 changes: 195 additions & 69 deletions R/allEndpoints.R

Large diffs are not rendered by default.

43 changes: 24 additions & 19 deletions R/body.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,15 @@ gemmaPath <- function(){
#' @param .call The original function call
#'
#' @noRd
.body <- function(fname, validators, endpoint, envWhere, isFile, header, raw, overwrite, file, attributes, .call) {
.body <- function(fname, validators, endpoint, envWhere, isFile, header, raw, overwrite, file, attributes = TRUE, .call) {

# Set header
if (header == "text/tab-separated-values") {
names(header) <- "Accept"
}
envWhere$header <- header

original_env = rlang::env_clone(envWhere)

# Validate arguments
if (!is.null(validators)) {
for (v in names(validators)) {
Expand All @@ -38,6 +39,11 @@ gemmaPath <- function(){
}
# Generate request
call <- quote(paste0(gemmaPath(), gsub("/((NA)?/)", "/", gsub("\\?[^=]+=NA", "\\?", gsub("&[^=]+=NA", "", glue::glue(endpoint)))))) %>% eval(envir = envWhere)

# remove empty parameters
call<- call %>% stringr::str_split('&') %>%
{.[[1]]} %>% {.[!grepl("\\=$",.)]} %>%
paste0(collapse = '&')

if (!is.null(getOption('gemma.username')) && !is.null(getOption('gemma.password'))){
requestExpr <- quote(httr::GET(
Expand Down Expand Up @@ -100,30 +106,29 @@ gemmaPath <- function(){
}
}

if (!is.null(file) && !is.na(file)) {
extension <- ifelse(raw, ".json", ifelse(any(vapply(mOut, typeof, character(1)) == "list"), ".rds", ".csv"))
if (isFile && raw){
extension <- '.gz'
}

file <- paste0(tools::file_path_sans_ext(file), extension)

if(attributes){
attributes(mOut) <- c(attributes(mOut),
env = original_env)
}


if (!is.null(file) && !is.na(file)) {
if (file.exists(file) && !overwrite && !file.info(file)$isdir) {
warning(file, " exists. Not overwriting.")
} else {
if (extension == ".json") {
write(jsonlite::toJSON(mOut, pretty = 2), file)
} else if (extension == ".rds") {
} else{
dir.create(dirname(file),showWarnings = FALSE,recursive = TRUE)
if(raw){
writeBin(response$content,file)
} else{
saveRDS(mOut, file)
} else if (extension == '.gz'){
tmp <- mOut
attributes(tmp) = NULL
writeBin(tmp,file)
} else {
utils::write.csv2(mOut, file, row.names = FALSE)
}
}


}


mOut
} else if (response$status_code == 403) {
stop(call,'\n',response$status_code, ": Forbidden. You do not have permission to access this data.")
Expand Down
113 changes: 70 additions & 43 deletions R/convenience.R
Original file line number Diff line number Diff line change
Expand Up @@ -220,24 +220,24 @@ make_design = function(samples,metaType){
#' output will only include samples relevant to the specific contrats.
#'
#' @return A list of \code{\link[SummarizedExperiment]{SummarizedExperiment}}s,
#' \code{\link[Biobase]{ExpressionSet}}s or a tibble containing metadata and
#' expression data for the queried datasets and genes. Metadata will be expanded to include
#' \code{\link[Biobase]{ExpressionSet}}s or a tibble containing metadata and
#' expression data for the queried datasets and genes. Metadata will be expanded to include
#' a variable number of factors that annotates samples from a dataset but will
#' always include single "factorValues" column that houses data.tables that
#' always include single "factorValues" column that houses data.tables that
#' include all annotations for a given sample.
#' @keywords dataset
#' @export
#' @examples
#' get_dataset_object("GSE2018")
get_dataset_object <- function(datasets,
genes = NULL,
keepNonSpecific = FALSE,
keepNonSpecific = FALSE,
consolidate = NA_character_,
resultSets = NULL,
contrasts = NULL,
filter = FALSE,
metaType = 'text',
type = "se",
filter = FALSE,
metaType = 'text',
type = "se",
memoised = getOption("gemma.memoised", FALSE)) {
if (type != "eset" && type != "se" && type != 'tidy') {
stop("Please enter a valid type: 'se' for SummarizedExperiment or 'eset' for ExpressionSet and 'tidy' for a long form tibble.")
Expand Down Expand Up @@ -312,37 +312,37 @@ get_dataset_object <- function(datasets,
designs <- metadata %>% lapply(function(meta){
make_design(meta,metaType)
})

# pack the information that will be included in all outputs
packed_data <- seq_along(datasets) %>% lapply(function(i){
dataset <- datasets[i]
# we don't want to pass data.tables by reference because
# we don't want to pass data.tables by reference because
# same datasets might be re-used
packed_info <-
packed_info <-
list(design = data.table::copy(designs[[as.character(dataset)]]),
exp = data.table::copy(expression[[as.character(dataset)]]),
result_set = resultSets[i],
contrast = contrasts[i],
dat = get_datasets_by_ids(dataset, raw = FALSE,memoised = memoised))





# reorders the expression to match the metadata
gene_info <- colnames(packed_info$exp)[!colnames(packed_info$exp) %in% rownames(packed_info$design)]
data.table::setcolorder(packed_info$exp,c(gene_info,rownames(packed_info$design)))

if(!is.null(resultSets)){
diff <- get_dataset_differential_expression_analyses(dataset,memoised = memoised)
subset_category <- diff %>%
dplyr::filter(result.ID == resultSets[i]) %>%
subset_category <- diff %>%
dplyr::filter(result.ID == resultSets[i]) %>%
.$subsetFactor.category %>% unique
subset_factor <- diff %>%
dplyr::filter(result.ID == resultSets[i]) %>%
subset_factor <- diff %>%
dplyr::filter(result.ID == resultSets[i]) %>%
.$subsetFactor.factorValue %>% unique

assertthat::assert_that(length(subset_category)==1)
assertthat::assert_that(length(subset_factor)==1)

if(!is.na(subset_category)){
in_subset <- packed_info$design$factorValues %>% purrr::map_lgl(function(x){
x %>% dplyr::filter(category %in% subset_category) %>%
Expand All @@ -355,7 +355,7 @@ get_dataset_object <- function(datasets,
contrast <- diff %>% dplyr::filter(result.ID == resultSets[i] & contrast.id == contrasts[i])
in_contrast <- packed_info$design$factorValues %>% purrr::map_lgl(function(x){
x %>% dplyr::filter(category == contrast$baseline.category) %>%
.$factorValue %in% c(contrast$baseline.factorValue,contrast$experimental.factorValue) %>%
.$factorValue %in% c(contrast$baseline.factorValue,contrast$experimental.factorValue) %>%
all
})
} else{
Expand All @@ -364,7 +364,7 @@ get_dataset_object <- function(datasets,
packed_info$exp <- packed_info$exp[,.SD,.SDcols = c(gene_info, rownames(packed_info$design)[in_subset & in_contrast])]
packed_info$design <- packed_info$design[in_subset & in_contrast,]
}

return(packed_info)
})

Expand All @@ -378,7 +378,7 @@ get_dataset_object <- function(datasets,
} else{
names(packed_data) <- packed_data %>% purrr::map('dat') %>% purrr::map_int('experiment.ID')
}


if (type == 'se'){
out <- packed_data %>% lapply(function(data){
Expand Down Expand Up @@ -427,7 +427,7 @@ get_dataset_object <- function(datasets,
genes <- S4Vectors::DataFrame(exprM[,.SD,.SDcols = colnames(exprM)[colnames(exprM) %in% c('Probe','GeneSymbol','GeneName','NCBIid')]])
exprM <- exprM[,.SD,.SDcols = colnames(exprM)[!colnames(exprM) %in% c('Probe','GeneSymbol','GeneName','NCBIid')]] %>%
data.matrix()

# reordering happens above
# exprM <- exprM[, match(rownames(design), colnames(exprM))]

Expand Down Expand Up @@ -465,7 +465,7 @@ get_dataset_object <- function(datasets,

exprM <- data$exp
design <- data$design

rownames(exprM) <- exprM$Probe
genes <- exprM[,.SD,.SDcols = colnames(exprM)[colnames(exprM) %in% c('Probe','GeneSymbol','GeneName','NCBIid')]]
exprM <- exprM[,.SD,.SDcols = colnames(exprM)[!colnames(exprM) %in% c('Probe','GeneSymbol','GeneName','NCBIid')]] %>%
Expand All @@ -482,10 +482,10 @@ get_dataset_object <- function(datasets,
dplyr::inner_join(genes, by ='Probe') %>%
dplyr::inner_join(design, by = "Sample") %>%
dplyr::rename(sample = "Sample", probe = "Probe") %>%
dplyr::mutate(experiment.ID = data$dat$experiment.ID,
dplyr::mutate(experiment.ID = data$dat$experiment.ID,
experiment.ShortName = data$dat$experiment.ShortName,
.before = 1)

if(!is.null(data$result_set)){
frm <- mutate(frm, result.ID = data$result_set,.before = 3)
}
Expand All @@ -497,7 +497,7 @@ get_dataset_object <- function(datasets,
}) %>% do.call(dplyr::bind_rows,.)

}

return(out)
}

Expand Down Expand Up @@ -630,7 +630,7 @@ gemma_call <- function(call,...,json = TRUE){
} else{
response <- httr::GET(glue::glue(paste0(gemmaPath(),call)))
}

if (response$status_code == 200) {
if(json){
response <- jsonlite::fromJSON(rawToChar(response$content),simplifyVector = FALSE)
Expand All @@ -652,28 +652,55 @@ gemma_call <- function(call,...,json = TRUE){


#' Get all pages of a paginated call
#'
#' Given a Gemma.R output with offset and limit arguments,
#' returns the entire output.
#'
#' @param query Output from a gemma.R function with offset and query argumend
#'
#' Given a Gemma.R output from a function with offset and limit arguments,
#' returns the output from all pages. All arguments other than offset, limit
#'
#' @param query Output from a gemma.R function with offset and query argument
#' @param step_size Size of individual calls to the server. 100 is the maximum value
#' @param binder Binding function for the calls. If \code{raw = FALSE} use \code{rbind} to
#' combine the data.tables. If not, use \code{c} to combine lists
#' @param directory Directory to save the output from the individual calls to. If provided, each page
#' is saved to separate files.
#' @param file Name of the file to save the results to. If provided, combined output is saved as an RDS file.
#' @param overwrite
#' @return A data.table or a list containing data from all pages.
get_all_pages <- function(query,step_size = 100,binder = rbind){
attr = attributes(query)
count = attr$totalElements

args = formals(attr$env$fname)
args_used = attr$env %>% as.list() %>% {.[names(args)]}
args_used$limit = step_size

lapply(seq(0,count,step_size),function(offset){
step_args = args_used
step_args$offset = offset
#' @export
get_all_pages <- function(query, step_size = 100,binder = rbind,directory = NULL, file = getOption("gemma.file", NA_character_),overwrite = getOption("gemma.overwrite", FALSE)){
attr <- attributes(query)
count <- attr$totalElements

args <- formals(attr$env$fname)
args_used <- attr$env %>% as.list() %>% {.[names(args)]}
args_used$limit <- step_size
args_used$overwrite <- overwrite

out <- lapply(seq(0,count,step_size),function(offset){
step_args <- args_used
step_args$offset <- offset

if(!is.null(directory)){
step_args$file = file.path(directory,offset)
} else{
# file argument should not be preserved since it'll overwrite itself in
# each call
step_args$file <- NA_character_
}

do.call(attr$env$fname,step_args)
}) %>% do.call(binder,.)

if(!is.null(file) && !is.na(file)){
if (file.exists(file) && !overwrite && !file.info(file)$isdir) {
warning(file, " exists. Not overwriting.")
} else{
dir.create(dirname(file),showWarnings = FALSE,recursive = TRUE)
saveRDS(out, file)
}
}

return(out)
}



Loading

0 comments on commit 7ceb003

Please sign in to comment.