merge 1.3 to devel

Merge branch 'gemma1.3' # Conflicts: # R/allEndpoints.R # R/convenience.R # man/get_all_pages.Rd # tests/testthat/testDatasetEndpoints.R
PavlidisLab · Aug 15, 2023 · 7ceb003 · 7ceb003
2 parents bfce061 + 311cc12
commit 7ceb003
Show file tree

Hide file tree

Showing 44 changed files with 697 additions and 272 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: gemma.R
 Title: A wrapper for Gemma's Restful API to access curated gene expression data and differential expression analyses
-Version: 1.3.2
+Version: 1.99.0
 Authors@R: 
     c(person(given = "Javier", family = "Castillo-Arnemann", 
              role = c("aut"), email = "javiercastilloar@gmail.com",

diff --git a/NAMESPACE b/NAMESPACE
@@ -2,6 +2,7 @@
 
 export(forget_gemma_memoised)
 export(gemma_call)
+export(get_all_pages)
 export(get_dataset_annotations)
 export(get_dataset_design)
 export(get_dataset_differential_expression_analyses)
@@ -10,6 +11,7 @@ export(get_dataset_expression_for_genes)
 export(get_dataset_object)
 export(get_dataset_platforms)
 export(get_dataset_samples)
+export(get_datasets)
 export(get_datasets_by_ids)
 export(get_differential_expression_values)
 export(get_gene_go_terms)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,6 @@
+# gemma.R 2.0.0
+* Breaking change to `get_dataset_differential_expression_analyses` function in order to return annotations for contrasts with multiple characteristics.
+
 # gemma.R 0.99.44
 * Fixes and changes for Bioconductor 3.16 release
 

diff --git a/R/allEndpoints.R b/R/allEndpoints.R
diff --git a/R/body.R b/R/body.R
@@ -22,14 +22,15 @@ gemmaPath <- function(){
 #' @param .call The original function call
 #'
 #' @noRd
-.body <- function(fname, validators, endpoint, envWhere, isFile, header, raw, overwrite, file, attributes, .call) {
+.body <- function(fname, validators, endpoint, envWhere, isFile, header, raw, overwrite, file, attributes = TRUE, .call) {
 
     # Set header
     if (header == "text/tab-separated-values") {
         names(header) <- "Accept"
     }
     envWhere$header <- header
-
+    original_env = rlang::env_clone(envWhere)
+
     # Validate arguments
     if (!is.null(validators)) {
         for (v in names(validators)) {
@@ -38,6 +39,11 @@ gemmaPath <- function(){
     }
     # Generate request
     call <- quote(paste0(gemmaPath(), gsub("/((NA)?/)", "/", gsub("\\?[^=]+=NA", "\\?", gsub("&[^=]+=NA", "", glue::glue(endpoint)))))) %>% eval(envir = envWhere)
+
+    # remove empty parameters
+    call<- call %>% stringr::str_split('&') %>% 
+        {.[[1]]} %>% {.[!grepl("\\=$",.)]} %>%
+        paste0(collapse = '&')
 
     if (!is.null(getOption('gemma.username')) && !is.null(getOption('gemma.password'))){
         requestExpr <- quote(httr::GET(
@@ -100,30 +106,29 @@ gemmaPath <- function(){
             }
         }
 
-        if (!is.null(file) && !is.na(file)) {
-            extension <- ifelse(raw, ".json", ifelse(any(vapply(mOut, typeof, character(1)) == "list"), ".rds", ".csv"))
-            if (isFile && raw){
-                extension <- '.gz'
-            }
-
-            file <- paste0(tools::file_path_sans_ext(file), extension)
 
+        if(attributes){
+            attributes(mOut) <- c(attributes(mOut),
+                                 env = original_env)
+        }
+
+
+        if (!is.null(file) && !is.na(file)) {
             if (file.exists(file) && !overwrite && !file.info(file)$isdir) {
                 warning(file, " exists. Not overwriting.")
-            } else {
-                if (extension == ".json") {
-                    write(jsonlite::toJSON(mOut, pretty = 2), file)
-                } else if (extension == ".rds") {
+            } else{
+                dir.create(dirname(file),showWarnings = FALSE,recursive = TRUE)
+                if(raw){
+                    writeBin(response$content,file)
+                } else{
                     saveRDS(mOut, file)
-                } else if (extension == '.gz'){
-                    tmp <- mOut
-                    attributes(tmp) = NULL
-                    writeBin(tmp,file)
-                } else {
-                    utils::write.csv2(mOut, file, row.names = FALSE)
                 }
             }
+
+
         }
+
+
         mOut
     } else if (response$status_code == 403) {
         stop(call,'\n',response$status_code, ": Forbidden. You do not have permission to access this data.")

diff --git a/R/convenience.R b/R/convenience.R
@@ -220,24 +220,24 @@ make_design = function(samples,metaType){
 #' output will only include samples relevant to the specific contrats.
 #'
 #' @return A list of \code{\link[SummarizedExperiment]{SummarizedExperiment}}s,
-#' \code{\link[Biobase]{ExpressionSet}}s or a tibble containing metadata and 
-#' expression data for the queried datasets and genes. Metadata will be expanded to include 
+#' \code{\link[Biobase]{ExpressionSet}}s or a tibble containing metadata and
+#' expression data for the queried datasets and genes. Metadata will be expanded to include
 #' a variable number of factors that annotates samples from a dataset but will
-#' always include single "factorValues" column that houses data.tables that 
+#' always include single "factorValues" column that houses data.tables that
 #' include all annotations for a given sample.
 #' @keywords dataset
 #' @export
 #' @examples
 #' get_dataset_object("GSE2018")
 get_dataset_object <- function(datasets,
                                genes = NULL,
-                               keepNonSpecific = FALSE, 
+                               keepNonSpecific = FALSE,
                                consolidate = NA_character_,
                                resultSets = NULL,
                                contrasts = NULL,
-                               filter = FALSE, 
-                               metaType = 'text', 
-                               type = "se", 
+                               filter = FALSE,
+                               metaType = 'text',
+                               type = "se",
                                memoised = getOption("gemma.memoised", FALSE)) {
     if (type != "eset" && type != "se" && type != 'tidy') {
         stop("Please enter a valid type: 'se' for SummarizedExperiment or 'eset' for ExpressionSet and 'tidy' for a long form tibble.")
@@ -312,37 +312,37 @@ get_dataset_object <- function(datasets,
     designs <- metadata %>% lapply(function(meta){
         make_design(meta,metaType)
     })
-    
+
     # pack the information that will be included in all outputs
     packed_data <- seq_along(datasets) %>% lapply(function(i){
         dataset <- datasets[i]
-        # we don't want to pass data.tables by reference because 
+        # we don't want to pass data.tables by reference because
         # same datasets might be re-used
-        packed_info <- 
+        packed_info <-
             list(design = data.table::copy(designs[[as.character(dataset)]]),
                  exp = data.table::copy(expression[[as.character(dataset)]]),
                  result_set = resultSets[i],
                  contrast = contrasts[i],
                  dat =  get_datasets_by_ids(dataset, raw = FALSE,memoised = memoised))
-
 
-
+
+
         # reorders the expression to match the metadata
         gene_info <- colnames(packed_info$exp)[!colnames(packed_info$exp) %in% rownames(packed_info$design)]
         data.table::setcolorder(packed_info$exp,c(gene_info,rownames(packed_info$design)))
 
         if(!is.null(resultSets)){
             diff <- get_dataset_differential_expression_analyses(dataset,memoised = memoised)
-            subset_category <- diff %>% 
-                dplyr::filter(result.ID == resultSets[i]) %>% 
+            subset_category <- diff %>%
+                dplyr::filter(result.ID == resultSets[i]) %>%
                 .$subsetFactor.category %>% unique
-            subset_factor <- diff %>% 
-                dplyr::filter(result.ID == resultSets[i]) %>% 
+            subset_factor <- diff %>%
+                dplyr::filter(result.ID == resultSets[i]) %>%
                 .$subsetFactor.factorValue %>% unique
-            
+
             assertthat::assert_that(length(subset_category)==1)
             assertthat::assert_that(length(subset_factor)==1)
-            
+
             if(!is.na(subset_category)){
                 in_subset <- packed_info$design$factorValues %>% purrr::map_lgl(function(x){
                     x %>% dplyr::filter(category %in% subset_category) %>%
@@ -355,7 +355,7 @@ get_dataset_object <- function(datasets,
                 contrast <- diff %>% dplyr::filter(result.ID == resultSets[i] & contrast.id == contrasts[i])
                 in_contrast <- packed_info$design$factorValues %>% purrr::map_lgl(function(x){
                     x %>% dplyr::filter(category == contrast$baseline.category) %>%
-                        .$factorValue %in% c(contrast$baseline.factorValue,contrast$experimental.factorValue) %>% 
+                        .$factorValue %in% c(contrast$baseline.factorValue,contrast$experimental.factorValue) %>%
                         all
                 })
             } else{
@@ -364,7 +364,7 @@ get_dataset_object <- function(datasets,
             packed_info$exp <- packed_info$exp[,.SD,.SDcols = c(gene_info, rownames(packed_info$design)[in_subset & in_contrast])]
             packed_info$design <- packed_info$design[in_subset & in_contrast,]
         }
-        
+
         return(packed_info)
     })
 
@@ -378,7 +378,7 @@ get_dataset_object <- function(datasets,
     } else{
         names(packed_data) <- packed_data %>% purrr::map('dat') %>% purrr::map_int('experiment.ID')
     }
-    
+
 
     if (type == 'se'){
         out <- packed_data %>% lapply(function(data){
@@ -427,7 +427,7 @@ get_dataset_object <- function(datasets,
             genes <- S4Vectors::DataFrame(exprM[,.SD,.SDcols = colnames(exprM)[colnames(exprM) %in% c('Probe','GeneSymbol','GeneName','NCBIid')]])
             exprM <- exprM[,.SD,.SDcols = colnames(exprM)[!colnames(exprM) %in% c('Probe','GeneSymbol','GeneName','NCBIid')]] %>%
                 data.matrix()
-            
+
             # reordering happens above
             # exprM <- exprM[, match(rownames(design), colnames(exprM))]
 
@@ -465,7 +465,7 @@ get_dataset_object <- function(datasets,
 
             exprM <- data$exp
             design <- data$design
-            
+
             rownames(exprM) <- exprM$Probe
             genes <- exprM[,.SD,.SDcols = colnames(exprM)[colnames(exprM) %in% c('Probe','GeneSymbol','GeneName','NCBIid')]]
             exprM <- exprM[,.SD,.SDcols = colnames(exprM)[!colnames(exprM) %in% c('Probe','GeneSymbol','GeneName','NCBIid')]] %>%
@@ -482,10 +482,10 @@ get_dataset_object <- function(datasets,
                 dplyr::inner_join(genes, by ='Probe') %>%
                 dplyr::inner_join(design, by = "Sample") %>%
                 dplyr::rename(sample = "Sample", probe = "Probe") %>%
-                dplyr::mutate(experiment.ID = data$dat$experiment.ID, 
+                dplyr::mutate(experiment.ID = data$dat$experiment.ID,
                               experiment.ShortName = data$dat$experiment.ShortName,
                               .before = 1)
-            
+
             if(!is.null(data$result_set)){
                 frm <- mutate(frm, result.ID = data$result_set,.before = 3)
             }
@@ -497,7 +497,7 @@ get_dataset_object <- function(datasets,
         }) %>% do.call(dplyr::bind_rows,.)
 
     }
-    
+
     return(out)
 }
 
@@ -630,7 +630,7 @@ gemma_call <- function(call,...,json = TRUE){
     } else{
         response <- httr::GET(glue::glue(paste0(gemmaPath(),call)))
     }
-    
+
     if (response$status_code == 200) {
         if(json){
             response <- jsonlite::fromJSON(rawToChar(response$content),simplifyVector = FALSE)
@@ -652,28 +652,55 @@ gemma_call <- function(call,...,json = TRUE){
 
 
 #' Get all pages of a paginated call
-#' 
-#' Given a Gemma.R output with offset and limit arguments,
-#' returns the entire output.
-#' 
-#' @param query Output from a gemma.R function with offset and query argumend
+#'
+#' Given a Gemma.R output from a function with offset and limit arguments,
+#' returns the output from all pages. All arguments other than offset, limit
+#'
+#' @param query Output from a gemma.R function with offset and query argument
 #' @param step_size Size of individual calls to the server. 100 is the maximum value
 #' @param binder Binding function for the calls. If \code{raw = FALSE} use \code{rbind} to
 #' combine the data.tables. If not, use \code{c} to combine lists
+#' @param directory Directory to save the output from the individual calls to. If provided, each page
+#' is saved to separate files.
+#' @param file Name of the file to save the results to. If provided, combined output is saved as an RDS file.
+#' @param overwrite 
 #' @return A data.table or a list containing data from all pages.
-get_all_pages <- function(query,step_size = 100,binder = rbind){
-    attr = attributes(query)
-    count = attr$totalElements
-
-    args = formals(attr$env$fname)
-    args_used = attr$env %>% as.list() %>% {.[names(args)]}
-    args_used$limit = step_size
-
-    lapply(seq(0,count,step_size),function(offset){
-        step_args = args_used
-        step_args$offset = offset
+#' @export
+get_all_pages <- function(query, step_size = 100,binder = rbind,directory  = NULL, file = getOption("gemma.file", NA_character_),overwrite = getOption("gemma.overwrite", FALSE)){
+    attr <- attributes(query)
+    count <- attr$totalElements
+
+    args <- formals(attr$env$fname)
+    args_used <- attr$env %>% as.list() %>% {.[names(args)]}
+    args_used$limit <- step_size
+    args_used$overwrite <- overwrite
+
+    out <- lapply(seq(0,count,step_size),function(offset){
+        step_args <- args_used
+        step_args$offset <- offset
+
+        if(!is.null(directory)){
+            step_args$file = file.path(directory,offset)
+        } else{
+            # file argument should not be preserved since it'll overwrite itself in
+            # each call
+            step_args$file <- NA_character_
+        }
 
         do.call(attr$env$fname,step_args)
     }) %>% do.call(binder,.)
 
+    if(!is.null(file) && !is.na(file)){
+        if (file.exists(file) && !overwrite && !file.info(file)$isdir) {
+            warning(file, " exists. Not overwriting.")
+        } else{
+            dir.create(dirname(file),showWarnings = FALSE,recursive = TRUE)
+            saveRDS(out, file)
+        }
+    }
+
+    return(out)
 }
+
+
+