Merge pull request #83 from spsanderson/development

Fixes #77
spsanderson · May 21, 2024 · b5e2997 · b5e2997
2 parents 4e1b32a + 5dd7c0c
commit b5e2997
Show file tree

Hide file tree

Showing 215 changed files with 669 additions and 187 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -12,8 +12,6 @@ Encoding: UTF-8
 LazyData: true
 LazyDataCompression: xz
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
-URL: https://www.spsanderson.com/healthyR.data/, https://github.com/spsanderson/healthyR.data
 BugReports: https://github.com/spsanderson/healthyR.data/issues
 Depends: 
     R (>= 3.4.0)
@@ -22,4 +20,7 @@ Imports:
     utils,
     janitor,
     dplyr,
-    stats
+    stats,
+    httr2,
+    stringr,
+    tidyr
diff --git a/NAMESPACE b/NAMESPACE
@@ -21,3 +21,4 @@ export(current_pch_outcomes_data)
 export(current_timely_and_effective_care_data)
 export(current_unplanned_hospital_vists_data)
 export(current_va_data)
+export(get_cms_meta_data)
diff --git a/NEWS.md b/NEWS.md
@@ -1,12 +1,21 @@
 # healthyR.data (development version)
+
+## Breaking Changes
+None
+
+## New Function
+1. Fix #77 - Add function `get_cms_meta_data()`
+
+## Minor Fixes and Improvements
 1. Fix #72 - Fix bug in directory file paths for `current_hosp_data()`
+
 # healthyR.data 1.0.3
 
 ## Breaking Changes
 1. Require R version 3.4.0 in keeping with tidyverse practices.
 
 ## New Functions
-1. Fix #12 - Add function `dl_hosp_data_dict()`
+1. Fix #12 - Add function `current_hosp_data_dict()`
 2. Fix #10 - Add function `current_hosp_data()`
 3. Fix #22 - Add function `current_asc_data()`
 4. Fix #28 - Add function `current_asc_oas_cahps_data()`

diff --git a/R/get-cms-meta-data.R b/R/get-cms-meta-data.R
@@ -0,0 +1,109 @@
+#' Retrieve Data Links from CMS Data URL
+#'
+#' @family Hospital Data
+#'
+#' @author Steven P. Sanderson II, MPH
+#'
+#' @seealso \url{https://data.cms.gov/data.json}
+#'
+#' @description
+#' This function sends a request to the specified CMS data URL, retrieves the JSON data,
+#' and processes it to create a tibble with relevant information about the datasets.
+#'
+#' @details
+#' The function fetches JSON data from the CMS data URL and extracts relevant fields to
+#' create a tidy tibble. It selects specific columns, handles nested lists by unnesting them,
+#' cleans column names, and processes dates and media types to make the data more useful for analysis.
+#' The columns in the returned tibble are:
+#' \itemize{
+#'   \item \code{title}
+#'   \item \code{description}
+#'   \item \code{landing_page}
+#'   \item \code{modified}
+#'   \item \code{keyword}
+#'   \item \code{described_by}
+#'   \item \code{fn}
+#'   \item \code{has_email}
+#'   \item \code{identifier}
+#'   \item \code{start}
+#'   \item \code{end}
+#'   \item \code{references}
+#'   \item \code{distribution_description}
+#'   \item \code{distribution_title}
+#'   \item \code{distribution_modified}
+#'   \item \code{distribution_start}
+#'   \item \code{distribution_end}
+#'   \item \code{media_type}
+#'   \item \code{data_link}
+#' }
+#'
+#' @return A tibble with data links and relevant metadata about the datasets.
+#'
+#' @examples
+#' \dontrun{
+#' # Fetch and process data links from the CMS data URL
+#' data_links <- get_cms_meta_data()
+#' print(data_links)
+#' }
+#'
+#' @name get_cms_meta_data
+NULL
+#' @rdname get_cms_meta_data
+#' @export
+
+get_cms_meta_data <- function() {
+    # Make a request to the specified URL and retrieve the JSON data
+    url <- "https://data.cms.gov/data.json"
+    data_sets <- httr2::request(url) |>
+        httr2::req_perform() |>
+        httr2::resp_body_json(check_type = FALSE, simplifyVector = TRUE)
+
+    # Create a tibble from the 'dataset' field of the JSON data
+    data_tbl <- data_sets$dataset |>
+        dplyr::tibble() |>
+        dplyr::select(
+            title, description, landingPage,
+            modified, keyword, description,
+            describedBy, contactPoint, identifier,
+            temporal, references, distribution
+        ) |>
+        tidyr::unnest(cols = distribution, names_sep = "_") |>
+        tidyr::unnest(cols = c(keyword, contactPoint, references)) |>
+        janitor::clean_names() |>
+        dplyr::select(-type, -distribution_type) |>
+        dplyr::mutate(media_type = ifelse(is.na(distribution_format),
+                                          distribution_media_type,
+                                          distribution_format
+        )) |>
+        dplyr::mutate(data_link = ifelse(is.na(distribution_access_url),
+                                         distribution_download_url,
+                                         distribution_access_url
+        )) |>
+        dplyr::mutate(has_email = stringr::str_remove(has_email, "mailto:")) |>
+        tidyr::separate(temporal,
+                        into = c("start", "end"), sep = "/",
+                        remove = TRUE
+        ) |>
+        tidyr::separate(distribution_temporal,
+                        into = c("distribution_start", "distribution_end"), sep = "/",
+                        remove = TRUE
+        ) |>
+        dplyr::mutate(dplyr::across(c(
+            start, end, modified,
+            distribution_modified, distribution_start,
+            distribution_end
+        ), as.Date)) |>
+        dplyr::mutate(distribution_description = ifelse(is.na(distribution_description),
+                                                        "old", distribution_description
+        )) |>
+        dplyr::mutate(distribution_title = stringr::str_remove_all(distribution_title, "[:|-]")) |>
+        dplyr::mutate(distribution_title = stringr::str_remove_all(distribution_title, "[:number:]")) |>
+        dplyr::select(
+            -distribution_format, -distribution_media_type,
+            -distribution_access_url, -distribution_download_url
+        ) |>
+        dplyr::mutate(dplyr::across(dplyr::where(is.character), stringr::str_squish))
+
+    # Return the resulting tibble with data links
+    return(data_tbl)
+}
diff --git a/docs/404.html b/docs/404.html