Skip to content

Commit

Permalink
Merge pull request #83 from spsanderson/development
Browse files Browse the repository at this point in the history
Fixes #77
  • Loading branch information
spsanderson authored May 21, 2024
2 parents 4e1b32a + 5dd7c0c commit b5e2997
Show file tree
Hide file tree
Showing 215 changed files with 669 additions and 187 deletions.
7 changes: 4 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ Encoding: UTF-8
LazyData: true
LazyDataCompression: xz
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
URL: https://www.spsanderson.com/healthyR.data/, https://github.com/spsanderson/healthyR.data
BugReports: https://github.com/spsanderson/healthyR.data/issues
Depends:
R (>= 3.4.0)
Expand All @@ -22,4 +20,7 @@ Imports:
utils,
janitor,
dplyr,
stats
stats,
httr2,
stringr,
tidyr
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ export(current_pch_outcomes_data)
export(current_timely_and_effective_care_data)
export(current_unplanned_hospital_vists_data)
export(current_va_data)
export(get_cms_meta_data)
11 changes: 10 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
# healthyR.data (development version)

## Breaking Changes
None

## New Function
1. Fix #77 - Add function `get_cms_meta_data()`

## Minor Fixes and Improvements
1. Fix #72 - Fix bug in directory file paths for `current_hosp_data()`

# healthyR.data 1.0.3

## Breaking Changes
1. Require R version 3.4.0 in keeping with tidyverse practices.

## New Functions
1. Fix #12 - Add function `dl_hosp_data_dict()`
1. Fix #12 - Add function `current_hosp_data_dict()`
2. Fix #10 - Add function `current_hosp_data()`
3. Fix #22 - Add function `current_asc_data()`
4. Fix #28 - Add function `current_asc_oas_cahps_data()`
Expand Down
109 changes: 109 additions & 0 deletions R/get-cms-meta-data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#' Retrieve Data Links from CMS Data URL
#'
#' @family Hospital Data
#'
#' @author Steven P. Sanderson II, MPH
#'
#' @seealso \url{https://data.cms.gov/data.json}
#'
#' @description
#' This function sends a request to the specified CMS data URL, retrieves the JSON data,
#' and processes it to create a tibble with relevant information about the datasets.
#'
#' @details
#' The function fetches JSON data from the CMS data URL and extracts relevant fields to
#' create a tidy tibble. It selects specific columns, handles nested lists by unnesting them,
#' cleans column names, and processes dates and media types to make the data more useful for analysis.
#' The columns in the returned tibble are:
#' \itemize{
#' \item \code{title}
#' \item \code{description}
#' \item \code{landing_page}
#' \item \code{modified}
#' \item \code{keyword}
#' \item \code{described_by}
#' \item \code{fn}
#' \item \code{has_email}
#' \item \code{identifier}
#' \item \code{start}
#' \item \code{end}
#' \item \code{references}
#' \item \code{distribution_description}
#' \item \code{distribution_title}
#' \item \code{distribution_modified}
#' \item \code{distribution_start}
#' \item \code{distribution_end}
#' \item \code{media_type}
#' \item \code{data_link}
#' }
#'
#' @return A tibble with data links and relevant metadata about the datasets.
#'
#' @examples
#' \dontrun{
#' # Fetch and process data links from the CMS data URL
#' data_links <- get_cms_meta_data()
#' print(data_links)
#' }
#'
#' @name get_cms_meta_data
NULL
#' @rdname get_cms_meta_data
#' @export

get_cms_meta_data <- function() {
# Make a request to the specified URL and retrieve the JSON data
url <- "https://data.cms.gov/data.json"
data_sets <- httr2::request(url) |>
httr2::req_perform() |>
httr2::resp_body_json(check_type = FALSE, simplifyVector = TRUE)

# Create a tibble from the 'dataset' field of the JSON data
data_tbl <- data_sets$dataset |>
dplyr::tibble() |>
dplyr::select(
title, description, landingPage,
modified, keyword, description,
describedBy, contactPoint, identifier,
temporal, references, distribution
) |>
tidyr::unnest(cols = distribution, names_sep = "_") |>
tidyr::unnest(cols = c(keyword, contactPoint, references)) |>
janitor::clean_names() |>
dplyr::select(-type, -distribution_type) |>
dplyr::mutate(media_type = ifelse(is.na(distribution_format),
distribution_media_type,
distribution_format
)) |>
dplyr::mutate(data_link = ifelse(is.na(distribution_access_url),
distribution_download_url,
distribution_access_url
)) |>
dplyr::mutate(has_email = stringr::str_remove(has_email, "mailto:")) |>
tidyr::separate(temporal,
into = c("start", "end"), sep = "/",
remove = TRUE
) |>
tidyr::separate(distribution_temporal,
into = c("distribution_start", "distribution_end"), sep = "/",
remove = TRUE
) |>
dplyr::mutate(dplyr::across(c(
start, end, modified,
distribution_modified, distribution_start,
distribution_end
), as.Date)) |>
dplyr::mutate(distribution_description = ifelse(is.na(distribution_description),
"old", distribution_description
)) |>
dplyr::mutate(distribution_title = stringr::str_remove_all(distribution_title, "[:|-]")) |>
dplyr::mutate(distribution_title = stringr::str_remove_all(distribution_title, "[:number:]")) |>
dplyr::select(
-distribution_format, -distribution_media_type,
-distribution_access_url, -distribution_download_url
) |>
dplyr::mutate(dplyr::across(dplyr::where(is.character), stringr::str_squish))

# Return the resulting tibble with data links
return(data_tbl)
}
14 changes: 6 additions & 8 deletions docs/404.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit b5e2997

Please sign in to comment.