New field_types_advanced() function to allow specification of only a …

…subset of fields (#20)
ropensci · Apr 4, 2024 · 2f3d6b8 · 2f3d6b8
1 parent 8aca906
commit 2f3d6b8
Show file tree

Hide file tree

Showing 12 changed files with 444 additions and 90 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: daiquiri
 Type: Package
 Title: Data Quality Reporting for Temporal Datasets
-Version: 1.1.1
+Version: 1.1.1.9000
 Authors@R: c(
     person(c("T.", "Phuong"), "Quan", email = "phuong.quan@ndm.ox.ac.uk",
         role = c("aut", "cre"), comment = c(ORCID = "0000-0001-8566-1817")),
@@ -36,7 +36,7 @@ Imports:
     utils,
     stats,
     xfun (>= 0.15)
-RoxygenNote: 7.2.0
+RoxygenNote: 7.3.1
 Suggests:
     covr,
     knitr,

diff --git a/NAMESPACE b/NAMESPACE
@@ -9,6 +9,7 @@ export(close_log)
 export(daiquiri_report)
 export(export_aggregated_data)
 export(field_types)
+export(field_types_advanced)
 export(ft_categorical)
 export(ft_datetime)
 export(ft_freetext)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,10 @@
+# daiquiri (development version)
+
+## New features
+
+* New `field_types_advanced()` function. Allows just a subset of the columns in the source df to be named explicitly in the specification, with the remaining columns set to the `.default_field_type` parameter. (#16)  
+
+
 # daiquiri 1.1.1 (2023-07-18)
 
 ## New features
@@ -20,6 +27,7 @@
 
 * Hex logo now appears on reports, adding dependency to `xfun`
 
+
 # daiquiri 1.0.3 (2022-12-06)
 
 ## Bug fixes and minor improvements

diff --git a/R/field_types.R b/R/field_types.R
@@ -34,95 +34,65 @@
 field_types <- function(...) {
   fts <- list(...)
 
-  # validate - collect all errors together and return only once
-  err_validation <- character()
-  is_field_type <- vapply(fts, is_field_type, logical(1))
-  if (any(!is_field_type)) {
-    err_validation <-
-      append(
-        err_validation,
-        paste(
-          "Unrecognised field_type(s) in positions: [",
-          paste(which(!is_field_type), collapse = ", "),
-          "]",
-          "names: [",
-          paste(names(fts)[which(!is_field_type)], collapse = ", "),
-          "]"
-        )
-      )
-  }
-  is_timepoint <- vapply(fts, is_ft_timepoint, logical(1))
-  if (sum(is_timepoint) != 1) {
-    err_validation <-
-      append(
-        err_validation,
-        paste(
-          "Must specify one and only one timepoint field. Timepoints currently in positions: [",
-          paste(which(is_timepoint), collapse = ", "),
-          "]",
-          "names: [",
-          paste(names(fts)[which(is_timepoint)], collapse = ", "),
-          "]"
-        )
-      )
-  }
-  is_strata <- vapply(fts, is_ft_strata, logical(1))
-  if (sum(is_strata) > 1) {
+  err_validation <- field_types_problems(fts)
+
+  # additional validation for .default_field_type reserved name
+  if (".default_field_type" %in% names(fts)) {
     err_validation <-
       append(
         err_validation,
-        paste(
-          "Only one strata field allowed. Strata fields currently specified in positions: [",
-          paste(which(is_strata), collapse = ", "),
-          "]",
-          "names: [",
-          paste(names(fts)[which(is_strata)], collapse = ", "),
-          "]"
-        )
+        ".default_field_type is a reserved name and cannot be one of the field names in the data.
+        Did you mean to use the field_types_advanced() function instead?"
       )
   }
-  is_aggregate_by_each_category <- is_field_type
-  is_aggregate_by_each_category[is_field_type] <-
-    vapply(fts[is_field_type],
-           FUN = field_type_has_option,
-           FUN.VALUE = logical(1),
-           option = "aggregate_by_each_category")
-  if (any(is_strata) && any(is_aggregate_by_each_category)) {
-    err_validation <-
-      append(
-        err_validation,
-        paste(
-          "Cannot use aggregate_by_each_category option when there is a strata field. Option currently specified in positions: [",
-          paste(which(is_aggregate_by_each_category), collapse = ", "),
-          "]",
-          "names: [",
-          paste(names(fts)[which(is_aggregate_by_each_category)], collapse = ", "),
-          "]"
-        )
-      )
-  }
-  if (anyDuplicated(names(fts)) > 0) {
-    err_validation <-
-      append(
-        err_validation,
-        paste(
-          "Duplicate column names not allowed: [",
-          paste(names(fts)[duplicated(names(fts))], collapse = ", "),
-          "]"
-        )
+
+  if (length(err_validation) > 0) {
+    stop_custom(
+      .subclass = "invalid_field_types",
+      message = paste0(
+        "Invalid `field_types' specification.\n",
+        paste(err_validation, collapse = "\n")
       )
+    )
   }
-  # check for reserved names
-  if (any(names(fts) %in% c("[DUPLICATES]", "[ALL_FIELDS_COMBINED]"))) {
+
+  structure(fts, class = "daiquiri_field_types")
+}
+
+
+# -----------------------------------------------------------------------------
+#' Create field_types_advanced specification
+#'
+#' Specify only a subset of the names and types of fields in the source data frame. The remaining
+#' fields will be given the same 'default' type.
+#'
+#' @param ... names and types of fields (columns) in source data.
+#' @param .default_field_type `field_type` to use for any remaining fields (columns) in source
+#'   data. Note, this means there can not be a field in the data named `.default_field_type`
+#' @return A `field_types` object
+#' @examples
+#' fts <- field_types_advanced(
+#'   PrescriptionDate = ft_timepoint(),
+#'   PatientID = ft_ignore(),
+#'   .default_field_type = ft_simple()
+#' )
+#'
+#' fts
+#' @seealso [field_types()], [field_types_available()], [template_field_types()]
+#' @export
+field_types_advanced <- function(..., .default_field_type = ft_simple()) {
+
+  fts <- list(..., ".default_field_type" = .default_field_type)
+
+  err_validation <- field_types_problems(fts)
+
+  # additional validation for .default_field_type
+  if (is_ft_timepoint(.default_field_type) || is_ft_strata(.default_field_type)) {
     err_validation <-
-      append(
-        err_validation,
-        paste(
-          "'[DUPLICATES]' and '[ALL_FIELDS_COMBINED]' are names reserved for calculated columns.
-					Please rename these columns in your data."
-        )
-      )
+      append(err_validation,
+             ".default_field_type cannot be a timepoint nor strata field_type")
   }
+
   if (length(err_validation) > 0) {
     stop_custom(
       .subclass = "invalid_field_types",
@@ -133,7 +103,7 @@ field_types <- function(...) {
     )
   }
 
-  structure(fts, class = "daiquiri_field_types")
+  structure(fts, class = c("daiquiri_field_types", "daiquiri_field_types_advanced"))
 }
 
 
@@ -493,6 +463,15 @@ ft_duplicates <- function() {
 is_field_types <- function(x) inherits(x, "daiquiri_field_types")
 
 
+# -----------------------------------------------------------------------------
+#' Test if object is a field_types_advanced object
+#'
+#' @param x object to test
+#' @return Logical
+#' @noRd
+is_field_types_advanced <- function(x) inherits(x, "daiquiri_field_types_advanced")
+
+
 # -----------------------------------------------------------------------------
 #' Constructor for individual field_type object
 #'
@@ -701,6 +680,7 @@ field_types_strata_field_name <- function(field_types) {
   strata_field_name
 }
 
+# -----------------------------------------------------------------------------
 #' Test if field_type has a particular option set
 #'
 #' @param ft field_type to test
@@ -710,3 +690,133 @@ field_types_strata_field_name <- function(field_types) {
 field_type_has_option <- function(ft, option){
   option %in% ft$options
 }
+
+
+# -----------------------------------------------------------------------------
+#' Validate list of (standard) field_types
+#'
+#' @param fts list of individual `field_type`s
+#' @return A character vector of error messages (if any)
+#'
+#' @noRd
+field_types_problems <- function(fts) {
+  # validate - collect all errors together and return only once
+  err_validation <- character()
+  is_field_type <- vapply(fts, is_field_type, logical(1))
+  if (any(!is_field_type)) {
+    err_validation <-
+      append(
+        err_validation,
+        paste(
+          "Unrecognised field_type(s) in positions: [",
+          paste(which(!is_field_type), collapse = ", "),
+          "]",
+          "names: [",
+          paste(names(fts)[which(!is_field_type)], collapse = ", "),
+          "]"
+        )
+      )
+  }
+  is_timepoint <- vapply(fts, is_ft_timepoint, logical(1))
+  if (sum(is_timepoint) != 1) {
+    err_validation <-
+      append(
+        err_validation,
+        paste(
+          "Must specify one and only one timepoint field. Timepoints currently in positions: [",
+          paste(which(is_timepoint), collapse = ", "),
+          "]",
+          "names: [",
+          paste(names(fts)[which(is_timepoint)], collapse = ", "),
+          "]"
+        )
+      )
+  }
+  is_strata <- vapply(fts, is_ft_strata, logical(1))
+  if (sum(is_strata) > 1) {
+    err_validation <-
+      append(
+        err_validation,
+        paste(
+          "Only one strata field allowed. Strata fields currently specified in positions: [",
+          paste(which(is_strata), collapse = ", "),
+          "]",
+          "names: [",
+          paste(names(fts)[which(is_strata)], collapse = ", "),
+          "]"
+        )
+      )
+  }
+  is_aggregate_by_each_category <- is_field_type
+  is_aggregate_by_each_category[is_field_type] <-
+    vapply(fts[is_field_type],
+           FUN = field_type_has_option,
+           FUN.VALUE = logical(1),
+           option = "aggregate_by_each_category")
+  if (any(is_strata) && any(is_aggregate_by_each_category)) {
+    err_validation <-
+      append(
+        err_validation,
+        paste(
+          "Cannot use aggregate_by_each_category option when there is a strata field. Option currently specified in positions: [",
+          paste(which(is_aggregate_by_each_category), collapse = ", "),
+          "]",
+          "names: [",
+          paste(names(fts)[which(is_aggregate_by_each_category)], collapse = ", "),
+          "]"
+        )
+      )
+  }
+  if (anyDuplicated(names(fts)) > 0) {
+    err_validation <-
+      append(
+        err_validation,
+        paste(
+          "Duplicate column names not allowed: [",
+          paste(names(fts)[duplicated(names(fts))], collapse = ", "),
+          "]"
+        )
+      )
+  }
+  # check for reserved names
+  if (any(names(fts) %in% c("[DUPLICATES]", "[ALL_FIELDS_COMBINED]"))) {
+    err_validation <-
+      append(
+        err_validation,
+        paste(
+          "'[DUPLICATES]' and '[ALL_FIELDS_COMBINED]' are names reserved for calculated columns.
+					Please rename these columns in your data."
+        )
+      )
+  }
+
+  err_validation
+}
+
+
+# -----------------------------------------------------------------------------
+#' Fill in default field_types (if any) to create a fully-named specification
+#'
+#' @param df_names field names in the supplied df
+#' @param field_types field_types object with or without .default_field_type specified
+#' @return A `field_types` object
+#' @noRd
+complete_field_types <- function(df_names, field_types){
+
+  if (!is_field_types_advanced(field_types)) {
+    fts <- field_types
+  } else{
+    fts <- list()
+    for (i in seq_along(df_names)) {
+      if (df_names[i] %in% names(field_types)) {
+        fts[[df_names[i]]] <- field_types[[df_names[i]]]
+      } else{
+        fts[[df_names[i]]] <-
+          field_types[[".default_field_type"]]
+      }
+    }
+  }
+
+  structure(fts, class = "daiquiri_field_types")
+}
+