diff --git a/R/util_corr_fit.R b/R/util_corr_fit.R
index 5d0232a..2161387 100644
--- a/R/util_corr_fit.R
+++ b/R/util_corr_fit.R
@@ -2,21 +2,27 @@
 #'
 #' @param postsynth A postsynth object from tidysynthesis or a tibble
 #' @param data an original (observed) data set.
+#' @param group_by The unquoted name of a (or multiple) grouping variable(s)
 #'
 #' @return A `list` of fit metrics:
-#'  - `correlation_original`: correlation matrix of the original data.
-#'  - `correlation_synthetic`: correlation matrix of the synthetic data.
-#'  - `correlation_difference`: difference between `correlation_synthetic` and
-#'  `correlation_original`.
-#'  - `correlation_fit`: square root of the sum of squared differences between
-#'  `correlation_synthetic` and `correlation_original`, divided by the number of
-#'  cells in the correlation matrix.
+#'  - `correlation_data`: A `tibble` of the correlations among the 
+#'  numeric variables for the actual and synthetic data
+#'  - `metrics`: Correlation metrics including the correlation fit, square root 
+#'  of the sum of squared differences between the synthetic and original data, 
+#'  divided by the number of cells in the correlation matrix, mean squared
+#'  error, the mean of the absolute correlation differences between the actual
+#'  and synthetic data, and the root mean squared error, the root mean of the 
+#'  squared correlation differences between the actual and synthetic data
 #'  
 #' @family utility functions
 #'
 #' @export
 
-util_corr_fit <- function(postsynth, data) {
+util_corr_fit <- function(postsynth,
+                          data, 
+                          group_by = NULL) {
+  
+   
   
   if (is_postsynth(postsynth)) {
   
@@ -25,15 +31,52 @@ util_corr_fit <- function(postsynth, data) {
   } else {
     
     synthetic_data <- postsynth
-    
   }
   
-  synthetic_data <- dplyr::select_if(synthetic_data, is.numeric)
-  data <- dplyr::select_if(data, is.numeric)
-
-  # reorder data names
+  # reorder data names (this appears to check if the variables are the same)
   data <- dplyr::select(data, names(synthetic_data))
   
+  #TODO: Need to update code so if the synthetic data and actual data do not have the same groupings, the function still runs correctly. 
+  # I think we could add all of the groupings to each dataset, the run the rest of the code. 
+  
+  synthetic_data <- dplyr::select(synthetic_data, dplyr::where(is.numeric), {{ group_by }})  %>%
+    dplyr::arrange(dplyr::across({{ group_by }})) %>%
+    dplyr::group_split(dplyr::across({{ group_by }})) 
+  
+  data <- dplyr::select(data, dplyr::where(is.numeric), {{ group_by }}) %>%
+    dplyr::arrange(dplyr::across({{ group_by }})) %>%
+    dplyr::group_split(dplyr::across({{ group_by }}))
+  
+  groups <- lapply(data, function(x) dplyr::select(x, {{ group_by }}) %>%
+                     dplyr::slice(1))
+  n <- lapply(data, function(x) dplyr::select(x, {{ group_by }}) %>%
+                dplyr::count())
+
+  results <- purrr::pmap(
+      .l = list(synthetic_data, data, groups),
+      .f = get_correlations
+    )
+  
+    metrics <- dplyr::bind_cols(
+      n = unlist(n),
+      correlation_fit = purrr::map_dbl(results, "correlation_fit"),
+      correlation_difference_mae = purrr::map_dbl(results, "correlation_difference_mae"),
+      correlation_difference_rmse =purrr:: map_dbl(results, "correlation_difference_rmse"),
+      dplyr::bind_rows(groups)
+    )
+    
+    corr_data <- dplyr::bind_rows(purrr::map_dfr(results, "correlation_data"))
+    
+    return(list(
+      correlation_data = corr_data,
+      metrics = metrics
+    )
+  )
+}
+
+get_correlations <- function(synthetic_data,
+                             data,
+                             groups) {
   # helper function to find a correlation matrix with the upper tri set to zeros
   lower_triangle <- function(x) {
     
@@ -43,37 +86,55 @@ util_corr_fit <- function(postsynth, data) {
       dplyr::select_if(is.numeric) %>%
       stats::cor()
     
-    # set the values in the upper triangle to zero to avoid double counting
+    # set NA values in the lower triangle to "", set the values in the upper triangle to zero to avoid double counting
+    correlation_matrix[is.na(correlation_matrix[lower.tri(correlation_matrix, diag = FALSE)])] <- ""
     correlation_matrix[upper.tri(correlation_matrix, diag = TRUE)] <- NA
     
     return(correlation_matrix)
   }
   
-  # find the lower triangle of the original data linear correlation matrix
-  original_lt <- lower_triangle(data)
   
-  # find the lower triangle of the synthetic data linear correlation matrix
-  synthetic_lt <- lower_triangle(synthetic_data)
+  # find the lower triangle of the linear correlation matrices and add a var column 
+  original_lt <- data.frame(lower_triangle(data))
+  original_lt$var2 <- colnames(original_lt)
   
-  # compare names
-  if (any(rownames(original_lt) != rownames(synthetic_lt))) {
-    stop("ERROR: rownames are not identical")
-  }
+  synthetic_lt <- data.frame(lower_triangle(synthetic_data))
+  synthetic_lt$var2 <- colnames(synthetic_lt)
   
-  if (any(colnames(original_lt) != colnames(synthetic_lt))) {
-    stop("ERROR: colnames are not identical")
-  }
+  # restructure the correlation matrix so the cols are var1, var2, original/synthetic
+  original_lt <- original_lt %>%
+    tidyr::pivot_longer(cols = !var2, names_to = "var1", values_to = "original") %>%
+    dplyr::filter(!is.na(original)) %>%
+    dplyr::arrange(var1) %>%
+    dplyr::select(var1, var2, original) %>%
+    dplyr::mutate(original = dplyr::case_when(.data$original == "" ~ NA, 
+                                              .default = .data$original))
+  
+  synthetic_lt <- synthetic_lt %>%
+    tidyr::pivot_longer(cols = !var2, names_to = "var1", values_to = "synthetic") %>%
+    dplyr::filter(!is.na(synthetic)) %>%
+    dplyr::arrange(var1) %>%
+    dplyr::select(var1, var2, synthetic) %>%
+    dplyr::mutate(synthetic = dplyr::case_when(.data$synthetic == "" ~ NA, 
+                                               .default = .data$synthetic))
+  
+  # combine the data and find the difference between the original and synthetic correlations 
+  correlation_data <- original_lt %>%
+    dplyr::left_join(synthetic_lt, by = c("var1","var2")) %>%
+    dplyr::mutate(original = as.numeric(.data$original),
+                  synthetic = as.numeric(.data$synthetic),
+                  difference = .data$original - .data$synthetic,
+                  proportion_difference = .data$difference / .data$original)
   
-  # find the difference between the matrices
-  difference_lt <- synthetic_lt - original_lt
+  correlation_data <- bind_cols(correlation_data, groups)
   
-  # find the length of the nonzero values in the matrices
-  n <- choose(ncol(difference_lt), 2)
+  # find the number of values in the lower triangle 
+  n <- nrow(dplyr::filter(correlation_data, !is.na(difference)))
   
   # calculate the correlation fit and divide by n
-  correlation_fit <- sqrt(sum(difference_lt ^ 2, na.rm = TRUE)) / n
+  correlation_fit <- sqrt(sum(correlation_data$difference ^ 2, na.rm = TRUE)) / n
   
-  difference_vec <- as.numeric(difference_lt)[!is.na(difference_lt)]
+  difference_vec <- as.numeric(correlation_data$difference)
   
   # mean absolute error
   correlation_difference_mae <- difference_vec %>%
@@ -81,20 +142,19 @@ util_corr_fit <- function(postsynth, data) {
     mean()
   
   # root mean square error
-  correlation_difference_rmse <- 
-    difference_vec ^ 2%>%
+  correlation_difference_rmse <- difference_vec ^ 2 %>%
     mean() %>%
     sqrt()
   
+  
   return(
     list(
-      correlation_original = original_lt,
-      correlation_synthetic = synthetic_lt,
-      correlation_difference = difference_lt,
+      correlation_data = correlation_data,
       correlation_fit = correlation_fit,
       correlation_difference_mae = correlation_difference_mae,
       correlation_difference_rmse = correlation_difference_rmse
     )
   )
   
-}
\ No newline at end of file
+}
+
diff --git a/man/util_corr_fit.Rd b/man/util_corr_fit.Rd
index 860a090..721e8a4 100644
--- a/man/util_corr_fit.Rd
+++ b/man/util_corr_fit.Rd
@@ -4,23 +4,27 @@
 \alias{util_corr_fit}
 \title{Calculate the correlation fit metric of a confidential data set.}
 \usage{
-util_corr_fit(postsynth, data)
+util_corr_fit(postsynth, data, group_by = NULL)
 }
 \arguments{
 \item{postsynth}{A postsynth object from tidysynthesis or a tibble}
 
 \item{data}{an original (observed) data set.}
+
+\item{group_by}{The unquoted name of a (or multiple) grouping variable(s)}
 }
 \value{
 A \code{list} of fit metrics:
 \itemize{
-\item \code{correlation_original}: correlation matrix of the original data.
-\item \code{correlation_synthetic}: correlation matrix of the synthetic data.
-\item \code{correlation_difference}: difference between \code{correlation_synthetic} and
-\code{correlation_original}.
+\item \code{correlation_data}: A \code{tibble} of the correlations among the
+numeric variables for the actual and synthetic data
 \item \code{correlation_fit}: square root of the sum of squared differences between
-\code{correlation_synthetic} and \code{correlation_original}, divided by the number of
+the synthetic and original data, divided by the number of
 cells in the correlation matrix.
+\item \code{correlation_difference_mae}: the mean of the absolute correlation
+differences between the actual and synthetic data
+\item \code{correlation_difference_rmse}: the root mean of the squared correlation
+differences between the actual and synthetic data
 }
 }
 \description{
diff --git a/tests/testthat/test-util_corr_fit.R b/tests/testthat/test-util_corr_fit.R
index 4ff40c8..86ec3a4 100644
--- a/tests/testthat/test-util_corr_fit.R
+++ b/tests/testthat/test-util_corr_fit.R
@@ -1,50 +1,213 @@
+library(tidyverse)
+
 # confidential data
-df <- data.frame(a = c(1, 2, 3),
-                 b = c(1, 2, 3),
-                 c = c(1, 2, 3),
-                 RECID = c("a", "b", "c"))
-
-# difference matrix for tests
-diff_matrix <- matrix(
-  c(NA, NA, NA,
-    -2, NA, NA,
-    0, -2, NA),
-  ncol = 3,
-  byrow = TRUE
-) 
-
-rownames(diff_matrix) <- c("a", "c", "b")
-colnames(diff_matrix) <- c("a", "c", "b")
+df <- data.frame(a = c(1, 2, 3, 4, 5),
+                 b = c(1, 2, 3, 4, 5),
+                 c = c(1, 2, 3, 4, 5),
+                 d = c("red", "red", "yellow", "yellow", "yellow"),
+                 e = c("blue", "blue", "green", "green", "green"))
+
+syn <- list(synthetic_data = data.frame(a = c(1, 2, 3, 4, 5),
+                                        b = c(5, 4, 3, 2, 1),
+                                        c = c(1, 2, 3, 4, 5),
+                                        d = c("red", "red", "yellow", "yellow", "yellow"),
+                                        e = c("blue", "blue", "green", "green", "green"))) %>%
+  structure(class = "postsynth")
 
 # test with postsynth
 test_that("util_corr_fit is correct with postsynth ", {
   
-  syn <- list(synthetic_data = data.frame(a = c(1, 2, 3),
-                                          c = c(3, 2, 1),
-                                          b = c(1, 2, 3),
-                                          RECID = c("a", "b", "c"))) %>%
-    structure(class = "postsynth")
-  
   corr <- util_corr_fit(postsynth = syn, data = df)
+  corr_data <- corr$correlation_data 
+  metrics <- corr$metrics
+  
+  # check dimensions
+  expect_equal(ncol(corr_data), 6)
+  expect_equal(nrow(corr_data), 3)
+  expect_equal(ncol(metrics), 4)
+  expect_equal(nrow(metrics), 1)
+  
+  expect_equal(corr_data$difference, c(2, 0, 2))
+  expect_equal(metrics$correlation_fit, sqrt(sum(c(0, -2, -2) ^ 2)) / 3)
+  expect_equal(metrics$correlation_difference_mae, mean(abs(c(0, -2, -2))))
+  expect_equal(metrics$correlation_difference_rmse, sqrt(mean(c(0, -2, -2) ^ 2)))
+  # throws an error, may be due to how the n column was added to the data
+  expect_equal(metrics$n, 5)
   
-  expect_equal(corr$correlation_difference, diff_matrix)
-  expect_equal(corr$correlation_fit, sqrt(sum(c(0, -2, -2) ^ 2)) / 3)
-  expect_equal(corr$correlation_difference_mae, mean(abs(c(0, -2, -2))))
-  expect_equal(corr$correlation_difference_rmse, sqrt(mean(c(0, -2, -2) ^ 2)))
 })
 
 # test with data
 test_that("util_corr_fit is correct with postsynth ", {
   
-  syn <- data.frame(a = c(1, 2, 3),
-                    c = c(3, 2, 1),
-                    b = c(1, 2, 3),
-                    RECID = c("a", "b", "c"))
+  syn_data <- data.frame(a = c(1, 2, 3, 4, 5),
+                         b = c(5, 4, 3, 2, 1),
+                         c = c(1, 2, 3, 4, 5),
+                         d = c("red", "red", "yellow", "yellow", "yellow"),
+                         e = c("blue", "blue", "green", "green", "green"))
   
-  corr <- util_corr_fit(postsynth = syn, data = df)
+  corr <- util_corr_fit(postsynth = syn_data, data = df)
+  corr_data <- corr$correlation_data 
+  metrics <- corr$metrics
+  
+  # check dimensions
+  expect_equal(ncol(corr_data), 6)
+  expect_equal(nrow(corr_data), 3)
+  expect_equal(ncol(metrics), 4)
+  expect_equal(nrow(metrics), 1)
+  
+  expect_equal(corr_data$difference, c(2, 0, 2))
+  expect_equal(metrics$correlation_fit, sqrt(sum(c(0, -2, -2) ^ 2)) / 3)
+  expect_equal(metrics$correlation_difference_mae, mean(abs(c(0, -2, -2))))
+  expect_equal(metrics$correlation_difference_rmse, sqrt(mean(c(0, -2, -2) ^ 2)))
+  expect_equal(metrics$n, 5)
+})
+
+
+
+test_that("util_corr_fit is correct when groupped by one variable ", {
+  
+  df <- data.frame(a = c(1, 2, 3, 4, 5),
+                   b = c(1, 2, 3, 4, 5),
+                   c = c(1, 2, 3, 4, 5),
+                   d = c("red", "red", "yellow", "yellow", "yellow"),
+                   e = c("blue", "blue", "green", "green", "green"))
+  
+  syn <- list(synthetic_data = data.frame(a = c(1, 2, 3, 4, 5),
+                                          b = c(5, 4, 3, 2, 1),
+                                          c = c(1, 2, 3, 4, 5),
+                                          d = c("red", "red", "yellow", "yellow", "yellow"),
+                                          e = c("blue", "blue", "green", "green", "green"))) %>%
+    structure(class = "postsynth")
+  
+  corr <- util_corr_fit(postsynth = syn, data = df, group_by = d)
+  corr_data <- corr$correlation_data
+  metrics <- corr$metrics
+  
+  # check dimensions
+  expect_equal(ncol(corr_data), 7)
+  expect_equal(nrow(corr_data), 6)
+  expect_equal(ncol(metrics), 5)
+  expect_equal(nrow(metrics), 2)
+  
+  expect_equal(corr_data$difference, c(2, 0, 2, 2, 0, 2))
+  expect_equal(metrics$correlation_fit, rep(sqrt(sum(c(0, -2, -2) ^ 2)) / 3, 2))
+  expect_equal(metrics$correlation_difference_mae, rep(mean(abs(c(0, -2, -2))), 2))
+  expect_equal(metrics$correlation_difference_rmse, rep(sqrt(mean(c(0, -2, -2) ^ 2)), 2))
+  expect_equal(metrics$n, c(2, 3))
+  
+})
+
+
+test_that("util_corr_fit is correct when groupped by more than one variable ", {
+  
+  corr <- util_corr_fit(postsynth = syn, data = df, group_by = c(d, e))
+  corr_data <- corr$correlation_data 
+  metrics <- corr$metrics
+  
+  # check dimensions
+  expect_equal(ncol(corr_data), 8)
+  expect_equal(nrow(corr_data), 6)
+  expect_equal(ncol(metrics), 6)
+  expect_equal(nrow(metrics), 2)
+  
+  expect_equal(corr_data$difference, c(2, 0, 2, 2, 0, 2))
+  expect_equal(metrics$correlation_fit, rep(sqrt(sum(c(0, -2, -2) ^ 2)) / 3, 2))
+  expect_equal(metrics$correlation_difference_mae, rep(mean(abs(c(0, -2, -2))), 2))
+  expect_equal(metrics$correlation_difference_rmse, rep(sqrt(mean(c(0, -2, -2) ^ 2)), 2))
+  expect_equal(metrics$n, c(2, 3))
+  
+})
 
-  expect_equal(corr$correlation_difference, diff_matrix)
-  expect_equal(corr$correlation_fit, sqrt(sum(c(0, -2, -2) ^ 2)) / 3)
-  expect_equal(corr$correlation_difference_mae, mean(abs(c(0, -2, -2))))
-  expect_equal(corr$correlation_difference_rmse, sqrt(mean(c(0, -2, -2) ^ 2)))
+
+test_that("util_corr_fit returns NAs when only one observation is in a grouping ", {
+  
+  df <- data.frame(a = c(1, 2, 3, 4, 5),
+                   b = c(1, 2, 3, 4, 5),
+                   c = c(1, 2, 3, 4, 5),
+                   d = c("red", "red", "yellow", "yellow", "red"),
+                   e = c("blue", "blue", "green", "green", "green"),
+                   RECID = c("a", "b", "c", "d", "e"))
+  
+  syn <- data.frame(a = c(1, 2, 3, 4, 5),
+                    b = c(5, 4, 3, 2, 1),
+                    c = c(1, 2, 3, 4, 5),
+                    d = c("red", "red", "yellow", "yellow", "red"),
+                    e = c("blue", "blue", "green", "green", "green"),
+                    RECID = c("a", "b", "c", "d", "e")) 
+  
+  corr <- util_corr_fit(postsynth = syn, data = df, group_by = c(d, e))
+  corr_data <- corr$correlation_data
+  metrics <- corr$metrics
+  
+  expect_equal(ncol(corr_data), 8)
+  expect_equal(nrow(corr_data), 9)
+  expect_equal(ncol(metrics), 6)
+  expect_equal(nrow(metrics), 3)
+  
+  expect_equal(corr_data$difference, c(2, 0, 2, NA, NA, NA, 2, 0, 2))
+  expect_equal(metrics$correlation_fit, c(sqrt(sum(c(0, -2, -2) ^ 2)) / 3, NaN, sqrt(sum(c(0, -2, -2) ^ 2)) / 3))
+  expect_equal(metrics$correlation_difference_mae, c(mean(abs(c(0, -2, -2))), NA, mean(abs(c(0, -2, -2)))))
+  expect_equal(metrics$correlation_difference_rmse, c(sqrt(mean(c(0, -2, -2) ^ 2)), NA, sqrt(mean(c(0, -2, -2) ^ 2))))
+  expect_equal(metrics$n, c(2, 1, 2))
+  
+})
+
+# NOTE: These tests will fail
+
+test_that("util_corr_fit returns NAs when a grouping in the actual data is not in the synthetic data", {
+  
+  df <- data.frame(a = c(1, 2, 3, 4, 5),
+                   b = c(1, 2, 3, 4, 5),
+                   c = c(1, 2, 3, 4, 5),
+                   d = c("red", "red", "yellow", "yellow", "red"),
+                   e = c("blue", "blue", "green", "green", "green"),
+                   RECID = c("a", "b", "c", "d", "e"))
+  
+  syn <- data.frame(a = c(1, 2, 3, 4, 5),
+                    b = c(5, 4, 3, 2, 1),
+                    c = c(1, 2, 3, 4, 5),
+                    d = c("red", "red", "yellow", "yellow", "yellow"),
+                    e = c("blue", "blue", "green", "green", "green"),
+                    RECID = c("a", "b", "c", "d", "e")) 
+  
+  corr <- util_corr_fit(postsynth = syn, data = df, group_by = c(d, e))
+  corr_data <- corr$correlation_data 
+  
+  expect_equal(corr_data$difference, c(2, 0, 2, 2, 0, 2, NA, NA, NA))
+  expect_equal(corr$correlation_fit, c(rep(sqrt(sum(c(0, -2, -2) ^ 2)) / 3, 2), NaN))
+  expect_equal(corr$correlation_difference_mae, c(rep(mean(abs(c(0, -2, -2))), 2), NA))
+  expect_equal(corr$correlation_difference_rmse, c(rep(sqrt(mean(c(0, -2, -2) ^ 2)), 2), NA))
+  
+})
+
+
+test_that("util_corr_fit returns NAs when a grouping in the synthetic data is not in the actual data", {
+  
+  df <- data.frame(a = c(1, 2, 3, 4, 5),
+                   b = c(1, 2, 3, 4, 5),
+                   c = c(1, 2, 3, 4, 5),
+                   d = c("red", "red", "yellow", "yellow", "yellow"),
+                   e = c("blue", "blue", "green", "green", "green"),
+                   RECID = c("a", "b", "c", "d", "e"))
+  
+  syn <- data.frame(a = c(1, 2, 3, 4, 5),
+                    b = c(5, 4, 3, 2, 1),
+                    c = c(1, 2, 3, 4, 5),
+                    d = c("red", "red", "yellow", "yellow", "red"),
+                    e = c("blue", "blue", "green", "green", "green"),
+                    RECID = c("a", "b", "c", "d", "e")) 
+  
+  corr <- util_corr_fit(postsynth = syn, data = df, group_by = c(d, e))
+  corr_data <- corr$correlation_data 
+  
+  expect_equal(corr_data$difference, c(2, 0, 2, 2, 0, 2, NA, NA, NA))
+  expect_equal(corr$correlation_fit, c(rep(sqrt(sum(c(0, -2, -2) ^ 2)) / 3, 2), NaN))
+  expect_equal(corr$correlation_difference_mae, c(rep(mean(abs(c(0, -2, -2))), 2), NA))
+  expect_equal(corr$correlation_difference_rmse, c(rep(sqrt(mean(c(0, -2, -2) ^ 2)), 2), NA))
+  
 })
+
+
+
+
+