Skip to content

Commit

Permalink
Remove hashes
Browse files Browse the repository at this point in the history
  • Loading branch information
Damonamajor committed Jul 19, 2024
1 parent c343bac commit 9b8f605
Showing 1 changed file with 143 additions and 143 deletions.
286 changes: 143 additions & 143 deletions analyses/new-feature-template.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -63,142 +63,142 @@ paths <- model_file_dict(model_params$run_id, model_params$year)
```

```{r download_new_data}
# analyses_paths <- list(
# output = list(
# list(
# s3 = paste0("s3://ccao-model-results-us-east-1/performance/year=", params$run_id_year, "/stage=assessment/", params$run_id, ".parquet"),
# key = "performance"
# ),
# list(
# s3 = paste0("s3://ccao-model-results-us-east-1/metadata/year=", params$run_id_year, "/", params$run_id, ".parquet"),
# key = "metadata"
# ),
# list(
# s3 = paste0("s3://ccao-model-results-us-east-1/shap/year=", params$run_id_year, "/run_id=", params$run_id, "/"),
# key = "shap"
# ),
# list(
# s3 = paste0("s3://ccao-model-results-us-east-1/assessment_card/year=", params$run_id_year, "/run_id=", params$run_id, "/"),
# key = "assessment_card"
# ),
# list(
# s3 = paste0("s3://ccao-model-results-us-east-1/assessment_pin/year=", params$run_id_year, "/run_id=", params$run_id, "/"),
# key = "assessment_pin"
# )
# )
# )
#
# fetch_analyses <- function(run_id, year, analyses_paths) {
# tictoc::tic(paste0("Fetched run: ", run_id))
#
# s3_objs <- grep("s3://", unlist(analyses_paths$output), value = TRUE)
# bucket <- strsplit(s3_objs[1], "/")[[1]][3]
#
# data_list <- list()
#
# for (analyses_path in analyses_paths$output) {
# is_directory <- endsWith(analyses_path$s3, "/")
# if (is_directory) {
# partitioned_by_run <- endsWith(analyses_path$s3, paste0("run_id=", run_id, "/"))
# if (partitioned_by_run) {
# dir_path <- analyses_path$s3
# } else {
# dir_path <- paste0(analyses_path$s3, "year=", year, "/run_id=", run_id, "/")
# }
#
# message("Now fetching: ", dir_path)
# objs_prefix <- sub(paste0("s3://", bucket, "/"), "", dir_path)
# objs <- aws.s3::get_bucket_df(bucket, objs_prefix)
# objs <- dplyr::filter(objs, Size > 0)
#
# if (nrow(objs) > 0) {
# combined_data <- NULL
# for (key in objs$Key) {
# message("Now fetching: ", key)
# local_temp_path <- file.path(tempdir(), basename(key))
# aws.s3::save_object(key, bucket = bucket, file = local_temp_path)
#
# # Read the Parquet file and append it to combined_data
# temp_data <- arrow::read_parquet(local_temp_path)
# if (is.null(combined_data)) {
# combined_data <- temp_data
# } else {
# combined_data <- dplyr::bind_rows(combined_data, temp_data)
# }
# }
# data_list[[analyses_path$key]] <- combined_data
# } else {
# warning(analyses_path$key, " does not exist for this run")
# }
# } else {
# message("Now fetching: ", analyses_path$s3)
# if (aws.s3::object_exists(analyses_path$s3, bucket = bucket)) {
# local_temp_path <- file.path(tempdir(), basename(analyses_path$s3))
# aws.s3::save_object(analyses_path$s3, bucket = bucket, file = local_temp_path)
# data_list[[analyses_path$key]] <- arrow::read_parquet(local_temp_path)
# } else {
# warning(analyses_path$key, " does not exist for this run")
# }
# }
# }
#
# tictoc::toc()
# return(data_list)
# }
#
# data <- fetch_analyses(params$run_id, params$run_id_year, analyses_paths)
#
# performance <- data$performance
# metadata <- data$metadata
# shap <- data$shap
# assessment_card <- data$assessment_card %>%
# select(meta_pin, meta_card_num, pred_card_initial_fmv, meta_township_code, !!sym(params$added_feature))
#
# assessment_pin <- data$assessment_pin
#
# lockfile_assessment <- metadata$dvc_md5_assessment_data
#
# # Define S3 paths for assessment'? data
# s3_path_assessment <- paste0(
# "s3://ccao-data-dvc-us-east-1/files/md5/",
# substr(lockfile_assessment, 1, 2), "/",
# substr(lockfile_assessment, 3, nchar(lockfile_assessment))
# )
#
#
# assessment_data <- s3read_using(FUN = read_parquet, object = s3_path_assessment)
analyses_paths <- list(
output = list(
list(
s3 = paste0("s3://ccao-model-results-us-east-1/performance/year=", params$run_id_year, "/stage=assessment/", params$run_id, ".parquet"),
key = "performance"
),
list(
s3 = paste0("s3://ccao-model-results-us-east-1/metadata/year=", params$run_id_year, "/", params$run_id, ".parquet"),
key = "metadata"
),
list(
s3 = paste0("s3://ccao-model-results-us-east-1/shap/year=", params$run_id_year, "/run_id=", params$run_id, "/"),
key = "shap"
),
list(
s3 = paste0("s3://ccao-model-results-us-east-1/assessment_card/year=", params$run_id_year, "/run_id=", params$run_id, "/"),
key = "assessment_card"
),
list(
s3 = paste0("s3://ccao-model-results-us-east-1/assessment_pin/year=", params$run_id_year, "/run_id=", params$run_id, "/"),
key = "assessment_pin"
)
)
)
fetch_analyses <- function(run_id, year, analyses_paths) {
tictoc::tic(paste0("Fetched run: ", run_id))
s3_objs <- grep("s3://", unlist(analyses_paths$output), value = TRUE)
bucket <- strsplit(s3_objs[1], "/")[[1]][3]
data_list <- list()
for (analyses_path in analyses_paths$output) {
is_directory <- endsWith(analyses_path$s3, "/")
if (is_directory) {
partitioned_by_run <- endsWith(analyses_path$s3, paste0("run_id=", run_id, "/"))
if (partitioned_by_run) {
dir_path <- analyses_path$s3
} else {
dir_path <- paste0(analyses_path$s3, "year=", year, "/run_id=", run_id, "/")
}
message("Now fetching: ", dir_path)
objs_prefix <- sub(paste0("s3://", bucket, "/"), "", dir_path)
objs <- aws.s3::get_bucket_df(bucket, objs_prefix)
objs <- dplyr::filter(objs, Size > 0)
if (nrow(objs) > 0) {
combined_data <- NULL
for (key in objs$Key) {
message("Now fetching: ", key)
local_temp_path <- file.path(tempdir(), basename(key))
aws.s3::save_object(key, bucket = bucket, file = local_temp_path)
# Read the Parquet file and append it to combined_data
temp_data <- arrow::read_parquet(local_temp_path)
if (is.null(combined_data)) {
combined_data <- temp_data
} else {
combined_data <- dplyr::bind_rows(combined_data, temp_data)
}
}
data_list[[analyses_path$key]] <- combined_data
} else {
warning(analyses_path$key, " does not exist for this run")
}
} else {
message("Now fetching: ", analyses_path$s3)
if (aws.s3::object_exists(analyses_path$s3, bucket = bucket)) {
local_temp_path <- file.path(tempdir(), basename(analyses_path$s3))
aws.s3::save_object(analyses_path$s3, bucket = bucket, file = local_temp_path)
data_list[[analyses_path$key]] <- arrow::read_parquet(local_temp_path)
} else {
warning(analyses_path$key, " does not exist for this run")
}
}
}
tictoc::toc()
return(data_list)
}
data <- fetch_analyses(params$run_id, params$run_id_year, analyses_paths)
performance <- data$performance
metadata <- data$metadata
shap <- data$shap
assessment_card <- data$assessment_card %>%
select(meta_pin, meta_card_num, pred_card_initial_fmv, meta_township_code, !!sym(params$added_feature))
assessment_pin <- data$assessment_pin
lockfile_assessment <- metadata$dvc_md5_assessment_data
# Define S3 paths for assessment'? data
s3_path_assessment <- paste0(
"s3://ccao-data-dvc-us-east-1/files/md5/",
substr(lockfile_assessment, 1, 2), "/",
substr(lockfile_assessment, 3, nchar(lockfile_assessment))
)
assessment_data <- s3read_using(FUN = read_parquet, object = s3_path_assessment)
```

```{r download_comparison_data}
# analyses_paths <- list(
# output = list(
# list(
# s3 = paste0("s3://ccao-model-results-us-east-1/performance/year=", params$comparison_run_id_year, "/stage=assessment/", params$comparison_run_id, ".parquet"),
# key = "performance"
# ),
# list(
# s3 = paste0("s3://ccao-model-results-us-east-1/assessment_pin/year=", params$comparison_run_id_year, "/run_id=", params$comparison_run_id, "/"),
# key = "assessment_pin"
# ),
# list(
# s3 = paste0("s3://ccao-model-results-us-east-1/shap/year=", params$comparison_run_id_year, "/run_id=", params$comparison_run_id, "/"),
# key = "shap"
# ),
# list(
# s3 = paste0("s3://ccao-model-results-us-east-1/metadata/year=", params$comparison_run_id_year, "/", params$comparison_run_id, ".parquet"),
# key = "metadata"
# )
# )
# )
#
# data_comparison <- fetch_analyses(params$comparison_run_id, params$comparison_run_id_year, analyses_paths)
#
# metadata_comparison <- data_comparison$metadata
# model_performance_assessment_comparison <- data_comparison$performance
# shap_comparison <- data_comparison$shap
#
# assessment_pin_comparison <- data_comparison$assessment_pin %>%
# select(meta_pin, pred_pin_final_fmv, sale_ratio_study_price, meta_nbhd_code, meta_triad_code, pred_pin_initial_fmv, meta_township_code)
analyses_paths <- list(
output = list(
list(
s3 = paste0("s3://ccao-model-results-us-east-1/performance/year=", params$comparison_run_id_year, "/stage=assessment/", params$comparison_run_id, ".parquet"),
key = "performance"
),
list(
s3 = paste0("s3://ccao-model-results-us-east-1/assessment_pin/year=", params$comparison_run_id_year, "/run_id=", params$comparison_run_id, "/"),
key = "assessment_pin"
),
list(
s3 = paste0("s3://ccao-model-results-us-east-1/shap/year=", params$comparison_run_id_year, "/run_id=", params$comparison_run_id, "/"),
key = "shap"
),
list(
s3 = paste0("s3://ccao-model-results-us-east-1/metadata/year=", params$comparison_run_id_year, "/", params$comparison_run_id, ".parquet"),
key = "metadata"
)
)
)
data_comparison <- fetch_analyses(params$comparison_run_id, params$comparison_run_id_year, analyses_paths)
metadata_comparison <- data_comparison$metadata
model_performance_assessment_comparison <- data_comparison$performance
shap_comparison <- data_comparison$shap
assessment_pin_comparison <- data_comparison$assessment_pin %>%
select(meta_pin, pred_pin_final_fmv, sale_ratio_study_price, meta_nbhd_code, meta_triad_code, pred_pin_initial_fmv, meta_township_code)
```

```{r}
Expand All @@ -215,16 +215,16 @@ paths <- model_file_dict(model_params$run_id, model_params$year)
```

```{r}
assessment_data <- read_parquet("assessment_data.parquet")
assessment_pin <- read_parquet("assessment_pin.parquet")
assessment_card <- read_parquet("assessment_card.parquet")
performance <- read_parquet("performance.parquet")
shap <- read_parquet("shap.parquet")
metadata <- read_parquet("metadata.parquet")
assessment_pin_comparison <- read_parquet("assessment_pin_comparison.parquet")
model_performance_assessment_comparison <- read_parquet("model_performance_assessment_comparison.parquet")
shap_comparison <- read_parquet("shap_comparison.parquet")
metadata_comparison <- read_parquet("metadata_comparison.parquet")
# assessment_data <- read_parquet("assessment_data.parquet")
# assessment_pin <- read_parquet("assessment_pin.parquet")
# assessment_card <- read_parquet("assessment_card.parquet")
# performance <- read_parquet("performance.parquet")
# shap <- read_parquet("shap.parquet")
# metadata <- read_parquet("metadata.parquet")
# assessment_pin_comparison <- read_parquet("assessment_pin_comparison.parquet")
# model_performance_assessment_comparison <- read_parquet("model_performance_assessment_comparison.parquet")
# shap_comparison <- read_parquet("shap_comparison.parquet")
# metadata_comparison <- read_parquet("metadata_comparison.parquet")
```


Expand Down

0 comments on commit 9b8f605

Please sign in to comment.