Skip to content

Commit

Permalink
Add logic for categorical
Browse files Browse the repository at this point in the history
  • Loading branch information
Damonamajor committed Aug 7, 2024
1 parent 35faf5f commit e2f26d2
Showing 1 changed file with 30 additions and 27 deletions.
57 changes: 30 additions & 27 deletions analyses/Data_transformation.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ pin_individual <- assessment_pin_new %>%
pred_pin_initial_fmv_comp = dollar(pred_pin_initial_fmv_comp)
) %>%
inner_join(
assessment_data %>%
assessment_data_new %>%
distinct(meta_pin, .keep_all = TRUE) %>%
select(
meta_pin, meta_nbhd_code, loc_longitude,
Expand All @@ -78,34 +78,37 @@ pin_individual <- assessment_pin_new %>%
)

# Aggregate to neighborhood level
pin_nbhd <- pin_individual %>%
group_by(meta_nbhd_code) %>%
if (type == "continuous") {
summarize(
!!paste0({{ target_feature_value }}, "_neighborhood_mean") :=
mean(!!sym({{ target_feature_value }}), na.rm = TRUE),
!!paste0({{ target_feature_value }}, "_neighborhood_median") :=
median(!!sym({{ target_feature_value }}), na.rm = TRUE),
!!paste0({{ target_feature_value }}, "_neighborhood_90th") :=
quantile(!!sym({{ target_feature_value }}), 0.9, na.rm = TRUE)
)
} else {
if (type == "continuous") {
pin_nbhd <- pin_individual %>%
group_by(meta_nbhd_code) %>%
summarize(
!!paste0({{ target_feature_value }}, "_most_common_value") :=
names(sort(table(!!sym({{ target_feature_value }})), decreasing = TRUE)[1]),
!!paste0({{ target_feature_value }}, "_top5_common_values_percent") := {
freq <- sort(table(!!sym({{ target_feature_value }})), decreasing = TRUE)
top5 <- head(freq, 5)
sum(top5) / sum(freq) * 100
}
!!paste0(target_feature_value, "_neighborhood_mean") :=
mean(!!sym(target_feature_value), na.rm = TRUE),
!!paste0(target_feature_value, "_neighborhood_median") :=
median(!!sym(target_feature_value), na.rm = TRUE),
!!paste0(target_feature_value, "_neighborhood_90th") :=
quantile(!!sym(target_feature_value), 0.9, na.rm = TRUE)
)
}
ungroup() %>%
inner_join(
nbhd,
by = c("meta_nbhd_code" = "town_nbhd")
) %>%
st_as_sf()
} else {
{

Check warning on line 93 in analyses/Data_transformation.R

View workflow job for this annotation

GitHub Actions / pre-commit

file=/home/runner/work/model-res-avm/model-res-avm/analyses/Data_transformation.R,line=93,col=3,[brace_linter] Opening curly braces should never go on their own line and should always be followed by a new line.
pin_nbhd <- pin_individual %>%
group_by(meta_nbhd_code, !!sym({{ target_feature_value }})) %>%
count() %>%
ungroup() %>%
group_by(meta_nbhd_code) %>%
mutate(percentage = n / sum(n) * 100) %>%
select(meta_nbhd_code, !!sym({{ target_feature_value }}), percentage) %>%
arrange(meta_nbhd_code, desc(percentage)) %>% # Arrange to have the highest percentage first

Check warning on line 101 in analyses/Data_transformation.R

View workflow job for this annotation

GitHub Actions / pre-commit

file=/home/runner/work/model-res-avm/model-res-avm/analyses/Data_transformation.R,line=101,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 98 characters.
mutate(rank = row_number()) %>%
pivot_wider(
names_from = rank,
values_from = c(!!sym({{ target_feature_value }}), percentage),
names_glue = "{.value}_{rank}"
)
} %>%

Check warning on line 108 in analyses/Data_transformation.R

View workflow job for this annotation

GitHub Actions / pre-commit

file=/home/runner/work/model-res-avm/model-res-avm/analyses/Data_transformation.R,line=108,col=3,[brace_linter] Closing curly-braces should always be on their own line, unless they are followed by an else.
inner_join(nbhd, by = c("meta_nbhd_code" = "town_nbhd")) %>%
st_as_sf()
}

# Pivot wider for leaflet maps to allow multiple shap values
leaflet_data <- card_individual %>%
Expand Down

0 comments on commit e2f26d2

Please sign in to comment.