Skip to content

hrbrmstr/fbi-2018-ic3

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

3 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

2018 FBI IC3 PDF Data Wrangling

library(readxl)
library(ggbeeswarm)
library(pdftools)
library(stringi)
library(hrbrthemes)
library(ggrepel)
library(tidyverse)
tibble(
  year = 2014:2018,
  complaints = c(269422, 288012, 298728, 301580, 351937),
  losses = c(800.5, 1070.7, 1450.7, 1418.7, 2706.4)
) -> ic3_summary

write_csv(ic3_summary, here::here("data/2018-fbi-ic3-annual-summary.csv"))

ggplot(ic3_summary, aes(complaints, losses)) +
  geom_path(
    arrow = arrow(type = "closed", length = unit(12, "pt")),
    color = "#31739C"
  ) +
  geom_point(color = "#31739C") +
  geom_label_repel(
    aes(label = year), family = font_rc, size = c(rep(3, 4), 4),
    color = c(rep("#3B454A", 4), "black"),
    fontface = c(rep("plain", 4), "bold")
  ) +
  scale_x_comma(limits = c(0, NA)) +
  scale_y_continuous(label = scales::dollar, limits = c(0, NA)) +
  labs(
    x = "Number of Complaints", y = "Losses (USD, millions)",
    title = "Both Incident Count and Total Losses Related to Cybercrime\nSkyrocketed in the 2018 Edition of the FBI IC3 Report",
    subtitle = "Zero baseline; Point labels denote IC3 summary year data",
    caption = "Source: 2018 FBI IC3; Page 5 'IC3 Complaint Statistics 2014-2018'"
  ) +
  theme_ipsum_rc()

ic3 <- pdf_text(here::here("raw/2018_IC3Report.pdf"))

ic3[[16]] %>% 
  stri_split_lines() %>% 
  unlist() %>% 
  stri_trim_both() -> l

l[which(stri_detect_regex(l, "^Under")):which(stri_detect_regex(l, "^Over 6"))] %>% 
  stri_split_regex("[[:space:]]{3,}", simplify = TRUE) %>% 
  as.data.frame(stringsAsFactors = FALSE) %>% 
  set_names("age_group", "incidents", "losses") %>% 
  as_tibble() %>% 
  mutate(losses = stri_replace_first_fixed(losses, "$", "")) %>% 
  type_convert(
    col_types = cols(
      age_group = col_character(),
      incidents = col_number(),
      losses = col_number()
    )
  ) -> loss

write_csv(loss, here::here("data/2018-fbi-ic3-loss-by-age.csv"))

mutate(loss, `Loss Ratio (USD, milions)` = losses/incidents) %>% 
  rename(
    `Total Losses (USD, milions)` = losses,
    `Total Incidents` = incidents
  ) %>% 
  mutate(age_group = stri_replace_first_fixed(age_group, " ", "\n")) %>% 
  mutate(age_group = factor(age_group, age_group)) %>% 
  gather(measure, value, -age_group) %>% 
  mutate(
    measure = factor(measure, levels = c(
      "Total Incidents", "Total Losses (USD, milions)", "Loss Ratio (USD, milions)"))
  ) %>% 
  ggplot(aes(age_group, value)) +
  geom_col(width=0.45, fill = "#31739C") +
  scale_x_discrete() +
  scale_y_comma() +
  facet_wrap(~measure, scales = "free") +
  labs(
    x = NULL, y = "Loss Ratio (total losses/victim count)",
    title = "In 2018, Older Victims Generally Lost More Overall and Per-Incident Than Younger Victims",
    subtitle = "Note that 40-49 age group had more incients than older groups but fewer overall losses.",
    caption = "NOTE: Free Y Scale\nSource: 2018 FBI IC3; Page 16 '2018 Victims by Age Group'"
  ) +
  theme_ipsum_rc(grid="Y")

ic3[[19]] %>% 
  stri_split_lines() %>% 
  unlist() %>% 
  keep(stri_detect_regex, "^[[:upper:]]") %>% 
  keep(stri_detect_regex, "[[:digit:]]") %>% 
  stri_replace_first_regex("([[:digit:]])   ([[:alpha:]\\*])", "$1\t$2") %>% 
  stri_split_fixed("\t") %>% 
  unlist() %>% 
  keep(stri_detect_regex, " [[:digit:]]") %>% 
  stri_match_first_regex("([^[:digit:]]+)([[:digit:],]+)$") %>% 
  .[,2:3] %>% 
  as.data.frame(stringsAsFactors=FALSE) %>% 
  as_tibble() %>% 
  mutate_all(.funs = stri_trim_both) %>% 
  type_convert(
    col_types = cols(
      V1 = col_character(),
      V2 = col_number()
    )
  ) %>% 
  mutate(V1 = case_when(
    stri_detect_fixed(V1, "IPR/Copy") ~ "IPR/Copyright and Counterfeit",
    TRUE ~ V1
  )) %>% 
  set_names(c("crime", "victim_count")) %>% 
  head(-2) %>% 
  arrange(desc(victim_count)) -> victims

ic3[[20]] %>% 
  stri_split_lines() %>% 
  unlist() %>% 
  keep(stri_detect_regex, "^    [[:upper:]]") %>% 
  keep(stri_detect_regex, "[[:digit:]]") %>% 
  stri_trim_both() %>% 
  stri_replace_first_regex("([[:digit:]])   ([[:alpha:]\\*])", "$1\t$2") %>% 
  stri_split_fixed("\t") %>% 
  unlist() %>% 
  stri_replace_first_regex("([[:digit:]])[[:space:]]+([[:alpha:]\\*])", "$1\t$2") %>% 
  stri_split_fixed("\t") %>%  
  unlist() %>% 
  keep(stri_detect_regex, "\\$[[:digit:]]") %>% 
  stri_match_first_regex("([^\\$]+)([\\$[:digit:],\\.]+)$") %>% 
  .[,2:3] %>% 
  as.data.frame(stringsAsFactors=FALSE) %>% 
  as_tibble() %>% 
  mutate_all(.funs = stri_trim_both) %>% 
  mutate(V2 = stri_replace_first_fixed(V2, "$", "")) %>% 
  type_convert(
    col_types = cols(
      V1 = col_character(),
      V2 = col_number()
    )
  ) %>% 
  mutate(V1 = case_when(
    stri_detect_fixed(V1, "IPR/Copy") ~ "IPR/Copyright and Counterfeit",
    stri_detect_fixed(V1, "Malware/Sca") ~ "Malware/Scareware/Virus",
    stri_detect_fixed(V1, "Harassment/T") ~ "Harassment/Threats of Violence",
    stri_detect_fixed(V1, "Ransomware") ~ "Ransomware",
    stri_detect_fixed(V1, "Denial of Service") ~ "Denial of Service/TDoS",
    stri_detect_fixed(V1, "Re-Shipping") ~ "Re-shipping",
    TRUE ~ V1
  )) %>% 
  set_names(c("crime", "loss")) %>% 
  head(-2) %>% 
  left_join(victims, "crime") %>% 
  mutate(loss = loss / 1000000) -> crime_types

write_csv(crime_types, here::here("data/2018-fbi-ic3-loss-by-crime-type.csv"))

ggplot() +
  geom_point(
    data = mutate(crime_types, color = case_when(
      (loss >= 100) | (victim_count >= 20000) ~ "#E85E26",
      TRUE ~ "#31739C"
    )), 
    aes(victim_count, loss, color = I(color))
  ) +
  geom_label_repel(
    data = filter(crime_types, (loss >= 100) | (victim_count >= 20000)),
    aes(victim_count, loss, label = crime),
    size = 3, family = font_rc
  ) +
  scale_x_comma() +
  scale_y_continuous(label = scales::dollar) +
  labs(
    x = "Victim count", y = "Loss (USD, millions)",
    title = "[Business] E-mail Account Compromise was the Most Profitable\nIC3 Crime in 2018 with over $1.2 billion (USD) in Losses",
    subtitle = "Markers only on IC3 crimes with ≥$100m (USD) losses or ≥20,000 victims ",
    caption = "Source: 2018 FBI IC3; Pages 19-20 '2018 Crime Types'"
  ) +
  theme_ipsum_rc(grid="XY")

arrange(crime_types, desc(loss)) %>% 
  select(`Crime` = 1, `Loss (USD, millions)` = 2, `Victim Count` = 3) %>% 
  gt::gt() %>% 
  gt::fmt_number("Victim Count", decimals = 0) %>% 
  gt::fmt_currency("Loss (USD, millions)", decimals = 2)
<style>html { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Fira Sans', 'Droid Sans', 'Helvetica Neue', Arial, sans-serif; } #jemiiwsncq .gt_table { border-collapse: collapse; margin-left: auto; margin-right: auto; color: #000000; font-size: 16px; background-color: #FFFFFF; /* table.background.color */ width: auto; /* table.width */ border-top-style: solid; /* table.border.top.style */ border-top-width: 2px; /* table.border.top.width */ border-top-color: #A8A8A8; /* table.border.top.color */ } #jemiiwsncq .gt_heading { background-color: #FFFFFF; /* heading.background.color */ border-bottom-color: #FFFFFF; } #jemiiwsncq .gt_title { color: #000000; font-size: 125%; /* heading.title.font.size */ padding-top: 4px; /* heading.top.padding */ padding-bottom: 1px; border-bottom-color: #FFFFFF; border-bottom-width: 0; } #jemiiwsncq .gt_subtitle { color: #000000; font-size: 85%; /* heading.subtitle.font.size */ padding-top: 1px; padding-bottom: 4px; /* heading.bottom.padding */ border-top-color: #FFFFFF; border-top-width: 0; } #jemiiwsncq .gt_bottom_border { border-bottom-style: solid; /* heading.border.bottom.style */ border-bottom-width: 2px; /* heading.border.bottom.width */ border-bottom-color: #A8A8A8; /* heading.border.bottom.color */ } #jemiiwsncq .gt_column_spanner { border-bottom-style: solid; border-bottom-width: 2px; border-bottom-color: #A8A8A8; padding-top: 4px; padding-bottom: 4px; } #jemiiwsncq .gt_col_heading { color: #000000; background-color: #FFFFFF; /* column_labels.background.color */ font-size: 16px; /* column_labels.font.size */ font-weight: initial; /* column_labels.font.weight */ padding: 10px; margin: 10px; } #jemiiwsncq .gt_sep_right { border-right: 5px solid #FFFFFF; } #jemiiwsncq .gt_group_heading { padding: 8px; color: #000000; background-color: #FFFFFF; /* stub_group.background.color */ font-size: 16px; /* stub_group.font.size */ font-weight: initial; /* stub_group.font.weight */ border-top-style: solid; /* stub_group.border.top.style */ border-top-width: 2px; /* stub_group.border.top.width */ border-top-color: #A8A8A8; /* stub_group.border.top.color */ border-bottom-style: solid; /* stub_group.border.bottom .style */ border-bottom-width: 2px; /* stub_group.border.bottom .width */ border-bottom-color: #A8A8A8; /* stub_group.border.bottom .color */ } #jemiiwsncq .gt_empty_group_heading { padding: 0.5px; color: #000000; background-color: #FFFFFF; /* stub_group.background.color */ font-size: 16px; /* stub_group.font.size */ font-weight: initial; /* stub_group.font.weight */ border-top-style: solid; /* stub_group.border.top.style */ border-top-width: 2px; /* stub_group.border.top.width */ border-top-color: #A8A8A8; /* stub_group.border.top.color */ border-bottom-style: solid; /* stub_group.border.bottom .style */ border-bottom-width: 2px; /* stub_group.border.bottom .width */ border-bottom-color: #A8A8A8; /* stub_group.border.bottom .color */ } #jemiiwsncq .gt_striped tr:nth-child(even) { background-color: #f2f2f2; } #jemiiwsncq .gt_row { padding: 10px; /* row.padding */ margin: 10px; } #jemiiwsncq .gt_stub { border-right-style: solid; border-right-width: 2px; border-right-color: #A8A8A8; text-indent: 5px; } #jemiiwsncq .gt_stub.gt_row { background-color: #FFFFFF; } #jemiiwsncq .gt_summary_row { background-color: #FFFFFF; /* summary_row.background.color */ padding: 6px; /* summary_row.padding */ text-transform: inherit; /* summary_row.text_transform */ } #jemiiwsncq .gt_first_summary_row { border-top-style: solid; border-top-width: 2px; border-top-color: #A8A8A8; } #jemiiwsncq .gt_table_body { border-top-style: solid; /* field.border.top.style */ border-top-width: 2px; /* field.border.top.width */ border-top-color: #A8A8A8; /* field.border.top.color */ border-bottom-style: solid; /* field.border.bottom.style */ border-bottom-width: 2px; /* field.border.bottom.width */ border-bottom-color: #A8A8A8; /* field.border.bottom.color */ } #jemiiwsncq .gt_footnote { font-size: 90%; /* footnote.font.size */ padding: 4px; /* footnote.padding */ } #jemiiwsncq .gt_sourcenote { font-size: 90%; /* sourcenote.font.size */ padding: 4px; /* sourcenote.padding */ } #jemiiwsncq .gt_center { text-align: center; } #jemiiwsncq .gt_left { text-align: left; } #jemiiwsncq .gt_right { text-align: right; font-variant-numeric: tabular-nums; } #jemiiwsncq .gt_font_normal { font-weight: normal; } #jemiiwsncq .gt_font_bold { font-weight: bold; } #jemiiwsncq .gt_font_italic { font-style: italic; } #jemiiwsncq .gt_super { font-size: 65%; } #jemiiwsncq .gt_footnote_glyph { font-style: italic; font-size: 65%; } </style>

Crime

Loss (USD, millions)

Victim Count

BEC/EAC

$1,297.80

20,373

Confidence Fraud/Romance

$362.50

18,493

Investment

$252.96

3,693

Non-Payment/Non-Delivery

$183.83

65,116

Real Estate/Rental

$149.46

11,300

Personal Data Breach

$148.89

50,642

Corporate Data Breach

$117.71

2,480

Identity Theft

$100.43

16,128

Advanced Fee

$92.27

16,362

Credit Card Fraud

$88.99

15,210

Extortion

$83.36

51,146

Spoofing

$70.00

15,569

Government Impersonation

$64.21

10,978

Other

$63.13

10,826

Lottery/Sweepstakes

$60.21

7,146

Overpayment

$53.23

15,512

Phishing/Vishing/Smishing/Pharming

$48.24

26,379

Employment

$45.49

14,979

Tech Support

$38.70

14,408

Harassment/Threats of Violence

$21.90

18,415

Misrepresentation

$20.00

5,959

IPR/Copyright and Counterfeit

$15.80

2,249

Civil Matter

$15.17

768

Malware/Scareware/Virus

$7.41

2,811

Health Care Related

$4.47

337

Ransomware

$3.62

1,493

Denial of Service/TDoS

$2.05

1,799

Re-shipping

$1.68

907

Charity

$1.01

493

Gambling

$0.93

181

Crimes Against Children

$0.27

1,394

Hacktivist

$0.08

77

Terrorism

$0.01

120

No Lead Value

$0.00

36,936

ic3[[21]] %>%
  stri_split_lines() %>% 
  unlist() %>% 
  keep(stri_detect_regex, "^[[:digit:]]") %>% 
  stri_replace_first_regex("([[:digit:],]+)[[:space:]]+([[:digit:]])", "$1\t$2") %>% 
  stri_split_fixed("\t") %>% 
  unlist() %>% 
  stri_replace_first_regex("^[[:digit:] ]+", "") %>% 
  stri_replace_first_regex("[[:space:]]+([[:digit:]])", "\t$1") %>% 
  stri_split_fixed("\t", simplify = TRUE) %>% 
  as.data.frame(stringsAsFactors=FALSE) %>% 
  as_tibble() %>% 
  mutate_all(.funs = stri_trim_both) %>% 
  mutate(V1 = case_when(
    stri_detect_fixed(V1, "Northern Marina") ~ "Northern Mariana Islands",
    TRUE ~ V1
  )) %>% 
  type_convert(
    col_types = cols(
      V1 = col_character(),
      V2 = col_number()
    )
  ) %>% 
  set_names(c("state", "victim_count")) -> state_vics

ic3[[23]] %>%
  stri_split_lines() %>% 
  unlist() %>% 
  keep(stri_detect_regex, "^[[:space:]]+[[:digit:]]") %>% 
  stri_replace_first_regex("([[:digit:],]+)[[:space:]]+([[:digit:]])", "$1\t$2") %>% 
  stri_split_fixed("\t") %>% 
  unlist() %>% 
  stri_replace_first_regex("^[[:digit:] ]+", "") %>% 
  stri_replace_first_regex("[[:space:]]+([[:digit:]])", "\t$1") %>% 
  stri_split_fixed("\t", simplify = TRUE) %>% 
  as.data.frame(stringsAsFactors=FALSE) %>% 
  as_tibble() %>% 
  mutate_all(.funs = stri_trim_both) %>% 
  mutate(V1 = case_when(
    stri_detect_fixed(V1, "Northern Marina") ~ "Northern Mariana Islands",
    TRUE ~ V1
  )) %>% 
  type_convert(
    col_types = cols(
      V1 = col_character(),
      V2 = col_number()
    )
  ) %>% 
  set_names(c("state", "subject_earnings")) -> subj_earnings

ic3[[22]] %>%
  stri_split_lines() %>% 
  unlist() %>% 
  keep(stri_detect_regex, "^    [[:digit:]]") %>% 
  stri_trim_both() %>% 
  stri_replace_first_regex("([[:digit:],]+)[[:space:]]+([[:digit:]])", "$1\t$2") %>% 
  stri_split_fixed("\t") %>% 
  unlist() %>% 
  stri_replace_first_regex("^[[:digit:] ]+", "") %>% 
  stri_replace_first_regex("[[:space:]]+\\$([[:digit:]])", "\t$1") %>% 
  stri_split_fixed("\t", simplify = TRUE) %>% 
  as.data.frame(stringsAsFactors=FALSE) %>% 
  as_tibble() %>%
  mutate_all(.funs = stri_trim_both) %>% 
  type_convert(
    col_types = cols(
      V1 = col_character(),
      V2 = col_number()
    )
  ) %>%  
  set_names(c("state", "loss")) %>% 
  left_join(subj_earnings, "state") %>% 
  left_join(state_vics, "state") %>% 
  mutate(subject_earnings = subject_earnings / 1000000) %>% 
  mutate(loss = loss / 1000000) -> by_state

if (!file.exists(here::here("raw/2018-pop-est.xlsx"))) {
  download.file(
    url = "https://www2.census.gov/programs-surveys/popest/tables/2010-2018/national/totals/nst-est2018-01.xlsx", 
    destfile = here::here("raw/2018-pop-est.xlsx")
  )
}

read_excel(here::here("raw/2018-pop-est.xlsx"), skip=9, col_names = FALSE) %>% 
  select(state = 1, pop_2018 = 12) %>% 
  mutate(state = stri_replace_first_fixed(state, ".", "")) %>%
  filter(!is.na(state), !is.na(pop_2018)) %>% 
  add_row(state = "U.S. Virgin Islands", pop_2018 = 104914) %>% 
  add_row(state = "Guam", pop_2018 = 165718) %>% 
  add_row(state = "U.S. Minor Outlying Islands", pop_2018 = 270) %>% 
  add_row(state = "American Samoa", pop_2018 = 55679) %>% 
  add_row(state = "Northern Mariana Islands", pop_2018 = 55194) -> pops

left_join(by_state, pops, "state")  %>% 
  mutate(
    loss_per_vic = loss/victim_count,
    frac = victim_count / pop_2018
  ) -> by_state

write_csv(by_state, here::here("data/2018-fbi-ic3-loss-by-state.csv"))
arrange(by_state, desc(frac)) %>%
  mutate(loss_per_vic = loss_per_vic * 1000000) %>% 
  select(
    `State` = 1, 
    `Victim Count` = 4, 
    `Loss (USD, millions)` = 3, 
    `Loss per Victim (USD)` = 6, 
    `% Population Impacted` = 7,
    `Subject Earnings (USD, millions)` = 2
  ) %>% 
  gt::gt() %>% 
  gt::fmt_number("Victim Count", decimals = 0) %>% 
  gt::fmt_currency("Loss (USD, millions)", decimals = 2) %>% 
  gt::fmt_currency("Loss per Victim (USD)", decimals = 2) %>% 
  gt::fmt_percent("% Population Impacted", decimals = 4) %>% 
  gt::fmt_currency("Subject Earnings (USD, millions)", decimals = 2) 
<style>html { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Fira Sans', 'Droid Sans', 'Helvetica Neue', Arial, sans-serif; } #pvgcvpomyy .gt_table { border-collapse: collapse; margin-left: auto; margin-right: auto; color: #000000; font-size: 16px; background-color: #FFFFFF; /* table.background.color */ width: auto; /* table.width */ border-top-style: solid; /* table.border.top.style */ border-top-width: 2px; /* table.border.top.width */ border-top-color: #A8A8A8; /* table.border.top.color */ } #pvgcvpomyy .gt_heading { background-color: #FFFFFF; /* heading.background.color */ border-bottom-color: #FFFFFF; } #pvgcvpomyy .gt_title { color: #000000; font-size: 125%; /* heading.title.font.size */ padding-top: 4px; /* heading.top.padding */ padding-bottom: 1px; border-bottom-color: #FFFFFF; border-bottom-width: 0; } #pvgcvpomyy .gt_subtitle { color: #000000; font-size: 85%; /* heading.subtitle.font.size */ padding-top: 1px; padding-bottom: 4px; /* heading.bottom.padding */ border-top-color: #FFFFFF; border-top-width: 0; } #pvgcvpomyy .gt_bottom_border { border-bottom-style: solid; /* heading.border.bottom.style */ border-bottom-width: 2px; /* heading.border.bottom.width */ border-bottom-color: #A8A8A8; /* heading.border.bottom.color */ } #pvgcvpomyy .gt_column_spanner { border-bottom-style: solid; border-bottom-width: 2px; border-bottom-color: #A8A8A8; padding-top: 4px; padding-bottom: 4px; } #pvgcvpomyy .gt_col_heading { color: #000000; background-color: #FFFFFF; /* column_labels.background.color */ font-size: 16px; /* column_labels.font.size */ font-weight: initial; /* column_labels.font.weight */ padding: 10px; margin: 10px; } #pvgcvpomyy .gt_sep_right { border-right: 5px solid #FFFFFF; } #pvgcvpomyy .gt_group_heading { padding: 8px; color: #000000; background-color: #FFFFFF; /* stub_group.background.color */ font-size: 16px; /* stub_group.font.size */ font-weight: initial; /* stub_group.font.weight */ border-top-style: solid; /* stub_group.border.top.style */ border-top-width: 2px; /* stub_group.border.top.width */ border-top-color: #A8A8A8; /* stub_group.border.top.color */ border-bottom-style: solid; /* stub_group.border.bottom .style */ border-bottom-width: 2px; /* stub_group.border.bottom .width */ border-bottom-color: #A8A8A8; /* stub_group.border.bottom .color */ } #pvgcvpomyy .gt_empty_group_heading { padding: 0.5px; color: #000000; background-color: #FFFFFF; /* stub_group.background.color */ font-size: 16px; /* stub_group.font.size */ font-weight: initial; /* stub_group.font.weight */ border-top-style: solid; /* stub_group.border.top.style */ border-top-width: 2px; /* stub_group.border.top.width */ border-top-color: #A8A8A8; /* stub_group.border.top.color */ border-bottom-style: solid; /* stub_group.border.bottom .style */ border-bottom-width: 2px; /* stub_group.border.bottom .width */ border-bottom-color: #A8A8A8; /* stub_group.border.bottom .color */ } #pvgcvpomyy .gt_striped tr:nth-child(even) { background-color: #f2f2f2; } #pvgcvpomyy .gt_row { padding: 10px; /* row.padding */ margin: 10px; } #pvgcvpomyy .gt_stub { border-right-style: solid; border-right-width: 2px; border-right-color: #A8A8A8; text-indent: 5px; } #pvgcvpomyy .gt_stub.gt_row { background-color: #FFFFFF; } #pvgcvpomyy .gt_summary_row { background-color: #FFFFFF; /* summary_row.background.color */ padding: 6px; /* summary_row.padding */ text-transform: inherit; /* summary_row.text_transform */ } #pvgcvpomyy .gt_first_summary_row { border-top-style: solid; border-top-width: 2px; border-top-color: #A8A8A8; } #pvgcvpomyy .gt_table_body { border-top-style: solid; /* field.border.top.style */ border-top-width: 2px; /* field.border.top.width */ border-top-color: #A8A8A8; /* field.border.top.color */ border-bottom-style: solid; /* field.border.bottom.style */ border-bottom-width: 2px; /* field.border.bottom.width */ border-bottom-color: #A8A8A8; /* field.border.bottom.color */ } #pvgcvpomyy .gt_footnote { font-size: 90%; /* footnote.font.size */ padding: 4px; /* footnote.padding */ } #pvgcvpomyy .gt_sourcenote { font-size: 90%; /* sourcenote.font.size */ padding: 4px; /* sourcenote.padding */ } #pvgcvpomyy .gt_center { text-align: center; } #pvgcvpomyy .gt_left { text-align: left; } #pvgcvpomyy .gt_right { text-align: right; font-variant-numeric: tabular-nums; } #pvgcvpomyy .gt_font_normal { font-weight: normal; } #pvgcvpomyy .gt_font_bold { font-weight: bold; } #pvgcvpomyy .gt_font_italic { font-style: italic; } #pvgcvpomyy .gt_super { font-size: 65%; } #pvgcvpomyy .gt_footnote_glyph { font-style: italic; font-size: 65%; } </style>

State

Victim Count

Loss (USD, millions)

Loss per Victim (USD)

% Population Impacted

Subject Earnings (USD, millions)

U.S. Minor Outlying Islands

47

$0.00

$2,049.91

17.4074%

$0.10

Alaska

1,603

$0.00

$2,256.30

0.2174%

$3.62

District of Columbia

1,364

$0.00

$6,524.80

0.1942%

$8.90

Virginia

14,800

$0.01

$2,958.95

0.1738%

$43.79

Nevada

5,228

$0.00

$5,531.93

0.1723%

$28.92

Colorado

9,328

$0.00

$3,653.82

0.1638%

$34.08

Maryland

8,777

$0.00

$5,375.44

0.1452%

$47.18

Washington

10,775

$0.00

$5,616.07

0.1430%

$60.51

California

49,031

$0.02

$9,187.70

0.1240%

$450.48

Wisconsin

6,621

$0.00

$3,722.89

0.1139%

$24.65

Florida

23,984

$0.01

$7,427.51

0.1126%

$178.14

Arizona

8,027

$0.00

$5,626.77

0.1119%

$45.17

Oregon

4,511

$0.00

$6,340.05

0.1076%

$28.60

New Mexico

2,127

$0.00

$4,051.61

0.1015%

$8.62

Rhode Island

1,028

$0.00

$6,740.27

0.0972%

$6.93

Utah

3,041

$0.00

$6,779.82

0.0962%

$20.62

New Jersey

8,440

$0.00

$9,444.52

0.0947%

$79.71

Alabama

4,585

$0.00

$3,688.35

0.0938%

$16.91

Delaware

897

$0.00

$3,502.11

0.0927%

$3.14

New York

18,124

$0.01

$11,095.24

0.0927%

$201.09

Missouri

5,508

$0.00

$4,643.74

0.0899%

$25.58

Massachusetts

6,173

$0.00

$11,054.95

0.0894%

$68.24

Texas

25,589

$0.01

$7,644.34

0.0892%

$195.61

Connecticut

3,134

$0.00

$12,080.38

0.0877%

$37.86

Georgia

9,095

$0.00

$6,758.33

0.0865%

$61.47

Idaho

1,513

$0.00

$4,529.54

0.0862%

$6.85

Wyoming

497

$0.00

$9,088.79

0.0860%

$4.52

Vermont

525

$0.00

$4,052.03

0.0838%

$2.13

Tennessee

5,584

$0.00

$5,120.06

0.0825%

$28.59

Pennsylvania

10,554

$0.00

$5,940.19

0.0824%

$62.69

Illinois

10,087

$0.00

$8,213.52

0.0792%

$82.85

New Hampshire

1,056

$0.00

$5,761.96

0.0778%

$6.08

Hawaii

1,100

$0.00

$5,873.44

0.0774%

$6.46

Minnesota

4,304

$0.00

$11,341.56

0.0767%

$48.81

Michigan

7,533

$0.00

$10,743.37

0.0754%

$80.93

Louisiana

3,469

$0.00

$4,726.51

0.0744%

$16.40

Montana

787

$0.00

$8,401.60

0.0741%

$6.61

North Carolina

7,523

$0.00

$18,241.52

0.0725%

$137.23

Kansas

2,098

$0.00

$8,329.25

0.0721%

$17.47

South Carolina

3,575

$0.00

$5,473.54

0.0703%

$19.57

Indiana

4,676

$0.00

$6,325.43

0.0699%

$29.58

Oklahoma

2,644

$0.00

$4,382.72

0.0671%

$11.59

Ohio

7,812

$0.00

$12,510.25

0.0668%

$97.73

Mississippi

1,882

$0.00

$3,041.99

0.0630%

$5.73

Kentucky

2,813

$0.00

$3,324.84

0.0630%

$9.35

Iowa

1,983

$0.00

$7,734.73

0.0628%

$15.34

Nebraska

1,205

$0.00

$7,822.97

0.0625%

$9.43

Maine

832

$0.00

$3,244.89

0.0622%

$2.70

U.S. Virgin Islands

65

$0.00

$41,904.46

0.0620%

$2.72

West Virginia

1,109

$0.00

$7,483.10

0.0614%

$8.30

Arkansas

1,849

$0.00

$3,770.43

0.0614%

$6.97

North Dakota

459

$0.00

$5,003.90

0.0604%

$2.30

South Dakota

465

$0.00

$3,728.66

0.0527%

$1.73

Guam

52

$0.00

$2,981.83

0.0314%

$0.16

American Samoa

16

$0.00

$1,158.56

0.0287%

$0.02

Northern Mariana Islands

15

$0.00

$924.33

0.0272%

$0.01

Puerto Rico

704

$0.00

$7,413.48

0.0220%

$5.22

by_state <- mutate(by_state, loss_per_vic = loss_per_vic * 1000000)

ggplot(by_state) +
  geom_quasirandom(aes(x="", loss_per_vic)) -> gg

gb <- ggplot_build(gg)

as_tibble(gb$data[[1]]) %>% 
  select(x, y) %>% 
  left_join(by_state, c("y" = "loss_per_vic")) %>% 
  rename(loss_per_vic = y) -> gd

ggplot() +
  geom_blank(data = gd, aes(x, loss_per_vic)) +
  geom_hline(
    yintercept = round(median(gd$loss_per_vic)),
    linetype = "dotted", color = "#3B454A"
  ) +
  geom_label(
    data = data.frame(),
    aes(
      x = 0.5, y = round(median(gd$loss_per_vic)),
      label = sprintf(
        "2018 IC3 Median\nPer In-State\nVictim Loss\n($%s USD)", 
        scales::comma(round(median(gd$loss_per_vic)))
      )
    ), size = 3, family = font_rc, hjust = 0, vjust = 0,
    label.size = 0, lineheight = 0.875
  ) +
  geom_point(
    data = mutate(gd, color = case_when(
      (loss_per_vic >= 10000) ~ "#E85E26",
      TRUE ~ "#31739C"
    )), 
    aes(x=x, loss_per_vic, color = I(color))
  ) +
  geom_label_repel(nudge_y = 2500,
                   data = filter(gd, loss_per_vic >= 10000),
                   aes(x=x, loss_per_vic, label = state),
                   size = 3, family = font_rc
  ) +
  scale_x_continuous(expand = c(0,0.125)) +
  scale_y_continuous(label = scales::dollar) +
  labs(
    x = "Victim count", y = "Loss (USD, millions)",
    title = "U.S. Virgin Islands Residents Were Hit Hardest\nin IC3 2018 Catalogued Incidents",
    subtitle = "Markers only on IC3 states with ≥$10K (USD) losses per in-state victim",
    caption = "Source: 2018 FBI IC3; Pages 21-22 '2018 Overall State Statistics'"
  ) +
  theme_ipsum_rc(grid="XY") 

About

Data wrangling the 2018 FBI IC3 report

Topics

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published