-
Notifications
You must be signed in to change notification settings - Fork 16
/
scrape_colours.R
68 lines (61 loc) · 1.8 KB
/
scrape_colours.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
##
## scrape_players: players in squads
## scrape_flag: national team flags
## scrape_colours: national team kits
## scrape_comp: competition teams and logos
##
library(tidyverse)
library(rvest)
library(countrycode)
d <- read_csv("./data/wiki_comp.csv") %>%
select(nat_team_alpha3, url_team) %>%
distinct()
get_kit_colours <- function(u){
h <- paste0("https://en.wikipedia.org/", u) %>%
read_html()
#kit colour
tibble(
shirt = h %>%
html_nodes(".toccolours td:nth-child(1) div:nth-child(3)") %>%
html_attr("style") %>%
str_split(";") %>%
.[[1]] %>%
str_subset("background-color") %>%
str_remove("background-color:") %>%
str_trim(),
away = h %>%
html_nodes(".toccolours td:nth-child(2) div:nth-child(3)") %>%
html_attr("style") %>%
str_split(";") %>%
.[[1]] %>%
str_subset("background-color") %>%
str_remove("background-color:") %>%
str_trim(),
shorts = h %>%
html_nodes(".toccolours td:nth-child(1) div:nth-child(7)") %>%
html_attr("style") %>%
str_split(";") %>%
.[[1]] %>%
str_subset("background-color") %>%
str_remove("background-color:") %>%
str_trim(),
socks = h %>%
html_nodes(".toccolours td:nth-child(1) div:nth-child(9)") %>%
html_attr("style") %>%
str_split(";") %>%
.[[1]] %>%
str_subset("background-color") %>%
str_remove("background-color:") %>%
str_trim()
)
}
d0 <- d %>%
mutate(kit = map(.x = url_team, .f = ~get_kit_colours(u = .x)))
d1 <- d0 %>%
unnest(kit) %>%
mutate(shirt = ifelse(str_length(shirt) == 7, shirt, paste0(shirt, "0"))) %>%
rename(kit_shirt = shirt,
kit_shorts = shorts,
kit_socks = socks,
kit_away = away)
write_excel_csv(x = d1, file = "./data/wiki_colours.csv")