-
Notifications
You must be signed in to change notification settings - Fork 2
/
1_cmd_bible_cleaned.R
78 lines (69 loc) · 3.64 KB
/
1_cmd_bible_cleaned.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
##-----------------------------------------------------------------------------
# LOAD DATA
##-----------------------------------------------------------------------------
meta <- read_csv(file="2_kjv_meta.csv")
## download KJV bible books from Project Gutenberg
bible <- meta %>%
select(gutenberg_id) %>%
gutenberg_download(meta_fields = c("title", "gutenberg_id") ) %>%
group_by(title) %>%
summarise(text = paste(text, collapse=", "),
gutenberg_id = mean(as.numeric(gutenberg_id ) ) ) %>%
left_join(meta)
##-----------------------------------------------------------------------------
# Prepare TEXT DATA
##-----------------------------------------------------------------------------
unnest.bible <- bible %>%
mutate(text = str_replace_all(text, "[^[:ascii:]]", " ")) %>%
mutate(text = str_replace_all(text, "[[:punct:]]", " ")) %>%
mutate(text = replace_white(text)) %>%
mutate(text = strip(text, apostrophe.remove=TRUE)) %>%
mutate(text = replace_number(text)) %>%
unnest_tokens(word, text, to_lower = TRUE) %>%
anti_join(stop_words) %>%
filter(!str_detect(word, "[0-9]+") ) %>%
count(short_title, word) %>%
bind_tf_idf(word, short_title, n)
##-----------------------------------------------------------------------------
# Build DTM, Run CMD Function, MERGE with Metadata
##-----------------------------------------------------------------------------
bible.close <- unnest.bible %>%
cast_dtm(term = word,
document = short_title,
value = n,
weighting = tm::weightTf) %>%
removeSparseTerms(.999) %>%
CMDist(cw =c("introspection") ) %>%
rename(short_title = "docs") %>%
left_join(bible) %>%
mutate(name = factor(short_title,
levels=short_title[order(-year)])) %>%
as_tibble()
## ----------------------------------------------------------------------------
## ----------------------------------------------------------------------------
# FLIPPED Bar Chart
bars <- bible.close %>%
ggplot(aes(fill=jaynes, x=introspection, y=name) ) +
geom_barh( position = "identity", stat="identity", width = 9) +
geom_text(data=subset(bible.close, labels==1),
aes(x=-2.5, label = circa),
size = 3, nudge_y=1, family="Arial") +
xlim(-3, 2.2) +
labs(x = "Closeness to 'Introspection'",
y = "Books of the KJV Bible, Arranged by Approx. Year") +
facet_grid(name~., space="free", scales="free_y") +
theme(strip.text.y = element_text(angle = 360),
axis.text.y=element_blank(),
panel.spacing = unit(.1, "lines"),
legend.position="none",
strip.text = element_text(size = 6, family="Arial"),
strip.background =element_rect(fill="gray94"),
panel.grid.major = element_blank(),
axis.line = element_line(colour = "black")
)
png("Figure_barchart_introspect_kjv_bible.png",
width = 6, height = 8.5, units = 'in', res = 1300)
bars
dev.off()
#------------------------------------------------------------------------------
### END ###