-
Notifications
You must be signed in to change notification settings - Fork 292
/
demo_log_odds_ratio_prior.py
32 lines (30 loc) · 1.06 KB
/
demo_log_odds_ratio_prior.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from scattertext.termcompaction.CompactTerms import CompactTerms
import scattertext as st
from scattertext import LogOddsRatioInformativeDirichletPrior
fn = 'demo_log_odds_ratio_prior.html'
df = st.SampleCorpora.RottenTomatoes.get_data()
corpus = (st.CorpusFromPandas(df,
category_col='category',
text_col='text',
nlp=st.whitespace_nlp_with_sentences)
.build())
priors = (st.PriorFactory(corpus,
category='fresh',
not_categories=['rotten'],
starting_count=1)
# .use_general_term_frequencies()
.use_all_categories()
.get_priors())
(open(fn, 'wb')
.write(
st.produce_frequency_explorer(
corpus,
category='fresh',
category_name='fresh',
not_categories=['rotten'],
metadata=df['movie_name'],
term_scorer=LogOddsRatioInformativeDirichletPrior(priors, 1),
horizontal_line_y_position=0,
).encode('utf-8'))
)
print(fn)