-
Notifications
You must be signed in to change notification settings - Fork 2
/
tag_topics.py
57 lines (50 loc) · 1.81 KB
/
tag_topics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import requests
from operator import itemgetter
# Topics
citizens = ["Education","Skills","Employment","Health", "Veterans", "youth","sports","Social","Indigenous","Disability"]
nature = ["Agriculture","water","Environment"]
national_development = ["Industry", "Science","Resources","Energy","Infrastructure","Transport","Regional Development","Industrial"]
borders = ["Trade","Home","Defence","Foreign","Immigration","Citizenship","Migrant"]
economy = ["Treasury", "Finance"]
communications = ["Communications"]
topics = {
"citizens":citizens,
"nature":nature,
"national_development":national_development,
"borders":borders,
"economy":economy,
"communications":communications,
}
# related terms
rt = open("related_terms.csv","r")
topic_lines = rt.readlines()
headings = topic_lines.pop(0)
headings_list = headings.replace("\n","").lower().split(",")
topic_models = {}
for i in range(len(headings.replace("\n","").split(","))):
terms = []
for tl in topic_lines:
term = tl.replace("\n","").split(",")[i]
if term != "":
terms.append(term.lower())
topic_models[headings_list[i]] = terms
def tag_bill(bill):
t = []
if "portfolio" in bill.keys():
if bill["portfolio"] != "":
portfoilo = bill["portfolio"]
for topic in topics.keys():
for domain in topics[topic]:
if domain.lower() in portfoilo.lower():
if topic not in t:
t.append(topic)
if t != []:
return(t)
else:
p = []
for h in headings_list:
for rt in topic_models[h]:
if rt in bill["short_title"].lower() or rt in bill["summary"].lower():
if h not in p:
p.append(h)
return(p)