-
Notifications
You must be signed in to change notification settings - Fork 0
/
Delimeter_checker.py
81 lines (78 loc) · 2.38 KB
/
Delimeter_checker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
counter = 0
ls = []
rs = []
lines = []
guess = " trump "
# a list of keywords
gs = [" covid19 "," corona "," pandemic "," coronawarriors ", " mask ", " sanitation "," covid "," ppe ",
" incubation period ", " community spread ", " n95 ", " quarantine " ," isolation ",
" epidemic "," flattening the curve "," comorbidity "," social distancing "," hydroxychloroquine ",
" aarogya setu app "," lockdown "," lockdown extension"," virus "," infection "," airborne "," cough "," fever ", " positive "]
replace = ", ‚ . - ' ; : / ™ [ ] { } ( ) * - + & ! @ # $ % ^ _ = ` ‘ “ ~"
i = 0
# change the value of this variable with the number of files that have been parsed
max1 = 100
number_pdf = 0
with open("./To_download_links.txt",'r') as t:
x = t.read()
for link in x.split():
if i < max1:
i += 1
link = link.split("/")[-1]
print(link)
try:
# with open("C:/Users/LENOVO/Desktop/Internship/Newpaper_Cleaned/"+link+'.txt','rb') as f:
# we have made a new cleaned file using the Text_cleaner program
with open("./Better_cleaned/"+link+'.txt','rb') as f:
size = 0
rs.append(number_pdf)
number_pdf +=1
for line in f:
line = line.lower()
line = line.decode('utf-8')
# print(line)
line = line.split(".")
# print(line)
for word in line:
# print(word)
size +=1
# print(word)
if len(word.split()) > 2:
try:
# print(word.strip())
# word = word.decode('utf-8')
for x in replace.split():
if x in word:
word = word.replace(x,"")
# this is for the single word
# outer loop for loop for x in guess:
# if guess in word.lower():
# # print(word)
# counter+=1
# for multiple words
for c in gs:
if c in word.lower().strip():
# print(word)
counter +=1
except:
pass
print(size)
print(counter)
print("Percentage = ",counter/size * 100)
print()
ls.append(counter)
lines.append(size)
counter = 0
except:
pass
print(ls)
print(lines)
ls.reverse()
print(ls,rs)
# this is the graph plotting section of the code
from pandas import DataFrame
import matplotlib.pyplot as plt
Data ={ 'Day_number':rs,'Occurences':ls}
df = DataFrame(Data,columns=['Day_number','Occurences'])
df.plot(x ='Day_number', y='Occurences', kind = 'line')
plt.show()