-
Notifications
You must be signed in to change notification settings - Fork 1
/
myfile.py
179 lines (128 loc) · 5.56 KB
/
myfile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# from docx import Document
# doc = Document("./tdoc.docx")
# print(("tdoc.docx").split(".")[-1])
# print the list of paragraphs in the document
# print('List of paragraph objects:->>>')
# print(doc.paragraphs)
# print the list of the runs
# in a specified paragraph
# print('\nList of runs objects in 1st paragraph:->>>')
# print(doc.paragraphs[0].runs)
# print the text in a paragraph
# print('\nText in the 1st paragraph:->>>')
# print(doc.paragraphs[0].text)
# for printing the complete document
# print('\nThe whole content of the document:->>>\n')
# i = 8
# paras = doc.paragraphs
# N = len(paras)
# while i < N and paras[i].text!="Transcript":
# # print(paras[i].text)
# i += 1
# i += 2
# while i < N:
# print(paras[i].text)
# i += 1
# res = ""
# while i < N:
# print(pars[i])
# print(paras[i].text.split(" - ")[0] + ": " + paras[i+1].text)
# res +=
# i += 3
s = """Ganesh Utla: Hello. Hello.
Ganesh Utla: Welcome back, Kunal. This is the second part of the interview and I want to get these as quickly as possible because I know you somewhere between. Thank you.
Kunal Agrawal: Thank you. It's my pleasure.
Ganesh Utla: It says on the website you worked doing graphic design for a local branding agency before branching out and starting your own business.
Kunal Agrawal: Yes.
Ganesh Utla: Was that a conscious choice?
Kunal Agrawal: Yes.
Ganesh Utla: Sorry, one sec. We can hear the fan.
Aman Tiwari: [inaudible 00:27]
Ganesh Utla: No, no, That's perfect. Thanks, Aman. Good. Was starting your own company international, or did you just sort of fall into it?
Kunal Agrawal: Actually, Sort of both. I started out doing it as a favour for a friend. I didn't really know what I was doing at the time, but at some point, I found out I was having some success with that, and so I started doing it for local businesses and restaurants. Then it kind of took off from there, and I figured, well, if I'm going to be taking on all these new clients I might as well get a website going and make something out of this, you know?
Ganesh Utla: Sure. What kind of challenges did you experience when you were starting out, that you weren't expecting?
Kunal Agrawal: Hmm, challenges I wasn't expecting.
Ganesh Utla: Hahaha. I keep putting on the spot. I don't think I put that one in the questions either. We're just ad-libbing here.
Kunal Agrawal: No, it's fine. So, challenges.
Ganesh Utla: Okay. I understood. Time to say bye now!
Kunal Agrawal: Okay, once again Thank you for your time.
Ganesh Utla: Same goes for you."""
#MODEL 1
# import requests
# url = "https://api.meaningcloud.com/summarization-1.0"
# payload={
# 'key': '26974ec503b66a278e800edbbbfee2f9',
# 'txt': s,
# 'sentences': 10
# }
# response = requests.post(url, data=payload)
# print('Status code:', response.status_code)
# res = response.json()
# print(res["summary"])
# import requests
# url = "https://enelyou-enelyou-summarization--index--summary--topic--part-of-s.p.rapidapi.com/sumpagejson/"
# payload = "url=http%3A%2F%2Fen.wikipedia.org%2Fwiki%2FNatural_language_processing&length=.1"
# headers = {
# "content-type": "application/x-www-form-urlencoded",
# "X-RapidAPI-Key": "80a948574fmsh5676ccf6ef9c454p17718cjsneff8135f3b7d",
# "X-RapidAPI-Host": "enelyou-enelyou-summarization--index--summary--topic--part-of-s.p.rapidapi.com"
# }
# response = requests.request("POST", url, data=payload, headers=headers)
# print(response.text)
import re
import nltk
import heapq
def nlp_model(data):
article_text = re.sub(r'\[[0-9]*\]', ' ', data)
article_text = re.sub(r'\s+', ' ', article_text)
formatted_article_text = re.sub('[^a-zA-Z]', ' ', article_text )
formatted_article_text = re.sub(r'\s+', ' ', formatted_article_text)
sentence_list = nltk.sent_tokenize(article_text)
nltk.download("stopwords")
stopwords = nltk.corpus.stopwords.words('english')
word_frequencies = {}
for word in nltk.word_tokenize(formatted_article_text):
if word not in stopwords:
if word not in word_frequencies.keys():
word_frequencies[word] = 1
else:
word_frequencies[word] += 1
maximum_frequncy = max(word_frequencies.values())
for word in word_frequencies.keys():
word_frequencies[word] = (word_frequencies[word]/maximum_frequncy)
sentence_scores = {}
for sent in sentence_list:
for word in nltk.word_tokenize(sent.lower()):
if word in word_frequencies.keys():
if len(sent.split(' ')) < 30:
if sent not in sentence_scores.keys():
sentence_scores[sent] = word_frequencies[word]
else:
sentence_scores[sent] += word_frequencies[word]
summary_sentences = heapq.nlargest(5, sentence_scores, key=sentence_scores.get)
# summary = ' '.join(summary_sentences)
return summary_sentences
print(nlp_model(s))
"""
import speech_recognition as sr
# initialize recognizer class
r = sr.Recognizer()
//
# For Reading Microphone as source
with sr.Microphone() as source:
print("Talk")
audio = r.listen(source)
# OR
# For reading audio
filename = "audio_file.wav"
with sr.AudioFile(filename) as source:
audio_text = r.record(source)
//
# using google to recognize audio
try:
print("The audio file contains: " + r.recognize_google(audio_text))
except sr.UnknownValueError:
print("Google Speech Recognition could not understand audio")
except sr.RequestError as e:
print("Could not request results from Google Speech Recognition service; {0}".format(e))
"""