-
Notifications
You must be signed in to change notification settings - Fork 1
/
lambda_function.py
357 lines (296 loc) · 15.9 KB
/
lambda_function.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
import boto3
import s3fs
import json
import logging
import re
import random
from constants import VALID_OPTIONS, SAFE_4_WORK
from utilities import contains_curse
import boto3
s3 = boto3.client("s3")
my_s3fs = s3fs.S3FileSystem()
toplevel_dir = "s3://bars-api/just-lyrics/"
ALL_FOLDERS_OF_DATA = my_s3fs.ls(toplevel_dir)
def respond(err, res=None):
return {
'statusCode': '400' if err else '200',
'body': err.message if err else json.dumps(res),
'headers': {
'Content-Type': 'application/json',
"Access-Control-Allow-Origin": "*", # Required for CORS support to work
"Access-Control-Allow-Credentials": True
},
}
def lambda_handler(event, context):
'''Demonstrates a simple HTTP endpoint using API Gateway. You have full
access to the request and response payload, including headers and
status code.
'''
operations = {
'GET': 'get_method()',
'POST': 'post_method()'
}
operation = event['httpMethod']
if operation in operations:
if operation == 'GET':
lyric, song, artist = get_random_lyric()
logging.debug('lyric: {}\nsong: {}\nartist: {}\n'.format(lyric, song, artist))
payload = {'meta': {'code': 200}, 'data': {'lyric': lyric, 'song': song, 'author': artist}}
return respond(err=None, res=payload)
elif operation == 'POST':
print("Event Body {}".format(event['body']))
json_data = json.loads(event['body'])
if 'method' in json_data.keys() and 'category' in json_data.keys():
method_string = json_data['method'] # right now this is unused
# this is the "category" of quote. could be an artist or genre or inspirational
category_list = json_data['category']
print('arguments passed: {}'.format(category_list))
if len(category_list) == 0:
category_list = None
lyric, song, artist = get_random_lyric(category_list)
if lyric == song == artist:
error_msg = 'You passed an invalid argument. Use one of the following {}'.format(VALID_OPTIONS)
logging.error(error_msg)
payload = {'meta': {'code': 400}, 'error': {'code': 400, 'message': error_msg}}
else:
payload = {'meta': {'code': 200}, 'data': {'lyric': lyric, 'song': song, 'author': artist}}
else:
error_msg = 'Please include a category key in your JSON with an array specifying the type of random quote you would like.'
error_msg += ' Your options are as follows: \n{}'.format(VALID_OPTIONS)
error_msg += ' Or, use a GET request with no parameters.'
logging.error(error_msg)
payload = {'meta': {'code': 400}, 'error': {'code': 400, 'message': error_msg}}
return respond(err=None, res=payload)
return respond(err=None, res=operations[operation])
else:
return respond(ValueError('Unsupported method "{}"'.format(operation)))
def get_random_lyric(category_array=[]):
"""
if there are no arguments, we will pick something random from db/.txt files
or if there is one argument and it is the safe for work option
"""
logging.debug('Category Array: ' + str(category_array))
# pdb.set_trace()
# user wants curses if safe for work not in the category array
wants_curses = not SAFE_4_WORK in category_array
# if safe for work is only element in array, then get any song from any artist without curse words
if (category_array == []) or ((len(category_array) == 1) and (SAFE_4_WORK in category_array)):
txt_file, song, cat_folder = drill_down_and_get_file_and_song()
quote_or_lyric, author = piece_necessary_info_together(txt_file, song, wants_curses)
if not author:
# if the author isnt determined in method above then it is the category folder name
# split on _, get rid of 'lyric' or 'quote' [:-1], then make one string joined by space from list
author = ' '.join(cat_folder.split('_')[:-1])
# this depends on old naming convention of 'artistname _lyrics'
# if this returns nothing or blank string, use cat folder
if author in ['', ' ']:
# in AWS this is returning full path, but author is just the last piece
author = cat_folder.split('/')[-1]
song = song.split('/')[-1]
logging.debug('Returning author: ' + author)
logging.debug('Cat folder was: ' + cat_folder)
logging.debug('Returning quote or lyric' + quote_or_lyric)
logging.debug('Returning song' + song)
return quote_or_lyric, song, author
else:
# get the intersection of the available options and the options posted
valid_options_passed_in = set(VALID_OPTIONS) & set(category_array)
# wants_curses = True
# # if user passes in SAFE_4_WORK parameter then they dont want any cursing in the bars
# if SAFE_4_WORK in valid_options_passed_in:
# wants_curses = False
if len(valid_options_passed_in) == 0:
error_msg = 'You passed an invalid argument. Use one of the following: {}'.format(VALID_OPTIONS)
logging.error("Passed Invalid Args Message: {}".format(error_msg))
return '', '', ''
else:
if not wants_curses:
# remove safe for work so it doesnt get picked in the random author selection
valid_options_passed_in.remove(SAFE_4_WORK)
chosen_option = random.choice(list(valid_options_passed_in))
all_options_folder_names = ALL_FOLDERS_OF_DATA
print("{}".format(all_options_folder_names))
# to make only check folder names because aws s3fs.ls() returns whole path
all_options_folder_names_only = [x.split('/')[-1] for x in all_options_folder_names]
chosen_option_quote = chosen_option + '_quotes'
chosen_option_lyrics = chosen_option + '_lyrics'
chosen_option_underscores = chosen_option.replace('_', '-')
print('Chosen option: ' + chosen_option)
print('valid options passed in: ' + str(valid_options_passed_in))
if chosen_option_lyrics in all_options_folder_names_only:
the_file_lines, the_song, cat_folder = drill_down_and_get_file_and_song(
toplevel_dir + chosen_option_lyrics)
elif chosen_option in all_options_folder_names_only:
the_file_lines, the_song, cat_folder = drill_down_and_get_file_and_song(toplevel_dir + chosen_option)
elif chosen_option_quote in all_options_folder_names_only:
the_file_lines, the_song, cat_folder = drill_down_and_get_file_and_song(
toplevel_dir + chosen_option_quote)
elif chosen_option_underscores in all_options_folder_names_only:
the_file_lines, the_song, cat_folder = drill_down_and_get_file_and_song(
toplevel_dir + chosen_option_underscores)
else:
print("{}, {}, {}, {} were not found in available folders").format(
chosen_option, chosen_option_lyrics, chosen_option_lyrics, chosen_option_underscores)
print("all_options_folder_names_only: {}".format(all_options_folder_names_only))
print("chosen_option: {}".format(chosen_option))
print("chosen_option_lyrics: {}".format(chosen_option_lyrics))
print("chosen_option_quote: {}".format(chosen_option_quote))
print("File Lines: {}".format(the_file_lines))
print("Song: {}".format(the_song))
quote_or_lyric, author = piece_necessary_info_together(the_file_lines, the_song, wants_curses)
if not author:
# if the author isnt determined in method above then it is the category folder name
# split on _, get rid of 'lyric' or 'quote' [:-1], then make one string joined by space from list
author = ' '.join(cat_folder.split('_')[:-1])
author = author.split('/')[-1]
logging.debug('***** HIT SPLIT AUTHOR LOGIC*****')
# this depends on old naming convention of 'artistname _lyrics'
# if this returns nothing or blank string, use cat folder
if author in [' ', '']:
logging.debug('***** HIT BLANK AUTHOR LOGIC*****')
author = cat_folder
author = author.split('/')[-1]
the_song = the_song.split('/')[-1]
logging.debug('Returning author: ' + author)
print('Cat folder was: ' + cat_folder)
logging.debug('Returning quote or lyric: ' + quote_or_lyric)
print('Returning song: ' + the_song)
return quote_or_lyric, the_song, author
def drill_down_and_get_file_and_song(category_file_name_arg=None, wants_curses=True):
# the directory of the curent file
#working_dir = os.path.dirname(os.path.abspath(__file__))
# the data folder
#data_folder_path = working_dir + os.sep + "data_bc_webscraper_blocked"
#bucket = s3.bucket('bars-api')
#logging.debug("Reached data folder path: {}".format(data_folder_path))
# a random category within the folder
if not category_file_name_arg:
# ignore folders that have dots in them, not sure what this would be.
sub_directories_of_data = ALL_FOLDERS_OF_DATA
# sub_directories_of_data = [sub_dir for sub_dir in os.listdir(data_folder_path) if '.' not in sub_dir]
category_file_name = random.choice(sub_directories_of_data)
else:
category_file_name = category_file_name_arg
# the path to the folder that contains data to said category
path_to_chosen_category = category_file_name
#path_to_chosen_category = data_folder_path + os.sep + catetgory_file_name
# a random file within the chosen category
# print("Choices of files: {}".format(my_s3fs.ls(category_file_name)))
file_choices = [f for f in my_s3fs.ls(category_file_name) if f.endswith('.txt')]
last_file_name = random.choice(file_choices)
#last_file_name = random.choice(os.listdir(path_to_chosen_category))
if not wants_curses:
song_has_curse_word = contains_curse(last_file_name)
while not song_has_curse_word:
# last_file_name = random.choice(os.listdir(path_to_chosen_category))
last_file_name = random.choice(my_s3fs.ls(category_file_name))
song_has_curse_word = contains_curse(last_file_name)
# full path to txt file
#full_path = path_to_chosen_category+os.sep+last_file_name
full_path = last_file_name
print("Reached full path: {}".format(full_path))
with my_s3fs.open(full_path, 'r', errors='ignore') as fl:
my_file_lines = fl.readlines()
potential_song = ''
# if the file isnt a lyrics the text file will be saved with a quotes in line ending
if 'quotes' in category_file_name.split('_'):
potential_song = ''
else: # 'lyrics' in category_file_name.split('_'):# or '_' not in category_file_name:
# cut out the .txt
potential_song = last_file_name[:-4].replace('_', " ")
return my_file_lines, potential_song, category_file_name
def are_bars_valid(bars_list, cursing_allowed=True):
"""
Check to make sure the lines chosen don't have
something like the artists name in brackets
or [2x] or anything like that. We want a meaningful 4 lines
Also check for the album info for the new type of folders
"""
check_if_bar_is_bad = lambda a: '[' in a or ']' in a or len(a) == 1 or '(' in a or ')' in a
bar_validity_truth_array = [not check_if_bar_is_bad(bar) for bar in bars_list]
if not cursing_allowed:
curse = contains_curse(''.join(bars_list))
logging.debug('Contained a curse? {}'.format(curse))
return (not curse) & all(bar_validity_truth_array)
return all(bar_validity_truth_array)
def is_valid_quote_author_combo(combo_list_quote_first_author_second):
"""
the files have quote in front of the quotes
and authors in front of the authors
so check for them.
"""
l = combo_list_quote_first_author_second
return 'QUOTE' in l[0].split(':')[0].upper() and 'AUTHOR' in l[1].split(':')[0].upper()
def piece_necessary_info_together(txt_file_lines, song, wants_curses=True):
# if it is a song expect the bar format, where 2 lines make a bar
if len(song) > 0:
while(True):
# print("Number of lines in song: {}".format(len(txt_file_lines)))
if len(txt_file_lines) == 1:
txt_file_lines = txt_file_lines[0].split("\\n")
print("Number of lines in song after splitting: {}".format(len(txt_file_lines)))
# find out where the album info piece is and exclude it from random choice
try:
# better chance of finding album info
idx_of_album_info = txt_file_lines.index([x for x in txt_file_lines if "ALBUM INFO" in x][0])
except (IndexError, ValueError):
# not every text file will have album info i.e. mixtapes and stuff
idx_of_album_info = len(txt_file_lines)
num_useful_lines = idx_of_album_info
#print("Index of Last Useful Line: {}".format(num_useful_lines))
# up to the 4 before the end of useful lines so we can construct a whole bar
ind = random.choice(range(num_useful_lines - 4))
half_bar_1 = txt_file_lines[ind]
half_bar_2 = txt_file_lines[ind + 1]
half_bar_3 = txt_file_lines[ind + 2]
half_bar_4 = txt_file_lines[ind + 3]
bars_all = [half_bar_1, half_bar_2, half_bar_3, half_bar_4]
logging.info(bars_all)
if not are_bars_valid(bars_all, cursing_allowed=wants_curses):
continue
else:
break
# add line breaks if they dont already exist
#bars_all = [x+"\n" for x in bars_all if not x.endswith("\n")]
bars_processed = []
bar = ""
for half_bar in bars_all:
x = half_bar + "\n" if not half_bar.endswith("\n") else half_bar
x = x.replace("\r", "").replace("\'", "'").replace("\\'", "'").replace("\\r", "")
# bars_processed.append(x)
bar += x
#bar = half_bar_1 + half_bar_2 + half_bar_3 + half_bar_4
#bar = " ".join(bars_all)
logging.info("Valid bar composed: {}".format(bar))
author = None
# author is left blank bc its a song, the author is in the parent directory name
return bar, author
# its not a song so expect a quote and an author
else:
"""
This is strange but okay.
Choose a random index from the file and hope that it is a quote
and then hope that the next line is an author lol
If it's not try another random index and hope some more.
Repeat until hope == reality
I don't understand why the mod%2 operator wasnt used
since this file has all authors on even line numbers.
Maybe thats not always a valid assumption, I don't know.
Will all the quotes always be on one line?
This very clearly needs to be ported to a DB
"""
while(True):
ind = random.choice(range(len(txt_file_lines) - 1))
hopefully_quote = txt_file_lines[ind]
hopefully_author = txt_file_lines[ind + 1]
hopeful_combo = [hopefully_quote, hopefully_author]
if not is_valid_quote_author_combo(hopeful_combo):
continue
else:
break
# cut out the number of letters in the word "quote " (peep the space)
quote = hopefully_quote[6:]
# cut out the number of letters in the word "author " (peep the space)
author = hopefully_author[7:]
logging.info("Valid quote and author found: {} - {}".format(quote, author))
return quote, author