diff --git a/hqtrivia_bot.py b/hqtrivia_bot.py index ca91180..9c1482e 100755 --- a/hqtrivia_bot.py +++ b/hqtrivia_bot.py @@ -124,7 +124,7 @@ def prediction_time(self, data): confidence = {'A': 0, 'B': 0, 'C': 0} for solver in self.solvers: responses[solver] = solver.fetch_responses( - solver.build_urls(data.get('question'), data.get('answers')), session + solver.build_urls(data.get('question'), data.get('answers'), data.get('category')), session ) for solver, responses in responses.items(): (prediction, confidence) = solver.run( @@ -384,7 +384,7 @@ def cache_prune(session, solvers): for filename in sorted(glob('games/*.json')): game = load(open(filename)) for turn in game.get('questions'): - urls.extend(solver.build_urls(turn.get('question'), turn.get('answers'))) + urls.extend(solver.build_urls(turn.get('question'), turn.get('answers'), turn.get('category'))) stale_entries = [] for key, (resp, _) in session.cache.responses.items(): if resp.url not in urls and not any(step.url in urls for step in resp.history): @@ -402,7 +402,7 @@ def cache_refresh(session, solvers): for filename in sorted(glob('games/*.json')): game = load(open(filename)) for turn in game.get('questions'): - urls.extend(solver.build_urls(turn.get('question'), turn.get('answers'))) + urls.extend(solver.build_urls(turn.get('question'), turn.get('answers'), turn.get('category'))) cache_misses = [ url for url in urls if not session.cache.create_key( session.prepare_request(Request('GET', url)) @@ -444,7 +444,7 @@ def cache_export(session, solvers): urls = [] for solver in solvers: for turn in game.get('questions'): - urls.extend(solver.build_urls(turn.get('question'), turn.get('answers'))) + urls.extend(solver.build_urls(turn.get('question'), turn.get('answers'), turn.get('category'))) url_keys = [session.cache.create_key(session.prepare_request(Request('GET', url))) for url in urls] conn = connect(':memory:') cur = conn.cursor() diff --git a/solvers.py b/solvers.py index 159bbe6..14499bd 100644 --- a/solvers.py +++ b/solvers.py @@ -1,7 +1,7 @@ """ Solvers for the HQ Trivia bot project """ import re import sys -from urllib.parse import quote_plus +from urllib.parse import quote_plus, unquote_plus from bs4 import BeautifulSoup from utils import Colours, get_raw_words, get_significant_words @@ -13,13 +13,13 @@ class BaseSolver(object): service_url = None @staticmethod - def build_queries(question_text, answers): + def build_queries(question_text, answers, category): """ build queries with question text and answers """ raise NotImplementedError() - def build_urls(self, question_text, answers): + def build_urls(self, question_text, answers, category): """ build URLs with search queries """ - queries = self.build_queries(question_text.replace(' NOT ', ' ').replace(' NEVER ', ' '), answers) + queries = self.build_queries(question_text.replace(' NOT ', ' ').replace(' NEVER ', ' '), answers, category) return [self.service_url.format(quote_plus(query)) for query in queries] @staticmethod @@ -73,7 +73,7 @@ class GoogleAnswerWordsSolver(BaseSolver): service_url = 'https://www.google.co.uk/search?pws=0&q={}' @staticmethod - def build_queries(question_text, answers): + def build_queries(question_text, answers, category): """ build queries with question text and answers """ return [question_text] @@ -112,7 +112,7 @@ class GoogleResultsCountSolver(BaseSolver): service_url = 'https://www.google.co.uk/search?pws=0&q={}' @staticmethod - def build_queries(question_text, answers): + def build_queries(question_text, answers, category): """ build queries with question text and answers """ return ['%s "%s"' % (question_text, answer) for answer in answers.values()] @@ -138,21 +138,25 @@ class WolframAlphaAnswerWordsSolver(BaseSolver): service_url = 'http://api.wolframalpha.com/v1/result?appid=4H762W-PQ7735Q7T6&timeout=2&i={}' @staticmethod - def build_queries(question_text, answers): + def build_queries(question_text, answers, category): """ build queries with question text and answers """ - return ['{} {}'.format(question_text, ', '.join(answers.values()))] + if 'Which of these' in question_text and category in ['Geography', 'Literature ']: + question_text = re.sub(r'Which of these( [^ ]*)( is)?( NOT)?', r'Is {}\1', question_text) + return [question_text.format(answer) for answer in answers.values()] + return [] @staticmethod def get_answer_matches(response, _index, answers, matches): """ get answer occurences for response """ result = BeautifulSoup(response.text, "html5lib").text - print(response.url) - print('Response: {}'.format(result)) + print('{}: {}{}{}'.format(unquote_plus(response.url.split('&i=')[1]), \ + Colours.BOLD.value, result, Colours.ENDC.value)) if result != 'Wolfram|Alpha did not understand your input': results_words = get_raw_words(result) for index, answer in answers.items(): answer_words = get_raw_words(answer) matches[index] += results_words.count(answer_words) for index, count in matches.items(): - print('{}: {}'.format(index, Colours.BOLD.value + str(count) + Colours.ENDC.value)) + print('{}: {} '.format(index, Colours.BOLD.value + str(count) + Colours.ENDC.value), end='', flush=True) + print('\n') return matches