From d2bf35abf6d9aa170daa741e71bdc0c5accacf75 Mon Sep 17 00:00:00 2001 From: Zachary Date: Sat, 21 Jul 2018 22:31:12 +0100 Subject: [PATCH] Preserve quotes in question search strings --- solvers.py | 7 +++---- utils.py | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/solvers.py b/solvers.py index 72a9e3b..63076e7 100644 --- a/solvers.py +++ b/solvers.py @@ -19,10 +19,9 @@ def build_queries(question_text, answers): def build_urls(self, question_text, answers): """ build URLs with search queries """ - parsed_question_text = get_raw_words( - question_text.replace(' NOT ', ' ').replace(' NEVER ', ' ') - , lowercase=False) - queries = self.build_queries('{}?'.format(parsed_question_text), answers) + parsed_question_text = question_text.replace(' NOT ', ' ').replace(' NEVER ', ' ') + parsed_question_text = re.sub(r'[^A-Za-z0-9\“\”\? ]', '', parsed_question_text).replace(' ', ' ') + queries = self.build_queries(parsed_question_text, answers) return [self.service_url.format(quote_plus(query)) for query in queries] @staticmethod diff --git a/utils.py b/utils.py index bd07e69..0600c98 100644 --- a/utils.py +++ b/utils.py @@ -32,8 +32,8 @@ def get_significant_words(question_words): return list(filter(lambda word: word not in our_stopwords, question_words.split(' '))) -def get_raw_words(data, lowercase=True): +def get_raw_words(data): """ Extract raw words from data """ data = re.sub(r'[^A-Za-z0-9 ]', '', data).replace(' and ', ' ').strip() - words = data.replace(' ', ' ') - return words.lower() if lowercase else words + words = data.replace(' ', ' ').lower() + return words