Skip to content

Commit

Permalink
Strip non-word characters from question search strings
Browse files Browse the repository at this point in the history
  • Loading branch information
zachd committed Jul 25, 2018
1 parent d557c06 commit cadb5bf
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 5 deletions.
5 changes: 4 additions & 1 deletion solvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ def build_queries(question_text, answers):

def build_urls(self, question_text, answers):
""" build URLs with search queries """
queries = self.build_queries(question_text.replace(' NOT ', ' ').replace(' NEVER ', ' '), answers)
parsed_question_text = get_raw_words(
question_text.replace(' NOT ', ' ').replace(' NEVER ', ' ')
, lowercase=False)
queries = self.build_queries('{}?'.format(parsed_question_text), answers)
return [self.service_url.format(quote_plus(query)) for query in queries]

@staticmethod
Expand Down
8 changes: 4 additions & 4 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ def get_significant_words(question_words):
return list(filter(lambda word: word not in our_stopwords, question_words.split(' ')))


def get_raw_words(data):
def get_raw_words(data, lowercase=True):
""" Extract raw words from data """
data = re.sub(r'[^\w ]', '', data).replace(' and ', ' ').strip()
words = data.replace(' ', ' ').lower()
return words
data = re.sub(r'[^A-Za-z0-9 ]', '', data).replace(' and ', ' ').strip()
words = data.replace(' ', ' ')
return words.lower() if lowercase else words

0 comments on commit cadb5bf

Please sign in to comment.