diff --git a/dateparser/languages/locale.py b/dateparser/languages/locale.py index 0cb578552..6c37531e8 100644 --- a/dateparser/languages/locale.py +++ b/dateparser/languages/locale.py @@ -17,15 +17,12 @@ class Locale: """ Class that deals with applicability and translation from a locale. - :param shortname: A locale code, e.g. 'fr-PF', 'qu-EC', 'af-NA'. :type shortname: str - :param language_info: Language info (translation data) of the language the locale belongs to. :type language_info: dict - :return: A Locale instance """ @@ -50,15 +47,12 @@ def __init__(self, shortname, language_info): def is_applicable(self, date_string, strip_timezone=False, settings=None): """ Check if the locale is applicable to translate date string. - :param date_string: A string representing date and/or time in a recognizably valid format. :type date_string: str - :param strip_timezone: If True, timezone is stripped from date string. :type strip_timezone: bool - :return: boolean value representing if the locale is applicable for the date string or not. """ if strip_timezone: @@ -110,15 +104,12 @@ def clean_dictionary(dictionary, threshold=2): def translate(self, date_string, keep_formatting=False, settings=None): """ Translate the date string to its English equivalent. - :param date_string: A string representing date and/or time in a recognizably valid format. :type date_string: str - :param keep_formatting: If True, retain formatting of the date string after translation. :type keep_formatting: bool - :return: translated date string. """ date_string = self._translate_numerals(date_string) @@ -268,12 +259,20 @@ def _sentence_split(self, string, settings): 4: r'[。…‥\.!??!;\r\n]+(?:\s|$)+', # Japanese and Chinese 5: r'[\r\n]+', # Thai 6: r'[\r\n؟!\.…]+(?:\s|$)+'} # Arabic and Farsi + + sentences = [] + re_dot_date = r'(\d+\.\d+\.\d+)' + for dot_date_object in reversed(list(re.finditer(re_dot_date, string))): + start_index, end_index = dot_date_object.span() + string = string[:start_index] + string[end_index:] + sentences.append(dot_date_object.group()) + if 'sentence_splitter_group' not in self.info: split_reg = abbreviation_string + splitters_dict[1] - sentences = re.split(split_reg, string) + sentences.extend(re.split(split_reg, string)) else: split_reg = abbreviation_string + splitters_dict[self.info['sentence_splitter_group']] - sentences = re.split(split_reg, string) + sentences.extend(re.split(split_reg, string)) for i in sentences: if not i: diff --git a/dateparser_scripts/update_supported_languages_and_locales.py b/dateparser_scripts/update_supported_languages_and_locales.py old mode 100755 new mode 100644 diff --git a/docs/conf.py b/docs/conf.py old mode 100755 new mode 100644 diff --git a/tests/test_search.py b/tests/test_search.py index 1ea7b7bff..92334dc8a 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -744,6 +744,36 @@ def test_detection(self, shortname, text): languages=['en'], settings=None, expected=[('9/3/2017', datetime.datetime(2017, 9, 3, 0, 0))]), + + # Test dates with period. i.e "." + param(text="12.12.2000", + languages=None, + settings=None, + expected=[('12.12.2000', datetime.datetime(2000, 12, 12, 0, 0))]), + param(text="1973.02.16", + languages=None, + settings=None, + expected=[('1973.02.16', datetime.datetime(1973, 2, 16, 0, 0))]), + param(text="26.09.2019", + languages=None, + settings=None, + expected=[('26.09.2019', datetime.datetime(2019, 9, 26, 0, 0))]), + param(text="test 13.07.2016 test", + languages=None, + settings=None, + expected=[('13.07.2016', datetime.datetime(2016, 7, 13, 0, 0))]), + param(text="Date:22.06.2020", + languages=["de"], + settings={'DATE_ORDER': 'DMY'}, + expected=[('22.06.2020', datetime.datetime(2020, 6, 22, 0, 0))]), + param(text="Date :22.06.2020", + languages=["de"], + settings={'DATE_ORDER': 'DMY'}, + expected=[('22.06.2020', datetime.datetime(2020, 6, 22, 0, 0))]), + param(text="Hello-Date 26.09.2019", + languages=["de", "fr"], + settings={'DATE_ORDER': 'DMY'}, + expected=[('26.09.2019', datetime.datetime(2019, 9, 26, 0, 0))]), ]) def test_date_search_function(self, text, languages, settings, expected): result = search_dates(text, languages=languages, settings=settings)