From dc089a585ecdacfae6340497f83ee18e8599e22c Mon Sep 17 00:00:00 2001 From: "ilan.schvartzman" Date: Wed, 14 Oct 2020 11:29:54 +0300 Subject: [PATCH 1/2] Added words in Chinese related to hours ago, days ago, weeks ago, months ago, years ago --- .../data/date_translation_data/zh-Hans.py | 88 +++++++++++++------ dateparser/data/date_translation_data/zh.py | 42 +++++++-- 2 files changed, 96 insertions(+), 34 deletions(-) diff --git a/dateparser/data/date_translation_data/zh-Hans.py b/dateparser/data/date_translation_data/zh-Hans.py index c5add38a5..1f7798e64 100644 --- a/dateparser/data/date_translation_data/zh-Hans.py +++ b/dateparser/data/date_translation_data/zh-Hans.py @@ -1,5 +1,5 @@ info = { - "name": "zh-Hans", + "name": "zh", "date_order": "YMD", "january": [ "一月", @@ -51,31 +51,40 @@ ], "monday": [ "星期一", - "周一" + "周一", + "礼拜一" ], "tuesday": [ "星期二", - "周二" + "周二", + "礼拜二" ], "wednesday": [ "星期三", - "周三" + "周三", + "礼拜三" ], "thursday": [ "星期四", - "周四" + "周四", + "礼拜四" ], "friday": [ "星期五", - "周五" + "周五", + "礼拜五" ], "saturday": [ "星期六", - "周六" + "周六", + "礼拜六" ], "sunday": [ "星期日", - "周日" + "周日", + "星期天", + "礼拜日", + "礼拜天" ], "am": [ "上午" @@ -87,13 +96,17 @@ "年" ], "month": [ - "月" + "月", + "个月", + "個月" ], "week": [ - "周" + "周", + "星期" ], "day": [ - "日" + "日", + "天" ], "hour": [ "小时" @@ -150,6 +163,9 @@ ], "0 second ago": [ "现在" + ], + "2 day ago": [ + "前天" ] }, "relative-type-regex": { @@ -198,23 +214,11 @@ "(\\d+)秒前" ] }, - "locale_specific": { - "zh-Hans-SG": { - "name": "zh-Hans-SG", - "date_order": "DMY" - }, - "zh-Hans-HK": { - "name": "zh-Hans-HK", - "date_order": "DMY" - }, - "zh-Hans-MO": { - "name": "zh-Hans-MO", - "date_order": "DMY" - } - }, + "locale_specific": {}, "no_word_spacing": "True", "sentence_splitter_group": 4, "skip": [ + "约", " ", ".", ",", @@ -227,5 +231,37 @@ "[", "]", "," + ], + "ago": [ + "前" + ], + "in": [ + "在" + ], + "simplifications": [ + { + "半小时前": "30分前" + }, + { + "(?:中午|下午|(?:晚上?))(?:\\s*)(\\d+)(?:\\s*):(?:\\s+|:)?(\\d+)": "\\1:\\2 pm" + }, + { + "(?:上午|早上|凌晨)(?:\\s*)(\\d+)(?:\\s*):(?:\\s+|:)?(\\d+)": "\\1:\\2 am" + }, + { + "中午": "12:00" + }, + { + "(\\d+)年(?:\\s+)?(\\d+)月(?:\\s+)?(\\d+)日(?:\\s+)?(\\d+)时(?:\\s+)?(\\d+)分": "\\1-\\2-\\3 \\4:\\5" + }, + { + "(\\d+)年(?:\\s+)?(\\d+)月(?:\\s+)?(\\d{1,2})(?:日)?(?:\\s+)?(\\d{1,2})(?:点|:)(\\d{1,2})": "\\1-\\2-\\3 \\4:\\5" + }, + { + "(\\d+)年(?:\\s+)?(\\d+)月(?:\\s+)?(\\d{1,2})(?:日)?": "\\1-\\2-\\3" + }, + { + "(\\d+)月(?=.*[前后])": "\\1 月" + } ] -} +} \ No newline at end of file diff --git a/dateparser/data/date_translation_data/zh.py b/dateparser/data/date_translation_data/zh.py index a9ef5501b..0539124b1 100644 --- a/dateparser/data/date_translation_data/zh.py +++ b/dateparser/data/date_translation_data/zh.py @@ -93,7 +93,8 @@ "下午" ], "year": [ - "年" + "年", + "年度" ], "month": [ "月", @@ -102,7 +103,8 @@ ], "week": [ "周", - "星期" + "星期", + "礼拜" ], "day": [ "日", @@ -173,31 +175,55 @@ "(\\d+)年后" ], "\\1 year ago": [ - "(\\d+)年前" + "(\\d+)年前", + "(\\d+)年度前", + "(\\d+)个年前", + "(\\d+)个年度前", ], "in \\1 month": [ "(\\d+)个月后" ], "\\1 month ago": [ - "(\\d+)个月前" + "(\\d+)个月前", + "(\\d+)月前", + "(\\d+)月份前" + "(\\d+)个月份前" ], "in \\1 week": [ - "(\\d+)周后" + "(\\d+)周后", ], "\\1 week ago": [ - "(\\d+)周前" + "(\\d+)周前", + "(\\d+)个周前", + "(\\d+)星期前", + "(\\d+)个星期前", + "(\\d+)礼拜前", + "(\\d+)个礼拜前" ], "in \\1 day": [ "(\\d+)天后" ], "\\1 day ago": [ - "(\\d+)天前" + "(\\d+)天前", + "(\\d+)日前", + "(\\d+)日子前", + "(\\d+)昼前", + "(\\d+)个天前", + "(\\d+)个日前", + "(\\d+)个日子前", + "(\\d+)个昼前", + ], "in \\1 hour": [ "(\\d+)小时后" ], "\\1 hour ago": [ - "(\\d+)小时前" + "(\\d+)小时前", + "(\\d+)时刻前", + "(\\d+)钟头前", + "(\\d+)个小时前", + "(\\d+)个时刻前", + "(\\d+)个钟头前" ], "in \\1 minute": [ "(\\d+)分钟后" From 1becaad06e48f3e53c66f40af9cb75fd2fe9b740 Mon Sep 17 00:00:00 2001 From: "ilan.schvartzman" Date: Thu, 15 Oct 2020 09:30:23 +0300 Subject: [PATCH 2/2] more synonims for more relative dates --- dateparser/data/date_translation_data/zh.py | 35 ++++++++++++--------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/dateparser/data/date_translation_data/zh.py b/dateparser/data/date_translation_data/zh.py index 0539124b1..80e75edf6 100644 --- a/dateparser/data/date_translation_data/zh.py +++ b/dateparser/data/date_translation_data/zh.py @@ -176,9 +176,9 @@ ], "\\1 year ago": [ "(\\d+)年前", - "(\\d+)年度前", - "(\\d+)个年前", - "(\\d+)个年度前", + "(\\d+)载前", + "前一(\\d+)年", + "前一(\\d+)载", ], "in \\1 month": [ "(\\d+)个月后" @@ -186,8 +186,10 @@ "\\1 month ago": [ "(\\d+)个月前", "(\\d+)月前", - "(\\d+)月份前" - "(\\d+)个月份前" + "(\\d+)月前" + "(\\d+)个月前" + "前一(\\d+)月" + "前一(\\d+)个月" ], "in \\1 week": [ "(\\d+)周后", @@ -198,7 +200,12 @@ "(\\d+)星期前", "(\\d+)个星期前", "(\\d+)礼拜前", - "(\\d+)个礼拜前" + "(\\d+)个礼拜前", + "前一(\\d+)周", + "前一(\\d+)星期", + "前一(\\d+)个星期", + "前一(\\d+)礼拜", + "前一(\\d+)个礼拜" ], "in \\1 day": [ "(\\d+)天后" @@ -206,12 +213,8 @@ "\\1 day ago": [ "(\\d+)天前", "(\\d+)日前", - "(\\d+)日子前", - "(\\d+)昼前", - "(\\d+)个天前", - "(\\d+)个日前", - "(\\d+)个日子前", - "(\\d+)个昼前", + "前一(\\d+)天", + "前一(\\d+)日", ], "in \\1 hour": [ @@ -219,11 +222,13 @@ ], "\\1 hour ago": [ "(\\d+)小时前", - "(\\d+)时刻前", "(\\d+)钟头前", "(\\d+)个小时前", - "(\\d+)个时刻前", - "(\\d+)个钟头前" + "(\\d+)个钟头前", + "前一(\\d+)小时", + "前一(\\d+)个小时", + "前一(\\d+)钟头", + "前一(\\d+)个钟头" ], "in \\1 minute": [ "(\\d+)分钟后"