From 18d4d52da3e33c100f6a8c4fef629383fb59b935 Mon Sep 17 00:00:00 2001 From: benfitzpatrick Date: Mon, 3 Feb 2014 15:42:30 +0000 Subject: [PATCH 01/14] Separate out parsers and tests --- __init__.py | 0 isodatetime.py | 1043 +----------------------------------------------- isoparsers.py | 572 ++++++++++++++++++++++++++ isotests.py | 509 +++++++++++++++++++++++ 4 files changed, 1082 insertions(+), 1042 deletions(-) create mode 100644 __init__.py create mode 100644 isoparsers.py create mode 100644 isotests.py diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/isodatetime.py b/isodatetime.py index f98f6c6..bfef1a2 100644 --- a/isodatetime.py +++ b/isodatetime.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- #----------------------------------------------------------------------------- -# (C) British Crown Copyright 2013 Met Office. +# (C) British Crown Copyright 2013-2014 Met Office. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by @@ -30,922 +30,6 @@ "ordinal": (2000, 3)} -class TimeRecurrenceParser(object): - - """Parser for ISO 8601 recurrence expressions. - - Keyword arguments: - timepoint_parser (default None) should be an instance of - TimePointParser, or None to use a normal TimePointParser instance. - timeinterval_parser (default None) should be an instance of - TimeIntervalParser, or None to generate a normal - TimeIntervalParser. - - Callable (via self.parse method) with an ISO 8601-compliant - recurrence pattern - this returns a TimeRecurrence instance. - - """ - - RECURRENCE_REGEXES = [ - re.compile(r"^R(?P\d+)/(?P[^P][^/]*)/(?P[^P].*)$"), - re.compile(r"^R(?P\d+)?/(?P[^P][^/]*)/(?PP.+)$"), - re.compile(r"^R(?P\d+)?/(?PP.+)/(?P[^P].*)$")] - - def __init__(self, timepoint_parser=None, timeinterval_parser=None): - if timepoint_parser is None: - self.timepoint_parser = TimePointParser() - else: - self.timepoint_parser = timepoint_parser - if timeinterval_parser is None: - self.timeinterval_parser = TimeIntervalParser() - else: - self.timepoint_parser = timeinterval_parser - - def parse(self, expression): - """Parse a recurrence string into a TimeRecurrence instance.""" - for regex in self.RECURRENCE_REGEXES: - result = regex.search(expression) - if not result: - continue - result_map = result.groupdict() - repetitions = None - start_point = None - end_point = None - interval = None - if "reps" in result_map and result_map["reps"] is not None: - repetitions = int(result_map["reps"]) - if "start" in result_map: - start_point = self.timepoint_parser.parse(result_map["start"]) - if "end" in result_map: - end_point = self.timepoint_parser.parse(result_map["end"]) - if "intv" in result_map: - interval = self.timeinterval_parser.parse( - result_map["intv"]) - return TimeRecurrence(repetitions=repetitions, - start_point=start_point, - end_point=end_point, - interval=interval) - raise TimeSyntaxError( - "Not a supported ISO 8601 recurrence pattern: %s" % - expression) - - def get_tests(self): - """Run a series of self-tests. - - The amount of parsing in this class is quite small, so not many - tests are needed for this part. - - """ - test_points = ["-100024-02-10T17:00:00-12:30", - "+000001-W45-7T06Z", "1001W011", - "1955W051T06,5Z", "1999-06-01", - "1967-056", "+5002000830T235902,345", - "1765-W04"] - for reps in [None, 1, 2, 3, 10]: - if reps is None: - reps_string = "" - else: - reps_string = str(reps) - point_parser = TimePointParser() - interval_parser = TimeIntervalParser() - for point_expr in test_points: - interval_tests = interval_parser.get_tests() - start_point = point_parser.parse(point_expr) - for interval_expr, interval_result in interval_tests: - interval = interval_parser.parse(interval_expr) - end_point = start_point + interval - if reps is not None: - expr_1 = ("R" + reps_string + "/" + str(start_point) + - "/" + str(end_point)) - yield expr_1, {"repetitions": reps, - "start_point": start_point, - "end_point": end_point} - expr_3 = ("R" + reps_string + "/" + str(start_point) + - "/" + str(interval)) - yield expr_3, {"repetitions": reps, - "start_point": start_point, - "interval": interval} - expr_4 = ("R" + reps_string + "/" + str(interval) + "/" + - str(end_point)) - yield expr_4, {"repetitions": reps, "interval": interval, - "end_point": end_point} - - __call__ = parse - - -class TimePointParser(object): - - """Container for ISO 8601 date/time expressions. - - Keyword arguments: - num_expanded_year_digits (default 2) specifies the extra year - digits allowed by the ISO standard - for example, 1995 can be - written as +001995 with 2 extra year digits. - - allow_truncated (default False) specifies that ISO 8601:2000 - truncations are allowed (not allowed in the ISO 8601:2004 - standard which supersedes it). - - allow_only_basic (default False) specifies that only the basic - forms of date and time in the ISO standard are allowed (no - extraneous punctuation). This means that "2000-01-02T01:14:02" - is not allowed, and must be written as "20000102T011402". - - assume_utc (default False) specifies that dates and times without - timezone information should be assumed UTC (Z). Otherwise, these - will be converted to the local timezone. - - format_function (default None) should be a callable that takes a - TimePoint instance created by this parser and returns a custom - string representation such as "20150304T0103". This is called on - str(timepoint_instance). If None, the default TimePoint - formatting will be applied. - - """ - - DATE_EXPRESSIONS = {"basic": {"complete": u""" -ccYYMMDD -±ΫccYYMMDD -ccYYDDD -±ΫccYYDDD -ccYYWwwD -±ΫccYYWwwD""", - "reduced": u""" -ccYY-MM # Deviation? Not clear if "basic" or "extended" in standard. -ccYY -cc -±ΫccYY-MM # Deviation? Not clear if "basic" or "extended" in standard. -±ΫccYY -±Ϋcc -ccYYWww -±ΫccYYWww""", - "truncated": u""" --YYMM --YY ---MMDD ---MM ----DD -YYMMDD -YYDDD --DDD -YYWwwD -YYWww --ỵWwwD --ỵWww --WwwD --Www --W-D -"""}, - "extended": {"complete": u""" -ccYY-MM-DD -±ΫccYY-MM-DD -ccYY-DDD -±ΫccYY-DDD -ccYY-Www-D -±ΫccYY-Www-D""", - "reduced": u""" -ccYY-MM -±ΫccYY-MM -ccYY-Www -±ΫccYY-Www""", - "truncated": u""" --YY-MM ---MM-DD -YY-MM-DD -YY-DDD --DDD # Deviation from standard ? -YY-Www-D -YY-Www --ỵ-WwwD --ỵ-Www --Www-D -"""}} - - TIME_EXPRESSIONS = {"basic": {"complete": u""" -# No Time Zone -hhmmss - -# No Time Zone - decimals -hhmmss,sṡ -hhmm,mṁ -hh,hḣ -""", - "reduced": u""" -# No Time Zone -hhmm -hh - -# No Time Zone - decimals -""", - "truncated": u""" -# No Time Zone --mmss --mm ---ss - -# No Time Zone - decimals --mmss,sṡ --mm,mṁ ---ss,sṡ -"""}, - "extended": {"complete": u""" -# No Time Zone -hh:mm:ss - -# No Time Zone - decimals -hh:mm:ss,sṡ -hh:mm,mṁ -hh,hḣ # Deviation? Not allowed in standard ? -""", - "reduced": u""" -# No Time Zone -hh:mm -hh # Deviation? Not allowed in standard ? -""", - "truncated": u""" -# No Time Zone --mm:ss --mm # Deviation? Not allowed in standard ? ---ss # Deviation? Not allowed in standard ? - -# No Time Zone - decimals --mm:ss,sṡ --mm,mṁ # Deviation? Not allowed in standard ? ---ss,sṡ # Deviation? Not allowed in standard ? -"""}} - - TIMEZONE_EXPRESSIONS = {"basic": u""" -Z -±hh -±hhmm -""", - "extended": u""" -Z -±hh # Deviation? Not allowed in standard? -±hh:mm -"""} - - DATE_CHAR_REGEXES = [(u"±", "(?P[+-])"), - (u"cc", "(?P\d\d)"), - (u"YY", "(?P\d\d)"), - (u"MM", "(?P\d\d)"), - (u"DDD", "(?P\d\d\d)"), - (u"DD", "(?P\d\d)"), - (u"Www", "W(?P\d\d)"), - (u"D", "(?P\d)"), - (u"ỵ", "(?P\d)"), - (u"^---", "(?P---)"), - (u"^--", "(?P--)"), - (u"^-", "(?P-)"), - (u"^~", "(?P)")] - TIME_CHAR_REGEXES = [(u"(?<=^hh)mm", "(?P\d\d)"), - (u"(?<=^hh:)mm", "(?P\d\d)"), - (u"(?<=^-)mm", "(?P\d\d)"), - (u"^hh", "(?P\d\d)"), - (u",hḣ", "[,.](?P\d+)"), - (u",mṁ", "[,.](?P\d+)"), - (u"ss", "(?P\d\d)"), - (u",sṡ", "[,.](?P\d+)"), - (u"^--", "(?P--)"), - (u"^-", "(?P-)")] - TIMEZONE_CHAR_REGEXES = [ - (u"(?<=±hh)mm", "(?P\d\d)"), - (u"(?<=±hh:)mm", "(?P\d\d)"), - (u"(?<=±)hh", "(?P\d\d)"), - (u"±", "(?P[+-])"), - (u"Z", "(?PZ)") - ] - TIME_DESIGNATOR = "T" - - # Note: test dates assume 2 expanded year digits. - TEST_DATE_EXPRESSIONS = { - "basic": { - "complete": { - "00440104": {"year": 44, "month_of_year": 1, - "day_of_month": 4}, - "+5002000830": {"year": 500200, "month_of_year": 8, - "day_of_month": 30, "expanded_year_digits": 2}, - "-0000561113": {"year": -56, "month_of_year": 11, - "day_of_month": 13, "expanded_year_digits": 2}, - "-1000240210": {"year": -100024, "month_of_year": 2, - "day_of_month": 10, "expanded_year_digits": 2}, - "1967056": {"year": 1967, "day_of_year": 56}, - "+123456078": {"year": 123456, "day_of_year": 78, - "expanded_year_digits": 2}, - "-004560134": {"year": -4560, "day_of_year": 134, - "expanded_year_digits": 2}, - "1001W011": {"year": 1001, "week_of_year": 1, - "day_of_week": 1}, - "+000001W457": {"year": 1, "week_of_year": 45, - "day_of_week": 7, - "expanded_year_digits": 2}, - "-010001W053": {"year": -10001, "week_of_year": 5, - "day_of_week": 3, "expanded_year_digits": 2} - }, - "reduced": { - "4401-03": {"year": 4401, "month_of_year": 3}, - "1982": {"year": 1982}, - "19": {"year": 1900}, - "+056789-01": {"year": 56789, "month_of_year": 1, - "expanded_year_digits": 2}, - "-000001-12": {"year": -1, "month_of_year": 12, - "expanded_year_digits": 2}, - "-789123": {"year": -789123, "expanded_year_digits": 2}, - "+450001": {"year": 450001, "expanded_year_digits": 2}, - # The following cannot be parsed - looks like truncated -YYMM. - # "-0023": {"year": -2300, "expanded_year_digits": 2}, - "+5678": {"year": 567800, "expanded_year_digits": 2}, - "1765W04": {"year": 1765, "week_of_year": 4}, - "+001765W44": {"year": 1765, "week_of_year": 44, - "expanded_year_digits": 2}, - "-123321W50": {"year": -123321, "week_of_year": 50, - "expanded_year_digits": 2} - }, - "truncated": { - "-9001": {"year": 90, "month_of_year": 1, - "truncated": True, - "truncated_property": "year_of_century"}, - "960328": {"year": 96, "month_of_year": 3, - "day_of_month": 28, - "truncated": True, - "truncated_property": "year_of_century"}, - "-90": {"year": 90, "truncated": True, - "truncated_property": "year_of_century"}, - "--0501": {"month_of_year": 5, "day_of_month": 1, - "truncated": True}, - "--12": {"month_of_year": 12, "truncated": True}, - "---30": {"day_of_month": 30, "truncated": True}, - "98354": {"year": 98, "day_of_year": 354, "truncated": True, - "truncated_property": "year_of_century"}, - "-034": {"day_of_year": 34, "truncated": True}, - "00W031": {"year": 0, "week_of_year": 3, "day_of_week": 1, - "truncated": True, - "truncated_property": "year_of_century"}, - "99W34": {"year": 99, "week_of_year": 34, "truncated": True, - "truncated_property": "year_of_century"}, - "-1W02": {"year": 1, "week_of_year": 2, - "truncated": True, - "truncated_property": "year_of_decade"}, - "-W031": {"week_of_year": 3, "day_of_week": 1, - "truncated": True}, - "-W32": {"week_of_year": 32, "truncated": True}, - "-W-1": {"day_of_week": 1, "truncated": True} - } - }, - "extended": { - "complete": { - "0044-01-04": {"year": 44, "month_of_year": 1, - "day_of_month": 4}, - "+500200-08-30": {"year": 500200, "month_of_year": 8, - "day_of_month": 30, - "expanded_year_digits": 2}, - "-000056-11-13": {"year": -56, "month_of_year": 11, - "day_of_month": 13, - "expanded_year_digits": 2}, - "-100024-02-10": {"year": -100024, "month_of_year": 2, - "day_of_month": 10, - "expanded_year_digits": 2}, - "1967-056": {"year": 1967, "day_of_year": 56}, - "+123456-078": {"year": 123456, "day_of_year": 78, - "expanded_year_digits": 2}, - "-004560-134": {"year": -4560, "day_of_year": 134, - "expanded_year_digits": 2}, - "1001-W01-1": {"year": 1001, "week_of_year": 1, - "day_of_week": 1}, - "+000001-W45-7": {"year": 1, "week_of_year": 45, - "day_of_week": 7, - "expanded_year_digits": 2}, - "-010001-W05-3": {"year": -10001, "week_of_year": 5, - "day_of_week": 3, - "expanded_year_digits": 2} - }, - "reduced": { - "4401-03": {"year": 4401, "month_of_year": 3}, - "1982": {"year": 1982}, - "19": {"year": 1900}, - "+056789-01": {"year": 56789, "month_of_year": 1, - "expanded_year_digits": 2}, - "-000001-12": {"year": -1, "month_of_year": 12, - "expanded_year_digits": 2}, - "-789123": {"year": -789123, "expanded_year_digits": 2}, - "+450001": {"year": 450001, "expanded_year_digits": 2}, - # The following cannot be parsed - looks like truncated -YYMM. - # "-0023": {"year": -2300, "expanded_year_digits": 2}, - "+5678": {"year": 567800, "expanded_year_digits": 2}, - "1765-W04": {"year": 1765, "week_of_year": 4}, - "+001765-W44": {"year": 1765, "week_of_year": 44, - "expanded_year_digits": 2}, - "-123321-W50": {"year": -123321, "week_of_year": 50, - "expanded_year_digits": 2} - }, - "truncated": { - "-9001": {"year": 90, "month_of_year": 1, - "truncated": True, - "truncated_property": "year_of_century"}, - "96-03-28": {"year": 96, "month_of_year": 3, - "day_of_month": 28, - "truncated": True, - "truncated_property": "year_of_century"}, - "-90": {"year": 90, "truncated": True, - "truncated_property": "year_of_century"}, - "--05-01": {"month_of_year": 5, "day_of_month": 1, - "truncated": True}, - "--12": {"month_of_year": 12, "truncated": True}, - "---30": {"day_of_month": 30, "truncated": True}, - "98-354": {"year": 98, "day_of_year": 354, "truncated": True, - "truncated_property": "year_of_century"}, - "-034": {"day_of_year": 34, "truncated": True}, - "00-W03-1": {"year": 0, "week_of_year": 3, "day_of_week": 1, - "truncated": True, - "truncated_property": "year_of_century"}, - "99-W34": {"year": 99, "week_of_year": 34, "truncated": True, - "truncated_property": "year_of_century"}, - "-1-W02": {"year": 1, "week_of_year": 2, - "truncated": True, - "truncated_property": "year_of_decade"}, - "-W03-1": {"week_of_year": 3, "day_of_week": 1, - "truncated": True}, - "-W32": {"week_of_year": 32, "truncated": True}, - "-W-1": {"day_of_week": 1, "truncated": True} - } - } - } - TEST_TIME_EXPRESSIONS = { - "basic": { - "complete": { - "050102": {"hour_of_day": 5, "minute_of_hour": 1, - "second_of_minute": 2}, - "235902,345": {"hour_of_day": 23, "minute_of_hour": 59, - "second_of_minute": 2.345}, - "235902.345": {"hour_of_day": 23, "minute_of_hour": 59, - "second_of_minute": 2.345}, - "1201,4": {"hour_of_day": 12, "minute_of_hour": 1.4}, - "1201.4": {"hour_of_day": 12, "minute_of_hour": 1.4}, - "00,4356": {"hour_of_day": 0.4356}, - "00.4356": {"hour_of_day": 0.4356} - }, - "reduced": { - "0203": {"hour_of_day": 2, "minute_of_hour": 3}, - "17": {"hour_of_day": 17} - }, - "truncated": { - "-5612": {"minute_of_hour": 56, "second_of_minute": 12, - "truncated": True}, - "-12": {"minute_of_hour": 12, "truncated": True}, - "--45": {"second_of_minute": 45, "truncated": True}, - "-1234,45": {"minute_of_hour": 12, "second_of_minute": 34.45, - "truncated": True}, - "-1234.45": {"minute_of_hour": 12, "second_of_minute": 34.45, - "truncated": True}, - "-34,2": {"minute_of_hour": 34.2, "truncated": True}, - "-34.2": {"minute_of_hour": 34.2, "truncated": True}, - "--59,99": {"second_of_minute": 59.99, "truncated": True}, - "--59.99": {"second_of_minute": 59.99, "truncated": True} - } - }, - "extended": { - "complete": { - "05:01:02": {"hour_of_day": 5, "minute_of_hour": 1, - "second_of_minute": 2}, - "23:59:02,345": {"hour_of_day": 23, "minute_of_hour": 59, - "second_of_minute": 2.345}, - "23:59:02.345": {"hour_of_day": 23, "minute_of_hour": 59, - "second_of_minute": 2.345}, - "12:01,4": {"hour_of_day": 12, "minute_of_hour": 1.4}, - "12:01.4": {"hour_of_day": 12, "minute_of_hour": 1.4}, - "00,4356": {"hour_of_day": 0.4356}, - "00.4356": {"hour_of_day": 0.4356} - }, - "reduced": { - "02:03": {"hour_of_day": 2, "minute_of_hour": 3}, - "17": {"hour_of_day": 17} - }, - "truncated": { - "-56:12": {"minute_of_hour": 56, "second_of_minute": 12, - "truncated": True}, - "-12": {"minute_of_hour": 12, "truncated": True}, - "--45": {"second_of_minute": 45, "truncated": True}, - "-12:34,45": {"minute_of_hour": 12, "second_of_minute": 34.45, - "truncated": True}, - "-12:34.45": {"minute_of_hour": 12, "second_of_minute": 34.45, - "truncated": True}, - "-34,2": {"minute_of_hour": 34.2, "truncated": True}, - "-34.2": {"minute_of_hour": 34.2, "truncated": True}, - "--59,99": {"second_of_minute": 59.99, "truncated": True}, - "--59.99": {"second_of_minute": 59.99, "truncated": True} - } - } - } - TEST_TIMEZONE_EXPRESSIONS = { - "basic": { - "Z": {"time_zone_utc": True}, - "+01": {"time_zone_hour": 1}, - "-05": {"time_zone_hour": -5}, - "+2301": {"time_zone_hour": 23, "time_zone_minute": 1}, - "-1230": {"time_zone_hour": -12, "time_zone_minute": 30} - }, - "extended": { - "Z": {"time_zone_utc": True}, - "+01": {"time_zone_hour": 1}, - "-05": {"time_zone_hour": -5}, - "+23:01": {"time_zone_hour": 23, "time_zone_minute": 1}, - "-12:30": {"time_zone_hour": -12, "time_zone_minute": 30} - } - } - - def __init__(self, num_expanded_year_digits=2, - allow_truncated=False, - allow_only_basic=False, - assume_utc=False, - format_function=None): - expanded_year_digit_regex = "\d" * num_expanded_year_digits - self.expanded_year_digits = num_expanded_year_digits - self.DATE_CHAR_REGEXES.append( - (u"Ϋ", - "(?P" + expanded_year_digit_regex + ")") - ) - self.allow_truncated = allow_truncated - self.allow_only_basic = allow_only_basic - self.format_function = format_function - self._generate_regexes() - - def _generate_regexes(self): - """Generate combined date time strings.""" - date_map = self.DATE_EXPRESSIONS - time_map = self.TIME_EXPRESSIONS - timezone_map = self.TIMEZONE_EXPRESSIONS - self._date_regex_map = {} - self._time_regex_map = {} - self._timezone_regex_map = {} - format_ok_keys = ["basic", "extended"] - if self.allow_only_basic: - format_ok_keys = ["basic"] - for format_type in format_ok_keys: - self._date_regex_map.setdefault(format_type, {}) - self._time_regex_map.setdefault(format_type, {}) - self._timezone_regex_map.setdefault(format_type, []) - for date_key in date_map[format_type].keys(): - self._date_regex_map[format_type].setdefault(date_key, []) - regex_list = self._date_regex_map[format_type][date_key] - for date_expr in self.get_expressions( - date_map[format_type][date_key]): - date_regex = self.parse_date_expression_to_regex( - date_expr) - regex_list.append([re.compile(date_regex), date_expr]) - for time_key in time_map[format_type].keys(): - self._time_regex_map[format_type].setdefault(time_key, []) - regex_list = self._time_regex_map[format_type][time_key] - for time_expr in self.get_expressions( - time_map[format_type][time_key]): - time_regex = self.parse_time_expression_to_regex( - time_expr) - regex_list.append([re.compile(time_regex), time_expr]) - for timezone_expr in self.get_expressions( - timezone_map[format_type]): - timezone_regex = self.parse_timezone_expression_to_regex( - timezone_expr) - self._timezone_regex_map[format_type].append( - [re.compile(timezone_regex), timezone_expr]) - - def get_expressions(self, text): - """Yield valid expressions from text.""" - for line in text.splitlines(): - line_text = line.strip() - if not line_text or line_text.startswith("#"): - continue - expr_text = line_text.split("#", 1)[0].strip() - yield expr_text - - def parse_date_expression_to_regex(self, expression): - """Construct regular expressions for the date.""" - for expr_regex, substitute in self.DATE_CHAR_REGEXES: - expression = re.sub(expr_regex, substitute, expression) - expression = "^" + expression + "$" - return expression - - def parse_time_expression_to_regex(self, expression): - """Construct regular expressions for the time.""" - for expr_regex, substitute in self.TIME_CHAR_REGEXES: - expression = re.sub(expr_regex, substitute, expression) - expression = "^" + expression + "$" - return expression - - def parse_timezone_expression_to_regex(self, expression): - """Construct regular expressions for the timezone.""" - for expr_regex, substitute in self.TIMEZONE_CHAR_REGEXES: - expression = re.sub(expr_regex, substitute, expression) - expression = "^" + expression + "$" - return expression - - def parse(self, timepoint_string): - """Parse a user-supplied timepoint string.""" - date_time_timezone = timepoint_string.split(self.TIME_DESIGNATOR) - if len(date_time_timezone) == 1: - date = date_time_timezone[0] - keys, date_info = self.get_date_info(date) - time_info = {} - else: - date, time_timezone = date_time_timezone - if not date and self.allow_truncated: - keys = (None, "truncated") - date_info = {"truncated": True} - else: - keys, date_info = self.get_date_info(date, - bad_types=["reduced"]) - format_key, type_key = keys - bad_formats = [] - if format_key == "basic": - bad_formats = ["extended"] - if format_key == "extended": - bad_formats = ["basic"] - if type_key == "truncated": - # Do not force basic/extended formatting for truncated dates. - bad_formats = [] - bad_types = ["truncated"] - if date_info.get("truncated"): - bad_types = [] - if time_timezone.endswith("Z"): - time, timezone = time_timezone[:-1], "Z" - else: - if "+" in time_timezone: - time, timezone = time_timezone.split("+") - timezone = "+" + timezone - elif "-" in time_timezone: - time, timezone = time_timezone.rsplit("-", 1) - timezone = "-" + timezone - # Make sure this isn't just a truncated time. - try: - time_info = self.get_time_info( - time, - bad_formats=bad_formats, - bad_types=bad_types - ) - timezone_info = self.get_timezone_info( - timezone, - bad_formats=bad_formats - ) - except TimeSyntaxError: - time = time_timezone - timezone = None - else: - time = time_timezone - timezone = None - if timezone is None: - timezone_info = {} - else: - timezone_info = self.get_timezone_info( - timezone, - bad_formats=bad_formats - ) - if timezone_info.pop("time_zone_sign", "+") == "-": - timezone_info["time_zone_hour"] = ( - int(timezone_info["time_zone_hour"]) * -1) - if "time_zone_minute" in timezone_info: - timezone_info["time_zone_minute"] = ( - int(timezone_info["time_zone_minute"]) * -1) - time_info = self.get_time_info(time, bad_formats=bad_formats, - bad_types=bad_types) - time_info.update(timezone_info) - info = {} - truncated_property = None - if date_info.get("truncated"): - if "year_of_decade" in date_info: - truncated_property = "year_of_decade" - if "year_of_century" in date_info: - truncated_property = "year_of_century" - elif ("century" not in date_info and - "year_of_century" in date_info): - truncated_property = "year_of_century" - date_info["truncated"] = True - year = int(date_info.get("year", 0)) - if "year_of_decade" in date_info: - year += int(date_info.pop("year_of_decade")) - truncated_property = "year_of_decade" - year += int(date_info.pop("year_of_century", 0)) - year += 100 * int(date_info.pop("century", 0)) - expanded_year = date_info.pop("expanded_year", 0) - if expanded_year: - date_info["expanded_year_digits"] = self.expanded_year_digits - year += 10000 * int(expanded_year) - if date_info.pop("year_sign", "+") == "-": - year *= -1 - date_info["year"] = year - for key, value in date_info.items(): - try: - date_info[key] = int(value) - except (TypeError, ValueError): - pass - info.update(date_info) - for key, value in time_info.items(): - if key.endswith("_decimal"): - value = "0." + value - try: - value = float(value) - except (IOError, ValueError) as e: - pass - if key == "time_zone_utc" and value == "Z": - value = True - if key == "year_sign": - if value == "+": - value = 1 - else: - value = -1 - time_info[key] = value - info.update(time_info) - if info.pop("truncated", False): - info["truncated"] = True - if truncated_property is not None: - info["truncated_property"] = truncated_property - if self.format_function is not None: - info.update({"format_function": self.format_function}) - return TimePoint(**info) - - def get_date_info(self, date_string, bad_types=None): - """Return the format and properties from a date string.""" - type_keys = ["complete", "truncated", "reduced"] - if bad_types is not None: - for type_key in bad_types: - type_keys.remove(type_key) - if not self.allow_truncated and "truncated" in type_keys: - type_keys.remove("truncated") - for format_key, type_regex_map in self._date_regex_map.items(): - for type_key in type_keys: - regex_list = type_regex_map[type_key] - for regex, expr in regex_list: - result = regex.match(date_string) - if result: - return (format_key, type_key), result.groupdict() - raise TimeSyntaxError( - "Not a valid ISO 8601 date representation: %s" % date_string) - - def get_time_info(self, time_string, bad_formats=None, bad_types=None): - """Return the properties from a time string.""" - if bad_formats is None: - bad_formats = [] - if bad_types is None: - bad_types = [] - for format_key, type_regex_map in self._time_regex_map.items(): - if format_key in bad_formats: - continue - for type_key, regex_list in type_regex_map.items(): - if type_key in bad_types: - continue - for regex, expr in regex_list: - result = regex.match(time_string) - if result: - return result.groupdict() - raise TimeSyntaxError( - "Not a valid ISO 8601 time representation: %s" % time_string) - - def get_timezone_info(self, timezone_string, bad_formats=None): - """Return the properties from a timezone string.""" - if bad_formats is None: - bad_formats = [] - for format_key, regex_list in self._timezone_regex_map.items(): - if format_key in bad_formats: - continue - for regex, expr in regex_list: - result = regex.match(timezone_string) - if result: - return result.groupdict() - raise TimeSyntaxError( - "Not a valid ISO 8601 timezone representation: %s" % - timezone_string - ) - - def get_tests(self): - """Return self-tests as (str, TimePoint kwargs) tuples.""" - format_ok_keys = ["basic", "extended"] - if self.allow_only_basic: - format_ok_keys = ["basic"] - date_combo_ok_keys = ["complete"] - if self.allow_truncated: - date_combo_ok_keys = ["complete", "truncated"] - time_combo_ok_keys = ["complete", "reduced"] - test_date_map = self.TEST_DATE_EXPRESSIONS - test_time_map = self.TEST_TIME_EXPRESSIONS - test_timezone_map = self.TEST_TIMEZONE_EXPRESSIONS - for format_type in format_ok_keys: - date_format_tests = test_date_map[format_type] - time_format_tests = test_time_map[format_type] - timezone_format_tests = test_timezone_map[format_type] - for date_key in date_format_tests: - if not self.allow_truncated and date_key == "truncated": - continue - for date_expr, info in date_format_tests[date_key].items(): - yield date_expr, info - for date_key in date_combo_ok_keys: - date_tests = copy.deepcopy(date_format_tests[date_key]) - # Add a blank date for time-only testing. - for date_expr, info in date_tests.items(): - for time_key in time_combo_ok_keys: - time_items = time_format_tests[time_key].items() - for time_expr, time_info in time_items: - combo_expr = (date_expr + self.TIME_DESIGNATOR + - time_expr) - combo_info = {} - for key, value in info.items() + time_info.items(): - combo_info[key] = value - yield combo_expr, combo_info - timezone_items = timezone_format_tests.items() - for timezone_expr, timezone_info in timezone_items: - tz_expr = combo_expr + timezone_expr - tz_info = {} - for key, value in (combo_info.items() + - timezone_info.items()): - tz_info[key] = value - yield tz_expr, tz_info - if not self.allow_truncated: - continue - for time_key in time_format_tests: - time_tests = time_format_tests[time_key] - for time_expr, time_info in time_tests.items(): - combo_expr = self.TIME_DESIGNATOR + time_expr - # Add truncated (no date). - combo_info = {"truncated": True} - for key, value in time_info.items(): - combo_info[key] = value - yield combo_expr, combo_info - timezone_items = timezone_format_tests.items() - for timezone_expr, timezone_info in timezone_items: - tz_expr = combo_expr + timezone_expr - tz_info = {} - for key, value in (combo_info.items() + - timezone_info.items()): - tz_info[key] = value - yield tz_expr, tz_info - - -class TimeIntervalParser(object): - - """Parser for ISO 8601 Durations (time intervals).""" - - INTERVAL_REGEXES = [ - re.compile(r"""^P(?:(?P\d+)Y)? - (?:(?P\d+)M)? - (?:(?P\d+)D)?$""", re.X), - re.compile(r"""^P(?:(?P\d+)Y)? - (?:(?P\d+)M)? - (?:(?P\d+)D)? - T(?:(?P\d.*)H)? - (?:(?P\d.*)M)? - (?:(?P\d.*)S)?$""", re.X), - re.compile(r"""^P(?P\d+)W$""", re.X) - ] - - def parse(self, expression): - """Parse an ISO duration expression into a TimeInterval instance.""" - for rec_regex in self.INTERVAL_REGEXES: - result = rec_regex.search(expression) - if not result: - continue - result_map = result.groupdict() - for key, value in result_map.items(): - if value is None: - result_map.pop(key) - continue - if key in ["years", "months", "days", "weeks"]: - value = int(value) - else: - if "," in value: - value = value.replace(",", ".") - value = float(value) - result_map[key] = value - return TimeInterval(**result_map) - raise TimeSyntaxError("Not an ISO 8601 duration representation: %s" % - expression) - - def get_tests(self): - """Yield self-tests as (input_string, output_string) tuples.""" - - self.TEST_EXPRESSIONS = { - "P3Y": str(TimeInterval(years=3)), - "P90Y": str(TimeInterval(years=90)), - "P1Y2M": str(TimeInterval(years=1, months=2)), - "P20Y2M": str(TimeInterval(years=20, months=2)), - "P2M": str(TimeInterval(months=2)), - "P52M": str(TimeInterval(months=52)), - "P20Y10M2D": str(TimeInterval(years=20, months=10, days=2)), - "P1Y3D": str(TimeInterval(years=1, days=3)), - "P4M1D": str(TimeInterval(months=4, days=1)), - "P3Y404D": str(TimeInterval(years=3, days=404)), - "P30Y2D": str(TimeInterval(years=30, days=2)), - "PT6H": str(TimeInterval(hours=6)), - "PT1034H": str(TimeInterval(hours=1034)), - "P3YT4H2M": str(TimeInterval(years=3, hours=4, minutes=2)), - "P30Y2DT10S": str(TimeInterval(years=30, days=2, seconds=10)), - "PT2S": str(TimeInterval(seconds=2)), - "PT2.5S": str(TimeInterval(seconds=2.5)), - "PT2,5S": str(TimeInterval(seconds=2.5)), - "PT5.5023H": str(TimeInterval(hours=5.5023)), - "PT5,5023H": str(TimeInterval(hours=5.5023)), - "P5W": str(TimeInterval(weeks=5)), - "P100W": str(TimeInterval(weeks=100)) - } - for expression, ctrl_result in self.TEST_EXPRESSIONS.items(): - yield expression, ctrl_result - - class TimeSyntaxError(ValueError): """An error denoting invalid input syntax.""" @@ -2385,131 +1469,6 @@ def iter_months_days(year, month_of_year=None, day_of_month=None, yield i + 1, day -class TestSuite(unittest.TestCase): - - """Test the functionality of parsers and data model manipulation.""" - - def assertEqual(self, test, control, source=None): - """Override the assertEqual method to provide more information.""" - if source is None: - info = None - else: - info = ("Source %s produced\n%s, should be\n%s" % - (source, test, control)) - super(TestSuite, self).assertEqual(test, control, info) - - def test_timeinterval_parser(self): - """Test the time interval parsing.""" - parser = TimeIntervalParser() - for expression, ctrl_result in parser.get_tests(): - try: - test_result = str(parser.parse(expression)) - except TimeSyntaxError: - raise ValueError( - "TimeIntervalParser test failed to parse '%s'" % - expression - ) - self.assertEqual(test_result, ctrl_result, expression) - - def test_timepoint(self): - """Test the manipulation of dates and times (takes a while).""" - import datetime - import random - my_date = datetime.datetime(1801, 1, 1) - while my_date <= datetime.datetime(2401, 2, 1): - ctrl_data = my_date.isocalendar() - test_date = TimePoint(year=my_date.year, - month_of_year=my_date.month, - day_of_month=my_date.day) - test_data = test_date.get_week_date() - self.assertEqual(test_data, ctrl_data) - ctrl_data = (my_date.year, my_date.month, my_date.day) - test_data = test_date.to_week_date().get_calendar_date() - self.assertEqual(test_data, ctrl_data) - ctrl_data = my_date.toordinal() - year, day_of_year = test_date.get_ordinal_date() - test_data = day_of_year - test_data += get_days_since_1_ad(year - 1) - self.assertEqual(test_data, ctrl_data) - for attribute, attr_max in [("weeks", 110), - ("days", 770), - ("hours", 770*24), - ("minutes", 770 * 24 * 60), - ("seconds", 770 * 24 * 60 * 60)]: - delta_attr = random.randrange(0, attr_max) - kwargs = {attribute: delta_attr} - ctrl_data = my_date + datetime.timedelta(**kwargs) - ctrl_data = (ctrl_data.year, ctrl_data.month, ctrl_data.day) - test_data = ( - test_date + TimeInterval(**kwargs)).get_calendar_date() - self.assertEqual(test_data, ctrl_data) - ctrl_data = (my_date - datetime.timedelta(**kwargs)) - ctrl_data = (ctrl_data.year, ctrl_data.month, ctrl_data.day) - test_data = ( - test_date - TimeInterval(**kwargs)).get_calendar_date() - self.assertEqual(test_data, ctrl_data) - ctrl_data = (my_date + datetime.timedelta(minutes=450) + - datetime.timedelta(hours=5) - - datetime.timedelta(seconds=500, weeks=5)) - ctrl_data = [(ctrl_data.year, ctrl_data.month, ctrl_data.day), - (ctrl_data.hour, ctrl_data.minute, ctrl_data.second)] - test_data = ( - test_date + TimeInterval(minutes=450) + - TimeInterval(hours=5) - TimeInterval(weeks=5, seconds=500)) - test_data = [test_data.get_calendar_date(), - test_data.get_hour_minute_second()] - self.assertEqual(test_data, ctrl_data) - timedelta = datetime.timedelta(days=1) - my_date += timedelta - - def test_timepoint_parser(self): - """Test the parsing of date/time expressions.""" - parser = TimePointParser(allow_truncated=True) - for expression, timepoint_kwargs in parser.get_tests(): - timepoint_kwargs = copy.deepcopy(timepoint_kwargs) - try: - test_data = str(parser.parse(expression)) - except TimeSyntaxError: - raise ValueError("Parsing failed for %s" % expression) - ctrl_data = str(TimePoint(**timepoint_kwargs)) - self.assertEqual(test_data, ctrl_data, expression) - - def test_timerecurrence(self): - """Test the recurring date/time series data model.""" - parser = TimeRecurrenceParser() - for expression, ctrl_results in TimeRecurrence.TEST_EXPRESSIONS: - try: - test_recurrence = parser.parse(expression) - except TimeSyntaxError: - raise ValueError( - "TimeRecurrenceParser test failed to parse '%s'" % - expression - ) - test_results = [] - for i, time_point in enumerate(test_recurrence): - if i > 2: - break - test_results.append(str(time_point)) - self.assertEqual(test_results, ctrl_results, expression) - - def test_timerecurrence_parser(self): - """Test the recurring date/time series parsing.""" - parser = TimeRecurrenceParser() - for expression, test_info in parser.get_tests(): - try: - test_data = str(parser.parse(expression)) - except TimeSyntaxError: - raise ValueError("Parsing failed for %s" % expression) - ctrl_data = str(TimeRecurrence(**test_info)) - self.assertEqual(test_data, ctrl_data, expression) - - -def parse_timepoint_expression(timepoint_expression, **kwargs): - """Return a data model that represents timepoint_expression.""" - parser = TimePointParser(**kwargs) - return parser.parse(timepoint_expression) - - if __name__ == "__main__": suite = unittest.TestLoader().loadTestsFromTestCase(TestSuite) unittest.TextTestRunner(verbosity=2).run(suite) diff --git a/isoparsers.py b/isoparsers.py new file mode 100644 index 0000000..90fdf42 --- /dev/null +++ b/isoparsers.py @@ -0,0 +1,572 @@ +# -*- coding: utf-8 -*- +#----------------------------------------------------------------------------- +# (C) British Crown Copyright 2013 Met Office. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +#----------------------------------------------------------------------------- + +"""This provides ISO 8601 parsing functionality.""" + + +class TimeRecurrenceParser(object): + + """Parser for ISO 8601 recurrence expressions. + + Keyword arguments: + timepoint_parser (default None) should be an instance of + TimePointParser, or None to use a normal TimePointParser instance. + timeinterval_parser (default None) should be an instance of + TimeIntervalParser, or None to generate a normal + TimeIntervalParser. + + Callable (via self.parse method) with an ISO 8601-compliant + recurrence pattern - this returns a TimeRecurrence instance. + + """ + + RECURRENCE_REGEXES = [ + re.compile(r"^R(?P\d+)/(?P[^P][^/]*)/(?P[^P].*)$"), + re.compile(r"^R(?P\d+)?/(?P[^P][^/]*)/(?PP.+)$"), + re.compile(r"^R(?P\d+)?/(?PP.+)/(?P[^P].*)$")] + + def __init__(self, timepoint_parser=None, timeinterval_parser=None): + if timepoint_parser is None: + self.timepoint_parser = TimePointParser() + else: + self.timepoint_parser = timepoint_parser + if timeinterval_parser is None: + self.timeinterval_parser = TimeIntervalParser() + else: + self.timepoint_parser = timeinterval_parser + + def parse(self, expression): + """Parse a recurrence string into a TimeRecurrence instance.""" + for regex in self.RECURRENCE_REGEXES: + result = regex.search(expression) + if not result: + continue + result_map = result.groupdict() + repetitions = None + start_point = None + end_point = None + interval = None + if "reps" in result_map and result_map["reps"] is not None: + repetitions = int(result_map["reps"]) + if "start" in result_map: + start_point = self.timepoint_parser.parse(result_map["start"]) + if "end" in result_map: + end_point = self.timepoint_parser.parse(result_map["end"]) + if "intv" in result_map: + interval = self.timeinterval_parser.parse( + result_map["intv"]) + return TimeRecurrence(repetitions=repetitions, + start_point=start_point, + end_point=end_point, + interval=interval) + raise TimeSyntaxError( + "Not a supported ISO 8601 recurrence pattern: %s" % + expression) + + __call__ = parse + + +class TimePointParser(object): + + """Container for ISO 8601 date/time expressions. + + Keyword arguments: + num_expanded_year_digits (default 2) specifies the extra year + digits allowed by the ISO standard - for example, 1995 can be + written as +001995 with 2 extra year digits. + + allow_truncated (default False) specifies that ISO 8601:2000 + truncations are allowed (not allowed in the ISO 8601:2004 + standard which supersedes it). + + allow_only_basic (default False) specifies that only the basic + forms of date and time in the ISO standard are allowed (no + extraneous punctuation). This means that "2000-01-02T01:14:02" + is not allowed, and must be written as "20000102T011402". + + assume_utc (default False) specifies that dates and times without + timezone information should be assumed UTC (Z). Otherwise, these + will be converted to the local timezone. + + format_function (default None) should be a callable that takes a + TimePoint instance created by this parser and returns a custom + string representation such as "20150304T0103". This is called on + str(timepoint_instance). If None, the default TimePoint + formatting will be applied. + + """ + + DATE_EXPRESSIONS = {"basic": {"complete": u""" +ccYYMMDD +±ΫccYYMMDD +ccYYDDD +±ΫccYYDDD +ccYYWwwD +±ΫccYYWwwD""", + "reduced": u""" +ccYY-MM # Deviation? Not clear if "basic" or "extended" in standard. +ccYY +cc +±ΫccYY-MM # Deviation? Not clear if "basic" or "extended" in standard. +±ΫccYY +±Ϋcc +ccYYWww +±ΫccYYWww""", + "truncated": u""" +-YYMM +-YY +--MMDD +--MM +---DD +YYMMDD +YYDDD +-DDD +YYWwwD +YYWww +-ỵWwwD +-ỵWww +-WwwD +-Www +-W-D +"""}, + "extended": {"complete": u""" +ccYY-MM-DD +±ΫccYY-MM-DD +ccYY-DDD +±ΫccYY-DDD +ccYY-Www-D +±ΫccYY-Www-D""", + "reduced": u""" +ccYY-MM +±ΫccYY-MM +ccYY-Www +±ΫccYY-Www""", + "truncated": u""" +-YY-MM +--MM-DD +YY-MM-DD +YY-DDD +-DDD # Deviation from standard ? +YY-Www-D +YY-Www +-ỵ-WwwD +-ỵ-Www +-Www-D +"""}} + + TIME_EXPRESSIONS = {"basic": {"complete": u""" +# No Time Zone +hhmmss + +# No Time Zone - decimals +hhmmss,sṡ +hhmm,mṁ +hh,hḣ +""", + "reduced": u""" +# No Time Zone +hhmm +hh + +# No Time Zone - decimals +""", + "truncated": u""" +# No Time Zone +-mmss +-mm +--ss + +# No Time Zone - decimals +-mmss,sṡ +-mm,mṁ +--ss,sṡ +"""}, + "extended": {"complete": u""" +# No Time Zone +hh:mm:ss + +# No Time Zone - decimals +hh:mm:ss,sṡ +hh:mm,mṁ +hh,hḣ # Deviation? Not allowed in standard ? +""", + "reduced": u""" +# No Time Zone +hh:mm +hh # Deviation? Not allowed in standard ? +""", + "truncated": u""" +# No Time Zone +-mm:ss +-mm # Deviation? Not allowed in standard ? +--ss # Deviation? Not allowed in standard ? + +# No Time Zone - decimals +-mm:ss,sṡ +-mm,mṁ # Deviation? Not allowed in standard ? +--ss,sṡ # Deviation? Not allowed in standard ? +"""}} + + TIMEZONE_EXPRESSIONS = {"basic": u""" +Z +±hh +±hhmm +""", + "extended": u""" +Z +±hh # Deviation? Not allowed in standard? +±hh:mm +"""} + + DATE_CHAR_REGEXES = [(u"±", "(?P[+-])"), + (u"cc", "(?P\d\d)"), + (u"YY", "(?P\d\d)"), + (u"MM", "(?P\d\d)"), + (u"DDD", "(?P\d\d\d)"), + (u"DD", "(?P\d\d)"), + (u"Www", "W(?P\d\d)"), + (u"D", "(?P\d)"), + (u"ỵ", "(?P\d)"), + (u"^---", "(?P---)"), + (u"^--", "(?P--)"), + (u"^-", "(?P-)"), + (u"^~", "(?P)")] + TIME_CHAR_REGEXES = [(u"(?<=^hh)mm", "(?P\d\d)"), + (u"(?<=^hh:)mm", "(?P\d\d)"), + (u"(?<=^-)mm", "(?P\d\d)"), + (u"^hh", "(?P\d\d)"), + (u",hḣ", "[,.](?P\d+)"), + (u",mṁ", "[,.](?P\d+)"), + (u"ss", "(?P\d\d)"), + (u",sṡ", "[,.](?P\d+)"), + (u"^--", "(?P--)"), + (u"^-", "(?P-)")] + TIMEZONE_CHAR_REGEXES = [ + (u"(?<=±hh)mm", "(?P\d\d)"), + (u"(?<=±hh:)mm", "(?P\d\d)"), + (u"(?<=±)hh", "(?P\d\d)"), + (u"±", "(?P[+-])"), + (u"Z", "(?PZ)") + ] + TIME_DESIGNATOR = "T" + + def __init__(self, num_expanded_year_digits=2, + allow_truncated=False, + allow_only_basic=False, + assume_utc=False, + format_function=None): + expanded_year_digit_regex = "\d" * num_expanded_year_digits + self.expanded_year_digits = num_expanded_year_digits + self.DATE_CHAR_REGEXES.append( + (u"Ϋ", + "(?P" + expanded_year_digit_regex + ")") + ) + self.allow_truncated = allow_truncated + self.allow_only_basic = allow_only_basic + self.format_function = format_function + self._generate_regexes() + + def _generate_regexes(self): + """Generate combined date time strings.""" + date_map = self.DATE_EXPRESSIONS + time_map = self.TIME_EXPRESSIONS + timezone_map = self.TIMEZONE_EXPRESSIONS + self._date_regex_map = {} + self._time_regex_map = {} + self._timezone_regex_map = {} + format_ok_keys = ["basic", "extended"] + if self.allow_only_basic: + format_ok_keys = ["basic"] + for format_type in format_ok_keys: + self._date_regex_map.setdefault(format_type, {}) + self._time_regex_map.setdefault(format_type, {}) + self._timezone_regex_map.setdefault(format_type, []) + for date_key in date_map[format_type].keys(): + self._date_regex_map[format_type].setdefault(date_key, []) + regex_list = self._date_regex_map[format_type][date_key] + for date_expr in self.get_expressions( + date_map[format_type][date_key]): + date_regex = self.parse_date_expression_to_regex( + date_expr) + regex_list.append([re.compile(date_regex), date_expr]) + for time_key in time_map[format_type].keys(): + self._time_regex_map[format_type].setdefault(time_key, []) + regex_list = self._time_regex_map[format_type][time_key] + for time_expr in self.get_expressions( + time_map[format_type][time_key]): + time_regex = self.parse_time_expression_to_regex( + time_expr) + regex_list.append([re.compile(time_regex), time_expr]) + for timezone_expr in self.get_expressions( + timezone_map[format_type]): + timezone_regex = self.parse_timezone_expression_to_regex( + timezone_expr) + self._timezone_regex_map[format_type].append( + [re.compile(timezone_regex), timezone_expr]) + + def get_expressions(self, text): + """Yield valid expressions from text.""" + for line in text.splitlines(): + line_text = line.strip() + if not line_text or line_text.startswith("#"): + continue + expr_text = line_text.split("#", 1)[0].strip() + yield expr_text + + def parse_date_expression_to_regex(self, expression): + """Construct regular expressions for the date.""" + for expr_regex, substitute in self.DATE_CHAR_REGEXES: + expression = re.sub(expr_regex, substitute, expression) + expression = "^" + expression + "$" + return expression + + def parse_time_expression_to_regex(self, expression): + """Construct regular expressions for the time.""" + for expr_regex, substitute in self.TIME_CHAR_REGEXES: + expression = re.sub(expr_regex, substitute, expression) + expression = "^" + expression + "$" + return expression + + def parse_timezone_expression_to_regex(self, expression): + """Construct regular expressions for the timezone.""" + for expr_regex, substitute in self.TIMEZONE_CHAR_REGEXES: + expression = re.sub(expr_regex, substitute, expression) + expression = "^" + expression + "$" + return expression + + def parse(self, timepoint_string): + """Parse a user-supplied timepoint string.""" + date_time_timezone = timepoint_string.split(self.TIME_DESIGNATOR) + if len(date_time_timezone) == 1: + date = date_time_timezone[0] + keys, date_info = self.get_date_info(date) + time_info = {} + else: + date, time_timezone = date_time_timezone + if not date and self.allow_truncated: + keys = (None, "truncated") + date_info = {"truncated": True} + else: + keys, date_info = self.get_date_info(date, + bad_types=["reduced"]) + format_key, type_key = keys + bad_formats = [] + if format_key == "basic": + bad_formats = ["extended"] + if format_key == "extended": + bad_formats = ["basic"] + if type_key == "truncated": + # Do not force basic/extended formatting for truncated dates. + bad_formats = [] + bad_types = ["truncated"] + if date_info.get("truncated"): + bad_types = [] + if time_timezone.endswith("Z"): + time, timezone = time_timezone[:-1], "Z" + else: + if "+" in time_timezone: + time, timezone = time_timezone.split("+") + timezone = "+" + timezone + elif "-" in time_timezone: + time, timezone = time_timezone.rsplit("-", 1) + timezone = "-" + timezone + # Make sure this isn't just a truncated time. + try: + time_info = self.get_time_info( + time, + bad_formats=bad_formats, + bad_types=bad_types + ) + timezone_info = self.get_timezone_info( + timezone, + bad_formats=bad_formats + ) + except TimeSyntaxError: + time = time_timezone + timezone = None + else: + time = time_timezone + timezone = None + if timezone is None: + timezone_info = {} + else: + timezone_info = self.get_timezone_info( + timezone, + bad_formats=bad_formats + ) + if timezone_info.pop("time_zone_sign", "+") == "-": + timezone_info["time_zone_hour"] = ( + int(timezone_info["time_zone_hour"]) * -1) + if "time_zone_minute" in timezone_info: + timezone_info["time_zone_minute"] = ( + int(timezone_info["time_zone_minute"]) * -1) + time_info = self.get_time_info(time, bad_formats=bad_formats, + bad_types=bad_types) + time_info.update(timezone_info) + info = {} + truncated_property = None + if date_info.get("truncated"): + if "year_of_decade" in date_info: + truncated_property = "year_of_decade" + if "year_of_century" in date_info: + truncated_property = "year_of_century" + elif ("century" not in date_info and + "year_of_century" in date_info): + truncated_property = "year_of_century" + date_info["truncated"] = True + year = int(date_info.get("year", 0)) + if "year_of_decade" in date_info: + year += int(date_info.pop("year_of_decade")) + truncated_property = "year_of_decade" + year += int(date_info.pop("year_of_century", 0)) + year += 100 * int(date_info.pop("century", 0)) + expanded_year = date_info.pop("expanded_year", 0) + if expanded_year: + date_info["expanded_year_digits"] = self.expanded_year_digits + year += 10000 * int(expanded_year) + if date_info.pop("year_sign", "+") == "-": + year *= -1 + date_info["year"] = year + for key, value in date_info.items(): + try: + date_info[key] = int(value) + except (TypeError, ValueError): + pass + info.update(date_info) + for key, value in time_info.items(): + if key.endswith("_decimal"): + value = "0." + value + try: + value = float(value) + except (IOError, ValueError) as e: + pass + if key == "time_zone_utc" and value == "Z": + value = True + if key == "year_sign": + if value == "+": + value = 1 + else: + value = -1 + time_info[key] = value + info.update(time_info) + if info.pop("truncated", False): + info["truncated"] = True + if truncated_property is not None: + info["truncated_property"] = truncated_property + if self.format_function is not None: + info.update({"format_function": self.format_function}) + return TimePoint(**info) + + def get_date_info(self, date_string, bad_types=None): + """Return the format and properties from a date string.""" + type_keys = ["complete", "truncated", "reduced"] + if bad_types is not None: + for type_key in bad_types: + type_keys.remove(type_key) + if not self.allow_truncated and "truncated" in type_keys: + type_keys.remove("truncated") + for format_key, type_regex_map in self._date_regex_map.items(): + for type_key in type_keys: + regex_list = type_regex_map[type_key] + for regex, expr in regex_list: + result = regex.match(date_string) + if result: + return (format_key, type_key), result.groupdict() + raise TimeSyntaxError( + "Not a valid ISO 8601 date representation: %s" % date_string) + + def get_time_info(self, time_string, bad_formats=None, bad_types=None): + """Return the properties from a time string.""" + if bad_formats is None: + bad_formats = [] + if bad_types is None: + bad_types = [] + for format_key, type_regex_map in self._time_regex_map.items(): + if format_key in bad_formats: + continue + for type_key, regex_list in type_regex_map.items(): + if type_key in bad_types: + continue + for regex, expr in regex_list: + result = regex.match(time_string) + if result: + return result.groupdict() + raise TimeSyntaxError( + "Not a valid ISO 8601 time representation: %s" % time_string) + + def get_timezone_info(self, timezone_string, bad_formats=None): + """Return the properties from a timezone string.""" + if bad_formats is None: + bad_formats = [] + for format_key, regex_list in self._timezone_regex_map.items(): + if format_key in bad_formats: + continue + for regex, expr in regex_list: + result = regex.match(timezone_string) + if result: + return result.groupdict() + raise TimeSyntaxError( + "Not a valid ISO 8601 timezone representation: %s" % + timezone_string + ) + + +class TimeIntervalParser(object): + + """Parser for ISO 8601 Durations (time intervals).""" + + INTERVAL_REGEXES = [ + re.compile(r"""^P(?:(?P\d+)Y)? + (?:(?P\d+)M)? + (?:(?P\d+)D)?$""", re.X), + re.compile(r"""^P(?:(?P\d+)Y)? + (?:(?P\d+)M)? + (?:(?P\d+)D)? + T(?:(?P\d.*)H)? + (?:(?P\d.*)M)? + (?:(?P\d.*)S)?$""", re.X), + re.compile(r"""^P(?P\d+)W$""", re.X) + ] + + def parse(self, expression): + """Parse an ISO duration expression into a TimeInterval instance.""" + for rec_regex in self.INTERVAL_REGEXES: + result = rec_regex.search(expression) + if not result: + continue + result_map = result.groupdict() + for key, value in result_map.items(): + if value is None: + result_map.pop(key) + continue + if key in ["years", "months", "days", "weeks"]: + value = int(value) + else: + if "," in value: + value = value.replace(",", ".") + value = float(value) + result_map[key] = value + return TimeInterval(**result_map) + raise TimeSyntaxError("Not an ISO 8601 duration representation: %s" % + expression) + + +def parse_timepoint_expression(timepoint_expression, **kwargs): + """Return a data model that represents timepoint_expression.""" + parser = TimePointParser(**kwargs) + return parser.parse(timepoint_expression) diff --git a/isotests.py b/isotests.py new file mode 100644 index 0000000..d84db32 --- /dev/null +++ b/isotests.py @@ -0,0 +1,509 @@ +# -*- coding: utf-8 -*- +#----------------------------------------------------------------------------- +# (C) British Crown Copyright 2013 Met Office. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +#----------------------------------------------------------------------------- + +"""This provides ISO 8601 parsing functionality.""" + +import isodata +import isoparsers + + +def get_timeintervalparser_tests(self): + """Yield tests for the time interval parser.""" + test_expresssions = { + "P3Y": str(isodata.TimeInterval(years=3)), + "P90Y": str(isodata.TimeInterval(years=90)), + "P1Y2M": str(isodata.TimeInterval(years=1, months=2)), + "P20Y2M": str(isodata.TimeInterval(years=20, months=2)), + "P2M": str(isodata.TimeInterval(months=2)), + "P52M": str(isodata.TimeInterval(months=52)), + "P20Y10M2D": str(isodata.TimeInterval(years=20, months=10, days=2)), + "P1Y3D": str(isodata.TimeInterval(years=1, days=3)), + "P4M1D": str(isodata.TimeInterval(months=4, days=1)), + "P3Y404D": str(isodata.TimeInterval(years=3, days=404)), + "P30Y2D": str(isodata.TimeInterval(years=30, days=2)), + "PT6H": str(isodata.TimeInterval(hours=6)), + "PT1034H": str(isodata.TimeInterval(hours=1034)), + "P3YT4H2M": str(isodata.TimeInterval(years=3, hours=4, minutes=2)), + "P30Y2DT10S": str(isodata.TimeInterval(years=30, days=2, seconds=10)), + "PT2S": str(isodata.TimeInterval(seconds=2)), + "PT2.5S": str(isodata.TimeInterval(seconds=2.5)), + "PT2,5S": str(isodata.TimeInterval(seconds=2.5)), + "PT5.5023H": str(isodata.TimeInterval(hours=5.5023)), + "PT5,5023H": str(isodata.TimeInterval(hours=5.5023)), + "P5W": str(isodata.TimeInterval(weeks=5)), + "P100W": str(isodata.TimeInterval(weeks=100)) + } + for expression, ctrl_result in test_expressions.items(): + yield expression, ctrl_result + + +def get_timepointparser_tests(self): + """Yield tests for the time point parser.""" + # Note: test dates assume 2 expanded year digits. + test_date_expressions = { + "basic": { + "complete": { + "00440104": {"year": 44, "month_of_year": 1, + "day_of_month": 4}, + "+5002000830": {"year": 500200, "month_of_year": 8, + "day_of_month": 30, + "expanded_year_digits": 2}, + "-0000561113": {"year": -56, "month_of_year": 11, + "day_of_month": 13, + "expanded_year_digits": 2}, + "-1000240210": {"year": -100024, "month_of_year": 2, + "day_of_month": 10, + "expanded_year_digits": 2}, + "1967056": {"year": 1967, "day_of_year": 56}, + "+123456078": {"year": 123456, "day_of_year": 78, + "expanded_year_digits": 2}, + "-004560134": {"year": -4560, "day_of_year": 134, + "expanded_year_digits": 2}, + "1001W011": {"year": 1001, "week_of_year": 1, + "day_of_week": 1}, + "+000001W457": {"year": 1, "week_of_year": 45, + "day_of_week": 7, + "expanded_year_digits": 2}, + "-010001W053": {"year": -10001, "week_of_year": 5, + "day_of_week": 3, "expanded_year_digits": 2} + }, + "reduced": { + "4401-03": {"year": 4401, "month_of_year": 3}, + "1982": {"year": 1982}, + "19": {"year": 1900}, + "+056789-01": {"year": 56789, "month_of_year": 1, + "expanded_year_digits": 2}, + "-000001-12": {"year": -1, "month_of_year": 12, + "expanded_year_digits": 2}, + "-789123": {"year": -789123, "expanded_year_digits": 2}, + "+450001": {"year": 450001, "expanded_year_digits": 2}, + # The following cannot be parsed - looks like truncated -YYMM. + # "-0023": {"year": -2300, "expanded_year_digits": 2}, + "+5678": {"year": 567800, "expanded_year_digits": 2}, + "1765W04": {"year": 1765, "week_of_year": 4}, + "+001765W44": {"year": 1765, "week_of_year": 44, + "expanded_year_digits": 2}, + "-123321W50": {"year": -123321, "week_of_year": 50, + "expanded_year_digits": 2} + }, + "truncated": { + "-9001": {"year": 90, "month_of_year": 1, + "truncated": True, + "truncated_property": "year_of_century"}, + "960328": {"year": 96, "month_of_year": 3, + "day_of_month": 28, + "truncated": True, + "truncated_property": "year_of_century"}, + "-90": {"year": 90, "truncated": True, + "truncated_property": "year_of_century"}, + "--0501": {"month_of_year": 5, "day_of_month": 1, + "truncated": True}, + "--12": {"month_of_year": 12, "truncated": True}, + "---30": {"day_of_month": 30, "truncated": True}, + "98354": {"year": 98, "day_of_year": 354, "truncated": True, + "truncated_property": "year_of_century"}, + "-034": {"day_of_year": 34, "truncated": True}, + "00W031": {"year": 0, "week_of_year": 3, "day_of_week": 1, + "truncated": True, + "truncated_property": "year_of_century"}, + "99W34": {"year": 99, "week_of_year": 34, "truncated": True, + "truncated_property": "year_of_century"}, + "-1W02": {"year": 1, "week_of_year": 2, + "truncated": True, + "truncated_property": "year_of_decade"}, + "-W031": {"week_of_year": 3, "day_of_week": 1, + "truncated": True}, + "-W32": {"week_of_year": 32, "truncated": True}, + "-W-1": {"day_of_week": 1, "truncated": True} + } + }, + "extended": { + "complete": { + "0044-01-04": {"year": 44, "month_of_year": 1, + "day_of_month": 4}, + "+500200-08-30": {"year": 500200, "month_of_year": 8, + "day_of_month": 30, + "expanded_year_digits": 2}, + "-000056-11-13": {"year": -56, "month_of_year": 11, + "day_of_month": 13, + "expanded_year_digits": 2}, + "-100024-02-10": {"year": -100024, "month_of_year": 2, + "day_of_month": 10, + "expanded_year_digits": 2}, + "1967-056": {"year": 1967, "day_of_year": 56}, + "+123456-078": {"year": 123456, "day_of_year": 78, + "expanded_year_digits": 2}, + "-004560-134": {"year": -4560, "day_of_year": 134, + "expanded_year_digits": 2}, + "1001-W01-1": {"year": 1001, "week_of_year": 1, + "day_of_week": 1}, + "+000001-W45-7": {"year": 1, "week_of_year": 45, + "day_of_week": 7, + "expanded_year_digits": 2}, + "-010001-W05-3": {"year": -10001, "week_of_year": 5, + "day_of_week": 3, + "expanded_year_digits": 2} + }, + "reduced": { + "4401-03": {"year": 4401, "month_of_year": 3}, + "1982": {"year": 1982}, + "19": {"year": 1900}, + "+056789-01": {"year": 56789, "month_of_year": 1, + "expanded_year_digits": 2}, + "-000001-12": {"year": -1, "month_of_year": 12, + "expanded_year_digits": 2}, + "-789123": {"year": -789123, "expanded_year_digits": 2}, + "+450001": {"year": 450001, "expanded_year_digits": 2}, + # The following cannot be parsed - looks like truncated -YYMM. + # "-0023": {"year": -2300, "expanded_year_digits": 2}, + "+5678": {"year": 567800, "expanded_year_digits": 2}, + "1765-W04": {"year": 1765, "week_of_year": 4}, + "+001765-W44": {"year": 1765, "week_of_year": 44, + "expanded_year_digits": 2}, + "-123321-W50": {"year": -123321, "week_of_year": 50, + "expanded_year_digits": 2} + }, + "truncated": { + "-9001": {"year": 90, "month_of_year": 1, + "truncated": True, + "truncated_property": "year_of_century"}, + "96-03-28": {"year": 96, "month_of_year": 3, + "day_of_month": 28, + "truncated": True, + "truncated_property": "year_of_century"}, + "-90": {"year": 90, "truncated": True, + "truncated_property": "year_of_century"}, + "--05-01": {"month_of_year": 5, "day_of_month": 1, + "truncated": True}, + "--12": {"month_of_year": 12, "truncated": True}, + "---30": {"day_of_month": 30, "truncated": True}, + "98-354": {"year": 98, "day_of_year": 354, "truncated": True, + "truncated_property": "year_of_century"}, + "-034": {"day_of_year": 34, "truncated": True}, + "00-W03-1": {"year": 0, "week_of_year": 3, "day_of_week": 1, + "truncated": True, + "truncated_property": "year_of_century"}, + "99-W34": {"year": 99, "week_of_year": 34, "truncated": True, + "truncated_property": "year_of_century"}, + "-1-W02": {"year": 1, "week_of_year": 2, + "truncated": True, + "truncated_property": "year_of_decade"}, + "-W03-1": {"week_of_year": 3, "day_of_week": 1, + "truncated": True}, + "-W32": {"week_of_year": 32, "truncated": True}, + "-W-1": {"day_of_week": 1, "truncated": True} + } + } + } + test_time_expresssions = { + "basic": { + "complete": { + "050102": {"hour_of_day": 5, "minute_of_hour": 1, + "second_of_minute": 2}, + "235902,345": {"hour_of_day": 23, "minute_of_hour": 59, + "second_of_minute": 2.345}, + "235902.345": {"hour_of_day": 23, "minute_of_hour": 59, + "second_of_minute": 2.345}, + "1201,4": {"hour_of_day": 12, "minute_of_hour": 1.4}, + "1201.4": {"hour_of_day": 12, "minute_of_hour": 1.4}, + "00,4356": {"hour_of_day": 0.4356}, + "00.4356": {"hour_of_day": 0.4356} + }, + "reduced": { + "0203": {"hour_of_day": 2, "minute_of_hour": 3}, + "17": {"hour_of_day": 17} + }, + "truncated": { + "-5612": {"minute_of_hour": 56, "second_of_minute": 12, + "truncated": True}, + "-12": {"minute_of_hour": 12, "truncated": True}, + "--45": {"second_of_minute": 45, "truncated": True}, + "-1234,45": {"minute_of_hour": 12, "second_of_minute": 34.45, + "truncated": True}, + "-1234.45": {"minute_of_hour": 12, "second_of_minute": 34.45, + "truncated": True}, + "-34,2": {"minute_of_hour": 34.2, "truncated": True}, + "-34.2": {"minute_of_hour": 34.2, "truncated": True}, + "--59,99": {"second_of_minute": 59.99, "truncated": True}, + "--59.99": {"second_of_minute": 59.99, "truncated": True} + } + }, + "extended": { + "complete": { + "05:01:02": {"hour_of_day": 5, "minute_of_hour": 1, + "second_of_minute": 2}, + "23:59:02,345": {"hour_of_day": 23, "minute_of_hour": 59, + "second_of_minute": 2.345}, + "23:59:02.345": {"hour_of_day": 23, "minute_of_hour": 59, + "second_of_minute": 2.345}, + "12:01,4": {"hour_of_day": 12, "minute_of_hour": 1.4}, + "12:01.4": {"hour_of_day": 12, "minute_of_hour": 1.4}, + "00,4356": {"hour_of_day": 0.4356}, + "00.4356": {"hour_of_day": 0.4356} + }, + "reduced": { + "02:03": {"hour_of_day": 2, "minute_of_hour": 3}, + "17": {"hour_of_day": 17} + }, + "truncated": { + "-56:12": {"minute_of_hour": 56, "second_of_minute": 12, + "truncated": True}, + "-12": {"minute_of_hour": 12, "truncated": True}, + "--45": {"second_of_minute": 45, "truncated": True}, + "-12:34,45": {"minute_of_hour": 12, "second_of_minute": 34.45, + "truncated": True}, + "-12:34.45": {"minute_of_hour": 12, "second_of_minute": 34.45, + "truncated": True}, + "-34,2": {"minute_of_hour": 34.2, "truncated": True}, + "-34.2": {"minute_of_hour": 34.2, "truncated": True}, + "--59,99": {"second_of_minute": 59.99, "truncated": True}, + "--59.99": {"second_of_minute": 59.99, "truncated": True} + } + } + } + test_timezone_expressions = { + "basic": { + "Z": {"time_zone_utc": True}, + "+01": {"time_zone_hour": 1}, + "-05": {"time_zone_hour": -5}, + "+2301": {"time_zone_hour": 23, "time_zone_minute": 1}, + "-1230": {"time_zone_hour": -12, "time_zone_minute": 30} + }, + "extended": { + "Z": {"time_zone_utc": True}, + "+01": {"time_zone_hour": 1}, + "-05": {"time_zone_hour": -5}, + "+23:01": {"time_zone_hour": 23, "time_zone_minute": 1}, + "-12:30": {"time_zone_hour": -12, "time_zone_minute": 30} + } + } + format_ok_keys = ["basic", "extended"] + if self.allow_only_basic: + format_ok_keys = ["basic"] + date_combo_ok_keys = ["complete"] + if self.allow_truncated: + date_combo_ok_keys = ["complete", "truncated"] + time_combo_ok_keys = ["complete", "reduced"] + test_date_map = self.TEST_DATE_EXPRESSIONS + test_time_map = self.TEST_TIME_EXPRESSIONS + test_timezone_map = self.TEST_TIMEZONE_EXPRESSIONS + for format_type in format_ok_keys: + date_format_tests = test_date_map[format_type] + time_format_tests = test_time_map[format_type] + timezone_format_tests = test_timezone_map[format_type] + for date_key in date_format_tests: + if not self.allow_truncated and date_key == "truncated": + continue + for date_expr, info in date_format_tests[date_key].items(): + yield date_expr, info + for date_key in date_combo_ok_keys: + date_tests = copy.deepcopy(date_format_tests[date_key]) + # Add a blank date for time-only testing. + for date_expr, info in date_tests.items(): + for time_key in time_combo_ok_keys: + time_items = time_format_tests[time_key].items() + for time_expr, time_info in time_items: + combo_expr = (date_expr + self.TIME_DESIGNATOR + + time_expr) + combo_info = {} + for key, value in info.items() + time_info.items(): + combo_info[key] = value + yield combo_expr, combo_info + timezone_items = timezone_format_tests.items() + for timezone_expr, timezone_info in timezone_items: + tz_expr = combo_expr + timezone_expr + tz_info = {} + for key, value in (combo_info.items() + + timezone_info.items()): + tz_info[key] = value + yield tz_expr, tz_info + if not self.allow_truncated: + continue + for time_key in time_format_tests: + time_tests = time_format_tests[time_key] + for time_expr, time_info in time_tests.items(): + combo_expr = self.TIME_DESIGNATOR + time_expr + # Add truncated (no date). + combo_info = {"truncated": True} + for key, value in time_info.items(): + combo_info[key] = value + yield combo_expr, combo_info + timezone_items = timezone_format_tests.items() + for timezone_expr, timezone_info in timezone_items: + tz_expr = combo_expr + timezone_expr + tz_info = {} + for key, value in (combo_info.items() + + timezone_info.items()): + tz_info[key] = value + yield tz_expr, tz_info + + +def get_timerecurrenceparser_tests(self): + """Yield tests for the time recurrence parser.""" + test_points = ["-100024-02-10T17:00:00-12:30", + "+000001-W45-7T06Z", "1001W011", + "1955W051T06,5Z", "1999-06-01", + "1967-056", "+5002000830T235902,345", + "1765-W04"] + for reps in [None, 1, 2, 3, 10]: + if reps is None: + reps_string = "" + else: + reps_string = str(reps) + point_parser = isoparsers.TimePointParser() + interval_parser = isoparsers.TimeIntervalParser() + for point_expr in test_points: + interval_tests = interval_parser.get_tests() + start_point = point_parser.parse(point_expr) + for interval_expr, interval_result in interval_tests: + interval = interval_parser.parse(interval_expr) + end_point = start_point + interval + if reps is not None: + expr_1 = ("R" + reps_string + "/" + str(start_point) + + "/" + str(end_point)) + yield expr_1, {"repetitions": reps, + "start_point": start_point, + "end_point": end_point} + expr_3 = ("R" + reps_string + "/" + str(start_point) + + "/" + str(interval)) + yield expr_3, {"repetitions": reps, + "start_point": start_point, + "interval": interval} + expr_4 = ("R" + reps_string + "/" + str(interval) + "/" + + str(end_point)) + yield expr_4, {"repetitions": reps, "interval": interval, + "end_point": end_point} + + +class TestSuite(unittest.TestCase): + + """Test the functionality of parsers and data model manipulation.""" + + def assertEqual(self, test, control, source=None): + """Override the assertEqual method to provide more information.""" + if source is None: + info = None + else: + info = ("Source %s produced\n%s, should be\n%s" % + (source, test, control)) + super(TestSuite, self).assertEqual(test, control, info) + + def test_timeinterval_parser(self): + """Test the time interval parsing.""" + parser = isoparsers.TimeIntervalParser() + for expression, ctrl_result in get_timeintervalparser_tests(): + try: + test_result = str(parser.parse(expression)) + except TimeSyntaxError: + raise ValueError( + "TimeIntervalParser test failed to parse '%s'" % + expression + ) + self.assertEqual(test_result, ctrl_result, expression) + + def test_timepoint(self): + """Test the manipulation of dates and times (takes a while).""" + import datetime + import random + my_date = datetime.datetime(1801, 1, 1) + while my_date <= datetime.datetime(2401, 2, 1): + ctrl_data = my_date.isocalendar() + test_date = TimePoint(year=my_date.year, + month_of_year=my_date.month, + day_of_month=my_date.day) + test_data = test_date.get_week_date() + self.assertEqual(test_data, ctrl_data) + ctrl_data = (my_date.year, my_date.month, my_date.day) + test_data = test_date.to_week_date().get_calendar_date() + self.assertEqual(test_data, ctrl_data) + ctrl_data = my_date.toordinal() + year, day_of_year = test_date.get_ordinal_date() + test_data = day_of_year + test_data += get_days_since_1_ad(year - 1) + self.assertEqual(test_data, ctrl_data) + for attribute, attr_max in [("weeks", 110), + ("days", 770), + ("hours", 770*24), + ("minutes", 770 * 24 * 60), + ("seconds", 770 * 24 * 60 * 60)]: + delta_attr = random.randrange(0, attr_max) + kwargs = {attribute: delta_attr} + ctrl_data = my_date + datetime.timedelta(**kwargs) + ctrl_data = (ctrl_data.year, ctrl_data.month, ctrl_data.day) + test_data = ( + test_date + TimeInterval(**kwargs)).get_calendar_date() + self.assertEqual(test_data, ctrl_data) + ctrl_data = (my_date - datetime.timedelta(**kwargs)) + ctrl_data = (ctrl_data.year, ctrl_data.month, ctrl_data.day) + test_data = ( + test_date - TimeInterval(**kwargs)).get_calendar_date() + self.assertEqual(test_data, ctrl_data) + ctrl_data = (my_date + datetime.timedelta(minutes=450) + + datetime.timedelta(hours=5) - + datetime.timedelta(seconds=500, weeks=5)) + ctrl_data = [(ctrl_data.year, ctrl_data.month, ctrl_data.day), + (ctrl_data.hour, ctrl_data.minute, ctrl_data.second)] + test_data = ( + test_date + TimeInterval(minutes=450) + + TimeInterval(hours=5) - TimeInterval(weeks=5, seconds=500)) + test_data = [test_data.get_calendar_date(), + test_data.get_hour_minute_second()] + self.assertEqual(test_data, ctrl_data) + timedelta = datetime.timedelta(days=1) + my_date += timedelta + + def test_timepoint_parser(self): + """Test the parsing of date/time expressions.""" + parser = isoparsers.TimePointParser(allow_truncated=True) + for expression, timepoint_kwargs in get_timepointparser_tests(): + timepoint_kwargs = copy.deepcopy(timepoint_kwargs) + try: + test_data = str(parser.parse(expression)) + except TimeSyntaxError: + raise ValueError("Parsing failed for %s" % expression) + ctrl_data = str(TimePoint(**timepoint_kwargs)) + self.assertEqual(test_data, ctrl_data, expression) + + def test_timerecurrence(self): + """Test the recurring date/time series data model.""" + parser = isoparsers.TimeRecurrenceParser() + for expression, ctrl_results in TimeRecurrence.TEST_EXPRESSIONS: + try: + test_recurrence = parser.parse(expression) + except TimeSyntaxError: + raise ValueError( + "TimeRecurrenceParser test failed to parse '%s'" % + expression + ) + test_results = [] + for i, time_point in enumerate(test_recurrence): + if i > 2: + break + test_results.append(str(time_point)) + self.assertEqual(test_results, ctrl_results, expression) + + def test_timerecurrence_parser(self): + """Test the recurring date/time series parsing.""" + parser = isoparsers.TimeRecurrenceParser() + for expression, test_info in get_timerecurrenceparser_tests(): + try: + test_data = str(parser.parse(expression)) + except TimeSyntaxError: + raise ValueError("Parsing failed for %s" % expression) + ctrl_data = str(TimeRecurrence(**test_info)) + self.assertEqual(test_data, ctrl_data, expression) From 3c696841923f52064f6f2c38c1b47da53bcd8b92 Mon Sep 17 00:00:00 2001 From: benfitzpatrick Date: Mon, 3 Feb 2014 15:42:57 +0000 Subject: [PATCH 02/14] Rename isodatetime remnant --- isodatetime.py => isodata.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename isodatetime.py => isodata.py (100%) diff --git a/isodatetime.py b/isodata.py similarity index 100% rename from isodatetime.py rename to isodata.py From 6d8514840d0d69e0176161bba0ea959aa34a61d6 Mon Sep 17 00:00:00 2001 From: benfitzpatrick Date: Tue, 4 Feb 2014 12:37:33 +0000 Subject: [PATCH 03/14] Add type checking, fix bugs --- isodata.py | 296 +++++++++++++++++++++++++++++++++++++------------- isoparsers.py | 31 ++++-- isotests.py | 187 +++++++++++++++++++++---------- 3 files changed, 368 insertions(+), 146 deletions(-) diff --git a/isodata.py b/isodata.py index bfef1a2..09239e6 100644 --- a/isodata.py +++ b/isodata.py @@ -16,11 +16,7 @@ # along with this program. If not, see . #----------------------------------------------------------------------------- -"""This provides ISO 8601 parsing and data model functionality.""" - -import copy -import re -import unittest +"""This provides ISO 8601 data model functionality.""" DAYS_OF_MONTHS = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] @@ -30,41 +26,28 @@ "ordinal": (2000, 3)} -class TimeSyntaxError(ValueError): +class BadInputError(ValueError): + + """An error raised when constructor inputs are invalid.""" - """An error denoting invalid input syntax.""" + pass class TimeRecurrence(object): """Represent a recurring time interval.""" - TEST_EXPRESSIONS = [ - ("R3/1001-W01-1T00:00:00Z/1002-W52-6T00:00:00-05:30", - ["1001-W01-1T00:00:00Z", "1001-W53-3T14:45:00Z", - "1002-W52-6T05:30:00Z"]), - ("R3/P700D/1957-W01-1T06,5Z", - ["1953-W10-1T06,5Z", "1955-W05-1T06,5Z", "1957-W01-1T06,5Z"]), - ("R3/P5DT2,5S/1001-W11-1T00:30:02,5-02:00", - ["1001-W09-5T00:29:57,5-02:00", "1001-W10-3T00:30:00-02:00", - "1001-W11-1T00:30:02,5-02:00"]), - ("R/+000001W457T060000Z/P4M1D", - ["+000001-W45-7T06:00:00Z", "+000002-W11-2T06:00:00Z", - "+000002-W28-6T06:00:00Z"]), - ("R/P4M1DT6M/+002302-002T06:00:00-00:30", - ["+002302-002T06:00:00-00:30", "+002301-244T05:54:00-00:30", - "+002301-120T05:48:00-00:30"]), - ("R/P30Y2DT15H/-099994-02-12T17:00:00-02:30", - ["-099994-02-12T17:00:00-02:30", "-100024-02-10T02:00:00-02:30", - "-100054-02-07T11:00:00-02:30"]), - ("R/-100024-02-10T17:00:00-12:30/PT5.5H", - ["-100024-02-10T17:00:00-12:30", "-100024-02-10T22,5-12:30", - "-100024-02-11T04:00:00-12:30"]) - ] - def __init__(self, repetitions=None, start_point=None, interval=None, end_point=None, min_point=None, max_point=None): + _type_checker( + (repetitions, "repetitions", None, int), + (start_point, "start_point", None, TimePoint), + (interval, "interval", None, TimeInterval), + (end_point, "end_point", None, TimePoint), + (min_point, "min_point", None, TimePoint), + (max_point, "max_point", None, TimePoint) + ) self.repetitions = repetitions self.start_point = start_point self.interval = interval @@ -119,7 +102,8 @@ def __init__(self, repetitions=None, start_point=None, point -= self.interval self.start_point = point else: - raise ValueError("Unsupported or invalid recurrence information.") + raise BadInputError( + "Unsupported or invalid recurrence information.") def __iter__(self): if self.start_point is None: @@ -201,6 +185,15 @@ class TimeInterval(object): def __init__(self, years=0, months=0, weeks=0, days=0, hours=0.0, minutes=0.0, seconds=0.0): + _type_checker( + (years, "years", int, float, None), + (months, "months", int, float, None), + (weeks, "weeks", int, float, None), + (days, "days", int, float, None), + (hours, "hours", int, float, None), + (minutes, "minutes", int, float, None), + (seconds, "seconds", int, float, None) + ) self.years = years self.months = months self.weeks = None @@ -446,65 +439,185 @@ def __str__(self): class TimePoint(object): - """Represent an instant in time.""" - - def __init__(self, **kwargs): - self.format_function = kwargs.get("format_function") - self.expanded_year_digits = kwargs.get("expanded_year_digits", 0) - self.truncated = kwargs.get("truncated", False) - self.truncated_property = kwargs.get("truncated_property") - self.year = kwargs.get("year") - self.month_of_year = kwargs.get("month_of_year") - self.day_of_year = kwargs.get("day_of_year") - self.day_of_month = kwargs.get("day_of_month") - self.day_of_week = kwargs.get("day_of_week") - self.week_of_year = kwargs.get("week_of_year") - if self.truncated: - time_default = None - else: - time_default = 0 - self.hour_of_day = kwargs.get("hour_of_day", time_default) - if "hour_of_day_decimal" in kwargs: + """Represent an instant in time. + + An ISO 8601 date/time instant can be represented in three + separate ways: + Calendar date: calendar year, calendar month, + calendar day of the month + Ordinal date: calendar year, calendar day of the year + Week date: calendar (week) year, calendar week, + calendar day of the week (note: week years are not identical to + calendar years). + + This class maintains a date/time instant in the original + representation with which it was invoked - so it may be in any of + these formats. See the TimePoint.to_*_date methods for internal + conversions between formats. + + Where properties are not given (consistent with ISO 8601 reduced + precision dates), they will be given the expected defaults if + truncation is not specified. For example, if only the year and the + month_of_year is given, the day_of_month will be set to 1. + + Time zone information defaults to UTC. It is essential to provide it + unless you are happy with this behaviour. A date/time + representation is ambiguous without it. + + Keyword arguments (usually default to None if not provided): + expanded_year_digits (default 0) - an agreed-upon number of extra + digits to represent the year, beyond the default of 4. For example, + a value of 2 would suggest representing the year 2000 as 002000. + year - a positive or negative integer. Note that ISO 8601 implies + using non-zero expanded_year_digits when using negative integers. + Remember we are using the proleptic Gregorian calendar, with a year + zero which does not exist in standard 1 BC => 1 AD usage - so 2 BC + should be represented as -1. + month_of_year - an integer between 1 and 12 inclusive, if using the + calendar date representation. + week_of_year - an integer between 1 and 52/53 (depending on the + year), if using the week date representation. + day_of_year - an integer between 1 and 365/366 (depending on the + year), if using the ordinal date representation. + day_of_month - an integer between 1 and 28/29/30/31 (depending on + the month), if using the calendar date representation. + day_of_week - an integer between 1 and 7, if using the week date + representation. + hour_of_day - an integer between 1 and 24. + hour_of_day_decimal - a float between 0 and 1, if using decimal + accuracy for hours. Note that you should not provide lower units + such as minute_of_hour or second_of_minute when using this. + minute_of_hour - an integer between 0 and 59. + minute_of_hour_decimal - a float between 0 and 1, if using decimal + accuracy for minutes. Note that you should not provide lower units + such as second_of_minute when using this. + second_of_minute - an integer between 0 and 59 (note: no support + for leap seconds at 60 yet) + second_of_minute_decimal - a float between 0 and 1, if using decimal + accuracy for seconds. + time_zone_hour - (default 0) an integer denoting the hour timezone + offset from UTC. Note that unless this is a truncated + representation, 0 will be assumed if this is not provided. + time_zone_minute - (default 0) an integer between 0 and 59 denoting + the minute component of the timezone offset from UTC. + format_function - a custom callable to provide your own str() + implementation. + truncated - (default False) a boolean denoting whether the + date/time instant has purposefully incomplete information + (ISO 8601:2000 truncation). + truncated_property - a string that can either be "year_of_decade" + or "year_of_century". This is used for truncated representations to + distinguish between the two ways of truncating the year. + + """ + + def __init__(self, expanded_year_digits=0, year=None, month_of_year=None, + week_of_year=None, day_of_year=None, day_of_month=None, + day_of_week=None, hour_of_day=None, hour_of_day_decimal=None, + minute_of_hour=None, minute_of_hour_decimal=None, + second_of_minute=None, second_of_minute_decimal=None, + time_zone_hour=None, time_zone_minute=None, + format_function=None, truncated=False, + truncated_property=None): + _type_checker( + (expanded_year_digits, "expanded_year_digits", int), + (year, "year", None, int), + (month_of_year, "month_of_year", None, int), + (week_of_year, "week_of_year", None, int), + (day_of_year, "day_of_year", None, int), + (day_of_month, "day_of_month", None, int), + (day_of_week, "day_of_week", None, int), + (hour_of_day, "hour_of_day", None, int, float), + (hour_of_day_decimal, "hour_of_day_decimal", None, float), + (minute_of_hour, "minute_of_hour", None, int, float), + (minute_of_hour_decimal, "minute_of_hour_decimal", None, float), + (second_of_minute, "second_of_minute", None, int, float), + (second_of_minute_decimal, "second_of_minute_decimal", None, + float), + (time_zone_hour, "time_zone_hour", None, int), + (time_zone_minute, "time_zone_minute", None, int) + ) + if format_function is not None and not callable(format_function): + raise BadInputError( + "Invalid input for format_function: {0}".format( + format_function)) + if (truncated_property is not None and + truncated_property not in ["year_of_decade", + "year_of_century"]): + raise BadInputError( + "Invalid input for truncated_property: {0}".format( + truncated_property)) + self.format_function = format_function + self.expanded_year_digits = _int_caster(expanded_year_digits, + "expanded_year_digits") + self.truncated = truncated + self.truncated_property = truncated_property + self.year = _int_caster(year, "year", allow_none=True) + self.month_of_year = _int_caster(month_of_year, "year", + allow_none=True) + self.day_of_year = _int_caster(day_of_year, "day_of_year", + allow_none=True) + self.day_of_month = _int_caster(day_of_month, "day_of_month", + allow_none=True) + self.day_of_week = _int_caster(day_of_week, "day_of_week", + allow_none=True) + self.week_of_year = _int_caster(week_of_year, "week_of_year", + allow_none=True) + self.hour_of_day = _int_caster(hour_of_day, "hour_of_day", + allow_none=True) + if hour_of_day_decimal is not None: if self.hour_of_day is None: raise TimePointInputError( "Invalid input: hour decimal points - but not hours") - self.hour_of_day += kwargs.get("hour_of_day_decimal") - if "minute_of_hour" in kwargs: + self.hour_of_day += float(hour_of_day_decimal) + if minute_of_hour is not None: raise TimePointInputError( "Invalid input: minutes - already have hour decimals") - if "second_of_minute" in kwargs: + if second_of_minute is not None: raise TimePointInputError( "Invalid input: seconds - already have hour decimals") - if "minute_of_hour_decimal" in kwargs: - if "minute_of_hour" not in kwargs: + if minute_of_hour_decimal is not None: + if minute_of_hour is None: raise TimePointInputError( "Invalid input: minute decimal points - but not minutes") - self.minute_of_hour = kwargs["minute_of_hour"] - self.minute_of_hour += kwargs["minute_of_hour_decimal"] - if "second_of_minute" in kwargs: + self.minute_of_hour = _int_caster( + minute_of_hour, "minute_of_hour") + self.minute_of_hour += float(minute_of_hour_decimal) + if second_of_minute is not None: raise TimePointInputError( "Invalid input: seconds - already have minute decimals") else: - self.minute_of_hour = kwargs.get("minute_of_hour", time_default) - if "second_of_minute_decimal" in kwargs: - if "second_of_minute" not in kwargs: + self.minute_of_hour = _int_caster( + minute_of_hour, "minute_of_hour", allow_none=True) + if second_of_minute_decimal is not None: + if second_of_minute is None: raise TimePointInputError( "Invalid input: second decimal points - but not seconds") - self.second_of_minute = kwargs["second_of_minute"] - self.second_of_minute += kwargs["second_of_minute_decimal"] + self.second_of_minute = _int_caster(second_of_minute, + "second_of_minute") + self.second_of_minute += float(second_of_minute_decimal) else: - self.second_of_minute = kwargs.get("second_of_minute", - time_default) + self.second_of_minute = _int_caster(second_of_minute, + "second_of_minute", + allow_none=True) + if not self.truncated: + if self.hour_of_day is None: + self.hour_of_day = 0 + if self.minute_of_hour is None: + self.minute_of_hour = 0 + if self.second_of_minute is None: + self.second_of_minute = 0 self.time_zone = TimeZone() has_unknown_tz = True - if "time_zone_hour" in kwargs: + if time_zone_hour is not None: has_unknown_tz = False - self.time_zone.hours = kwargs.get("time_zone_hour") - if "time_zone_minute" in kwargs: + self.time_zone.hours = _int_caster(time_zone_hour, + "time_zone_hour") + if time_zone_minute is not None: has_unknown_tz = False - self.time_zone.minutes = kwargs.get("time_zone_minute") - has_unknown_tz = self.truncated and has_unknown_tz - self.time_zone.unknown = has_unknown_tz + self.time_zone.minutes = _int_caster(time_zone_minute, + "time_zone_minute") + self.time_zone.unknown = self.truncated and has_unknown_tz if not self.truncated: # Reduced precision date - e.g. 1970 - assume Jan 1, etc. if (self.month_of_year is None and self.week_of_year is None and @@ -1469,6 +1582,41 @@ def iter_months_days(year, month_of_year=None, day_of_month=None, yield i + 1, day -if __name__ == "__main__": - suite = unittest.TestLoader().loadTestsFromTestCase(TestSuite) - unittest.TextTestRunner(verbosity=2).run(suite) +def _int_caster(number, name="number", allow_none=False): + if allow_none and number is None: + return None + try: + int_number = int(number) + float_number = float(number) + except (TypeError, ValueError) as num_exc: + raise BadInputError( + "Invalid input for {0}: {1}: {2}".format(name, number, num_exc)) + if float(int_number) != float_number: + raise BadInputError( + "Non-integer like number for {0}: {1}".format(name, number)) + return int_number + + +def _type_checker(*objects): + for type_info in objects: + value, name = type_info[:2] + allowed_types = list(type_info[2:]) + if None in allowed_types: + allowed_types.remove(None) + allowed_types.append(type(None)) + if int in allowed_types and float not in allowed_types: + value = _int_caster(value, name=name, allow_none=( + type(None) in allowed_types)) + is_ok = False + for type_ in allowed_types: + if isinstance(value, type_): + is_ok = True + break + if not is_ok: + values_string = "" + if allowed_types: + values_string = " should be: " + values_string += " or ".join( + [str(v) for v in allowed_types]) + raise BadInputError("Invalid type for '{0}': {1}{2}".format( + name, repr(value), values_string)) diff --git a/isoparsers.py b/isoparsers.py index 90fdf42..ff8695c 100644 --- a/isoparsers.py +++ b/isoparsers.py @@ -18,6 +18,15 @@ """This provides ISO 8601 parsing functionality.""" +import re + +import isodata + + +class TimeSyntaxError(ValueError): + + """An error denoting invalid input syntax.""" + class TimeRecurrenceParser(object): @@ -70,10 +79,10 @@ def parse(self, expression): if "intv" in result_map: interval = self.timeinterval_parser.parse( result_map["intv"]) - return TimeRecurrence(repetitions=repetitions, - start_point=start_point, - end_point=end_point, - interval=interval) + return isodata.TimeRecurrence(repetitions=repetitions, + start_point=start_point, + end_point=end_point, + interval=interval) raise TimeSyntaxError( "Not a supported ISO 8601 recurrence pattern: %s" % expression) @@ -456,12 +465,10 @@ def parse(self, timepoint_string): except (IOError, ValueError) as e: pass if key == "time_zone_utc" and value == "Z": - value = True - if key == "year_sign": - if value == "+": - value = 1 - else: - value = -1 + time_info.pop(key) + time_info.update({"time_zone_hour": 0, + "time_zone_minute": 0}) + continue time_info[key] = value info.update(time_info) if info.pop("truncated", False): @@ -470,7 +477,7 @@ def parse(self, timepoint_string): info["truncated_property"] = truncated_property if self.format_function is not None: info.update({"format_function": self.format_function}) - return TimePoint(**info) + return isodata.TimePoint(**info) def get_date_info(self, date_string, bad_types=None): """Return the format and properties from a date string.""" @@ -561,7 +568,7 @@ def parse(self, expression): value = value.replace(",", ".") value = float(value) result_map[key] = value - return TimeInterval(**result_map) + return isodata.TimeInterval(**result_map) raise TimeSyntaxError("Not an ISO 8601 duration representation: %s" % expression) diff --git a/isotests.py b/isotests.py index d84db32..bd94990 100644 --- a/isotests.py +++ b/isotests.py @@ -16,15 +16,18 @@ # along with this program. If not, see . #----------------------------------------------------------------------------- -"""This provides ISO 8601 parsing functionality.""" +"""This tests the ISO 8601 parsing and data model functionality.""" + +import copy +import unittest import isodata import isoparsers -def get_timeintervalparser_tests(self): +def get_timeintervalparser_tests(): """Yield tests for the time interval parser.""" - test_expresssions = { + test_expressions = { "P3Y": str(isodata.TimeInterval(years=3)), "P90Y": str(isodata.TimeInterval(years=90)), "P1Y2M": str(isodata.TimeInterval(years=1, months=2)), @@ -52,10 +55,11 @@ def get_timeintervalparser_tests(self): yield expression, ctrl_result -def get_timepointparser_tests(self): +def get_timepointparser_tests(allow_only_basic=False, + allow_truncated=False): """Yield tests for the time point parser.""" # Note: test dates assume 2 expanded year digits. - test_date_expressions = { + test_date_map = { "basic": { "complete": { "00440104": {"year": 44, "month_of_year": 1, @@ -210,19 +214,25 @@ def get_timepointparser_tests(self): } } } - test_time_expresssions = { + test_time_map = { "basic": { "complete": { "050102": {"hour_of_day": 5, "minute_of_hour": 1, "second_of_minute": 2}, "235902,345": {"hour_of_day": 23, "minute_of_hour": 59, - "second_of_minute": 2.345}, + "second_of_minute": 2, + "second_of_minute_decimal": 0.345}, "235902.345": {"hour_of_day": 23, "minute_of_hour": 59, - "second_of_minute": 2.345}, - "1201,4": {"hour_of_day": 12, "minute_of_hour": 1.4}, - "1201.4": {"hour_of_day": 12, "minute_of_hour": 1.4}, - "00,4356": {"hour_of_day": 0.4356}, - "00.4356": {"hour_of_day": 0.4356} + "second_of_minute": 2, + "second_of_minute_decimal": 0.345}, + "1201,4": {"hour_of_day": 12, "minute_of_hour": 1, + "minute_of_hour_decimal": 0.4}, + "1201.4": {"hour_of_day": 12, "minute_of_hour": 1, + "minute_of_hour_decimal": 0.4}, + "00,4356": {"hour_of_day": 0, + "hour_of_day_decimal": 0.4356}, + "00.4356": {"hour_of_day": 0, + "hour_of_day_decimal": 0.4356} }, "reduced": { "0203": {"hour_of_day": 2, "minute_of_hour": 3}, @@ -233,14 +243,22 @@ def get_timepointparser_tests(self): "truncated": True}, "-12": {"minute_of_hour": 12, "truncated": True}, "--45": {"second_of_minute": 45, "truncated": True}, - "-1234,45": {"minute_of_hour": 12, "second_of_minute": 34.45, + "-1234,45": {"minute_of_hour": 12, "second_of_minute": 34, + "second_of_minute_decimal": 0.45, "truncated": True}, - "-1234.45": {"minute_of_hour": 12, "second_of_minute": 34.45, + "-1234.45": {"minute_of_hour": 12, "second_of_minute": 34, + "second_of_minute_decimal": 0.45, "truncated": True}, - "-34,2": {"minute_of_hour": 34.2, "truncated": True}, - "-34.2": {"minute_of_hour": 34.2, "truncated": True}, - "--59,99": {"second_of_minute": 59.99, "truncated": True}, - "--59.99": {"second_of_minute": 59.99, "truncated": True} + "-34,2": {"minute_of_hour": 34, "minute_of_hour_decimal": 0.2, + "truncated": True}, + "-34.2": {"minute_of_hour": 34, "minute_of_hour_decimal": 0.2, + "truncated": True}, + "--59,99": {"second_of_minute": 59, + "second_of_minute_decimal": 0.99, + "truncated": True}, + "--59.99": {"second_of_minute": 59, + "second_of_minute_decimal": 0.99, + "truncated": True} } }, "extended": { @@ -248,13 +266,17 @@ def get_timepointparser_tests(self): "05:01:02": {"hour_of_day": 5, "minute_of_hour": 1, "second_of_minute": 2}, "23:59:02,345": {"hour_of_day": 23, "minute_of_hour": 59, - "second_of_minute": 2.345}, + "second_of_minute": 2, + "second_of_minute_decimal": 0.345}, "23:59:02.345": {"hour_of_day": 23, "minute_of_hour": 59, - "second_of_minute": 2.345}, - "12:01,4": {"hour_of_day": 12, "minute_of_hour": 1.4}, - "12:01.4": {"hour_of_day": 12, "minute_of_hour": 1.4}, - "00,4356": {"hour_of_day": 0.4356}, - "00.4356": {"hour_of_day": 0.4356} + "second_of_minute": 2, + "second_of_minute_decimal": 0.345}, + "12:01,4": {"hour_of_day": 12, "minute_of_hour": 1, + "minute_of_hour_decimal": 0.4}, + "12:01.4": {"hour_of_day": 12, "minute_of_hour": 1, + "minute_of_hour_decimal": 0.4}, + "00,4356": {"hour_of_day": 0, "hour_of_day_decimal": 0.4356}, + "00.4356": {"hour_of_day": 0, "hour_of_day_decimal": 0.4356} }, "reduced": { "02:03": {"hour_of_day": 2, "minute_of_hour": 3}, @@ -265,27 +287,35 @@ def get_timepointparser_tests(self): "truncated": True}, "-12": {"minute_of_hour": 12, "truncated": True}, "--45": {"second_of_minute": 45, "truncated": True}, - "-12:34,45": {"minute_of_hour": 12, "second_of_minute": 34.45, + "-12:34,45": {"minute_of_hour": 12, "second_of_minute": 34, + "second_of_minute_decimal": 0.45, "truncated": True}, - "-12:34.45": {"minute_of_hour": 12, "second_of_minute": 34.45, + "-12:34.45": {"minute_of_hour": 12, "second_of_minute": 34, + "second_of_minute_decimal": 0.45, "truncated": True}, - "-34,2": {"minute_of_hour": 34.2, "truncated": True}, - "-34.2": {"minute_of_hour": 34.2, "truncated": True}, - "--59,99": {"second_of_minute": 59.99, "truncated": True}, - "--59.99": {"second_of_minute": 59.99, "truncated": True} + "-34,2": {"minute_of_hour": 34, "minute_of_hour_decimal": 0.2, + "truncated": True}, + "-34.2": {"minute_of_hour": 34, "minute_of_hour_decimal": 0.2, + "truncated": True}, + "--59,99": {"second_of_minute": 59, + "second_of_minute_decimal": 0.99, + "truncated": True}, + "--59.99": {"second_of_minute": 59, + "second_of_minute_decimal": 0.99, + "truncated": True} } } } - test_timezone_expressions = { + test_timezone_map = { "basic": { - "Z": {"time_zone_utc": True}, + "Z": {"time_zone_hour": 0, "time_zone_minute": 0}, "+01": {"time_zone_hour": 1}, "-05": {"time_zone_hour": -5}, "+2301": {"time_zone_hour": 23, "time_zone_minute": 1}, "-1230": {"time_zone_hour": -12, "time_zone_minute": 30} }, "extended": { - "Z": {"time_zone_utc": True}, + "Z": {"time_zone_hour": 0, "time_zone_minute": 0}, "+01": {"time_zone_hour": 1}, "-05": {"time_zone_hour": -5}, "+23:01": {"time_zone_hour": 23, "time_zone_minute": 1}, @@ -293,21 +323,18 @@ def get_timepointparser_tests(self): } } format_ok_keys = ["basic", "extended"] - if self.allow_only_basic: + if allow_only_basic: format_ok_keys = ["basic"] date_combo_ok_keys = ["complete"] - if self.allow_truncated: + if allow_truncated: date_combo_ok_keys = ["complete", "truncated"] time_combo_ok_keys = ["complete", "reduced"] - test_date_map = self.TEST_DATE_EXPRESSIONS - test_time_map = self.TEST_TIME_EXPRESSIONS - test_timezone_map = self.TEST_TIMEZONE_EXPRESSIONS for format_type in format_ok_keys: date_format_tests = test_date_map[format_type] time_format_tests = test_time_map[format_type] timezone_format_tests = test_timezone_map[format_type] for date_key in date_format_tests: - if not self.allow_truncated and date_key == "truncated": + if not allow_truncated and date_key == "truncated": continue for date_expr, info in date_format_tests[date_key].items(): yield date_expr, info @@ -318,8 +345,11 @@ def get_timepointparser_tests(self): for time_key in time_combo_ok_keys: time_items = time_format_tests[time_key].items() for time_expr, time_info in time_items: - combo_expr = (date_expr + self.TIME_DESIGNATOR + - time_expr) + combo_expr = ( + date_expr + + isoparsers.TimePointParser.TIME_DESIGNATOR + + time_expr + ) combo_info = {} for key, value in info.items() + time_info.items(): combo_info[key] = value @@ -332,12 +362,13 @@ def get_timepointparser_tests(self): timezone_info.items()): tz_info[key] = value yield tz_expr, tz_info - if not self.allow_truncated: + if not allow_truncated: continue for time_key in time_format_tests: time_tests = time_format_tests[time_key] for time_expr, time_info in time_tests.items(): - combo_expr = self.TIME_DESIGNATOR + time_expr + combo_expr = (isoparsers.TimePointParser.TIME_DESIGNATOR + + time_expr) # Add truncated (no date). combo_info = {"truncated": True} for key, value in time_info.items(): @@ -353,7 +384,33 @@ def get_timepointparser_tests(self): yield tz_expr, tz_info -def get_timerecurrenceparser_tests(self): +def get_timerecurrence_tests(): + """Return test expressions for isodata.TimeRecurrence.""" + return [ + ("R3/1001-W01-1T00:00:00Z/1002-W52-6T00:00:00-05:30", + ["1001-W01-1T00:00:00Z", "1001-W53-3T14:45:00Z", + "1002-W52-6T05:30:00Z"]), + ("R3/P700D/1957-W01-1T06,5Z", + ["1953-W10-1T06,5Z", "1955-W05-1T06,5Z", "1957-W01-1T06,5Z"]), + ("R3/P5DT2,5S/1001-W11-1T00:30:02,5-02:00", + ["1001-W09-5T00:29:57,5-02:00", "1001-W10-3T00:30:00-02:00", + "1001-W11-1T00:30:02,5-02:00"]), + ("R/+000001W457T060000Z/P4M1D", + ["+000001-W45-7T06:00:00Z", "+000002-W11-2T06:00:00Z", + "+000002-W28-6T06:00:00Z"]), + ("R/P4M1DT6M/+002302-002T06:00:00-00:30", + ["+002302-002T06:00:00-00:30", "+002301-244T05:54:00-00:30", + "+002301-120T05:48:00-00:30"]), + ("R/P30Y2DT15H/-099994-02-12T17:00:00-02:30", + ["-099994-02-12T17:00:00-02:30", "-100024-02-10T02:00:00-02:30", + "-100054-02-07T11:00:00-02:30"]), + ("R/-100024-02-10T17:00:00-12:30/PT5.5H", + ["-100024-02-10T17:00:00-12:30", "-100024-02-10T22,5-12:30", + "-100024-02-11T04:00:00-12:30"]) + ] + + +def get_timerecurrenceparser_tests(): """Yield tests for the time recurrence parser.""" test_points = ["-100024-02-10T17:00:00-12:30", "+000001-W45-7T06Z", "1001W011", @@ -368,7 +425,7 @@ def get_timerecurrenceparser_tests(self): point_parser = isoparsers.TimePointParser() interval_parser = isoparsers.TimeIntervalParser() for point_expr in test_points: - interval_tests = interval_parser.get_tests() + interval_tests = get_timeintervalparser_tests() start_point = point_parser.parse(point_expr) for interval_expr, interval_result in interval_tests: interval = interval_parser.parse(interval_expr) @@ -423,9 +480,9 @@ def test_timepoint(self): my_date = datetime.datetime(1801, 1, 1) while my_date <= datetime.datetime(2401, 2, 1): ctrl_data = my_date.isocalendar() - test_date = TimePoint(year=my_date.year, - month_of_year=my_date.month, - day_of_month=my_date.day) + test_date = isodata.TimePoint(year=my_date.year, + month_of_year=my_date.month, + day_of_month=my_date.day) test_data = test_date.get_week_date() self.assertEqual(test_data, ctrl_data) ctrl_data = (my_date.year, my_date.month, my_date.day) @@ -434,7 +491,7 @@ def test_timepoint(self): ctrl_data = my_date.toordinal() year, day_of_year = test_date.get_ordinal_date() test_data = day_of_year - test_data += get_days_since_1_ad(year - 1) + test_data += isodata.get_days_since_1_ad(year - 1) self.assertEqual(test_data, ctrl_data) for attribute, attr_max in [("weeks", 110), ("days", 770), @@ -446,12 +503,14 @@ def test_timepoint(self): ctrl_data = my_date + datetime.timedelta(**kwargs) ctrl_data = (ctrl_data.year, ctrl_data.month, ctrl_data.day) test_data = ( - test_date + TimeInterval(**kwargs)).get_calendar_date() + test_date + isodata.TimeInterval( + **kwargs)).get_calendar_date() self.assertEqual(test_data, ctrl_data) ctrl_data = (my_date - datetime.timedelta(**kwargs)) ctrl_data = (ctrl_data.year, ctrl_data.month, ctrl_data.day) test_data = ( - test_date - TimeInterval(**kwargs)).get_calendar_date() + test_date - isodata.TimeInterval( + **kwargs)).get_calendar_date() self.assertEqual(test_data, ctrl_data) ctrl_data = (my_date + datetime.timedelta(minutes=450) + datetime.timedelta(hours=5) - @@ -459,8 +518,10 @@ def test_timepoint(self): ctrl_data = [(ctrl_data.year, ctrl_data.month, ctrl_data.day), (ctrl_data.hour, ctrl_data.minute, ctrl_data.second)] test_data = ( - test_date + TimeInterval(minutes=450) + - TimeInterval(hours=5) - TimeInterval(weeks=5, seconds=500)) + test_date + isodata.TimeInterval(minutes=450) + + isodata.TimeInterval(hours=5) - + isodata.TimeInterval(weeks=5, seconds=500) + ) test_data = [test_data.get_calendar_date(), test_data.get_hour_minute_second()] self.assertEqual(test_data, ctrl_data) @@ -470,22 +531,23 @@ def test_timepoint(self): def test_timepoint_parser(self): """Test the parsing of date/time expressions.""" parser = isoparsers.TimePointParser(allow_truncated=True) - for expression, timepoint_kwargs in get_timepointparser_tests(): + for expression, timepoint_kwargs in get_timepointparser_tests( + allow_truncated=True): timepoint_kwargs = copy.deepcopy(timepoint_kwargs) try: test_data = str(parser.parse(expression)) - except TimeSyntaxError: + except isoparsers.TimeSyntaxError: raise ValueError("Parsing failed for %s" % expression) - ctrl_data = str(TimePoint(**timepoint_kwargs)) + ctrl_data = str(isodata.TimePoint(**timepoint_kwargs)) self.assertEqual(test_data, ctrl_data, expression) def test_timerecurrence(self): """Test the recurring date/time series data model.""" parser = isoparsers.TimeRecurrenceParser() - for expression, ctrl_results in TimeRecurrence.TEST_EXPRESSIONS: + for expression, ctrl_results in get_timerecurrence_tests(): try: test_recurrence = parser.parse(expression) - except TimeSyntaxError: + except isoparsers.TimeSyntaxError: raise ValueError( "TimeRecurrenceParser test failed to parse '%s'" % expression @@ -503,7 +565,12 @@ def test_timerecurrence_parser(self): for expression, test_info in get_timerecurrenceparser_tests(): try: test_data = str(parser.parse(expression)) - except TimeSyntaxError: + except isoparsers.TimeSyntaxError: raise ValueError("Parsing failed for %s" % expression) - ctrl_data = str(TimeRecurrence(**test_info)) + ctrl_data = str(isodata.TimeRecurrence(**test_info)) self.assertEqual(test_data, ctrl_data, expression) + + +if __name__ == "__main__": + suite = unittest.TestLoader().loadTestsFromTestCase(TestSuite) + unittest.TextTestRunner(verbosity=2).run(suite) From 433ba3a64631fff370ee4dabd624d2c3345efacc Mon Sep 17 00:00:00 2001 From: benfitzpatrick Date: Tue, 4 Feb 2014 12:59:29 +0000 Subject: [PATCH 04/14] Restructure & rename files --- __init__.py | 0 isodatetime/__init__.py | 17 +++++++++++++++++ isodata.py => isodatetime/data.py | 0 isoparsers.py => isodatetime/parsers.py | 0 isotests.py => isodatetime/tests.py | 0 5 files changed, 17 insertions(+) delete mode 100644 __init__.py create mode 100644 isodatetime/__init__.py rename isodata.py => isodatetime/data.py (100%) rename isoparsers.py => isodatetime/parsers.py (100%) rename isotests.py => isodatetime/tests.py (100%) diff --git a/__init__.py b/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/isodatetime/__init__.py b/isodatetime/__init__.py new file mode 100644 index 0000000..94d38f0 --- /dev/null +++ b/isodatetime/__init__.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +#----------------------------------------------------------------------------- +# (C) British Crown Copyright 2013-2014 Met Office. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +#----------------------------------------------------------------------------- diff --git a/isodata.py b/isodatetime/data.py similarity index 100% rename from isodata.py rename to isodatetime/data.py diff --git a/isoparsers.py b/isodatetime/parsers.py similarity index 100% rename from isoparsers.py rename to isodatetime/parsers.py diff --git a/isotests.py b/isodatetime/tests.py similarity index 100% rename from isotests.py rename to isodatetime/tests.py From f4e98815674cb7b7de62f3ed343194fddae036ce Mon Sep 17 00:00:00 2001 From: benfitzpatrick Date: Wed, 5 Feb 2014 17:30:16 +0000 Subject: [PATCH 05/14] Improve dumping capability --- isodatetime/data.py | 227 ++++++++++++++++++++++---------- isodatetime/dumpers.py | 102 +++++++++++++++ isodatetime/parser_spec.py | 198 ++++++++++++++++++++++++++++ isodatetime/parsers.py | 261 ++++++++----------------------------- isodatetime/run_tests | 22 ++++ isodatetime/tests.py | 164 +++++++++++++---------- isodatetime/util.py | 41 ++++++ 7 files changed, 673 insertions(+), 342 deletions(-) create mode 100644 isodatetime/dumpers.py create mode 100644 isodatetime/parser_spec.py create mode 100755 isodatetime/run_tests create mode 100644 isodatetime/util.py diff --git a/isodatetime/data.py b/isodatetime/data.py index 09239e6..5b5ba3c 100644 --- a/isodatetime/data.py +++ b/isodatetime/data.py @@ -19,6 +19,9 @@ """This provides ISO 8601 data model functionality.""" +from . import dumpers +from . import util + DAYS_OF_MONTHS = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] DAYS_OF_MONTHS_LEAP = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] @@ -26,6 +29,12 @@ "ordinal": (2000, 3)} +TIMEPOINT_DUMPER_MAP = { + 0: dumpers.TimePointDumper(num_expanded_year_digits=0), + 2: dumpers.TimePointDumper(num_expanded_year_digits=2) +} + + class BadInputError(ValueError): """An error raised when constructor inputs are invalid.""" @@ -500,8 +509,8 @@ class TimePoint(object): representation, 0 will be assumed if this is not provided. time_zone_minute - (default 0) an integer between 0 and 59 denoting the minute component of the timezone offset from UTC. - format_function - a custom callable to provide your own str() - implementation. + dump_format - a custom format string to control the stringification + of the timepoint. See isodatetime.parser_spec for more details. truncated - (default False) a boolean denoting whether the date/time instant has purposefully incomplete information (ISO 8601:2000 truncation). @@ -511,13 +520,21 @@ class TimePoint(object): """ + DATA_ATTRIBUTES = [ + "expanded_year_digits", "year", "month_of_year", + "day_of_year", "day_of_month", "day_of_week", + "week_of_year", "hour_of_day", "minute_of_hour", + "second_of_minute", "truncated", "truncated_property", + "dump_format" + ] + def __init__(self, expanded_year_digits=0, year=None, month_of_year=None, week_of_year=None, day_of_year=None, day_of_month=None, day_of_week=None, hour_of_day=None, hour_of_day_decimal=None, minute_of_hour=None, minute_of_hour_decimal=None, second_of_minute=None, second_of_minute_decimal=None, time_zone_hour=None, time_zone_minute=None, - format_function=None, truncated=False, + dump_format=None, truncated=False, truncated_property=None): _type_checker( (expanded_year_digits, "expanded_year_digits", int), @@ -537,17 +554,18 @@ def __init__(self, expanded_year_digits=0, year=None, month_of_year=None, (time_zone_hour, "time_zone_hour", None, int), (time_zone_minute, "time_zone_minute", None, int) ) - if format_function is not None and not callable(format_function): + if (dump_format is not None and not + isinstance(dump_format, basestring)): raise BadInputError( - "Invalid input for format_function: {0}".format( - format_function)) + "Invalid input for dumper: {0}".format( + dumper)) if (truncated_property is not None and truncated_property not in ["year_of_decade", "year_of_century"]): raise BadInputError( "Invalid input for truncated_property: {0}".format( truncated_property)) - self.format_function = format_function + self.dump_format = dump_format self.expanded_year_digits = _int_caster(expanded_year_digits, "expanded_year_digits") self.truncated = truncated @@ -680,6 +698,52 @@ def get_ordinal_date(self): self.week_of_year, self.day_of_week) + def get(self, property_name): + """Return a calculated value for property name.""" + if property_name == "year_sign": + return "+" if self.year >= 0 else "-" + if property_name == "century": + return (abs(self.year) % 10000) // 100 + if property_name == "year_of_century": + return abs(self.year) % 100 + if property_name == "month_of_year": + return self.get_calendar_date()[1] + if property_name == "day_of_year": + return self.get_ordinal_date()[1] + if property_name == "day_of_month": + return self.get_calendar_date()[2] + if property_name == "week_of_year": + return self.get_week_date()[1] + if property_name == "day_of_week": + return self.get_week_date()[2] + if property_name == "year_of_decade": + return abs(self.year) % 10 + if property_name == "minute_of_hour": + return int(self.minute_of_hour) + if property_name == "hour_of_day": + return int(self.hour_of_day) + if property_name == "hour_of_day_decimal_string": + string = "%f" % (float(self.hour_of_day) - int(self.hour_of_day)) + return string.replace("0.", ",", 1) + if property_name == "minute_of_hour_decimal_string": + string = "%f" % (float(self.minute_of_hour) - + int(self.minute_of_hour)) + return string.replace("0.", ",", 1) + if property_name == "second_of_minute": + return int(self.second_of_minute) + if property_name == "second_of_minute_decimal_string": + string = "%f" % (float(self.second_of_minute) - + int(self.second_of_minute)) + return string.replace("0.", ",", 1) + if property_name == "time_zone_minute_abs": + return abs(self.time_zone.minutes) + if property_name == "time_zone_hour_abs": + return abs(self.time_zone.minutes) + if property_name == "time_zone_sign": + if self.time_zone.hours < 0 or self.time_zone.minutes < 0: + return "-" + return "+" + def get_second_of_day(self): """Return the seconds elapsed since the start of the day.""" second_of_day = 0 @@ -947,11 +1011,7 @@ def __add__(self, other, no_copy=False): def copy(self): """Copy this TimePoint without leaving references.""" dummy_timepoint = TimePoint() - for attr in ["expanded_year_digits", "year", "month_of_year", - "day_of_year", "day_of_month", "day_of_week", - "week_of_year", "hour_of_day", "minute_of_hour", - "second_of_minute", "truncated", "truncated_property", - "format_function"]: + for attr in self.DATA_ATTRIBUTES: setattr(dummy_timepoint, attr, getattr(self, attr)) dummy_timepoint.time_zone = self.time_zone.copy() return dummy_timepoint @@ -962,6 +1022,17 @@ def __cmp__(self, other): "Invalid comparison type '%s' - should be TimePoint." % type(other).__name__ ) + if self.truncated != other.truncated: + raise TypeError( + "Cannot compare truncated to non-truncated " + + "TimePoint: %s, %s" % (self, other)) + if self.truncated: + for attribute in self.DATA_ATTRIBUTES: + other_attr = getattr(other, attribute) + self_attr = getattr(self, attribute) + if other_attr != self_attr: + return cmp(self_attr, other_attr) + return 0 other = other.copy() other.set_time_zone(self.get_time_zone()) if self.get_is_calendar_date(): @@ -1146,9 +1217,19 @@ def _tick_over_day_of_month(self): self.day_of_month = day return - def __str__(self, override_custom=False): - if self.format_function is not None and not override_custom: - return self.format_function(self) + def __str__(self, override_custom_dump_format=False): + if self.expanded_year_digits not in TIMEPOINT_DUMPER_MAP: + TIMEPOINT_DUMPER_MAP[self.expanded_year_digits] = ( + dumpers.TimePointDumper( + self.expanded_year_digits)) + dumper = TIMEPOINT_DUMPER_MAP[self.expanded_year_digits] + if self.truncated: + return dumper.dump(self, self._get_truncated_dump_format()) + if self.dump_format and not override_custom_dump_format: + return dumper.dump(self, self.dump_format) + return dumper.dump(self, self._get_dump_format()) + + def _get_dump_format(self): year_digits = 4 + self.expanded_year_digits year_string = "%0" + str(year_digits) + "d" if self.expanded_year_digits: @@ -1163,67 +1244,79 @@ def __str__(self, override_custom=False): ) elif self.year is not None: year_string = year_string % self.year - if self.truncated: - year_string = "-" - if self.truncated_property == "year_of_decade": - year_string = "-" + str(self.year % 10) - elif self.truncated_property == "year_of_century": - year_string = "-" + str(self.year % 100) - date_string = year_string - if self.truncated: - if self.month_of_year is not None: - date_string = year_string + "-%02d" % self.month_of_year - if self.day_of_month is not None: - date_string += "-%02d" % self.day_of_month - elif self.day_of_month is not None: - date_string = year_string + "-%02d" % self.day_of_month - if self.day_of_year is not None: - day_string = "%03d" % self.day_of_year - if year_string == "-": - date_string = year_string + day_string - else: - date_string = year_string + "-" + day_string - if self.week_of_year is not None: - date_string = year_string + "-W%02d" % self.week_of_year - if self.day_of_week is not None: - date_string += "-%01d" % self.day_of_week - elif self.day_of_week is not None: - date_string = year_string + "-W-%01d" % self.day_of_week + + if self.get_is_calendar_date(): + date_string = year_string + "-MM-DD" + if self.get_is_ordinal_date(): + date_string = year_string + "-DDD" + if self.get_is_week_date(): + date_string = year_string + "-Www-D" + time_string = "Thh" + if int(self.hour_of_day) != self.hour_of_day: + time_string += ",ii" else: - if self.get_is_calendar_date(): - date_string = year_string + "-%02d-%02d" % (self.month_of_year, - self.day_of_month) - if self.get_is_ordinal_date(): - date_string = year_string + "-%03d" % self.day_of_year - if self.get_is_week_date(): - date_string = year_string + "-W%02d-%01d" % (self.week_of_year, - self.day_of_week) + time_string += ":mm" + if int(self.minute_of_hour) != self.minute_of_hour: + time_string += ",nn" + else: + seconds_int = int(self.second_of_minute) + time_string += ":ss" + if seconds_int != self.second_of_minute: + time_string += ",tt" + time_string += u" ±hh:mm" + return date_string + time_string + + def _get_truncated_dump_format(self): + year_string = "-" + if self.truncated_property == "year_of_decade": + year_string = "-" + "y" + elif self.truncated_property == "year_of_century": + if self.month_of_year is None: + year_string = "YY" + else: + year_string = "-YY" + date_string = year_string + if self.month_of_year is not None: + date_string = year_string + "-MM" + if self.day_of_month is not None: + date_string += "-DD" + elif self.day_of_month is not None: + date_string = year_string + "-DD" + if self.day_of_year is not None: + day_string = "DDD" + if year_string == "-": + date_string = year_string + day_string + else: + date_string = year_string + "-" + day_string + if self.week_of_year is not None: + date_string = year_string + "-Www" + if self.day_of_week is not None: + date_string += "-D" + elif self.day_of_week is not None: + date_string = year_string + "-W-D" time_string = "" if self.hour_of_day is not None: - time_string = "T%02d" % int(self.hour_of_day) + time_string = "Thh" if int(self.hour_of_day) != self.hour_of_day: remainder = self.hour_of_day - int(self.hour_of_day) - time_string += _format_remainder(remainder) + time_string += ",ii" else: - if self.truncated and self.minute_of_hour is None: + if self.minute_of_hour is None: time_string += ":00:00" else: - time_string += ":%02d" % int(self.minute_of_hour) + time_string += ":mm" if int(self.minute_of_hour) != self.minute_of_hour: - remainder = ( - self.minute_of_hour - int(self.minute_of_hour)) - time_string += _format_remainder(remainder) + time_string += ",nn" else: - if self.truncated and self.second_of_minute is None: + if self.second_of_minute is None: time_string += ":00" else: seconds_int = int(self.second_of_minute) - time_string += ":%02d" % seconds_int + time_string += ":ss" if seconds_int != self.second_of_minute: - remainder = self.second_of_minute - seconds_int - time_string += _format_remainder(remainder) + time_string += ",tt" if time_string: - time_string += str(self.time_zone) + time_string += u"±hh:mm" return date_string + time_string __repr__ = __str__ @@ -1259,7 +1352,7 @@ def _format_remainder(float_time_number): return string -@cache_results +@util.cache_results def get_is_leap_year(year): """Return if year is a leap year in the proleptic Gregorian calendar.""" if year % 4 == 0: @@ -1271,7 +1364,7 @@ def get_is_leap_year(year): return False -@cache_results +@util.cache_results def get_days_in_year(year): """Return 366 if year is a leap year, otherwise 365.""" if get_is_leap_year(year): @@ -1279,7 +1372,7 @@ def get_days_in_year(year): return 365 -@cache_results +@util.cache_results def get_weeks_in_year(year): """Return the number of calendar weeks in this week date year.""" cal_year, cal_ord_days = get_ordinal_date_week_date_start(year) @@ -1470,7 +1563,7 @@ def get_week_date_from_ordinal_date(year, day_of_year): return get_week_date_from_calendar_date(year, month, day) -@cache_results +@util.cache_results def get_calendar_date_week_date_start(year): """Return the calendar date of the start of (week date) year.""" ref_year, ref_month, ref_day = WEEK_DAY_START_REFERENCE["calendar"] @@ -1504,7 +1597,7 @@ def get_calendar_date_week_date_start(year): return year - 1, month, day -@cache_results +@util.cache_results def get_days_since_1_ad(year): """Return the number of days since Jan 1, 1 A.D. to the year end.""" if year == 1: @@ -1519,7 +1612,7 @@ def get_days_since_1_ad(year): return days -@cache_results +@util.cache_results def get_ordinal_date_week_date_start(year): """Return the week date start for year in year, day-of-year.""" cal_year, cal_month, cal_day = get_calendar_date_week_date_start(year) diff --git a/isodatetime/dumpers.py b/isodatetime/dumpers.py new file mode 100644 index 0000000..f2e0def --- /dev/null +++ b/isodatetime/dumpers.py @@ -0,0 +1,102 @@ +# -*- coding: utf-8 -*- +#----------------------------------------------------------------------------- +# (C) British Crown Copyright 2013-2014 Met Office. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +#----------------------------------------------------------------------------- + +"""This provides data model dumping functionality.""" + +import re + +from . import parser_spec +from . import util + + +class TimePointDumper(object): + + """Dump TimePoint instances to strings.""" + + RE_PROP = re.compile("%\(([^)]*)\)") + + def __init__(self, num_expanded_year_digits=2): + self._rec_formats = {"date": [], "time": [], "timezone": []} + self._time_designator = parser_spec.TIME_DESIGNATOR + for info, key in [ + (parser_spec.get_date_translate_info( + num_expanded_year_digits), + "date"), + (parser_spec.get_time_translate_info(), "time"), + (parser_spec.get_timezone_translate_info(), "timezone")]: + for regex, regex_sub, format_sub in info: + rec = re.compile(regex) + prop = None + prop_results = self.RE_PROP.search(format_sub) + if prop_results: + prop = prop_results.groups()[0] + self._rec_formats[key].append((rec, format_sub, prop)) + + def dump(self, timepoint, formatting_string): + """Dump a timepoint according to formatting_string. + + The syntax for formatting_string is the syntax used for the + TimePointParser internals. See TimePointParser.*_TRANSLATE_INFO. + + """ + expression, properties = self._get_expression_and_properties( + formatting_string) + if (not timepoint.truncated and + ("week_of_year" in properties or + "day_of_week" in properties) and + not ("month_of_year" in properties or + "day_of_month" in properties or + "day_of_year" in properties)): + # We need the year to be in week years. + timepoint.to_week_date() + property_map = {} + for property_ in properties: + property_map[property_] = timepoint.get(property_) + return expression % property_map + + @util.cache_results + def _get_expression_and_properties(self, formatting_string): + date_time_strings = formatting_string.split( + self._time_designator) + date_string = date_time_strings[0] + time_string = "" + timezone_string = "" + if len(date_time_strings) > 1: + time_string = date_time_strings[1] + if time_string.endswith("Z"): + time_string = time_string[:-1] + timezone_string = "Z" + elif u"±" in time_string: + time_string, timezone_string = time_string.split(u"±") + timezone_string = u"±" + timezone_string + point_prop_list = [] + string_map = {"date": "", "time": "", "timezone": ""} + for string, key in [(date_string, "date"), + (time_string, "time"), + (timezone_string, "timezone")]: + for rec, format_sub, prop in self._rec_formats[key]: + new_string = rec.sub(format_sub, string) + if new_string != string and prop is not None: + point_prop_list.append(prop) + string = new_string + string_map[key] = string + expression = string_map["date"] + if string_map["time"]: + expression += self._time_designator + string_map["time"] + expression += string_map["timezone"] + return expression, tuple(point_prop_list) diff --git a/isodatetime/parser_spec.py b/isodatetime/parser_spec.py new file mode 100644 index 0000000..cf3d6d6 --- /dev/null +++ b/isodatetime/parser_spec.py @@ -0,0 +1,198 @@ +# -*- coding: utf-8 -*- +#----------------------------------------------------------------------------- +# (C) British Crown Copyright 2013-2014 Met Office. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +#----------------------------------------------------------------------------- + +"""This provides data to drive ISO 8601 parsing functionality.""" + + +DATE_EXPRESSIONS = {"basic": {"complete": u""" +CCYYMMDD +±XCCYYMMDD +CCYYDDD +±XCCYYDDD +CCYYWwwD +±XCCYYWwwD""", + "reduced": u""" +CCYY-MM # Deviation? Not clear if "basic" or "extended" in standard. +CCYY +CC +±XCCYY-MM # Deviation? Not clear if "basic" or "extended" in standard. +±XCCYY +±XCC +CCYYWww +±XCCYYWww""", + "truncated": u""" +-YYMM +-YY +--MMDD +--MM +---DD +YYMMDD +YYDDD +-DDD +YYWwwD +YYWww +-zWwwD +-zWww +-WwwD +-Www +-W-D +"""}, + "extended": {"complete": u""" +CCYY-MM-DD +±XCCYY-MM-DD +CCYY-DDD +±XCCYY-DDD +CCYY-Www-D +±XCCYY-Www-D""", + "reduced": u""" +CCYY-MM +±XCCYY-MM +CCYY-Www +±XCCYY-Www""", + "truncated": u""" +-YY-MM +--MM-DD +YY-MM-DD +YY-DDD +-DDD # Deviation from standard ? +YY-Www-D +YY-Www +-z-WwwD +-z-Www +-Www-D +"""}} +TIME_EXPRESSIONS = {"basic": {"complete": u""" +# No Time Zone +hhmmss + +# No Time Zone - decimals +hhmmss,tt +hhmm,nn +hh,ii +""", + "reduced": u""" +# No Time Zone +hhmm +hh + +# No Time Zone - decimals +""", + "truncated": u""" +# No Time Zone +-mmss +-mm +--ss + +# No Time Zone - decimals +-mmss,tt +-mm,nn +--ss,tt +"""}, + "extended": {"complete": u""" +# No Time Zone +hh:mm:ss + +# No Time Zone - decimals +hh:mm:ss,tt +hh:mm,nn +hh,ii # Deviation? Not allowed in standard ? +""", + "reduced": u""" +# No Time Zone +hh:mm +hh # Deviation? Not allowed in standard ? +""", + "truncated": u""" +# No Time Zone +-mm:ss +-mm # Deviation? Not allowed in standard ? +--ss # Deviation? Not allowed in standard ? + +# No Time Zone - decimals +-mm:ss,tt +-mm,nn # Deviation? Not allowed in standard ? +--ss,tt # Deviation? Not allowed in standard ? +"""}} +TIMEZONE_EXPRESSIONS = {"basic": u""" +Z +±hh +±hhmm +""", + "extended": u""" +Z +±hh # Deviation? Not allowed in standard? +±hh:mm +"""} +TIME_DESIGNATOR = "T" +_DATE_TRANSLATE_INFO = [ + (u"±", "(?P[-+])", "%(year_sign)s"), + (u"CC", "(?P\d\d)", "%(century)02d"), + (u"YY", "(?P\d\d)", "%(year_of_century)02d"), + (u"MM", "(?P\d\d)", "%(month_of_year)02d"), + (u"DDD", "(?P\d\d\d)", "%(day_of_year)03d"), + (u"DD", "(?P\d\d)", "%(day_of_month)02d"), + (u"Www", "W(?P\d\d)", "W%(week_of_year)02d"), + (u"D", "(?P\d)", "%(day_of_week)01d"), + (u"z", "(?P\d)", "%(year_of_decade)01d"), + (u"^---", "(?P---)", "---"), + (u"^--", "(?P--)", "--"), + (u"^-", "(?P-)", "-") +] +_TIME_TRANSLATE_INFO = [ + (u"(?<=^hh)mm", "(?P\d\d)", "%(minute_of_hour)02d"), + (u"(?<=^hh:)mm", "(?P\d\d)", "%(minute_of_hour)02d"), + (u"(?<=^-)mm", "(?P\d\d)", "%(minute_of_hour)02d"), + (u"^hh", "(?P\d\d)", "%(hour_of_day)02d"), + (u",ii", "[,.](?P\d+)", + "%(hour_of_day_decimal_string)s"), + (u",nn", "[,.](?P\d+)", + "%(minute_of_hour_decimal_string)s"), + (u"ss", "(?P\d\d)", "%(second_of_minute)02d"), + (u",tt", "[,.](?P\d+)", + "%(second_of_minute_decimal_string)s"), + (u"^--", "(?P--)", "--"), + (u"^-", "(?P-)", "--") +] +_TIMEZONE_TRANSLATE_INFO = [ + (u"(?<=±hh)mm", "(?P\d\d)", + "%(time_zone_minute_abs)02d"), + (u"(?<=±hh:)mm", "(?P\d\d)", + "%(time_zone_minute_abs)02d"), + (u"(?<=±)hh", "(?P\d\d)", + "%(time_zone_hour_abs)02d"), + (u"±", "(?P[-+])", "%(time_zone_sign)s"), + (u"Z", "(?PZ)", "Z") +] + + +def get_date_translate_info(num_expanded_year_digits=2): + expanded_year_digit_regex = "\d" * num_expanded_year_digits + return _DATE_TRANSLATE_INFO + [ + (u"X", + "(?P" + expanded_year_digit_regex + ")", + "%(expanded_year_digits)0" + str(num_expanded_year_digits) + "d") + ] + + +def get_time_translate_info(): + return _TIME_TRANSLATE_INFO + + +def get_timezone_translate_info(): + return _TIMEZONE_TRANSLATE_INFO + diff --git a/isodatetime/parsers.py b/isodatetime/parsers.py index ff8695c..a9fb9e6 100644 --- a/isodatetime/parsers.py +++ b/isodatetime/parsers.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- #----------------------------------------------------------------------------- -# (C) British Crown Copyright 2013 Met Office. +# (C) British Crown Copyright 2013-2014 Met Office. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by @@ -20,7 +20,9 @@ import re -import isodata +from . import data +from . import dumpers +from . import parser_spec class TimeSyntaxError(ValueError): @@ -79,10 +81,12 @@ def parse(self, expression): if "intv" in result_map: interval = self.timeinterval_parser.parse( result_map["intv"]) - return isodata.TimeRecurrence(repetitions=repetitions, - start_point=start_point, - end_point=end_point, - interval=interval) + return data.TimeRecurrence( + repetitions=repetitions, + start_point=start_point, + end_point=end_point, + interval=interval + ) raise TimeSyntaxError( "Not a supported ISO 8601 recurrence pattern: %s" % expression) @@ -112,189 +116,28 @@ class TimePointParser(object): timezone information should be assumed UTC (Z). Otherwise, these will be converted to the local timezone. - format_function (default None) should be a callable that takes a - TimePoint instance created by this parser and returns a custom - string representation such as "20150304T0103". This is called on - str(timepoint_instance). If None, the default TimePoint - formatting will be applied. + dump_format (default None) specifies a default custom dump format + string for TimePoint instances. See data.TimePoint documentation + for syntax. """ - DATE_EXPRESSIONS = {"basic": {"complete": u""" -ccYYMMDD -±ΫccYYMMDD -ccYYDDD -±ΫccYYDDD -ccYYWwwD -±ΫccYYWwwD""", - "reduced": u""" -ccYY-MM # Deviation? Not clear if "basic" or "extended" in standard. -ccYY -cc -±ΫccYY-MM # Deviation? Not clear if "basic" or "extended" in standard. -±ΫccYY -±Ϋcc -ccYYWww -±ΫccYYWww""", - "truncated": u""" --YYMM --YY ---MMDD ---MM ----DD -YYMMDD -YYDDD --DDD -YYWwwD -YYWww --ỵWwwD --ỵWww --WwwD --Www --W-D -"""}, - "extended": {"complete": u""" -ccYY-MM-DD -±ΫccYY-MM-DD -ccYY-DDD -±ΫccYY-DDD -ccYY-Www-D -±ΫccYY-Www-D""", - "reduced": u""" -ccYY-MM -±ΫccYY-MM -ccYY-Www -±ΫccYY-Www""", - "truncated": u""" --YY-MM ---MM-DD -YY-MM-DD -YY-DDD --DDD # Deviation from standard ? -YY-Www-D -YY-Www --ỵ-WwwD --ỵ-Www --Www-D -"""}} - - TIME_EXPRESSIONS = {"basic": {"complete": u""" -# No Time Zone -hhmmss - -# No Time Zone - decimals -hhmmss,sṡ -hhmm,mṁ -hh,hḣ -""", - "reduced": u""" -# No Time Zone -hhmm -hh - -# No Time Zone - decimals -""", - "truncated": u""" -# No Time Zone --mmss --mm ---ss - -# No Time Zone - decimals --mmss,sṡ --mm,mṁ ---ss,sṡ -"""}, - "extended": {"complete": u""" -# No Time Zone -hh:mm:ss - -# No Time Zone - decimals -hh:mm:ss,sṡ -hh:mm,mṁ -hh,hḣ # Deviation? Not allowed in standard ? -""", - "reduced": u""" -# No Time Zone -hh:mm -hh # Deviation? Not allowed in standard ? -""", - "truncated": u""" -# No Time Zone --mm:ss --mm # Deviation? Not allowed in standard ? ---ss # Deviation? Not allowed in standard ? - -# No Time Zone - decimals --mm:ss,sṡ --mm,mṁ # Deviation? Not allowed in standard ? ---ss,sṡ # Deviation? Not allowed in standard ? -"""}} - - TIMEZONE_EXPRESSIONS = {"basic": u""" -Z -±hh -±hhmm -""", - "extended": u""" -Z -±hh # Deviation? Not allowed in standard? -±hh:mm -"""} - - DATE_CHAR_REGEXES = [(u"±", "(?P[+-])"), - (u"cc", "(?P\d\d)"), - (u"YY", "(?P\d\d)"), - (u"MM", "(?P\d\d)"), - (u"DDD", "(?P\d\d\d)"), - (u"DD", "(?P\d\d)"), - (u"Www", "W(?P\d\d)"), - (u"D", "(?P\d)"), - (u"ỵ", "(?P\d)"), - (u"^---", "(?P---)"), - (u"^--", "(?P--)"), - (u"^-", "(?P-)"), - (u"^~", "(?P)")] - TIME_CHAR_REGEXES = [(u"(?<=^hh)mm", "(?P\d\d)"), - (u"(?<=^hh:)mm", "(?P\d\d)"), - (u"(?<=^-)mm", "(?P\d\d)"), - (u"^hh", "(?P\d\d)"), - (u",hḣ", "[,.](?P\d+)"), - (u",mṁ", "[,.](?P\d+)"), - (u"ss", "(?P\d\d)"), - (u",sṡ", "[,.](?P\d+)"), - (u"^--", "(?P--)"), - (u"^-", "(?P-)")] - TIMEZONE_CHAR_REGEXES = [ - (u"(?<=±hh)mm", "(?P\d\d)"), - (u"(?<=±hh:)mm", "(?P\d\d)"), - (u"(?<=±)hh", "(?P\d\d)"), - (u"±", "(?P[+-])"), - (u"Z", "(?PZ)") - ] - TIME_DESIGNATOR = "T" - def __init__(self, num_expanded_year_digits=2, allow_truncated=False, allow_only_basic=False, assume_utc=False, - format_function=None): - expanded_year_digit_regex = "\d" * num_expanded_year_digits + dump_format=None): self.expanded_year_digits = num_expanded_year_digits - self.DATE_CHAR_REGEXES.append( - (u"Ϋ", - "(?P" + expanded_year_digit_regex + ")") - ) self.allow_truncated = allow_truncated self.allow_only_basic = allow_only_basic - self.format_function = format_function + self.dump_format = dump_format self._generate_regexes() def _generate_regexes(self): """Generate combined date time strings.""" - date_map = self.DATE_EXPRESSIONS - time_map = self.TIME_EXPRESSIONS - timezone_map = self.TIMEZONE_EXPRESSIONS + date_map = parser_spec.DATE_EXPRESSIONS + time_map = parser_spec.TIME_EXPRESSIONS + timezone_map = parser_spec.TIMEZONE_EXPRESSIONS self._date_regex_map = {} self._time_regex_map = {} self._timezone_regex_map = {} @@ -339,28 +182,34 @@ def get_expressions(self, text): def parse_date_expression_to_regex(self, expression): """Construct regular expressions for the date.""" - for expr_regex, substitute in self.DATE_CHAR_REGEXES: + for expr_regex, substitute, format in ( + parser_spec.get_date_translate_info( + self.expanded_year_digits)): expression = re.sub(expr_regex, substitute, expression) expression = "^" + expression + "$" return expression def parse_time_expression_to_regex(self, expression): """Construct regular expressions for the time.""" - for expr_regex, substitute in self.TIME_CHAR_REGEXES: + for expr_regex, substitute, format in ( + parser_spec.get_time_translate_info()): expression = re.sub(expr_regex, substitute, expression) expression = "^" + expression + "$" return expression def parse_timezone_expression_to_regex(self, expression): """Construct regular expressions for the timezone.""" - for expr_regex, substitute in self.TIMEZONE_CHAR_REGEXES: + for expr_regex, substitute, format in ( + parser_spec.get_timezone_translate_info( + )): expression = re.sub(expr_regex, substitute, expression) expression = "^" + expression + "$" return expression - def parse(self, timepoint_string): + def parse(self, timepoint_string, dump_format=None): """Parse a user-supplied timepoint string.""" - date_time_timezone = timepoint_string.split(self.TIME_DESIGNATOR) + date_time_timezone = timepoint_string.split( + parser_spec.TIME_DESIGNATOR) if len(date_time_timezone) == 1: date = date_time_timezone[0] keys, date_info = self.get_date_info(date) @@ -387,30 +236,29 @@ def parse(self, timepoint_string): bad_types = [] if time_timezone.endswith("Z"): time, timezone = time_timezone[:-1], "Z" - else: - if "+" in time_timezone: - time, timezone = time_timezone.split("+") - timezone = "+" + timezone - elif "-" in time_timezone: - time, timezone = time_timezone.rsplit("-", 1) - timezone = "-" + timezone - # Make sure this isn't just a truncated time. - try: - time_info = self.get_time_info( - time, - bad_formats=bad_formats, - bad_types=bad_types - ) - timezone_info = self.get_timezone_info( - timezone, - bad_formats=bad_formats - ) - except TimeSyntaxError: - time = time_timezone - timezone = None - else: + elif "+" in time_timezone: + time, timezone = time_timezone.split("+") + timezone = "+" + timezone + elif "-" in time_timezone: + time, timezone = time_timezone.rsplit("-", 1) + timezone = "-" + timezone + # Make sure this isn't just a truncated time. + try: + time_info = self.get_time_info( + time, + bad_formats=bad_formats, + bad_types=bad_types + ) + timezone_info = self.get_timezone_info( + timezone, + bad_formats=bad_formats + ) + except TimeSyntaxError: time = time_timezone timezone = None + else: + time = time_timezone + timezone = None if timezone is None: timezone_info = {} else: @@ -475,9 +323,10 @@ def parse(self, timepoint_string): info["truncated"] = True if truncated_property is not None: info["truncated_property"] = truncated_property - if self.format_function is not None: - info.update({"format_function": self.format_function}) - return isodata.TimePoint(**info) + if dump_format is None and self.dump_format: + dump_format = self.dump_format + info.update({"dump_format": dump_format}) + return data.TimePoint(**info) def get_date_info(self, date_string, bad_types=None): """Return the format and properties from a date string.""" @@ -568,7 +417,7 @@ def parse(self, expression): value = value.replace(",", ".") value = float(value) result_map[key] = value - return isodata.TimeInterval(**result_map) + return data.TimeInterval(**result_map) raise TimeSyntaxError("Not an ISO 8601 duration representation: %s" % expression) diff --git a/isodatetime/run_tests b/isodatetime/run_tests new file mode 100755 index 0000000..0cde8f6 --- /dev/null +++ b/isodatetime/run_tests @@ -0,0 +1,22 @@ +#!/bin/bash +# -*- coding: utf-8 -*- +#----------------------------------------------------------------------------- +# (C) British Crown Copyright 2013-2014 Met Office. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +#----------------------------------------------------------------------------- +# Run tests for the ISO 8601 parsing and data model functionality.""" +#----------------------------------------------------------------------------- +cd $(dirname $0)/../ +python -m isodatetime.tests diff --git a/isodatetime/tests.py b/isodatetime/tests.py index bd94990..922fd1a 100644 --- a/isodatetime/tests.py +++ b/isodatetime/tests.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- #----------------------------------------------------------------------------- -# (C) British Crown Copyright 2013 Met Office. +# (C) British Crown Copyright 2013-2014 Met Office. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by @@ -21,40 +21,45 @@ import copy import unittest -import isodata -import isoparsers +from . import data +from . import parsers +from . import parser_spec def get_timeintervalparser_tests(): """Yield tests for the time interval parser.""" test_expressions = { - "P3Y": str(isodata.TimeInterval(years=3)), - "P90Y": str(isodata.TimeInterval(years=90)), - "P1Y2M": str(isodata.TimeInterval(years=1, months=2)), - "P20Y2M": str(isodata.TimeInterval(years=20, months=2)), - "P2M": str(isodata.TimeInterval(months=2)), - "P52M": str(isodata.TimeInterval(months=52)), - "P20Y10M2D": str(isodata.TimeInterval(years=20, months=10, days=2)), - "P1Y3D": str(isodata.TimeInterval(years=1, days=3)), - "P4M1D": str(isodata.TimeInterval(months=4, days=1)), - "P3Y404D": str(isodata.TimeInterval(years=3, days=404)), - "P30Y2D": str(isodata.TimeInterval(years=30, days=2)), - "PT6H": str(isodata.TimeInterval(hours=6)), - "PT1034H": str(isodata.TimeInterval(hours=1034)), - "P3YT4H2M": str(isodata.TimeInterval(years=3, hours=4, minutes=2)), - "P30Y2DT10S": str(isodata.TimeInterval(years=30, days=2, seconds=10)), - "PT2S": str(isodata.TimeInterval(seconds=2)), - "PT2.5S": str(isodata.TimeInterval(seconds=2.5)), - "PT2,5S": str(isodata.TimeInterval(seconds=2.5)), - "PT5.5023H": str(isodata.TimeInterval(hours=5.5023)), - "PT5,5023H": str(isodata.TimeInterval(hours=5.5023)), - "P5W": str(isodata.TimeInterval(weeks=5)), - "P100W": str(isodata.TimeInterval(weeks=100)) + "P3Y": str(data.TimeInterval(years=3)), + "P90Y": str(data.TimeInterval(years=90)), + "P1Y2M": str(data.TimeInterval(years=1, months=2)), + "P20Y2M": str(data.TimeInterval(years=20, months=2)), + "P2M": str(data.TimeInterval(months=2)), + "P52M": str(data.TimeInterval(months=52)), + "P20Y10M2D": str(data.TimeInterval(years=20, months=10, days=2)), + "P1Y3D": str(data.TimeInterval(years=1, days=3)), + "P4M1D": str(data.TimeInterval(months=4, days=1)), + "P3Y404D": str(data.TimeInterval(years=3, days=404)), + "P30Y2D": str(data.TimeInterval(years=30, days=2)), + "PT6H": str(data.TimeInterval(hours=6)), + "PT1034H": str(data.TimeInterval(hours=1034)), + "P3YT4H2M": str(data.TimeInterval(years=3, hours=4, minutes=2)), + "P30Y2DT10S": str(data.TimeInterval(years=30, days=2, seconds=10)), + "PT2S": str(data.TimeInterval(seconds=2)), + "PT2.5S": str(data.TimeInterval(seconds=2.5)), + "PT2,5S": str(data.TimeInterval(seconds=2.5)), + "PT5.5023H": str(data.TimeInterval(hours=5.5023)), + "PT5,5023H": str(data.TimeInterval(hours=5.5023)), + "P5W": str(data.TimeInterval(weeks=5)), + "P100W": str(data.TimeInterval(weeks=100)) } for expression, ctrl_result in test_expressions.items(): yield expression, ctrl_result +def get_timepointdumper_tests(): + return + + def get_timepointparser_tests(allow_only_basic=False, allow_truncated=False): """Yield tests for the time point parser.""" @@ -329,6 +334,7 @@ def get_timepointparser_tests(allow_only_basic=False, if allow_truncated: date_combo_ok_keys = ["complete", "truncated"] time_combo_ok_keys = ["complete", "reduced"] + time_designator = parser_spec.TIME_DESIGNATOR for format_type in format_ok_keys: date_format_tests = test_date_map[format_type] time_format_tests = test_time_map[format_type] @@ -339,7 +345,7 @@ def get_timepointparser_tests(allow_only_basic=False, for date_expr, info in date_format_tests[date_key].items(): yield date_expr, info for date_key in date_combo_ok_keys: - date_tests = copy.deepcopy(date_format_tests[date_key]) + date_tests = date_format_tests[date_key] # Add a blank date for time-only testing. for date_expr, info in date_tests.items(): for time_key in time_combo_ok_keys: @@ -347,7 +353,7 @@ def get_timepointparser_tests(allow_only_basic=False, for time_expr, time_info in time_items: combo_expr = ( date_expr + - isoparsers.TimePointParser.TIME_DESIGNATOR + + time_designator + time_expr ) combo_info = {} @@ -367,8 +373,10 @@ def get_timepointparser_tests(allow_only_basic=False, for time_key in time_format_tests: time_tests = time_format_tests[time_key] for time_expr, time_info in time_tests.items(): - combo_expr = (isoparsers.TimePointParser.TIME_DESIGNATOR + - time_expr) + combo_expr = ( + time_designator + + time_expr + ) # Add truncated (no date). combo_info = {"truncated": True} for key, value in time_info.items(): @@ -385,45 +393,45 @@ def get_timepointparser_tests(allow_only_basic=False, def get_timerecurrence_tests(): - """Return test expressions for isodata.TimeRecurrence.""" + """Return test expressions for data.TimeRecurrence.""" return [ ("R3/1001-W01-1T00:00:00Z/1002-W52-6T00:00:00-05:30", - ["1001-W01-1T00:00:00Z", "1001-W53-3T14:45:00Z", - "1002-W52-6T05:30:00Z"]), + ["1001-W01-1T00:00:00Z", "1001-W53-3T14:45:00Z", + "1002-W52-6T05:30:00Z"]), ("R3/P700D/1957-W01-1T06,5Z", - ["1953-W10-1T06,5Z", "1955-W05-1T06,5Z", "1957-W01-1T06,5Z"]), + ["1953-W10-1T06,5Z", "1955-W05-1T06,5Z", "1957-W01-1T06,5Z"]), ("R3/P5DT2,5S/1001-W11-1T00:30:02,5-02:00", - ["1001-W09-5T00:29:57,5-02:00", "1001-W10-3T00:30:00-02:00", - "1001-W11-1T00:30:02,5-02:00"]), + ["1001-W09-5T00:29:57,5-02:00", "1001-W10-3T00:30:00-02:00", + "1001-W11-1T00:30:02,5-02:00"]), ("R/+000001W457T060000Z/P4M1D", - ["+000001-W45-7T06:00:00Z", "+000002-W11-2T06:00:00Z", - "+000002-W28-6T06:00:00Z"]), + ["+000001-W45-7T06:00:00Z", "+000002-W11-2T06:00:00Z", + "+000002-W28-6T06:00:00Z"]), ("R/P4M1DT6M/+002302-002T06:00:00-00:30", - ["+002302-002T06:00:00-00:30", "+002301-244T05:54:00-00:30", - "+002301-120T05:48:00-00:30"]), + ["+002302-002T06:00:00-00:30", "+002301-244T05:54:00-00:30", + "+002301-120T05:48:00-00:30"]), ("R/P30Y2DT15H/-099994-02-12T17:00:00-02:30", - ["-099994-02-12T17:00:00-02:30", "-100024-02-10T02:00:00-02:30", - "-100054-02-07T11:00:00-02:30"]), + ["-099994-02-12T17:00:00-02:30", "-100024-02-10T02:00:00-02:30", + "-100054-02-07T11:00:00-02:30"]), ("R/-100024-02-10T17:00:00-12:30/PT5.5H", - ["-100024-02-10T17:00:00-12:30", "-100024-02-10T22,5-12:30", - "-100024-02-11T04:00:00-12:30"]) + ["-100024-02-10T17:00:00-12:30", "-100024-02-10T22,5-12:30", + "-100024-02-11T04:00:00-12:30"]) ] def get_timerecurrenceparser_tests(): """Yield tests for the time recurrence parser.""" test_points = ["-100024-02-10T17:00:00-12:30", - "+000001-W45-7T06Z", "1001W011", - "1955W051T06,5Z", "1999-06-01", - "1967-056", "+5002000830T235902,345", - "1765-W04"] + "+000001-W45-7T06Z", "1001W011", + "1955W051T06,5Z", "1999-06-01", + "1967-056", "+5002000830T235902,345", + "1765-W04"] for reps in [None, 1, 2, 3, 10]: if reps is None: reps_string = "" else: reps_string = str(reps) - point_parser = isoparsers.TimePointParser() - interval_parser = isoparsers.TimeIntervalParser() + point_parser = parsers.TimePointParser() + interval_parser = parsers.TimeIntervalParser() for point_expr in test_points: interval_tests = get_timeintervalparser_tests() start_point = point_parser.parse(point_expr) @@ -462,7 +470,7 @@ def assertEqual(self, test, control, source=None): def test_timeinterval_parser(self): """Test the time interval parsing.""" - parser = isoparsers.TimeIntervalParser() + parser = parsers.TimeIntervalParser() for expression, ctrl_result in get_timeintervalparser_tests(): try: test_result = str(parser.parse(expression)) @@ -473,16 +481,17 @@ def test_timeinterval_parser(self): ) self.assertEqual(test_result, ctrl_result, expression) - def test_timepoint(self): + def _test_timepoint(self): """Test the manipulation of dates and times (takes a while).""" import datetime import random my_date = datetime.datetime(1801, 1, 1) while my_date <= datetime.datetime(2401, 2, 1): ctrl_data = my_date.isocalendar() - test_date = isodata.TimePoint(year=my_date.year, - month_of_year=my_date.month, - day_of_month=my_date.day) + test_date = data.TimePoint( + year=my_date.year, + month_of_year=my_date.month, + day_of_month=my_date.day) test_data = test_date.get_week_date() self.assertEqual(test_data, ctrl_data) ctrl_data = (my_date.year, my_date.month, my_date.day) @@ -491,7 +500,7 @@ def test_timepoint(self): ctrl_data = my_date.toordinal() year, day_of_year = test_date.get_ordinal_date() test_data = day_of_year - test_data += isodata.get_days_since_1_ad(year - 1) + test_data += data.get_days_since_1_ad(year - 1) self.assertEqual(test_data, ctrl_data) for attribute, attr_max in [("weeks", 110), ("days", 770), @@ -503,13 +512,13 @@ def test_timepoint(self): ctrl_data = my_date + datetime.timedelta(**kwargs) ctrl_data = (ctrl_data.year, ctrl_data.month, ctrl_data.day) test_data = ( - test_date + isodata.TimeInterval( + test_date + data.TimeInterval( **kwargs)).get_calendar_date() self.assertEqual(test_data, ctrl_data) ctrl_data = (my_date - datetime.timedelta(**kwargs)) ctrl_data = (ctrl_data.year, ctrl_data.month, ctrl_data.day) test_data = ( - test_date - isodata.TimeInterval( + test_date - data.TimeInterval( **kwargs)).get_calendar_date() self.assertEqual(test_data, ctrl_data) ctrl_data = (my_date + datetime.timedelta(minutes=450) + @@ -518,9 +527,9 @@ def test_timepoint(self): ctrl_data = [(ctrl_data.year, ctrl_data.month, ctrl_data.day), (ctrl_data.hour, ctrl_data.minute, ctrl_data.second)] test_data = ( - test_date + isodata.TimeInterval(minutes=450) + - isodata.TimeInterval(hours=5) - - isodata.TimeInterval(weeks=5, seconds=500) + test_date + data.TimeInterval(minutes=450) + + data.TimeInterval(hours=5) - + data.TimeInterval(weeks=5, seconds=500) ) test_data = [test_data.get_calendar_date(), test_data.get_hour_minute_second()] @@ -528,26 +537,42 @@ def test_timepoint(self): timedelta = datetime.timedelta(days=1) my_date += timedelta + def test_timepoint_dumper(self): + """Test the dumping of TimePoint instances.""" + parser = parsers.TimePointParser(allow_truncated=True) + for expression, timepoint_kwargs in get_timepointparser_tests( + allow_truncated=True): + ctrl_timepoint = data.TimePoint(**timepoint_kwargs) + try: + test_timepoint = parser.parse(str(ctrl_timepoint)) + except parsers.TimeSyntaxError as syn_exc: + raise ValueError( + "Parsing failed for the dump of {0}: {1}".format( + expression, syn_exc)) + self.assertEqual(test_timepoint, + ctrl_timepoint, expression) + def test_timepoint_parser(self): """Test the parsing of date/time expressions.""" - parser = isoparsers.TimePointParser(allow_truncated=True) + parser = parsers.TimePointParser(allow_truncated=True) for expression, timepoint_kwargs in get_timepointparser_tests( allow_truncated=True): timepoint_kwargs = copy.deepcopy(timepoint_kwargs) try: test_data = str(parser.parse(expression)) - except isoparsers.TimeSyntaxError: - raise ValueError("Parsing failed for %s" % expression) - ctrl_data = str(isodata.TimePoint(**timepoint_kwargs)) + except parsers.TimeSyntaxError as syn_exc: + raise ValueError("Parsing failed for {0}: {1}".format( + expression, syn_exc)) + ctrl_data = str(data.TimePoint(**timepoint_kwargs)) self.assertEqual(test_data, ctrl_data, expression) def test_timerecurrence(self): """Test the recurring date/time series data model.""" - parser = isoparsers.TimeRecurrenceParser() + parser = parsers.TimeRecurrenceParser() for expression, ctrl_results in get_timerecurrence_tests(): try: test_recurrence = parser.parse(expression) - except isoparsers.TimeSyntaxError: + except parsers.TimeSyntaxError: raise ValueError( "TimeRecurrenceParser test failed to parse '%s'" % expression @@ -561,13 +586,14 @@ def test_timerecurrence(self): def test_timerecurrence_parser(self): """Test the recurring date/time series parsing.""" - parser = isoparsers.TimeRecurrenceParser() + parser = parsers.TimeRecurrenceParser() for expression, test_info in get_timerecurrenceparser_tests(): try: test_data = str(parser.parse(expression)) - except isoparsers.TimeSyntaxError: + except parsers.TimeSyntaxError as e: + print e raise ValueError("Parsing failed for %s" % expression) - ctrl_data = str(isodata.TimeRecurrence(**test_info)) + ctrl_data = str(data.TimeRecurrence(**test_info)) self.assertEqual(test_data, ctrl_data, expression) diff --git a/isodatetime/util.py b/isodatetime/util.py new file mode 100644 index 0000000..b3c7917 --- /dev/null +++ b/isodatetime/util.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +#----------------------------------------------------------------------------- +# (C) British Crown Copyright 2013-2014 Met Office. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +#----------------------------------------------------------------------------- + +"""Provide an optimisation decorator and other utilities.""" + + +def cache_results(func): + """Decorator to store results for given inputs. + + func is the decorated function. + + A maximum of 100000 arg-value pairs are stored. + + """ + cache = {} + + def wrap_func(*args, **kwargs): + key = (str(args), str(kwargs)) + if key in cache: + return cache[key] + else: + results = func(*args, **kwargs) + if len(cache) < 100000: + cache[key] = results + return results + return wrap_func From b7c5e698fedf78df06f023c279c9d0809525c40d Mon Sep 17 00:00:00 2001 From: benfitzpatrick Date: Thu, 6 Feb 2014 17:34:05 +0000 Subject: [PATCH 06/14] Dumper fixes --- isodatetime/data.py | 57 ++++++++++++++++++++++++++++++++---------- isodatetime/dumpers.py | 3 ++- isodatetime/parsers.py | 42 ++++++++++++++++++++----------- isodatetime/tests.py | 10 ++++---- 4 files changed, 79 insertions(+), 33 deletions(-) diff --git a/isodatetime/data.py b/isodatetime/data.py index 5b5ba3c..d50ae4e 100644 --- a/isodatetime/data.py +++ b/isodatetime/data.py @@ -707,14 +707,24 @@ def get(self, property_name): if property_name == "year_of_century": return abs(self.year) % 100 if property_name == "month_of_year": + if self.month_of_year is not None: + return self.month_of_year return self.get_calendar_date()[1] if property_name == "day_of_year": + if self.day_of_year is not None: + return self.day_of_year return self.get_ordinal_date()[1] if property_name == "day_of_month": + if self.day_of_month is not None: + return self.day_of_month return self.get_calendar_date()[2] if property_name == "week_of_year": + if self.week_of_year is not None: + return self.week_of_year return self.get_week_date()[1] if property_name == "day_of_week": + if self.day_of_week is not None: + return self.day_of_week return self.get_week_date()[2] if property_name == "year_of_decade": return abs(self.year) % 10 @@ -724,25 +734,26 @@ def get(self, property_name): return int(self.hour_of_day) if property_name == "hour_of_day_decimal_string": string = "%f" % (float(self.hour_of_day) - int(self.hour_of_day)) - return string.replace("0.", ",", 1) + return string.replace("0.", ",", 1).rstrip("0") if property_name == "minute_of_hour_decimal_string": string = "%f" % (float(self.minute_of_hour) - int(self.minute_of_hour)) - return string.replace("0.", ",", 1) + return string.replace("0.", ",", 1).rstrip("0") if property_name == "second_of_minute": return int(self.second_of_minute) if property_name == "second_of_minute_decimal_string": string = "%f" % (float(self.second_of_minute) - int(self.second_of_minute)) - return string.replace("0.", ",", 1) + return string.replace("0.", ",", 1).rstrip("0") if property_name == "time_zone_minute_abs": return abs(self.time_zone.minutes) if property_name == "time_zone_hour_abs": - return abs(self.time_zone.minutes) + return abs(self.time_zone.hours) if property_name == "time_zone_sign": if self.time_zone.hours < 0 or self.time_zone.minutes < 0: return "-" return "+" + raise NotImplementedError(property_name) def get_second_of_day(self): """Return the seconds elapsed since the start of the day.""" @@ -1263,25 +1274,33 @@ def _get_dump_format(self): time_string += ":ss" if seconds_int != self.second_of_minute: time_string += ",tt" - time_string += u" ±hh:mm" + if time_string: + if self.time_zone.hours == 0 and self.time_zone.minutes == 0: + time_string += "Z" + else: + time_string += u"±hh:mm" return date_string + time_string def _get_truncated_dump_format(self): year_string = "-" if self.truncated_property == "year_of_decade": - year_string = "-" + "y" + year_string = "-" + "z" elif self.truncated_property == "year_of_century": - if self.month_of_year is None: - year_string = "YY" - else: + if (self.day_of_month is None and + self.month_of_year is not None): year_string = "-YY" + else: + year_string = "YY" date_string = year_string if self.month_of_year is not None: date_string = year_string + "-MM" if self.day_of_month is not None: date_string += "-DD" elif self.day_of_month is not None: - date_string = year_string + "-DD" + if year_string == "-": + date_string = year_string + "--DD" + else: + date_string = year_string + "-DD" if self.day_of_year is not None: day_string = "DDD" if year_string == "-": @@ -1289,11 +1308,17 @@ def _get_truncated_dump_format(self): else: date_string = year_string + "-" + day_string if self.week_of_year is not None: - date_string = year_string + "-Www" + if year_string == "-": + date_string = year_string + "Www" + else: + date_string = year_string + "-Www" if self.day_of_week is not None: date_string += "-D" elif self.day_of_week is not None: - date_string = year_string + "-W-D" + if year_string == "-": + date_string = year_string + "W-D" + else: + date_string = year_string + "-W-D" time_string = "" if self.hour_of_day is not None: time_string = "Thh" @@ -1316,7 +1341,13 @@ def _get_truncated_dump_format(self): if seconds_int != self.second_of_minute: time_string += ",tt" if time_string: - time_string += u"±hh:mm" + if self.time_zone.hours == 0 and self.time_zone.minutes == 0: + time_string += "Z" + else: + time_string += u"±hh:mm" + if date_string == "YY": + date_string = "-YY" + time_string = time_string.replace(":", "") return date_string + time_string __repr__ = __str__ diff --git a/isodatetime/dumpers.py b/isodatetime/dumpers.py index f2e0def..dd0c8d4 100644 --- a/isodatetime/dumpers.py +++ b/isodatetime/dumpers.py @@ -18,6 +18,7 @@ """This provides data model dumping functionality.""" +import copy import re from . import parser_spec @@ -63,7 +64,7 @@ def dump(self, timepoint, formatting_string): "day_of_month" in properties or "day_of_year" in properties)): # We need the year to be in week years. - timepoint.to_week_date() + timecopy = copy.copy(timepoint).to_week_date() property_map = {} for property_ in properties: property_map[property_] = timepoint.get(property_) diff --git a/isodatetime/parsers.py b/isodatetime/parsers.py index a9fb9e6..2864ce3 100644 --- a/isodatetime/parsers.py +++ b/isodatetime/parsers.py @@ -130,6 +130,7 @@ def __init__(self, num_expanded_year_digits=2, self.expanded_year_digits = num_expanded_year_digits self.allow_truncated = allow_truncated self.allow_only_basic = allow_only_basic + self.assume_utc = assume_utc self.dump_format = dump_format self._generate_regexes() @@ -261,6 +262,9 @@ def parse(self, timepoint_string, dump_format=None): timezone = None if timezone is None: timezone_info = {} + if self.assume_utc: + timezone_info["time_zone_hour"] = 0 + timezone_info["time_zone_minute"] = 0 else: timezone_info = self.get_timezone_info( timezone, @@ -286,19 +290,28 @@ def parse(self, timepoint_string, dump_format=None): "year_of_century" in date_info): truncated_property = "year_of_century" date_info["truncated"] = True - year = int(date_info.get("year", 0)) - if "year_of_decade" in date_info: - year += int(date_info.pop("year_of_decade")) - truncated_property = "year_of_decade" - year += int(date_info.pop("year_of_century", 0)) - year += 100 * int(date_info.pop("century", 0)) - expanded_year = date_info.pop("expanded_year", 0) - if expanded_year: - date_info["expanded_year_digits"] = self.expanded_year_digits - year += 10000 * int(expanded_year) - if date_info.pop("year_sign", "+") == "-": - year *= -1 - date_info["year"] = year + is_year_present = True + if date_info.get("truncated"): + is_year_present = False + for property_ in ["year", "year_of_decade", "century", + "year_of_century", "expanded_year", + "year_sign"]: + if date_info.get(property_) is not None: + is_year_present = True + if is_year_present: + year = int(date_info.get("year", 0)) + if "year_of_decade" in date_info: + year += int(date_info.pop("year_of_decade")) + truncated_property = "year_of_decade" + year += int(date_info.pop("year_of_century", 0)) + year += 100 * int(date_info.pop("century", 0)) + expanded_year = date_info.pop("expanded_year", 0) + if expanded_year: + date_info["expanded_year_digits"] = self.expanded_year_digits + year += 10000 * int(expanded_year) + if date_info.pop("year_sign", "+") == "-": + year *= -1 + date_info["year"] = year for key, value in date_info.items(): try: date_info[key] = int(value) @@ -325,7 +338,8 @@ def parse(self, timepoint_string, dump_format=None): info["truncated_property"] = truncated_property if dump_format is None and self.dump_format: dump_format = self.dump_format - info.update({"dump_format": dump_format}) + if dump_format is not None: + info.update({"dump_format": dump_format}) return data.TimePoint(**info) def get_date_info(self, date_string, bad_types=None): diff --git a/isodatetime/tests.py b/isodatetime/tests.py index 922fd1a..39eb32f 100644 --- a/isodatetime/tests.py +++ b/isodatetime/tests.py @@ -464,11 +464,11 @@ def assertEqual(self, test, control, source=None): if source is None: info = None else: - info = ("Source %s produced\n%s, should be\n%s" % + info = ("Source %s produced:\n'%s'\nshould be:\n'%s'" % (source, test, control)) super(TestSuite, self).assertEqual(test, control, info) - def test_timeinterval_parser(self): + def _test_timeinterval_parser(self): """Test the time interval parsing.""" parser = parsers.TimeIntervalParser() for expression, ctrl_result in get_timeintervalparser_tests(): @@ -552,7 +552,7 @@ def test_timepoint_dumper(self): self.assertEqual(test_timepoint, ctrl_timepoint, expression) - def test_timepoint_parser(self): + def _test_timepoint_parser(self): """Test the parsing of date/time expressions.""" parser = parsers.TimePointParser(allow_truncated=True) for expression, timepoint_kwargs in get_timepointparser_tests( @@ -566,7 +566,7 @@ def test_timepoint_parser(self): ctrl_data = str(data.TimePoint(**timepoint_kwargs)) self.assertEqual(test_data, ctrl_data, expression) - def test_timerecurrence(self): + def _test_timerecurrence(self): """Test the recurring date/time series data model.""" parser = parsers.TimeRecurrenceParser() for expression, ctrl_results in get_timerecurrence_tests(): @@ -584,7 +584,7 @@ def test_timerecurrence(self): test_results.append(str(time_point)) self.assertEqual(test_results, ctrl_results, expression) - def test_timerecurrence_parser(self): + def _test_timerecurrence_parser(self): """Test the recurring date/time series parsing.""" parser = parsers.TimeRecurrenceParser() for expression, test_info in get_timerecurrenceparser_tests(): From f0886646a35f6529cba0895733db581999dae0f9 Mon Sep 17 00:00:00 2001 From: benfitzpatrick Date: Fri, 7 Feb 2014 00:38:03 +0000 Subject: [PATCH 07/14] fix dumper --- isodatetime/data.py | 51 +++++++++++++++++++------------ isodatetime/dumpers.py | 15 ++++----- isodatetime/parser_spec.py | 62 +++++++++++++++++++++----------------- isodatetime/parsers.py | 6 ++-- isodatetime/tests.py | 17 +++++------ 5 files changed, 83 insertions(+), 68 deletions(-) diff --git a/isodatetime/data.py b/isodatetime/data.py index d50ae4e..731d8bf 100644 --- a/isodatetime/data.py +++ b/isodatetime/data.py @@ -817,6 +817,10 @@ def set_time_zone(self, dest_time_zone): self.apply_time_zone_offset(dest_time_zone - self.get_time_zone()) self.time_zone = dest_time_zone + def set_time_zone_to_utc(self): + """Set the time zone to UTC, if it's not already.""" + self.set_time_zone(TimeZone(hours=0, minutes=0)) + def to_calendar_date(self): """Reformat the date in years, month-of-year, day-of-month.""" year, month, day = self.get_calendar_date() @@ -1320,26 +1324,33 @@ def _get_truncated_dump_format(self): else: date_string = year_string + "-W-D" time_string = "" - if self.hour_of_day is not None: + if (self.hour_of_day is None and + (self.minute_of_hour is not None or + self.second_of_minute is not None)): + time_string = "T-" + elif (self.hour_of_day is not None and + int(self.hour_of_day) != self.hour_of_day): + time_string = "Thh,ii" + elif self.hour_of_day is not None: time_string = "Thh" - if int(self.hour_of_day) != self.hour_of_day: - remainder = self.hour_of_day - int(self.hour_of_day) - time_string += ",ii" - else: - if self.minute_of_hour is None: - time_string += ":00:00" - else: - time_string += ":mm" - if int(self.minute_of_hour) != self.minute_of_hour: - time_string += ",nn" - else: - if self.second_of_minute is None: - time_string += ":00" - else: - seconds_int = int(self.second_of_minute) - time_string += ":ss" - if seconds_int != self.second_of_minute: - time_string += ",tt" + if self.minute_of_hour is None and self.second_of_minute is not None: + time_string += "-" + elif (self.minute_of_hour is not None and + int(self.minute_of_hour) != self.minute_of_hour): + if self.hour_of_day is not None: + time_string += ":" + time_string += "mm,nn" + elif self.minute_of_hour is not None: + if self.hour_of_day is not None: + time_string += ":" + time_string += "mm" + if self.second_of_minute is not None: + seconds_int = int(self.second_of_minute) + if self.minute_of_hour is not None: + time_string += ":" + time_string += "ss" + if seconds_int != self.second_of_minute: + time_string += ",tt" if time_string: if self.time_zone.hours == 0 and self.time_zone.minutes == 0: time_string += "Z" @@ -1348,6 +1359,8 @@ def _get_truncated_dump_format(self): if date_string == "YY": date_string = "-YY" time_string = time_string.replace(":", "") + if date_string == "-": + date_string = "" return date_string + time_string __repr__ = __str__ diff --git a/isodatetime/dumpers.py b/isodatetime/dumpers.py index dd0c8d4..37eb3b3 100644 --- a/isodatetime/dumpers.py +++ b/isodatetime/dumpers.py @@ -29,8 +29,6 @@ class TimePointDumper(object): """Dump TimePoint instances to strings.""" - RE_PROP = re.compile("%\(([^)]*)\)") - def __init__(self, num_expanded_year_digits=2): self._rec_formats = {"date": [], "time": [], "timezone": []} self._time_designator = parser_spec.TIME_DESIGNATOR @@ -40,13 +38,9 @@ def __init__(self, num_expanded_year_digits=2): "date"), (parser_spec.get_time_translate_info(), "time"), (parser_spec.get_timezone_translate_info(), "timezone")]: - for regex, regex_sub, format_sub in info: + for regex, regex_sub, format_sub, prop_name in info: rec = re.compile(regex) - prop = None - prop_results = self.RE_PROP.search(format_sub) - if prop_results: - prop = prop_results.groups()[0] - self._rec_formats[key].append((rec, format_sub, prop)) + self._rec_formats[key].append((rec, format_sub, prop_name)) def dump(self, timepoint, formatting_string): """Dump a timepoint according to formatting_string. @@ -64,7 +58,10 @@ def dump(self, timepoint, formatting_string): "day_of_month" in properties or "day_of_year" in properties)): # We need the year to be in week years. - timecopy = copy.copy(timepoint).to_week_date() + timepoint = copy.copy(timepoint).to_week_date() + if "Z" in expression and ( + timepoint.time_zone.hours or timepoint.time_zone.minutes): + timepoint = copy.copy(timepoint.set_time_zone_to_utc()) property_map = {} for property_ in properties: property_map[property_] = timepoint.get(property_) diff --git a/isodatetime/parser_spec.py b/isodatetime/parser_spec.py index cf3d6d6..27a5e4c 100644 --- a/isodatetime/parser_spec.py +++ b/isodatetime/parser_spec.py @@ -140,43 +140,48 @@ """} TIME_DESIGNATOR = "T" _DATE_TRANSLATE_INFO = [ - (u"±", "(?P[-+])", "%(year_sign)s"), - (u"CC", "(?P\d\d)", "%(century)02d"), - (u"YY", "(?P\d\d)", "%(year_of_century)02d"), - (u"MM", "(?P\d\d)", "%(month_of_year)02d"), - (u"DDD", "(?P\d\d\d)", "%(day_of_year)03d"), - (u"DD", "(?P\d\d)", "%(day_of_month)02d"), - (u"Www", "W(?P\d\d)", "W%(week_of_year)02d"), - (u"D", "(?P\d)", "%(day_of_week)01d"), - (u"z", "(?P\d)", "%(year_of_decade)01d"), - (u"^---", "(?P---)", "---"), - (u"^--", "(?P--)", "--"), - (u"^-", "(?P-)", "-") + (u"±", "(?P[-+])", "%(year_sign)s", "year_sign"), + (u"CC", "(?P\d\d)", "%(century)02d", "century"), + (u"YY", "(?P\d\d)", "%(year_of_century)02d", + "year_of_century"), + (u"MM", "(?P\d\d)", "%(month_of_year)02d", "month_of_year"), + (u"DDD", "(?P\d\d\d)", "%(day_of_year)03d", "day_of_year"), + (u"DD", "(?P\d\d)", "%(day_of_month)02d", "day_of_month"), + (u"Www", "W(?P\d\d)", "W%(week_of_year)02d", "week_of_year"), + (u"D", "(?P\d)", "%(day_of_week)01d", "day_of_week"), + (u"z", "(?P\d)", "%(year_of_decade)01d", "year_of_decade"), + (u"^---", "(?P---)", "---", None), + (u"^--", "(?P--)", "--", None), + (u"^-", "(?P-)", "-", None) ] _TIME_TRANSLATE_INFO = [ - (u"(?<=^hh)mm", "(?P\d\d)", "%(minute_of_hour)02d"), - (u"(?<=^hh:)mm", "(?P\d\d)", "%(minute_of_hour)02d"), - (u"(?<=^-)mm", "(?P\d\d)", "%(minute_of_hour)02d"), - (u"^hh", "(?P\d\d)", "%(hour_of_day)02d"), + (u"(?<=^hh)mm", "(?P\d\d)", "%(minute_of_hour)02d", + "minute_of_hour"), + (u"(?<=^hh:)mm", "(?P\d\d)", "%(minute_of_hour)02d", + "minute_of_hour"), + (u"(?<=^-)mm", "(?P\d\d)", "%(minute_of_hour)02d", + "minute_of_hour"), + (u"^hh", "(?P\d\d)", "%(hour_of_day)02d", "hour_of_day"), (u",ii", "[,.](?P\d+)", - "%(hour_of_day_decimal_string)s"), + "%(hour_of_day_decimal_string)s", "hour_of_day_decimal_string"), (u",nn", "[,.](?P\d+)", - "%(minute_of_hour_decimal_string)s"), - (u"ss", "(?P\d\d)", "%(second_of_minute)02d"), + "%(minute_of_hour_decimal_string)s", "minute_of_hour_decimal_string"), + (u"ss", "(?P\d\d)", "%(second_of_minute)02d", + "second_of_minute"), (u",tt", "[,.](?P\d+)", - "%(second_of_minute_decimal_string)s"), - (u"^--", "(?P--)", "--"), - (u"^-", "(?P-)", "--") + "%(second_of_minute_decimal_string)s", "second_of_minute_decimal_string"), + (u"^--", "(?P--)", "--", None), + (u"^-", "(?P-)", "-", None) ] _TIMEZONE_TRANSLATE_INFO = [ (u"(?<=±hh)mm", "(?P\d\d)", - "%(time_zone_minute_abs)02d"), + "%(time_zone_minute_abs)02d", "time_zone_minute_abs"), (u"(?<=±hh:)mm", "(?P\d\d)", - "%(time_zone_minute_abs)02d"), + "%(time_zone_minute_abs)02d", "time_zone_minute_abs"), (u"(?<=±)hh", "(?P\d\d)", - "%(time_zone_hour_abs)02d"), - (u"±", "(?P[-+])", "%(time_zone_sign)s"), - (u"Z", "(?PZ)", "Z") + "%(time_zone_hour_abs)02d", "time_zone_hour_abs"), + (u"±", "(?P[-+])", "%(time_zone_sign)s", "time_zone_sign"), + (u"Z", "(?PZ)", "Z", None) ] @@ -185,7 +190,8 @@ def get_date_translate_info(num_expanded_year_digits=2): return _DATE_TRANSLATE_INFO + [ (u"X", "(?P" + expanded_year_digit_regex + ")", - "%(expanded_year_digits)0" + str(num_expanded_year_digits) + "d") + "%(expanded_year_digits)0" + str(num_expanded_year_digits) + "d", + "expanded_year_digits") ] diff --git a/isodatetime/parsers.py b/isodatetime/parsers.py index 2864ce3..0077dff 100644 --- a/isodatetime/parsers.py +++ b/isodatetime/parsers.py @@ -183,7 +183,7 @@ def get_expressions(self, text): def parse_date_expression_to_regex(self, expression): """Construct regular expressions for the date.""" - for expr_regex, substitute, format in ( + for expr_regex, substitute, format_, name in ( parser_spec.get_date_translate_info( self.expanded_year_digits)): expression = re.sub(expr_regex, substitute, expression) @@ -192,7 +192,7 @@ def parse_date_expression_to_regex(self, expression): def parse_time_expression_to_regex(self, expression): """Construct regular expressions for the time.""" - for expr_regex, substitute, format in ( + for expr_regex, substitute, format_, name in ( parser_spec.get_time_translate_info()): expression = re.sub(expr_regex, substitute, expression) expression = "^" + expression + "$" @@ -200,7 +200,7 @@ def parse_time_expression_to_regex(self, expression): def parse_timezone_expression_to_regex(self, expression): """Construct regular expressions for the timezone.""" - for expr_regex, substitute, format in ( + for expr_regex, substitute, format_, name in ( parser_spec.get_timezone_translate_info( )): expression = re.sub(expr_regex, substitute, expression) diff --git a/isodatetime/tests.py b/isodatetime/tests.py index 39eb32f..3e4414f 100644 --- a/isodatetime/tests.py +++ b/isodatetime/tests.py @@ -317,14 +317,14 @@ def get_timepointparser_tests(allow_only_basic=False, "+01": {"time_zone_hour": 1}, "-05": {"time_zone_hour": -5}, "+2301": {"time_zone_hour": 23, "time_zone_minute": 1}, - "-1230": {"time_zone_hour": -12, "time_zone_minute": 30} + "-1230": {"time_zone_hour": -12, "time_zone_minute": -30} }, "extended": { "Z": {"time_zone_hour": 0, "time_zone_minute": 0}, "+01": {"time_zone_hour": 1}, "-05": {"time_zone_hour": -5}, "+23:01": {"time_zone_hour": 23, "time_zone_minute": 1}, - "-12:30": {"time_zone_hour": -12, "time_zone_minute": 30} + "-12:30": {"time_zone_hour": -12, "time_zone_minute": -30} } } format_ok_keys = ["basic", "extended"] @@ -468,7 +468,7 @@ def assertEqual(self, test, control, source=None): (source, test, control)) super(TestSuite, self).assertEqual(test, control, info) - def _test_timeinterval_parser(self): + def test_timeinterval_parser(self): """Test the time interval parsing.""" parser = parsers.TimeIntervalParser() for expression, ctrl_result in get_timeintervalparser_tests(): @@ -481,7 +481,7 @@ def _test_timeinterval_parser(self): ) self.assertEqual(test_result, ctrl_result, expression) - def _test_timepoint(self): + def test_timepoint(self): """Test the manipulation of dates and times (takes a while).""" import datetime import random @@ -552,7 +552,7 @@ def test_timepoint_dumper(self): self.assertEqual(test_timepoint, ctrl_timepoint, expression) - def _test_timepoint_parser(self): + def test_timepoint_parser(self): """Test the parsing of date/time expressions.""" parser = parsers.TimePointParser(allow_truncated=True) for expression, timepoint_kwargs in get_timepointparser_tests( @@ -566,7 +566,7 @@ def _test_timepoint_parser(self): ctrl_data = str(data.TimePoint(**timepoint_kwargs)) self.assertEqual(test_data, ctrl_data, expression) - def _test_timerecurrence(self): + def test_timerecurrence(self): """Test the recurring date/time series data model.""" parser = parsers.TimeRecurrenceParser() for expression, ctrl_results in get_timerecurrence_tests(): @@ -584,14 +584,13 @@ def _test_timerecurrence(self): test_results.append(str(time_point)) self.assertEqual(test_results, ctrl_results, expression) - def _test_timerecurrence_parser(self): + def test_timerecurrence_parser(self): """Test the recurring date/time series parsing.""" parser = parsers.TimeRecurrenceParser() for expression, test_info in get_timerecurrenceparser_tests(): try: test_data = str(parser.parse(expression)) - except parsers.TimeSyntaxError as e: - print e + except parsers.TimeSyntaxError: raise ValueError("Parsing failed for %s" % expression) ctrl_data = str(data.TimeRecurrence(**test_info)) self.assertEqual(test_data, ctrl_data, expression) From b14ac1596890e741a39fd1ecee943b24e6c77c48 Mon Sep 17 00:00:00 2001 From: benfitzpatrick Date: Wed, 19 Feb 2014 12:55:07 +0000 Subject: [PATCH 08/14] Centralise constants and error messages. This implements feedback from @matthewrmshin. --- isodatetime/data.py | 242 +++++++++++++++++++++++------------------ isodatetime/parsers.py | 28 +++-- 2 files changed, 151 insertions(+), 119 deletions(-) diff --git a/isodatetime/data.py b/isodatetime/data.py index 731d8bf..6681045 100644 --- a/isodatetime/data.py +++ b/isodatetime/data.py @@ -22,9 +22,29 @@ from . import dumpers from . import util -DAYS_OF_MONTHS = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] -DAYS_OF_MONTHS_LEAP = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] +# The following constants could be encapsulated in a calendar class. +SECONDS_IN_MINUTE = 60 +MINUTES_IN_HOUR = 60 +SECONDS_IN_HOUR = SECONDS_IN_MINUTE * MINUTES_IN_HOUR +HOURS_IN_DAY = 24 +SECONDS_IN_DAY = SECONDS_IN_HOUR * HOURS_IN_DAY +MINUTES_IN_DAY = MINUTES_IN_HOUR * HOURS_IN_DAY +DAYS_IN_WEEK = 7 +DAYS_IN_MONTHS = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] +DAYS_IN_MONTHS_LEAP = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] +MONTHS_IN_YEAR = len(DAYS_IN_MONTHS) +# No support for MONTHS_IN_YEAR_LEAP (some calendars...) +ROUGH_DAYS_IN_MONTH = 30 # Used for duration conversion, nowhere else. +DAYS_IN_YEAR = sum(DAYS_IN_MONTHS) +ROUGH_DAYS_IN_YEAR = DAYS_IN_YEAR # = as ROUGH_DAYS_IN_MONTH +DAYS_IN_YEAR_LEAP = sum(DAYS_IN_MONTHS_LEAP) +HOURS_IN_YEAR = DAYS_IN_YEAR * HOURS_IN_DAY +MINUTES_IN_YEAR = DAYS_IN_YEAR * MINUTES_IN_DAY +SECONDS_IN_YEAR = DAYS_IN_YEAR * SECONDS_IN_DAY +HOURS_IN_YEAR_LEAP = DAYS_IN_YEAR_LEAP * HOURS_IN_DAY +MINUTES_IN_YEAR_LEAP = DAYS_IN_YEAR_LEAP * MINUTES_IN_DAY +SECONDS_IN_YEAR_LEAP = DAYS_IN_YEAR_LEAP * SECONDS_IN_DAY WEEK_DAY_START_REFERENCE = {"calendar": (2000, 1, 3), "ordinal": (2000, 3)} @@ -34,12 +54,24 @@ 2: dumpers.TimePointDumper(num_expanded_year_digits=2) } +BAD_INPUT_CONFLICT = "Conflicting input: {0} but have {1}" +BAD_INPUT_INT_CAST = "Invalid input for {0}: {1}: {2}" +BAD_INPUT_INT_REMAINDER = "Non-integer like number for {0}: {1}" +BAD_INPUT_MISSING = "Missing input: {0} needs {1}" +BAD_INPUT_OUT_OF_BOUNDS = "Invalid input (out of bounds): {0}: {1}" +BAD_INPUT_RECURRENCE = "Unsupported or invalid recurrence information." +BAD_INPUT_TYPE = "Invalid type for {0}: {1}{2}" +BAD_INPUT_VALUES = "Invalid input for {0}: {1}: allowed: {2}" + class BadInputError(ValueError): """An error raised when constructor inputs are invalid.""" - pass + def __str__(self): + format_string = self.args[0] + format_args = self.args[1:] + return format_string.format(*format_args) class TimeRecurrence(object): @@ -79,10 +111,10 @@ def __init__(self, repetitions=None, start_point=None, diff_seconds = end_seconds - start_seconds while diff_seconds < 0: diff_days -= 1 - diff_seconds += 86400 - while diff_seconds >= 86400: + diff_seconds += SECONDS_IN_DAY + while diff_seconds >= SECONDS_IN_DAY: diff_days += 1 - diff_seconds -= 86400 + diff_seconds -= SECONDS_IN_DAY if self.repetitions == 1: self.interval = TimeInterval(years=0) else: @@ -91,7 +123,8 @@ def __init__(self, repetitions=None, start_point=None, diff_seconds_float = diff_seconds / float( self.repetitions - 1) diff_days = int(diff_days_float) - diff_seconds_float += (diff_days_float - diff_days) * 86400 + diff_seconds_float += ( + diff_days_float - diff_days) * SECONDS_IN_DAY self.interval = TimeInterval(days=diff_days, seconds=diff_seconds_float) elif self.end_point is None: @@ -111,8 +144,7 @@ def __init__(self, repetitions=None, start_point=None, point -= self.interval self.start_point = point else: - raise BadInputError( - "Unsupported or invalid recurrence information.") + raise BadInputError(BAD_INPUT_RECURRENCE) def __iter__(self): if self.start_point is None: @@ -209,16 +241,16 @@ def __init__(self, years=0, months=0, weeks=0, days=0, self.days = days if weeks is not None: if days is None: - self.days = 7 * weeks + self.days = DAYS_IN_WEEK * weeks else: - self.days += 7 * weeks + self.days += DAYS_IN_WEEK * weeks self.hours = hours self.minutes = minutes self.seconds = seconds if (not self.years and not self.months and not self.hours and not self.minutes and not self.seconds and weeks and not days): - self.weeks = self.days / 7 + self.weeks = self.days / DAYS_IN_WEEK self.years, self.months, self.days = (None, None, None) self.hours, self.minutes, self.seconds = (None, None, None) @@ -236,22 +268,27 @@ def get_days_and_seconds(self): equal to 365 days, months to 30, in order to work (no context can be supplied). This code needs improving. - Seconds are returned in the range 0 <= seconds < 86400, which - means that a TimeInterval which has self.seconds = 86500 will - return 1 day, 100 seconds or (1, 100) from this method. + Seconds are returned in the range + 0 <= seconds < SECONDS_IN_DAY, which means that a TimeInterval + which has self.seconds = SECONDS_IN_DAY + 100 will return 1 + day, 100 seconds or (1, 100) from this method. """ # TODO: Implement error calculation for the below quantities. new = self.copy() new.to_days() - new_days = new.years * 365 + new.months * 30 + new.days - new_seconds = new.hours * 3600 + new.minutes * 60 + new.seconds - while new_seconds >= 86400: + new_days = (new.years * ROUGH_DAYS_IN_YEAR + + new.months * ROUGH_DAYS_IN_MONTH + + new.days) + new_seconds = (new.hours * SECONDS_IN_HOUR + + new.minutes * SECONDS_IN_MINUTE + + new.seconds) + while new_seconds >= SECONDS_IN_DAY: new_days += 1 - new_seconds -= 86400 + new_seconds -= SECONDS_IN_DAY while new_seconds < 0: new_days -= 1 - new_seconds += 86400 + new_seconds += SECONDS_IN_DAY return new_days, new_seconds def get_is_in_weeks(self): @@ -265,13 +302,13 @@ def to_days(self): self.minutes, self.seconds]: if attribute is None: attribute = 0 - self.days = self.weeks * 7 + self.days = self.weeks * DAYS_IN_WEEK self.weeks = None def to_weeks(self): """Convert to week representation (warning: use with caution).""" if not self.get_is_in_weeks(): - self.weeks = self.days / 7 + self.weeks = self.days / DAYS_IN_WEEK self.years, self.months, self.days = (None, None, None) self.hours, self.minutes, self.seconds = (None, None, None) @@ -301,29 +338,7 @@ def __add__(self, other): ) def __sub__(self, other): - new = self.copy() - if isinstance(other, TimeInterval): - if new.get_is_in_weeks(): - if other.get_is_in_weeks(): - new.weeks -= other.weeks - return new - new.to_days() - elif other.get_is_in_weeks(): - other = other.copy().to_days() - new.years -= other.years - new.months -= other.months - new.days -= other.days - new.hours -= other.hours - new.minutes -= other.minutes - new.seconds -= other.seconds - return new - if isinstance(other, TimePoint): - return other - new - raise TypeError( - "Invalid type for subtraction: " + - "'%s' should be TimeInterval or TimePoint." % - type(other).__name__ - ) + return self + -1 * other def __mul__(self, other): # TODO: support float multiplication? @@ -517,7 +532,6 @@ class TimePoint(object): truncated_property - a string that can either be "year_of_decade" or "year_of_century". This is used for truncated representations to distinguish between the two ways of truncating the year. - """ DATA_ATTRIBUTES = [ @@ -557,14 +571,15 @@ def __init__(self, expanded_year_digits=0, year=None, month_of_year=None, if (dump_format is not None and not isinstance(dump_format, basestring)): raise BadInputError( - "Invalid input for dumper: {0}".format( - dumper)) + BAD_INPUT_TYPE, + "dump_format", repr(dump_format), type(dump_format)) if (truncated_property is not None and truncated_property not in ["year_of_decade", "year_of_century"]): raise BadInputError( - "Invalid input for truncated_property: {0}".format( - truncated_property)) + BAD_INPUT_VALUES, "truncated_property", + repr(truncated_property), + "'year_of_decade' or 'year_of_century'") self.dump_format = dump_format self.expanded_year_digits = _int_caster(expanded_year_digits, "expanded_year_digits") @@ -585,32 +600,39 @@ def __init__(self, expanded_year_digits=0, year=None, month_of_year=None, allow_none=True) if hour_of_day_decimal is not None: if self.hour_of_day is None: - raise TimePointInputError( - "Invalid input: hour decimal points - but not hours") + raise BadInputError( + BAD_INPUT_MISSING, "hour_of_day_decimal", + "hour_of_day") self.hour_of_day += float(hour_of_day_decimal) if minute_of_hour is not None: - raise TimePointInputError( - "Invalid input: minutes - already have hour decimals") + raise BadInputError( + BAD_INPUT_CONFLICT, "minute_of_hour", + "hour_of_day_decimal") if second_of_minute is not None: - raise TimePointInputError( - "Invalid input: seconds - already have hour decimals") + raise BadInputError( + BAD_INPUT_CONFLICT, "second_of_minute", + "hour_of_day_decimal") if minute_of_hour_decimal is not None: if minute_of_hour is None: - raise TimePointInputError( - "Invalid input: minute decimal points - but not minutes") + raise BadInputError( + BAD_INPUT_MISSING, "minute_of_hour_decimal", + "minute_of_hour") self.minute_of_hour = _int_caster( minute_of_hour, "minute_of_hour") self.minute_of_hour += float(minute_of_hour_decimal) if second_of_minute is not None: - raise TimePointInputError( - "Invalid input: seconds - already have minute decimals") + raise BadInputError( + BAD_INPUT_CONFLICT, "second_of_minute", + "minute_of_hour_decimal") else: self.minute_of_hour = _int_caster( minute_of_hour, "minute_of_hour", allow_none=True) if second_of_minute_decimal is not None: if second_of_minute is None: - raise TimePointInputError( - "Invalid input: second decimal points - but not seconds") + raise BadInputError( + BAD_INPUT_MISSING, + "second_of_minute_decimal", + "second_of_minute") self.second_of_minute = _int_caster(second_of_minute, "second_of_minute") self.second_of_minute += float(second_of_minute_decimal) @@ -679,10 +701,10 @@ def get_hour_minute_second(self): if minute_of_hour is None: hour_decimals = hour_of_day - int(hour_of_day) hour_of_day = float(int(hour_of_day)) - minute_of_hour = 60 * hour_decimals + minute_of_hour = MINUTES_IN_HOUR * hour_decimals minute_decimals = minute_of_hour - int(minute_of_hour) minute_of_hour = float(int(minute_of_hour)) - second_of_minute = 60 * minute_decimals + second_of_minute = SECONDS_IN_MINUTE * minute_decimals return hour_of_day, minute_of_hour, second_of_minute def get_ordinal_date(self): @@ -761,8 +783,8 @@ def get_second_of_day(self): if self.second_of_minute is not None: second_of_day += self.second_of_minute if self.minute_of_hour is not None: - second_of_day += self.minute_of_hour * 60 - second_of_day += self.hour_of_day * 3600 + second_of_day += self.minute_of_hour * SECONDS_IN_MINUTE + second_of_day += self.hour_of_day * SECONDS_IN_HOUR return second_of_day def get_time_zone(self): @@ -791,7 +813,7 @@ def apply_time_zone_offset(self, offset): """Apply a time zone shift represented by a TimeInterval.""" if offset.minutes: if self.minute_of_hour is None: - self.hour_of_day += offset.minutes / 60.0 + self.hour_of_day += offset.minutes / float(MINUTES_IN_HOUR) else: self.minute_of_hour += offset.minutes self._tick_over() @@ -976,15 +998,17 @@ def __add__(self, other, no_copy=False): if duration.seconds: if new.second_of_minute is None: if new.minute_of_hour is None: - new.hour_of_day += duration.seconds / 3600.0 + new.hour_of_day += ( + duration.seconds / float(SECONDS_IN_HOUR)) else: - new.minute_of_hour += duration.seconds / 60.0 + new.minute_of_hour += ( + duration.seconds / float(SECONDS_IN_MINUTE)) else: new.second_of_minute += duration.seconds new._tick_over() if duration.minutes: if new.minute_of_hour is None: - new.hour_of_day += duration.minutes / 3600.0 + new.hour_of_day += duration.minutes / float(MINUTES_IN_HOUR) else: new.minute_of_hour += duration.minutes new._tick_over() @@ -1005,11 +1029,11 @@ def __add__(self, other, no_copy=False): if duration.years: new.year += duration.years if new.get_is_calendar_date(): - month_index = (new.month_of_year - 1) % 12 + month_index = (new.month_of_year - 1) % MONTHS_IN_YEAR if get_is_leap_year(new.year): - max_day_in_new_month = DAYS_OF_MONTHS_LEAP[month_index] + max_day_in_new_month = DAYS_IN_MONTHS_LEAP[month_index] else: - max_day_in_new_month = DAYS_OF_MONTHS[month_index] + max_day_in_new_month = DAYS_IN_MONTHS[month_index] if new.day_of_month > max_day_in_new_month: # For example, when Feb 29 - 1 year = Feb 28. new.day_of_month = max_day_in_new_month @@ -1031,6 +1055,13 @@ def copy(self): dummy_timepoint.time_zone = self.time_zone.copy() return dummy_timepoint + def get_props(self): + """Return the data properties of this TimePoint.""" + hash_ = [] + for attr in self.DATA_ATTRIBUTES: + hash_.append(attr, getattr(self, attr, None)) + return hash_ + def __cmp__(self, other): if not isinstance(other, TimePoint): raise TypeError( @@ -1041,6 +1072,8 @@ def __cmp__(self, other): raise TypeError( "Cannot compare truncated to non-truncated " + "TimePoint: %s, %s" % (self, other)) + if self.get_props() == other.get_props(): + return 0 if self.truncated: for attribute in self.DATA_ATTRIBUTES: other_attr = getattr(other, attribute) @@ -1106,19 +1139,19 @@ def _add_months(self, num_months): for i in range(abs(num_months)): if num_months > 0: self.month_of_year += 1 - if self.month_of_year > 12: - self.month_of_year -= 12 + if self.month_of_year > MONTHS_IN_YEAR: + self.month_of_year -= MONTHS_IN_YEAR self.year += 1 if num_months < 0: self.month_of_year -= 1 if self.month_of_year < 1: - self.month_of_year += 12 + self.month_of_year += MONTHS_IN_YEAR self.year -= 1 - month_index = (self.month_of_year - 1) % 12 + month_index = (self.month_of_year - 1) % MONTHS_IN_YEAR if get_is_leap_year(self.year): - max_day_in_new_month = DAYS_OF_MONTHS_LEAP[month_index] + max_day_in_new_month = DAYS_IN_MONTHS_LEAP[month_index] else: - max_day_in_new_month = DAYS_OF_MONTHS[month_index] + max_day_in_new_month = DAYS_IN_MONTHS[month_index] if self.day_of_month > max_day_in_new_month: # For example, when 31 March + 1 month = 30 April. self.day_of_month = max_day_in_new_month @@ -1139,7 +1172,7 @@ def _tick_over(self): self.hour_of_day += num_hours self.minute_of_hour = minutes if self.hour_of_day is not None: - num_days, hours = divmod(self.hour_of_day, 24) + num_days, hours = divmod(self.hour_of_day, HOURS_IN_DAY) if self.day_of_week is not None: self.day_of_week += num_days elif self.day_of_month is not None: @@ -1148,7 +1181,7 @@ def _tick_over(self): self.day_of_year += num_days self.hour_of_day = hours if self.day_of_week is not None: - num_weeks, days = divmod(self.day_of_week - 1, 7) + num_weeks, days = divmod(self.day_of_week - 1, DAYS_IN_WEEK) self.week_of_year += num_weeks self.day_of_week = days + 1 if self.day_of_month is not None: @@ -1173,10 +1206,10 @@ def _tick_over(self): self.year += 1 if self.month_of_year is not None: while self.month_of_year < 1: - self.month_of_year += 12 + self.month_of_year += MONTHS_IN_YEAR self.year -= 1 - while self.month_of_year > 12: - self.month_of_year -= 12 + while self.month_of_year > MONTHS_IN_YEAR: + self.month_of_year -= MONTHS_IN_YEAR self.year += 1 def _tick_over_day_of_month(self): @@ -1204,11 +1237,11 @@ def _tick_over_day_of_month(self): self.month_of_year = month self.day_of_month = day else: - month_index = (self.month_of_year - 1) % 12 + month_index = (self.month_of_year - 1) % MONTHS_IN_YEAR if get_is_leap_year(self.year): - max_day_in_month = DAYS_OF_MONTHS_LEAP[month_index] + max_day_in_month = DAYS_IN_MONTHS_LEAP[month_index] else: - max_day_in_month = DAYS_OF_MONTHS[month_index] + max_day_in_month = DAYS_IN_MONTHS[month_index] if self.day_of_month > max_day_in_month: num_days = 0 for month, day in iter_months_days( @@ -1410,10 +1443,10 @@ def get_is_leap_year(year): @util.cache_results def get_days_in_year(year): - """Return 366 if year is a leap year, otherwise 365.""" + """Return the number of days in this particular year.""" if get_is_leap_year(year): - return 366 - return 365 + return DAYS_IN_YEAR_LEAP + return DAYS_IN_YEAR @util.cache_results @@ -1426,7 +1459,7 @@ def get_weeks_in_year(year): while cal_year_next != cal_year: diff_days += get_days_in_year(cal_year) cal_year += 1 - return diff_days / 7 + return diff_days / DAYS_IN_WEEK def get_calendar_date_from_ordinal_date(year, day_of_year): @@ -1459,7 +1492,7 @@ def get_calendar_date_from_week_date(year, week_of_year, day_of_week): day_of_week is an integer that denotes the day of the week (1-7). """ - num_days_week_year = (week_of_year - 1) * 7 + day_of_week - 1 + num_days_week_year = (week_of_year - 1) * DAYS_IN_WEEK + day_of_week - 1 start_year, start_month, start_day = ( get_calendar_date_week_date_start(year)) if num_days_week_year == 0: @@ -1573,8 +1606,8 @@ def get_week_date_from_calendar_date(year, month_of_year, day_of_month): if (start_year == year and iter_month == month_of_year and iter_day == day_of_month): - week_of_year = (total_iter_days / 7) + 1 - day_of_week = (total_iter_days % 7) + 1 + week_of_year = (total_iter_days / DAYS_IN_WEEK) + 1 + day_of_week = (total_iter_days % DAYS_IN_WEEK) + 1 return week_date_start_year, week_of_year, day_of_week for iter_start_year in [start_year + 1, start_year + 2]: @@ -1584,8 +1617,8 @@ def get_week_date_from_calendar_date(year, month_of_year, day_of_month): if (iter_start_year == year and iter_month == month_of_year and iter_day == day_of_month): - week_of_year = (total_iter_days / 7) + 1 - day_of_week = (total_iter_days % 7) + 1 + week_of_year = (total_iter_days / DAYS_IN_WEEK) + 1 + day_of_week = (total_iter_days % DAYS_IN_WEEK) + 1 return week_date_start_year, week_of_year, day_of_week raise ValueError("Bad calendar date: %s-%02d-%02d" % (year, month_of_year, @@ -1623,11 +1656,12 @@ def get_calendar_date_week_date_start(year): days_diff = ref_ordinal_day - 2 for intervening_year in years: days_diff += get_days_in_year(intervening_year) - weekdays_diff = (days_diff) % 7 + weekdays_diff = (days_diff) % DAYS_IN_WEEK if year > ref_year: day_of_week_start_year = weekdays_diff + 1 else: - day_of_week_start_year = 7 - weekdays_diff # Jan 1 as day of week. + # Jan 1 as day of week. + day_of_week_start_year = DAYS_IN_WEEK - weekdays_diff if day_of_week_start_year == 1: return year, 1, 1 if day_of_week_start_year > 4: @@ -1678,9 +1712,9 @@ def iter_months_days(year, month_of_year=None, day_of_month=None, True (default False). """ - source = DAYS_OF_MONTHS + source = DAYS_IN_MONTHS if get_is_leap_year(year): - source = DAYS_OF_MONTHS_LEAP + source = DAYS_IN_MONTHS_LEAP if day_of_month is not None and month_of_year is None: raise ValueError("Need to specify start month as well as day.") if in_reverse: @@ -1727,10 +1761,10 @@ def _int_caster(number, name="number", allow_none=False): float_number = float(number) except (TypeError, ValueError) as num_exc: raise BadInputError( - "Invalid input for {0}: {1}: {2}".format(name, number, num_exc)) + BAD_INPUT_INT_CAST, name, number, num_exc) if float(int_number) != float_number: raise BadInputError( - "Non-integer like number for {0}: {1}".format(name, number)) + BAD_INPUT_INT_REMAINDER, name, number) return int_number @@ -1755,5 +1789,5 @@ def _type_checker(*objects): values_string = " should be: " values_string += " or ".join( [str(v) for v in allowed_types]) - raise BadInputError("Invalid type for '{0}': {1}{2}".format( - name, repr(value), values_string)) + raise BadInputError( + BAD_INPUT_TYPE, name, repr(value), values_string) diff --git a/isodatetime/parsers.py b/isodatetime/parsers.py index 0077dff..89cbeaa 100644 --- a/isodatetime/parsers.py +++ b/isodatetime/parsers.py @@ -25,10 +25,16 @@ from . import parser_spec -class TimeSyntaxError(ValueError): + +class ISOSyntaxError(ValueError): """An error denoting invalid input syntax.""" + BAD_TIME_INPUT = "Invalid ISO 8601 {0} representation: {1}" + + def __str__(self): + return self.BAD_TIME_INPUT.format(*self.args) + class TimeRecurrenceParser(object): @@ -87,9 +93,7 @@ def parse(self, expression): end_point=end_point, interval=interval ) - raise TimeSyntaxError( - "Not a supported ISO 8601 recurrence pattern: %s" % - expression) + raise ISOSyntaxError("recurrence", expression) __call__ = parse @@ -254,7 +258,7 @@ def parse(self, timepoint_string, dump_format=None): timezone, bad_formats=bad_formats ) - except TimeSyntaxError: + except ISOSyntaxError: time = time_timezone timezone = None else: @@ -357,8 +361,7 @@ def get_date_info(self, date_string, bad_types=None): result = regex.match(date_string) if result: return (format_key, type_key), result.groupdict() - raise TimeSyntaxError( - "Not a valid ISO 8601 date representation: %s" % date_string) + raise ISOSyntaxError("date", date_string) def get_time_info(self, time_string, bad_formats=None, bad_types=None): """Return the properties from a time string.""" @@ -376,8 +379,7 @@ def get_time_info(self, time_string, bad_formats=None, bad_types=None): result = regex.match(time_string) if result: return result.groupdict() - raise TimeSyntaxError( - "Not a valid ISO 8601 time representation: %s" % time_string) + raise ISOSyntaxError("time", time_string) def get_timezone_info(self, timezone_string, bad_formats=None): """Return the properties from a timezone string.""" @@ -390,10 +392,7 @@ def get_timezone_info(self, timezone_string, bad_formats=None): result = regex.match(timezone_string) if result: return result.groupdict() - raise TimeSyntaxError( - "Not a valid ISO 8601 timezone representation: %s" % - timezone_string - ) + raise ISOSyntaxError("timezone", timezone_string) class TimeIntervalParser(object): @@ -432,8 +431,7 @@ def parse(self, expression): value = float(value) result_map[key] = value return data.TimeInterval(**result_map) - raise TimeSyntaxError("Not an ISO 8601 duration representation: %s" % - expression) + raise ISOSyntaxError("duration", expression) def parse_timepoint_expression(timepoint_expression, **kwargs): From e2d97e1d3a600ff89e80038602a38ed489699f83 Mon Sep 17 00:00:00 2001 From: benfitzpatrick Date: Thu, 20 Feb 2014 17:07:11 +0000 Subject: [PATCH 09/14] isodatetime: fixes --- isodatetime/data.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/isodatetime/data.py b/isodatetime/data.py index 6681045..5a407e4 100644 --- a/isodatetime/data.py +++ b/isodatetime/data.py @@ -722,6 +722,8 @@ def get_ordinal_date(self): def get(self, property_name): """Return a calculated value for property name.""" + if property_name == "expanded_year_digits": + return abs(self.year) / 10000 if property_name == "year_sign": return "+" if self.year >= 0 else "-" if property_name == "century": @@ -1059,7 +1061,7 @@ def get_props(self): """Return the data properties of this TimePoint.""" hash_ = [] for attr in self.DATA_ATTRIBUTES: - hash_.append(attr, getattr(self, attr, None)) + hash_.append((attr, getattr(self, attr, None))) return hash_ def __cmp__(self, other): From 7a61bd786339b587670a43222f736e0822d12aaf Mon Sep 17 00:00:00 2001 From: benfitzpatrick Date: Thu, 20 Feb 2014 17:31:58 +0000 Subject: [PATCH 10/14] isodatetime: fix time zone dumping --- isodatetime/dumpers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/isodatetime/dumpers.py b/isodatetime/dumpers.py index 37eb3b3..c7cae16 100644 --- a/isodatetime/dumpers.py +++ b/isodatetime/dumpers.py @@ -61,7 +61,8 @@ def dump(self, timepoint, formatting_string): timepoint = copy.copy(timepoint).to_week_date() if "Z" in expression and ( timepoint.time_zone.hours or timepoint.time_zone.minutes): - timepoint = copy.copy(timepoint.set_time_zone_to_utc()) + timepoint = copy.copy(timepoint) + timepoint.set_time_zone_to_utc() property_map = {} for property_ in properties: property_map[property_] = timepoint.get(property_) From f58f114f37b0b45fddfb278660cf057f7405d0a9 Mon Sep 17 00:00:00 2001 From: benfitzpatrick Date: Mon, 24 Feb 2014 10:09:51 +0000 Subject: [PATCH 11/14] isodatetime: implement feedback --- isodatetime/data.py | 71 +++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/isodatetime/data.py b/isodatetime/data.py index 5a407e4..1b7c5b6 100644 --- a/isodatetime/data.py +++ b/isodatetime/data.py @@ -54,20 +54,20 @@ 2: dumpers.TimePointDumper(num_expanded_year_digits=2) } -BAD_INPUT_CONFLICT = "Conflicting input: {0} but have {1}" -BAD_INPUT_INT_CAST = "Invalid input for {0}: {1}: {2}" -BAD_INPUT_INT_REMAINDER = "Non-integer like number for {0}: {1}" -BAD_INPUT_MISSING = "Missing input: {0} needs {1}" -BAD_INPUT_OUT_OF_BOUNDS = "Invalid input (out of bounds): {0}: {1}" -BAD_INPUT_RECURRENCE = "Unsupported or invalid recurrence information." -BAD_INPUT_TYPE = "Invalid type for {0}: {1}{2}" -BAD_INPUT_VALUES = "Invalid input for {0}: {1}: allowed: {2}" - class BadInputError(ValueError): """An error raised when constructor inputs are invalid.""" + CONFLICT = "Conflicting input: {0} but have {1}" + INT_CAST = "Invalid input for {0}: {1}: {2}" + INT_REMAINDER = "Non-integer like number for {0}: {1}" + MISSING = "Missing input: {0} needs {1}" + OUT_OF_BOUNDS = "Invalid input (out of bounds): {0}: {1}" + RECURRENCE = "Invalid recurrence info: {0}" + TYPE = "Invalid type for {0}: {1}{2}" + VALUES = "Invalid input for {0}: {1}: allowed: {2}" + def __str__(self): format_string = self.args[0] format_args = self.args[1:] @@ -81,7 +81,7 @@ class TimeRecurrence(object): def __init__(self, repetitions=None, start_point=None, interval=None, end_point=None, min_point=None, max_point=None): - _type_checker( + inputs = ( (repetitions, "repetitions", None, int), (start_point, "start_point", None, TimePoint), (interval, "interval", None, TimeInterval), @@ -89,6 +89,7 @@ def __init__(self, repetitions=None, start_point=None, (min_point, "min_point", None, TimePoint), (max_point, "max_point", None, TimePoint) ) + _type_checker(*inputs) self.repetitions = repetitions self.start_point = start_point self.interval = interval @@ -105,14 +106,13 @@ def __init__(self, repetitions=None, start_point=None, end_year, end_days = self.end_point.get_ordinal_date() end_seconds = self.end_point.get_second_of_day() diff_days = end_days - start_days - while end_year != start_year: - diff_days += get_days_in_year(start_year) - start_year += 1 + for year in range(start_year, end_year): + diff_days += get_days_in_year(year) diff_seconds = end_seconds - start_seconds - while diff_seconds < 0: + if diff_seconds < 0: diff_days -= 1 diff_seconds += SECONDS_IN_DAY - while diff_seconds >= SECONDS_IN_DAY: + if diff_seconds >= SECONDS_IN_DAY: diff_days += 1 diff_seconds -= SECONDS_IN_DAY if self.repetitions == 1: @@ -144,7 +144,10 @@ def __init__(self, repetitions=None, start_point=None, point -= self.interval self.start_point = point else: - raise BadInputError(BAD_INPUT_RECURRENCE) + raise BadInputError( + BadInputError.RECURRENCE, + [i[:2] for i in inputs] + ) def __iter__(self): if self.start_point is None: @@ -283,10 +286,10 @@ def get_days_and_seconds(self): new_seconds = (new.hours * SECONDS_IN_HOUR + new.minutes * SECONDS_IN_MINUTE + new.seconds) - while new_seconds >= SECONDS_IN_DAY: + if new_seconds >= SECONDS_IN_DAY: new_days += 1 new_seconds -= SECONDS_IN_DAY - while new_seconds < 0: + if new_seconds < 0: new_days -= 1 new_seconds += SECONDS_IN_DAY return new_days, new_seconds @@ -571,13 +574,13 @@ def __init__(self, expanded_year_digits=0, year=None, month_of_year=None, if (dump_format is not None and not isinstance(dump_format, basestring)): raise BadInputError( - BAD_INPUT_TYPE, + BadInputError.TYPE, "dump_format", repr(dump_format), type(dump_format)) if (truncated_property is not None and truncated_property not in ["year_of_decade", "year_of_century"]): raise BadInputError( - BAD_INPUT_VALUES, "truncated_property", + BadInputError.VALUES, "truncated_property", repr(truncated_property), "'year_of_decade' or 'year_of_century'") self.dump_format = dump_format @@ -601,28 +604,28 @@ def __init__(self, expanded_year_digits=0, year=None, month_of_year=None, if hour_of_day_decimal is not None: if self.hour_of_day is None: raise BadInputError( - BAD_INPUT_MISSING, "hour_of_day_decimal", + BadInputError.MISSING, "hour_of_day_decimal", "hour_of_day") self.hour_of_day += float(hour_of_day_decimal) if minute_of_hour is not None: raise BadInputError( - BAD_INPUT_CONFLICT, "minute_of_hour", + BadInputError.CONFLICT, "minute_of_hour", "hour_of_day_decimal") if second_of_minute is not None: raise BadInputError( - BAD_INPUT_CONFLICT, "second_of_minute", + BadInputError.CONFLICT, "second_of_minute", "hour_of_day_decimal") if minute_of_hour_decimal is not None: if minute_of_hour is None: raise BadInputError( - BAD_INPUT_MISSING, "minute_of_hour_decimal", + BadInputError.MISSING, "minute_of_hour_decimal", "minute_of_hour") self.minute_of_hour = _int_caster( minute_of_hour, "minute_of_hour") self.minute_of_hour += float(minute_of_hour_decimal) if second_of_minute is not None: raise BadInputError( - BAD_INPUT_CONFLICT, "second_of_minute", + BadInputError.CONFLICT, "second_of_minute", "minute_of_hour_decimal") else: self.minute_of_hour = _int_caster( @@ -630,7 +633,7 @@ def __init__(self, expanded_year_digits=0, year=None, month_of_year=None, if second_of_minute_decimal is not None: if second_of_minute is None: raise BadInputError( - BAD_INPUT_MISSING, + BadInputError.MISSING, "second_of_minute_decimal", "second_of_minute") self.second_of_minute = _int_caster(second_of_minute, @@ -1458,9 +1461,8 @@ def get_weeks_in_year(year): cal_year_next, cal_ord_days_next = get_ordinal_date_week_date_start( year + 1) diff_days = cal_ord_days_next - cal_ord_days - while cal_year_next != cal_year: - diff_days += get_days_in_year(cal_year) - cal_year += 1 + for intervening_year in range(cal_year, cal_year_next): + diff_days += get_days_in_year(intervening_year) return diff_days / DAYS_IN_WEEK @@ -1686,9 +1688,8 @@ def get_days_since_1_ad(year): return 0 start_year = 0 days = 0 - while start_year < year: - start_year += 1 - days += get_days_in_year(start_year) + for intervening_year in range(start_year + 1, year + 1): + days += get_days_in_year(intervening_year) return days @@ -1763,10 +1764,10 @@ def _int_caster(number, name="number", allow_none=False): float_number = float(number) except (TypeError, ValueError) as num_exc: raise BadInputError( - BAD_INPUT_INT_CAST, name, number, num_exc) + BadInputError.INT_CAST, name, number, num_exc) if float(int_number) != float_number: raise BadInputError( - BAD_INPUT_INT_REMAINDER, name, number) + BadInputError.INT_REMAINDER, name, number) return int_number @@ -1792,4 +1793,4 @@ def _type_checker(*objects): values_string += " or ".join( [str(v) for v in allowed_types]) raise BadInputError( - BAD_INPUT_TYPE, name, repr(value), values_string) + BadInputError.TYPE, name, repr(value), values_string) From 7ca7a442e05577e207bca5e10c3b13be74a97255 Mon Sep 17 00:00:00 2001 From: benfitzpatrick Date: Mon, 24 Feb 2014 12:09:20 +0000 Subject: [PATCH 12/14] update memoize-decorator caching --- isodatetime/util.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/isodatetime/util.py b/isodatetime/util.py index b3c7917..01fc8cc 100644 --- a/isodatetime/util.py +++ b/isodatetime/util.py @@ -35,7 +35,8 @@ def wrap_func(*args, **kwargs): return cache[key] else: results = func(*args, **kwargs) - if len(cache) < 100000: - cache[key] = results + if len(cache) > 100000: + cache.popitem() + cache[key] = results return results return wrap_func From 927420cb7e372d3ac1071c682a50aad59b631bae Mon Sep 17 00:00:00 2001 From: benfitzpatrick Date: Mon, 24 Feb 2014 12:09:25 +0000 Subject: [PATCH 13/14] improved documentation --- isodatetime/data.py | 18 ++++-- isodatetime/dumpers.py | 32 +++++++++- isodatetime/parser_spec.py | 119 +++++++++++++++++++++++-------------- isodatetime/parsers.py | 14 ++--- 4 files changed, 125 insertions(+), 58 deletions(-) diff --git a/isodatetime/data.py b/isodatetime/data.py index 1b7c5b6..fa716a6 100644 --- a/isodatetime/data.py +++ b/isodatetime/data.py @@ -441,11 +441,21 @@ def __str__(self): class TimeZone(TimeInterval): - """Represent a time zone offset.""" + """Represent a time zone offset from UTC. - def __init__(self, *args, **kwargs): - self.unknown = kwargs.pop("unknown", False) - super(TimeZone, self).__init__(*args, **kwargs) + Keyword arguments: + hours, minutes: integers (default 0) denoting the hour and minute + component of the offset from UTC. These may be positive, zero, or + negative, as required. Note that a negative UTC offset should have + both hours and minutes as zero or negative integers. + unknown: a boolean that represents an unknown TimeZone. Some + operations and comparisons may fail when this is True. + + """ + + def __init__(self, hours=0, minutes=0, unknown=False): + self.unknown = unknown + super(TimeZone, self).__init__(hours=hours, minutes=minutes) def copy(self): """Return an unlinked copy of this instance.""" diff --git a/isodatetime/dumpers.py b/isodatetime/dumpers.py index c7cae16..e09a759 100644 --- a/isodatetime/dumpers.py +++ b/isodatetime/dumpers.py @@ -27,7 +27,37 @@ class TimePointDumper(object): - """Dump TimePoint instances to strings.""" + """Dump TimePoint instances to strings using particular formats. + + A format can be specified in the self.dump method via the + formatting_string argument. Unlike Python's datetime strftime + method, this uses normal/Unicode character patterns to represent + which pieces of information to output where. A full reference + of valid patterns is found in the parser_spec module, with lots + of examples (coincidentally, used to generate the parsing). + Anything not matched will get left as it is in the string. + Specifying a particular timezone will result in a timezone + conversion of the date/time information before it is output. + + For example, the following formatting_string + 'CCYYMMDDThhmmZ' is made up of: + CC - year (century) information, e.g. 19 + YY - year (decade, year of decade) information, e.g. 85 + MM - month of year information, e.g. 05 + DD - day of month information, e.g. 31 + T - left alone, date/time separator + hh - hour of day information, e.g. 06 + mm - minute of hour information, e.g. 58 + Z - Zulu or UTC zero-offset timezone, left in, forces timezone + conversion + and might dump a TimePoint instance like this: '19850531T0658Z'. + + Keyword arguments: + num_expanded_year_digits - an integer (default 2) that indicates + how many extra year digits to apply if appropriate (and if the + user requests that information). + + """ def __init__(self, num_expanded_year_digits=2): self._rec_formats = {"date": [], "time": [], "timezone": []} diff --git a/isodatetime/parser_spec.py b/isodatetime/parser_spec.py index 27a5e4c..400939a 100644 --- a/isodatetime/parser_spec.py +++ b/isodatetime/parser_spec.py @@ -19,14 +19,16 @@ """This provides data to drive ISO 8601 parsing functionality.""" -DATE_EXPRESSIONS = {"basic": {"complete": u""" +DATE_EXPRESSIONS = { + "basic": { + "complete": u""" CCYYMMDD ±XCCYYMMDD CCYYDDD ±XCCYYDDD CCYYWwwD ±XCCYYWwwD""", - "reduced": u""" + "reduced": u""" CCYY-MM # Deviation? Not clear if "basic" or "extended" in standard. CCYY CC @@ -35,7 +37,7 @@ ±XCC CCYYWww ±XCCYYWww""", - "truncated": u""" + "truncated": u""" -YYMM -YY --MMDD @@ -51,20 +53,21 @@ -WwwD -Www -W-D -"""}, - "extended": {"complete": u""" +""" }, + "extended": { + "complete": u""" CCYY-MM-DD ±XCCYY-MM-DD CCYY-DDD ±XCCYY-DDD CCYY-Www-D ±XCCYY-Www-D""", - "reduced": u""" + "reduced": u""" CCYY-MM ±XCCYY-MM CCYY-Www ±XCCYY-Www""", - "truncated": u""" + "truncated": u""" -YY-MM --MM-DD YY-MM-DD @@ -75,8 +78,11 @@ -z-WwwD -z-Www -Www-D -"""}} -TIME_EXPRESSIONS = {"basic": {"complete": u""" +""" } +} +TIME_EXPRESSIONS = { + "basic": { + "complete": u""" # No Time Zone hhmmss @@ -85,14 +91,14 @@ hhmm,nn hh,ii """, - "reduced": u""" + "reduced": u""" # No Time Zone hhmm hh # No Time Zone - decimals """, - "truncated": u""" + "truncated": u""" # No Time Zone -mmss -mm @@ -102,8 +108,9 @@ -mmss,tt -mm,nn --ss,tt -"""}, - "extended": {"complete": u""" +""" }, + "extended": { + "complete": u""" # No Time Zone hh:mm:ss @@ -112,12 +119,12 @@ hh:mm,nn hh,ii # Deviation? Not allowed in standard ? """, - "reduced": u""" + "reduced": u""" # No Time Zone hh:mm hh # Deviation? Not allowed in standard ? """, - "truncated": u""" + "truncated": u""" # No Time Zone -mm:ss -mm # Deviation? Not allowed in standard ? @@ -127,51 +134,69 @@ -mm:ss,tt -mm,nn # Deviation? Not allowed in standard ? --ss,tt # Deviation? Not allowed in standard ? -"""}} -TIMEZONE_EXPRESSIONS = {"basic": u""" +""" } +} +TIMEZONE_EXPRESSIONS = { + "basic": u""" Z ±hh ±hhmm """, - "extended": u""" + "extended": u""" Z ±hh # Deviation? Not allowed in standard? ±hh:mm -"""} +""" +} TIME_DESIGNATOR = "T" _DATE_TRANSLATE_INFO = [ - (u"±", "(?P[-+])", "%(year_sign)s", "year_sign"), - (u"CC", "(?P\d\d)", "%(century)02d", "century"), - (u"YY", "(?P\d\d)", "%(year_of_century)02d", - "year_of_century"), - (u"MM", "(?P\d\d)", "%(month_of_year)02d", "month_of_year"), - (u"DDD", "(?P\d\d\d)", "%(day_of_year)03d", "day_of_year"), - (u"DD", "(?P\d\d)", "%(day_of_month)02d", "day_of_month"), - (u"Www", "W(?P\d\d)", "W%(week_of_year)02d", "week_of_year"), - (u"D", "(?P\d)", "%(day_of_week)01d", "day_of_week"), - (u"z", "(?P\d)", "%(year_of_decade)01d", "year_of_decade"), - (u"^---", "(?P---)", "---", None), - (u"^--", "(?P--)", "--", None), - (u"^-", "(?P-)", "-", None) + (u"±", "(?P[-+])", + "%(year_sign)s", "year_sign"), + (u"CC", "(?P\d\d)", + "%(century)02d", "century"), + (u"YY", "(?P\d\d)", + "%(year_of_century)02d", "year_of_century"), + (u"MM", "(?P\d\d)", + "%(month_of_year)02d", "month_of_year"), + (u"DDD", "(?P\d\d\d)", + "%(day_of_year)03d", "day_of_year"), + (u"DD", "(?P\d\d)", + "%(day_of_month)02d", "day_of_month"), + (u"Www", "W(?P\d\d)", + "W%(week_of_year)02d", "week_of_year"), + (u"D", "(?P\d)", + "%(day_of_week)01d", "day_of_week"), + (u"z", "(?P\d)", + "%(year_of_decade)01d", "year_of_decade"), + (u"^---", "(?P---)", + "---", None), + (u"^--", "(?P--)", + "--", None), + (u"^-", "(?P-)", + "-", None) ] _TIME_TRANSLATE_INFO = [ - (u"(?<=^hh)mm", "(?P\d\d)", "%(minute_of_hour)02d", - "minute_of_hour"), - (u"(?<=^hh:)mm", "(?P\d\d)", "%(minute_of_hour)02d", - "minute_of_hour"), - (u"(?<=^-)mm", "(?P\d\d)", "%(minute_of_hour)02d", - "minute_of_hour"), - (u"^hh", "(?P\d\d)", "%(hour_of_day)02d", "hour_of_day"), + (u"(?<=^hh)mm", "(?P\d\d)", + "%(minute_of_hour)02d", "minute_of_hour"), + (u"(?<=^hh:)mm", "(?P\d\d)", + "%(minute_of_hour)02d", "minute_of_hour"), + (u"(?<=^-)mm", "(?P\d\d)", + "%(minute_of_hour)02d", "minute_of_hour"), + (u"^hh", "(?P\d\d)", + "%(hour_of_day)02d", "hour_of_day"), (u",ii", "[,.](?P\d+)", "%(hour_of_day_decimal_string)s", "hour_of_day_decimal_string"), (u",nn", "[,.](?P\d+)", "%(minute_of_hour_decimal_string)s", "minute_of_hour_decimal_string"), - (u"ss", "(?P\d\d)", "%(second_of_minute)02d", - "second_of_minute"), + (u"ss", "(?P\d\d)", + "%(second_of_minute)02d", "second_of_minute"), (u",tt", "[,.](?P\d+)", - "%(second_of_minute_decimal_string)s", "second_of_minute_decimal_string"), - (u"^--", "(?P--)", "--", None), - (u"^-", "(?P-)", "-", None) + "%(second_of_minute_decimal_string)s", + "second_of_minute_decimal_string"), + (u"^--", "(?P--)", + "--", None), + (u"^-", "(?P-)", + "-", None) ] _TIMEZONE_TRANSLATE_INFO = [ (u"(?<=±hh)mm", "(?P\d\d)", @@ -180,8 +205,10 @@ "%(time_zone_minute_abs)02d", "time_zone_minute_abs"), (u"(?<=±)hh", "(?P\d\d)", "%(time_zone_hour_abs)02d", "time_zone_hour_abs"), - (u"±", "(?P[-+])", "%(time_zone_sign)s", "time_zone_sign"), - (u"Z", "(?PZ)", "Z", None) + (u"±", "(?P[-+])", + "%(time_zone_sign)s", "time_zone_sign"), + (u"Z", "(?PZ)", + "Z", None) ] diff --git a/isodatetime/parsers.py b/isodatetime/parsers.py index 89cbeaa..deb4fe7 100644 --- a/isodatetime/parsers.py +++ b/isodatetime/parsers.py @@ -26,7 +26,7 @@ -class ISOSyntaxError(ValueError): +class ISO8601SyntaxError(ValueError): """An error denoting invalid input syntax.""" @@ -93,7 +93,7 @@ def parse(self, expression): end_point=end_point, interval=interval ) - raise ISOSyntaxError("recurrence", expression) + raise ISO8601SyntaxError("recurrence", expression) __call__ = parse @@ -258,7 +258,7 @@ def parse(self, timepoint_string, dump_format=None): timezone, bad_formats=bad_formats ) - except ISOSyntaxError: + except ISO8601SyntaxError: time = time_timezone timezone = None else: @@ -361,7 +361,7 @@ def get_date_info(self, date_string, bad_types=None): result = regex.match(date_string) if result: return (format_key, type_key), result.groupdict() - raise ISOSyntaxError("date", date_string) + raise ISO8601SyntaxError("date", date_string) def get_time_info(self, time_string, bad_formats=None, bad_types=None): """Return the properties from a time string.""" @@ -379,7 +379,7 @@ def get_time_info(self, time_string, bad_formats=None, bad_types=None): result = regex.match(time_string) if result: return result.groupdict() - raise ISOSyntaxError("time", time_string) + raise ISO8601SyntaxError("time", time_string) def get_timezone_info(self, timezone_string, bad_formats=None): """Return the properties from a timezone string.""" @@ -392,7 +392,7 @@ def get_timezone_info(self, timezone_string, bad_formats=None): result = regex.match(timezone_string) if result: return result.groupdict() - raise ISOSyntaxError("timezone", timezone_string) + raise ISO8601SyntaxError("timezone", timezone_string) class TimeIntervalParser(object): @@ -431,7 +431,7 @@ def parse(self, expression): value = float(value) result_map[key] = value return data.TimeInterval(**result_map) - raise ISOSyntaxError("duration", expression) + raise ISO8601SyntaxError("duration", expression) def parse_timepoint_expression(timepoint_expression, **kwargs): From eb26066eaa6772a9b1f9836a8a311414102e9d16 Mon Sep 17 00:00:00 2001 From: benfitzpatrick Date: Mon, 24 Feb 2014 15:30:32 +0000 Subject: [PATCH 14/14] centralise caching limit --- isodatetime/util.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/isodatetime/util.py b/isodatetime/util.py index 01fc8cc..99a26a3 100644 --- a/isodatetime/util.py +++ b/isodatetime/util.py @@ -19,12 +19,15 @@ """Provide an optimisation decorator and other utilities.""" +MAX_CACHE_SIZE = 100000 + + def cache_results(func): """Decorator to store results for given inputs. func is the decorated function. - A maximum of 100000 arg-value pairs are stored. + A maximum of MAX_CACHE_SIZE arg-value pairs are stored. """ cache = {} @@ -35,7 +38,7 @@ def wrap_func(*args, **kwargs): return cache[key] else: results = func(*args, **kwargs) - if len(cache) > 100000: + if len(cache) > MAX_CACHE_SIZE: cache.popitem() cache[key] = results return results