diff --git a/src/omi/dialects/oep/compiler.py b/src/omi/dialects/oep/compiler.py index d79ee4c..1dbcbe4 100644 --- a/src/omi/dialects/oep/compiler.py +++ b/src/omi/dialects/oep/compiler.py @@ -1,21 +1,23 @@ -import json -from collections import OrderedDict -from datetime import datetime +import datetime from omi import structure from omi.dialects.base.compiler import Compiler from omi.oem_structures import oem_v15 +def compile_date_or_none(x, format=None): + if isinstance(x, (datetime.datetime, datetime.date)): + if format: + return x.strftime(format) + else: + return x.isoformat() + else: + return x + + class JSONCompiler(Compiler): __METADATA_VERSION = "OEP-1.4.0" - def _compile_date(self, date: datetime, format): - if date: - return date.strftime(format) - else: - return None - def _construct_dict(self, *args, omit_none=True, **kwargs): """ Accepts a list of arguments of shape (name: str, field: Compileable) and returns a dictionary that maps @@ -61,7 +63,7 @@ def visit_contribution(self, contribution: structure.Contribution, *args, **kwar ("email", contribution.contributor.email), ("object", contribution.object), ("comment", contribution.comment), - ("date", self._compile_date(contribution.date, "%Y-%m-%d")), + ("date", compile_date_or_none(contribution.date, "%Y-%m-%d")), ) def visit_language(self, language: structure.Language, *args, **kwargs): @@ -90,11 +92,14 @@ def visit_temporal(self, temporal: structure.Temporal, *args, **kwargs): start = None end = None if temporal.ts_start is not None: - start = self._compile_date(temporal.ts_start, "%Y-%m-%dT%H:%M%z")[:-2] + start = compile_date_or_none(temporal.ts_start) if temporal.ts_end is not None: - end = self._compile_date(temporal.ts_end, "%Y-%m-%dT%H:%M%z")[:-2] + end = compile_date_or_none(temporal.ts_end) return self._construct_dict( - ("referenceDate", self._compile_date(temporal.reference_date, "%Y-%m-%d")), + ( + "referenceDate", + compile_date_or_none(temporal.reference_date, "%Y-%m-%d"), + ), timeseries=self._construct_dict( ("start", start), ("end", end), @@ -202,7 +207,9 @@ def visit_meta_comment(self, comment: structure.MetaComment, *args, **kwargs): def visit_metadata(self, metadata: structure.OEPMetadata, *args, **kwargs): publication_date = None if metadata.publication_date is not None: - publication_date = self._compile_date(metadata.publication_date, "%Y-%m-%d") + publication_date = compile_date_or_none( + metadata.publication_date, "%Y-%m-%d" + ) return self._construct_dict( ("name", metadata.name), ("title", metadata.title), @@ -286,9 +293,9 @@ def visit_timeseries(self, timeseries: oem_v15.Timeseries, *args, **kwargs): start = None end = None if timeseries.ts_start is not None: - start = self._compile_date(timeseries.ts_start, "%Y-%m-%dT%H:%M%z")[:-2] + start = compile_date_or_none(timeseries.ts_start) if timeseries.ts_end is not None: - end = self._compile_date(timeseries.ts_end, "%Y-%m-%dT%H:%M%z")[:-2] + end = compile_date_or_none(timeseries.ts_end) return self._construct_dict( ("start", start), ("end", end), @@ -299,10 +306,13 @@ def visit_timeseries(self, timeseries: oem_v15.Timeseries, *args, **kwargs): def visit_temporal(self, temporal: oem_v15.Temporal, *args, **kwargs): return self._construct_dict( - ("referenceDate", self._compile_date(temporal.reference_date, "%Y-%m-%d")), + ( + "referenceDate", + compile_date_or_none(temporal.reference_date, "%Y-%m-%d"), + ), ("timeseries", temporal.timeseries_collection), ) - + def visit_license(self, lic: oem_v15.License, *args, **kwargs): return self._construct_dict( ("name", lic.name), @@ -347,7 +357,9 @@ def visit_meta_comment(self, comment: oem_v15.MetaComment, *args, **kwargs): def visit_metadata(self, metadata: oem_v15.OEPMetadata, *args, **kwargs): publication_date = None if metadata.publication_date is not None: - publication_date = self._compile_date(metadata.publication_date, "%Y-%m-%d") + publication_date = compile_date_or_none( + metadata.publication_date, "%Y-%m-%d" + ) return self._construct_dict( ("name", metadata.name), ("title", metadata.title), diff --git a/src/omi/dialects/oep/parser.py b/src/omi/dialects/oep/parser.py index 7e701ea..8378fa4 100644 --- a/src/omi/dialects/oep/parser.py +++ b/src/omi/dialects/oep/parser.py @@ -4,9 +4,10 @@ import json import logging import pathlib +import re +import dateutil import jsonschema -from dateutil.parser import parse as parse_date from jsonschema import ValidationError # oemetadata from metadata.latest.schema import OEMETADATA_LATEST_SCHEMA @@ -31,15 +32,42 @@ ] -def parse_date_or_none(x, *args, **kwargs): +def parse_date_or_none(x): if x is None: - return None + pass + elif type(x) == int: + # e.g just a year or a unix timestamp + # NOTE: isinstance(x, int) is also True for a bool, + # which we dont want + pass + elif isinstance(x, str): + # IMPORTANT NOTE: only use dateutil.parser if date part is complete + # if you parse something like '2020' or '2020-01', it will silently + # fill in the missing month/day from the current date! + # in this case, we keep the string, if it is at least is the correct pattern + + if re.match("^[123][0-9]{3}(|-[0-9]{1,2})$", x): + # only year or year-month: keep string + pass + elif re.match("^[123][0-9]{3}-[0-9]{1,2}-[0-9]{1,2}", x): + try: + date_time = dateutil.parser.parse(x) + except Exception: + raise ParserException(f"invalid value for date: {x}") + if re.match("^[123][0-9]{3}-[0-9]{1,2}-[0-9]{1,2}$", x): + # date only + x = date_time.date() + else: + x = date_time + else: + raise ParserException(f"invalid value for date: {x}") else: - return parse_date(x, *args, **kwargs) + raise ParserException(f"invalid type for date: {type(x)}") + return x def create_report_json( - error_data, # type list[dict] + error_data, # type list[dict] save_at: pathlib.Path = "reports/", filename: str = "report.json", ): @@ -54,7 +82,6 @@ def create_report_json( class JSONParser(Parser): - def normalize_key_names_of_input(iput: dict): pass @@ -215,7 +242,6 @@ def is_valid(self, inp: dict, schema=OEMETADATA_V130_SCHEMA): except ValidationError: return False - def parse(self, json_old, *args, **kwargs): # context section context = None @@ -795,7 +821,10 @@ def parse_from_string( ) def get_any_value_not_none( - self, element: dict, keys, get_return_default=None #keys: list[str] - reove as not support by py3.8 + self, + element: dict, + keys, + get_return_default=None, # keys: list[str] - reove as not support by py3.8 ): """ Get the value for a key in a dict - but try multiple key names, in @@ -1145,7 +1174,7 @@ def parse_licence_including_former_structure(licences_element): primary_key=resource["schema"].get("primaryKey"), foreign_keys=foreign_keys, ) - + old_dialect = resource.get("dialect") if old_dialect is None: dialect = None diff --git a/tests/data/metadata_v14.json b/tests/data/metadata_v14.json index b32323b..ca773ce 100644 --- a/tests/data/metadata_v14.json +++ b/tests/data/metadata_v14.json @@ -32,8 +32,8 @@ "temporal": { "referenceDate": "2016-01-01", "timeseries": { - "start": "2017-01-01T00:00+01", - "end": "2017-12-31T23:00+01", + "start": "2017-01-01T00:00:00+01:00", + "end": "2017-12-31T23:00:00+01:00", "resolution": "1 h", "alignment": "left", "aggregationType": "sum" diff --git a/tests/data/metadata_v15.json b/tests/data/metadata_v15.json index 81893a8..68d455d 100644 --- a/tests/data/metadata_v15.json +++ b/tests/data/metadata_v15.json @@ -44,15 +44,15 @@ "referenceDate": "2016-01-01", "timeseries": [ { - "start": "2017-01-01T00:00+01", - "end": "2017-12-31T23:00+01", + "start": "2017-01-01T00:00:00+01:00", + "end": "2017-12-31T23:00:00+01:00", "resolution": "1 h", "alignment": "left", "aggregationType": "sum" }, { - "start": "2018-01-01T00:00+01", - "end": "2019-06-01T23:00+01", + "start": "2018-01-01T00:00:00+01:00", + "end": "2019-06-01T23:00:00+01:00", "resolution": "15 min", "alignment": "right", "aggregationType": "sum" @@ -126,12 +126,10 @@ "description": "Unique identifier", "type": "serial", "isAbout": [ - { - } + {} ], "valueReference": [ - { - } + {} ] }, { @@ -145,8 +143,7 @@ } ], "valueReference": [ - { - } + {} ] }, { @@ -183,8 +180,7 @@ } ], "valueReference": [ - { - } + {} ] }, { @@ -199,8 +195,7 @@ } ], "valueReference": [ - { - } + {} ] }, { @@ -214,8 +209,7 @@ } ], "valueReference": [ - { - } + {} ] } ], diff --git a/tests/test_dialects/test_oep/test_regression/__init__.py b/tests/test_dialects/test_oep/test_regression/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_dialects/test_oep/test_regression/test_issue86_datetime.py b/tests/test_dialects/test_oep/test_regression/test_issue86_datetime.py new file mode 100644 index 0000000..f701b2e --- /dev/null +++ b/tests/test_dialects/test_oep/test_regression/test_issue86_datetime.py @@ -0,0 +1,105 @@ +import json +from unittest import SkipTest +from unittest import TestCase + +from omi.dialects.oep import OEP_V_1_3_Dialect +from omi.dialects.oep import OEP_V_1_4_Dialect +from omi.dialects.oep import OEP_V_1_5_Dialect +from omi.dialects.oep.compiler import compile_date_or_none +from omi.dialects.oep.parser import ParserException +from omi.dialects.oep.parser import parse_date_or_none + +# in the metadata, for some values we return the date,not the full datetime + + +class TestIssue86Datetime(TestCase): + + BAD_VALUES = [True, {}, "", "not a date", "200", "2020-30-40", "2020-01-01 WTF"] + OK_VALUES = { + None: None, + 2020: 2020, + "2020": "2020", + "2020-12": "2020-12", + "2020-12-02": "2020-12-02", + "2020-12-2": "2020-12-02", + "2020-10-01T10:12:13": "2020-10-01T10:12:13", + "2020-10-01 10:12": "2020-10-01T10:12:00", + "2020-10-01T10:12:13+0200": "2020-10-01T10:12:13+02:00", + } + + def roundtrip_value(self, value): + value = parse_date_or_none(value) + value = compile_date_or_none(value) + return value + + def test_datetime_roundtrip(self): + for bad_value in self.BAD_VALUES: + self.assertRaises(ParserException, self.roundtrip_value, bad_value) + for ok_value, exp_value in self.OK_VALUES.items(): + self.assertEqual(self.roundtrip_value(ok_value), exp_value) + + +class TestIssue86Metadata(TestIssue86Datetime): + """test roundtrip in OEP_V_1_5_Dialect""" + + dialect = None + OK_VALUES = { + None: (None, None), + 2020: (2020, 2020), + "2020": ("2020", "2020"), + "2020-12": ("2020-12", "2020-12"), + "2020-12-02": ("2020-12-02", "2020-12-02"), + "2020-12-2": ("2020-12-02", "2020-12-02"), + "2020-10-01T10:12:13": ("2020-10-01T10:12:13", "2020-10-01"), + "2020-10-01 10:12": ("2020-10-01T10:12:00", "2020-10-01"), + "2020-10-01T10:12:13+0200": ("2020-10-01T10:12:13+02:00", "2020-10-01"), + } + + def test_datetime_roundtrip(self): + # only actually run tests in subclasses + if self.dialect: + return super().test_datetime_roundtrip() + + def roundtrip_value(self, value): + metadata_in = {"id": "test"} + self.set_date_datetime_values(metadata_in, value) + metadata_str = json.dumps(metadata_in) + metadata_obj = self.dialect.parse(metadata_str) + metadata_out = self.dialect.compile(metadata_obj) + return self.get_date_datetime_values(metadata_out) + + def set_date_datetime_values(self, metadata, value): + raise NotImplementedError() + + def get_date_datetime_values(self, metadata): + raise NotImplementedError() + + +class TestIssue86Datetime_V_1_5(TestIssue86Metadata): + """test roundtrip in OEP_V_1_5_Dialect""" + + dialect = OEP_V_1_5_Dialect() + + def set_date_datetime_values(self, metadata, value): + metadata["publicationDate"] = value + metadata["temporal"] = {"timeseries": [{"start": value}]} + + def get_date_datetime_values(self, metadata): + v_datetime = metadata["temporal"]["timeseries"][0].get("start") + v_date = metadata.get("publicationDate") + return (v_datetime, v_date) + + +class TestIssue86Datetime_V_1_4(TestIssue86Metadata): + """test roundtrip in OEP_V_1_4_Dialect""" + + dialect = OEP_V_1_4_Dialect() + + def set_date_datetime_values(self, metadata, value): + metadata["publicationDate"] = value + metadata["temporal"] = {"timeseries": {"start": value}} + + def get_date_datetime_values(self, metadata): + v_datetime = metadata["temporal"]["timeseries"].get("start") + v_date = metadata.get("publicationDate") + return (v_datetime, v_date)