Skip to content

Commit

Permalink
Merge pull request #87 from OpenEnergyPlatform/fix/issue-86-datetime-…
Browse files Browse the repository at this point in the history
…parser

Fix/issue 86 datetime parser
  • Loading branch information
wingechr authored Jan 25, 2023
2 parents 4e61adb + d6270ce commit 78e1f70
Show file tree
Hide file tree
Showing 6 changed files with 186 additions and 46 deletions.
50 changes: 31 additions & 19 deletions src/omi/dialects/oep/compiler.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
import json
from collections import OrderedDict
from datetime import datetime
import datetime

from omi import structure
from omi.dialects.base.compiler import Compiler
from omi.oem_structures import oem_v15


def compile_date_or_none(x, format=None):
if isinstance(x, (datetime.datetime, datetime.date)):
if format:
return x.strftime(format)
else:
return x.isoformat()
else:
return x


class JSONCompiler(Compiler):
__METADATA_VERSION = "OEP-1.4.0"

def _compile_date(self, date: datetime, format):
if date:
return date.strftime(format)
else:
return None

def _construct_dict(self, *args, omit_none=True, **kwargs):
"""
Accepts a list of arguments of shape (name: str, field: Compileable) and returns a dictionary that maps
Expand Down Expand Up @@ -61,7 +63,7 @@ def visit_contribution(self, contribution: structure.Contribution, *args, **kwar
("email", contribution.contributor.email),
("object", contribution.object),
("comment", contribution.comment),
("date", self._compile_date(contribution.date, "%Y-%m-%d")),
("date", compile_date_or_none(contribution.date, "%Y-%m-%d")),
)

def visit_language(self, language: structure.Language, *args, **kwargs):
Expand Down Expand Up @@ -90,11 +92,14 @@ def visit_temporal(self, temporal: structure.Temporal, *args, **kwargs):
start = None
end = None
if temporal.ts_start is not None:
start = self._compile_date(temporal.ts_start, "%Y-%m-%dT%H:%M%z")[:-2]
start = compile_date_or_none(temporal.ts_start)
if temporal.ts_end is not None:
end = self._compile_date(temporal.ts_end, "%Y-%m-%dT%H:%M%z")[:-2]
end = compile_date_or_none(temporal.ts_end)
return self._construct_dict(
("referenceDate", self._compile_date(temporal.reference_date, "%Y-%m-%d")),
(
"referenceDate",
compile_date_or_none(temporal.reference_date, "%Y-%m-%d"),
),
timeseries=self._construct_dict(
("start", start),
("end", end),
Expand Down Expand Up @@ -202,7 +207,9 @@ def visit_meta_comment(self, comment: structure.MetaComment, *args, **kwargs):
def visit_metadata(self, metadata: structure.OEPMetadata, *args, **kwargs):
publication_date = None
if metadata.publication_date is not None:
publication_date = self._compile_date(metadata.publication_date, "%Y-%m-%d")
publication_date = compile_date_or_none(
metadata.publication_date, "%Y-%m-%d"
)
return self._construct_dict(
("name", metadata.name),
("title", metadata.title),
Expand Down Expand Up @@ -286,9 +293,9 @@ def visit_timeseries(self, timeseries: oem_v15.Timeseries, *args, **kwargs):
start = None
end = None
if timeseries.ts_start is not None:
start = self._compile_date(timeseries.ts_start, "%Y-%m-%dT%H:%M%z")[:-2]
start = compile_date_or_none(timeseries.ts_start)
if timeseries.ts_end is not None:
end = self._compile_date(timeseries.ts_end, "%Y-%m-%dT%H:%M%z")[:-2]
end = compile_date_or_none(timeseries.ts_end)
return self._construct_dict(
("start", start),
("end", end),
Expand All @@ -299,10 +306,13 @@ def visit_timeseries(self, timeseries: oem_v15.Timeseries, *args, **kwargs):

def visit_temporal(self, temporal: oem_v15.Temporal, *args, **kwargs):
return self._construct_dict(
("referenceDate", self._compile_date(temporal.reference_date, "%Y-%m-%d")),
(
"referenceDate",
compile_date_or_none(temporal.reference_date, "%Y-%m-%d"),
),
("timeseries", temporal.timeseries_collection),
)

def visit_license(self, lic: oem_v15.License, *args, **kwargs):
return self._construct_dict(
("name", lic.name),
Expand Down Expand Up @@ -347,7 +357,9 @@ def visit_meta_comment(self, comment: oem_v15.MetaComment, *args, **kwargs):
def visit_metadata(self, metadata: oem_v15.OEPMetadata, *args, **kwargs):
publication_date = None
if metadata.publication_date is not None:
publication_date = self._compile_date(metadata.publication_date, "%Y-%m-%d")
publication_date = compile_date_or_none(
metadata.publication_date, "%Y-%m-%d"
)
return self._construct_dict(
("name", metadata.name),
("title", metadata.title),
Expand Down
47 changes: 38 additions & 9 deletions src/omi/dialects/oep/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
import json
import logging
import pathlib
import re

import dateutil
import jsonschema
from dateutil.parser import parse as parse_date
from jsonschema import ValidationError
# oemetadata
from metadata.latest.schema import OEMETADATA_LATEST_SCHEMA
Expand All @@ -31,15 +32,42 @@
]


def parse_date_or_none(x, *args, **kwargs):
def parse_date_or_none(x):
if x is None:
return None
pass
elif type(x) == int:
# e.g just a year or a unix timestamp
# NOTE: isinstance(x, int) is also True for a bool,
# which we dont want
pass
elif isinstance(x, str):
# IMPORTANT NOTE: only use dateutil.parser if date part is complete
# if you parse something like '2020' or '2020-01', it will silently
# fill in the missing month/day from the current date!
# in this case, we keep the string, if it is at least is the correct pattern

if re.match("^[123][0-9]{3}(|-[0-9]{1,2})$", x):
# only year or year-month: keep string
pass
elif re.match("^[123][0-9]{3}-[0-9]{1,2}-[0-9]{1,2}", x):
try:
date_time = dateutil.parser.parse(x)
except Exception:
raise ParserException(f"invalid value for date: {x}")
if re.match("^[123][0-9]{3}-[0-9]{1,2}-[0-9]{1,2}$", x):
# date only
x = date_time.date()
else:
x = date_time
else:
raise ParserException(f"invalid value for date: {x}")
else:
return parse_date(x, *args, **kwargs)
raise ParserException(f"invalid type for date: {type(x)}")
return x


def create_report_json(
error_data, # type list[dict]
error_data, # type list[dict]
save_at: pathlib.Path = "reports/",
filename: str = "report.json",
):
Expand All @@ -54,7 +82,6 @@ def create_report_json(


class JSONParser(Parser):

def normalize_key_names_of_input(iput: dict):
pass

Expand Down Expand Up @@ -215,7 +242,6 @@ def is_valid(self, inp: dict, schema=OEMETADATA_V130_SCHEMA):
except ValidationError:
return False


def parse(self, json_old, *args, **kwargs):
# context section
context = None
Expand Down Expand Up @@ -795,7 +821,10 @@ def parse_from_string(
)

def get_any_value_not_none(
self, element: dict, keys, get_return_default=None #keys: list[str] - reove as not support by py3.8
self,
element: dict,
keys,
get_return_default=None, # keys: list[str] - reove as not support by py3.8
):
"""
Get the value for a key in a dict - but try multiple key names, in
Expand Down Expand Up @@ -1145,7 +1174,7 @@ def parse_licence_including_former_structure(licences_element):
primary_key=resource["schema"].get("primaryKey"),
foreign_keys=foreign_keys,
)

old_dialect = resource.get("dialect")
if old_dialect is None:
dialect = None
Expand Down
4 changes: 2 additions & 2 deletions tests/data/metadata_v14.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@
"temporal": {
"referenceDate": "2016-01-01",
"timeseries": {
"start": "2017-01-01T00:00+01",
"end": "2017-12-31T23:00+01",
"start": "2017-01-01T00:00:00+01:00",
"end": "2017-12-31T23:00:00+01:00",
"resolution": "1 h",
"alignment": "left",
"aggregationType": "sum"
Expand Down
26 changes: 10 additions & 16 deletions tests/data/metadata_v15.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,15 @@
"referenceDate": "2016-01-01",
"timeseries": [
{
"start": "2017-01-01T00:00+01",
"end": "2017-12-31T23:00+01",
"start": "2017-01-01T00:00:00+01:00",
"end": "2017-12-31T23:00:00+01:00",
"resolution": "1 h",
"alignment": "left",
"aggregationType": "sum"
},
{
"start": "2018-01-01T00:00+01",
"end": "2019-06-01T23:00+01",
"start": "2018-01-01T00:00:00+01:00",
"end": "2019-06-01T23:00:00+01:00",
"resolution": "15 min",
"alignment": "right",
"aggregationType": "sum"
Expand Down Expand Up @@ -126,12 +126,10 @@
"description": "Unique identifier",
"type": "serial",
"isAbout": [
{
}
{}
],
"valueReference": [
{
}
{}
]
},
{
Expand All @@ -145,8 +143,7 @@
}
],
"valueReference": [
{
}
{}
]
},
{
Expand Down Expand Up @@ -183,8 +180,7 @@
}
],
"valueReference": [
{
}
{}
]
},
{
Expand All @@ -199,8 +195,7 @@
}
],
"valueReference": [
{
}
{}
]
},
{
Expand All @@ -214,8 +209,7 @@
}
],
"valueReference": [
{
}
{}
]
}
],
Expand Down
Empty file.
105 changes: 105 additions & 0 deletions tests/test_dialects/test_oep/test_regression/test_issue86_datetime.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import json
from unittest import SkipTest
from unittest import TestCase

from omi.dialects.oep import OEP_V_1_3_Dialect
from omi.dialects.oep import OEP_V_1_4_Dialect
from omi.dialects.oep import OEP_V_1_5_Dialect
from omi.dialects.oep.compiler import compile_date_or_none
from omi.dialects.oep.parser import ParserException
from omi.dialects.oep.parser import parse_date_or_none

# in the metadata, for some values we return the date,not the full datetime


class TestIssue86Datetime(TestCase):

BAD_VALUES = [True, {}, "", "not a date", "200", "2020-30-40", "2020-01-01 WTF"]
OK_VALUES = {
None: None,
2020: 2020,
"2020": "2020",
"2020-12": "2020-12",
"2020-12-02": "2020-12-02",
"2020-12-2": "2020-12-02",
"2020-10-01T10:12:13": "2020-10-01T10:12:13",
"2020-10-01 10:12": "2020-10-01T10:12:00",
"2020-10-01T10:12:13+0200": "2020-10-01T10:12:13+02:00",
}

def roundtrip_value(self, value):
value = parse_date_or_none(value)
value = compile_date_or_none(value)
return value

def test_datetime_roundtrip(self):
for bad_value in self.BAD_VALUES:
self.assertRaises(ParserException, self.roundtrip_value, bad_value)
for ok_value, exp_value in self.OK_VALUES.items():
self.assertEqual(self.roundtrip_value(ok_value), exp_value)


class TestIssue86Metadata(TestIssue86Datetime):
"""test roundtrip in OEP_V_1_5_Dialect"""

dialect = None
OK_VALUES = {
None: (None, None),
2020: (2020, 2020),
"2020": ("2020", "2020"),
"2020-12": ("2020-12", "2020-12"),
"2020-12-02": ("2020-12-02", "2020-12-02"),
"2020-12-2": ("2020-12-02", "2020-12-02"),
"2020-10-01T10:12:13": ("2020-10-01T10:12:13", "2020-10-01"),
"2020-10-01 10:12": ("2020-10-01T10:12:00", "2020-10-01"),
"2020-10-01T10:12:13+0200": ("2020-10-01T10:12:13+02:00", "2020-10-01"),
}

def test_datetime_roundtrip(self):
# only actually run tests in subclasses
if self.dialect:
return super().test_datetime_roundtrip()

def roundtrip_value(self, value):
metadata_in = {"id": "test"}
self.set_date_datetime_values(metadata_in, value)
metadata_str = json.dumps(metadata_in)
metadata_obj = self.dialect.parse(metadata_str)
metadata_out = self.dialect.compile(metadata_obj)
return self.get_date_datetime_values(metadata_out)

def set_date_datetime_values(self, metadata, value):
raise NotImplementedError()

def get_date_datetime_values(self, metadata):
raise NotImplementedError()


class TestIssue86Datetime_V_1_5(TestIssue86Metadata):
"""test roundtrip in OEP_V_1_5_Dialect"""

dialect = OEP_V_1_5_Dialect()

def set_date_datetime_values(self, metadata, value):
metadata["publicationDate"] = value
metadata["temporal"] = {"timeseries": [{"start": value}]}

def get_date_datetime_values(self, metadata):
v_datetime = metadata["temporal"]["timeseries"][0].get("start")
v_date = metadata.get("publicationDate")
return (v_datetime, v_date)


class TestIssue86Datetime_V_1_4(TestIssue86Metadata):
"""test roundtrip in OEP_V_1_4_Dialect"""

dialect = OEP_V_1_4_Dialect()

def set_date_datetime_values(self, metadata, value):
metadata["publicationDate"] = value
metadata["temporal"] = {"timeseries": {"start": value}}

def get_date_datetime_values(self, metadata):
v_datetime = metadata["temporal"]["timeseries"].get("start")
v_date = metadata.get("publicationDate")
return (v_datetime, v_date)

0 comments on commit 78e1f70

Please sign in to comment.