From 5ce621bce8479816e6820da516e062f0be5de921 Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Tue, 29 Oct 2024 14:56:42 +0100 Subject: [PATCH 01/23] rework readme (draft) #55 --- README.rst | 70 ++++++++++++++---------------------------------------- 1 file changed, 18 insertions(+), 52 deletions(-) diff --git a/README.rst b/README.rst index be21dd1..e89bbfe 100644 --- a/README.rst +++ b/README.rst @@ -2,6 +2,10 @@ Open Energy Family - Open Metadata Integration OMI ================================================== +A library to process and translate and work with the open energy metadata. + +* Free software: AGPL-3.0 + Overview ======== @@ -22,17 +26,9 @@ Overview :target: https://readthedocs.org/projects/omi :alt: Documentation Status -.. |travis| image:: https://travis-ci.org/OpenEnergyPlatform/omi.svg?branch=master - :alt: Travis-CI Build Status - :target: https://travis-ci.org/OpenEnergyPlatform/omi - -.. |appveyor| image:: https://ci.appveyor.com/api/projects/status/github/OpenEnergyPlatform/omi?branch=master&svg=true - :alt: AppVeyor Build Status - :target: https://ci.appveyor.com/project/OpenEnergyPlatform/omi - -.. |requires| image:: https://requires.io/github/OpenEnergyPlatform/omi/requirements.svg?branch=master - :alt: Requirements Status - :target: https://requires.io/github/OpenEnergyPlatform/omi/requirements/?branch=master +.. |Automated Test| image:: https://github.com/OpenEnergyPlatform/omi/actions/workflows/automated-testing.yml/badge.svg + :target: https://github.com/OpenEnergyPlatform/omi/actions/workflows/automated-testing.yml + :alt: Test status .. |codecov| image:: https://codecov.io/github/OpenEnergyPlatform/omi/coverage.svg?branch=master :alt: Coverage Status @@ -61,10 +57,6 @@ Overview .. end-badges -A library to process and translate open energy metadata. - -* Free software: AGPL-3.0 - Installation ============ @@ -75,42 +67,20 @@ Installation Documentation ============= - +Documentation for OMI versions up to 0.2: https://omi.readthedocs.io/ +Documentation for reworked OMI versions starting from 1.0 you can find in the README document. Later on we migrate the documentation to mkdocs. + Usage ===== -**Parse, Compile, Render, Convert and Validate** -Omi can read(parse), compile, Render(json compilant), convert(convert metadata from v1.4 to v1.5 structure) and validate - a json -file or object that is compliant with the oemetadata spec. This is usefull to do various operations that help to integrate with - as -well as in interact with the oemetadata. Some parts of this tool might still be volatile but the code quality is conventionsly improved -as this module is a core component of the oeplatfroms metadata integration system. - -Check if omi is able to read a oemetadata file (for version 1.4 and 1.5) -CLI - oemetadata version 1.5:: - - omi translate -f oep-v1.5 examples/data/metadata_v15.json - -CLI - oemetadata version 1.4:: - - omi translate -f oep-v1.4 -t oep-v1.4 examples/data/metadata_v14.json - -omi is able to read a JSON file and parse it into one of the internal Python structures (depending on the oemetadata version). -The OEPMetadata Python object can then be compiled and converted back to JSON. You can manipulate a successfully parsed -OEPMetadata object. - -Module usage:: - - from omi.dialects.oep.dialect import OEP_V_1_3_Dialect, OEP_V_1_4_Dialect, OEP_V_1_5_Dialect - inp = '{"id":"unique_id"}' #or read from json file - dialect1_5 = OEP_V_1_5_Dialect() - parsed = dialect1_5.parse(input) - print(parsed) - parsed.identifier = "another_unique_id" - compiled = dialect1_5.compile(parsed) - print(compiled) +You can use omi as python module and import its functionality into your codebase or use the cli capabilities. OMI provides tooling for validation +of oemetdata JSON documents using JSON-Schema. It also include helpers to generate the tabular data resource definition to seep up the metadata +creation and helps to select a open license by checking the license identifier against the SPDX license list. +As the oemetadata is updated from time to time we provides conversion functionality to convert metadata documents that use an earlier version +of the oemetadata-specification to help users stick with the latest enhancements the latest oemetadata version offers. **Conversion** @@ -135,17 +105,13 @@ Module usage:: # You can import the JSONParser directly like this: import json - from omi.dialects.oep.parser import JSONParser + from omi import validation with open("tests/data/metadata_v15.json", "r", encoding="utf-8") as f: metadata = json.load(f) - parser = JSONParser() - parser.validate(metadata) - - # check if your metadata is valid for the given schmea - schema = ... get a schema or import form oemetadata module - parser.is_valid(metadata, schema) + result = validation(metadata) + # TBD **Additional Fields - not related to the OEMetadata specification** From 849e0e4623d7b23893ee947dba91f70a0f7b43fb Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Wed, 30 Oct 2024 09:09:43 +0100 Subject: [PATCH 02/23] add OEMetaData v20 #102 --- src/omi/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/omi/base.py b/src/omi/base.py index 6469c9b..f6d6a4b 100644 --- a/src/omi/base.py +++ b/src/omi/base.py @@ -7,12 +7,12 @@ from dataclasses import dataclass import requests -from metadata import v152, v160 +from metadata import v20, v152, v160 from .settings import OEP_URL # Order matters! First entry equals latest version of metadata format -METADATA_FORMATS = {"OEP": ["OEP-1.6.0", "OEP-1.5.2"], "INSPIRE": []} +METADATA_FORMATS = {"OEP": ["OEMetadata-2.0.0", "OEP-1.6.0", "OEP-1.5.2"], "INSPIRE": []} METADATA_VERSIONS = {version: md_format for md_format, versions in METADATA_FORMATS.items() for version in versions} @@ -148,7 +148,7 @@ def __get_metadata_specs_for_oep(metadata_version: str) -> MetadataSpecification MetadataSpecification Metadata schema for given metadata version including template and example. """ - metadata_modules = {"OEP-1.5.2": v152, "OEP-1.6.0": v160} + metadata_modules = {"OEP-1.5.2": v152, "OEP-1.6.0": v160, "OEMetadata-2.0.0": v20} metadata_module = metadata_modules[metadata_version] module_path = pathlib.Path(metadata_module.__file__).parent specs = {} From 176b67772c9ca266c00f945404425ead18238eb8 Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Wed, 30 Oct 2024 09:21:36 +0100 Subject: [PATCH 03/23] add additional imports #102 --- src/omi/conversion.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/omi/conversion.py b/src/omi/conversion.py index 8fa1270..b6f0702 100644 --- a/src/omi/conversion.py +++ b/src/omi/conversion.py @@ -1,4 +1,5 @@ """Conversion module for OMI to update metadata to different versions.""" + from __future__ import annotations from copy import deepcopy From 3e5fba4c09ea0ec50f6030ca69ad6b674eb1e041 Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Wed, 30 Oct 2024 11:18:28 +0100 Subject: [PATCH 04/23] add draft functionality to convert metadata to oemetadata fom v160 to v2 #102 --- src/omi/conversion.py | 127 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 126 insertions(+), 1 deletion(-) diff --git a/src/omi/conversion.py b/src/omi/conversion.py index b6f0702..6a1923b 100644 --- a/src/omi/conversion.py +++ b/src/omi/conversion.py @@ -4,7 +4,7 @@ from copy import deepcopy -from omi.base import get_metadata_version +from omi.base import get_metadata_specification, get_metadata_version class ConversionError(Exception): @@ -96,6 +96,131 @@ def __convert_oep_152_to_160(metadata: dict) -> dict: return metadata +def __convert_oep_160_to_200(metadata: dict) -> dict: # noqa: C901, PLR0915 + """ + Convert metadata with version "OEP-1.6.0" to "OEMetadata-2.0.0" using the v2.0 template. + + Parameters + ---------- + metadata: dict + Metadata dictionary in v1.6 format + + Returns + ------- + dict + Updated metadata dictionary in v2.0 format + """ + metadata_v2 = get_metadata_specification("OEMetadata-2.0.0") + # Deep copy template to avoid mutating the original template + metadata_v2 = deepcopy(metadata_v2.template) + + # Map v1.6 fields to v2.0 fields + metadata_v2["name"] = None + metadata_v2["title"] = None + metadata_v2["id"] = None + + # Populate resources + for i, resource in enumerate(metadata.get("resources", [])): + if i >= len(metadata_v2["resources"]): + metadata_v2["resources"].append(deepcopy(metadata_v2["resources"][0])) + + resource_v2 = metadata_v2["resources"][i] + resource_v2["@id"] = metadata.get("@id") + resource_v2["@context"] = metadata.get("@context") + resource_v2["name"] = resource.get("name").split(".")[1] + resource_v2["topics"] = [resource.get("name", "").split(".")[0]] + resource_v2["title"] = metadata.get("title") + resource_v2["path"] = metadata.get("id") + resource_v2["description"] = metadata.get("description") + resource_v2["languages"] = metadata.get("language", []) + resource_v2["subject"] = metadata.get("subject", []) + resource_v2["keywords"] = metadata.get("keywords", []) + resource_v2["publicationDate"] = metadata.get("publicationDate") + + # Set to null to avoid validation errors: Date + resource_v2["embargoPeriod"]["start"] = None + resource_v2["embargoPeriod"]["end"] = None + + resource_v2["context"] = metadata.get("context", {}) + + # Set to null to avoid validation errors: URI + resource_v2["spatial"]["location"]["@id"] = None + resource_v2["spatial"]["extent"]["name"] = metadata.get("spatial", {}).get("extent") + # Set to null to avoid validation errors: URI + resource_v2["spatial"]["extent"]["@id"] = None + resource_v2["spatial"]["extent"]["resolutionValue"], resource_v2["spatial"]["extent"]["resolutionUnit"] = ( + metadata.get("spatial", {}).get("resolution", "").split(" ", 1) + ) + + resource_v2["temporal"] = metadata.get("temporal", {}) + # # Populate timeseries + # for i_ts, timeseries in enumerate(metadata.get("temporal", {}).get("timeseries", [])): + # if i_ts >= len(resource_v2["temporal"]["timeseries"]): + + # Populate sources + for i_source, source in enumerate(metadata.get("sources", [])): + if i_source >= len(resource_v2["sources"]): + resource_v2["sources"].append(deepcopy(metadata_v2["resources"][0]["sources"][0])) + + sources_v2 = resource_v2["sources"][i_source] + sources_v2["title"] = source.get("title") + sources_v2["description"] = source.get("description") + sources_v2["path"] = source.get("path") + for i_s_license, s_license in enumerate(source.get("licenses", [])): + if i_s_license >= len(sources_v2["licenses"]): + resource_v2["sources"].append(deepcopy(metadata_v2["resources"][0]["sources"][0]["licenses"][0])) + + licenses_v2 = resource_v2["licenses"][i_s_license] + licenses_v2.update(s_license) + licenses_v2["copyrightStatement"] = None + + # _license to avoid shadowing python internal + for i_license, _license in enumerate(metadata.get("licenses", [])): + if i_license >= len(resource_v2["licenses"]): + resource_v2["licenses"].append(deepcopy(metadata_v2["resources"][0]["licenses"][0])) + + licenses_v2 = resource_v2["licenses"][i_license] + licenses_v2.update(_license) + licenses_v2["copyrightStatement"] = None + + for i_contribution, contribution in enumerate(metadata.get("contributors", [])): + if i_contribution >= len(resource_v2["contributors"]): + resource_v2["contributors"].append(deepcopy(metadata_v2["resources"][0]["contributors"][0])) + + contributors_v2 = resource_v2["contributors"][i_contribution] + contributors_v2["title"] = contribution.get("title") + contributors_v2["path"] = contribution.get("path") + contributors_v2["organization"] = contribution.get("organization") + contributors_v2["date"] = contribution.get("date") + contributors_v2["object"] = contribution.get("object") + contributors_v2["comment"] = contribution.get("comment") + + # data resource/distribution definition + resource_v2["type"] = None + resource_v2["format"] = resource.get("format") + resource_v2["encoding"] = resource.get("encoding") + + for i_s_field, field in enumerate(resource.get("schema", {}).get("fields", [])): + if i_s_field >= len(resource_v2["schema"]["fields"]): + resource_v2["schema"]["fields"].append(deepcopy(metadata_v2["resources"][0]["schema"]["fields"][0])) + + schema_fields_v2 = resource_v2["schema"]["fields"][i_s_field] + schema_fields_v2["nullable"] = None + schema_fields_v2.update(field) + + resource_v2["schema"]["primaryKey"] = resource.get("schema", {}).get("primaryKey", []) + resource_v2["schema"]["foreignKeys"] = resource.get("schema", {}).get("foreignKeys", []) + + resource_v2["dialect"] = resource.get("dialect", {}) + resource_v2["review"] = metadata.get("review", {}) + + # Update metaMetadata section + metadata_v2["metaMetadata"]["metadataVersion"] = "OEMetadata-2.0.0" + metadata_v2["metaMetadata"]["metadataLicense"] = metadata.get("metaMetadata", {}).get("metadataLicense") + + return metadata_v2 + + METADATA_CONVERSIONS = { ("OEP-1.5.2", "OEP-1.6.0"): __convert_oep_152_to_160, } From f913ae6fdd9049ce5a94f53c7f7a8d2ec11c8f03 Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Wed, 30 Oct 2024 11:18:57 +0100 Subject: [PATCH 05/23] add conversion to the chain #102 --- src/omi/conversion.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/omi/conversion.py b/src/omi/conversion.py index 6a1923b..6aaf971 100644 --- a/src/omi/conversion.py +++ b/src/omi/conversion.py @@ -223,4 +223,5 @@ def __convert_oep_160_to_200(metadata: dict) -> dict: # noqa: C901, PLR0915 METADATA_CONVERSIONS = { ("OEP-1.5.2", "OEP-1.6.0"): __convert_oep_152_to_160, + ("OEP-1.6.0", "OEMetadata-2.0.0"): __convert_oep_160_to_200, } From ccf9d5ecafd65b2524427f424e1a8f419f68f37d Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Wed, 30 Oct 2024 11:24:03 +0100 Subject: [PATCH 06/23] extend license check to handle checking all licenses in any resource (in oemetadata v2 there is a license per resource) #102 --- src/omi/license.py | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/src/omi/license.py b/src/omi/license.py index 6e4f4e7..9425c40 100644 --- a/src/omi/license.py +++ b/src/omi/license.py @@ -4,6 +4,8 @@ import re from pathlib import Path +from omi.base import get_metadata_version + LICENCES_FILE = Path(__file__).parent / "data" / "licenses.json" @@ -82,27 +84,42 @@ def validate_oemetadata_licenses(metadata: dict) -> None: Returns ------- None - if licences are valid, otherwise LicenseError is raised + if licenses are valid, otherwise LicenseError is raised """ if metadata is None: msg = "Metadata is empty." raise LicenseError(msg) - licenses = metadata.get("licenses", []) + version = get_metadata_version(metadata) + licenses_info = _find_license_field(metadata, version) - if not licenses: + if not licenses_info: msg = "No license information available in the metadata." raise LicenseError(msg) - for i, license_ in enumerate(licenses): - if not license_.get("name"): - raise LicenseError(f"The license name is missing in {i}. license ({license_})") - - if not validate_license(license_["name"]): - raise LicenseError( - f"The (normalized) license name '{license_['name']}' was not found in the SPDX licenses list. " - "(See https://github.com/spdx/license-list-data/blob/main/json/licenses.json).", - ) + for resource_index, licenses in licenses_info: + for i, license_ in enumerate(licenses or []): + if not license_.get("name"): + raise LicenseError( + f"The license name is missing in resource {resource_index}, license {i} ({license_}).", + ) + + if not validate_license(license_["name"]): + raise LicenseError( + f"The (normalized) license name '{license_['name']}' in resource {resource_index}, license {i} " + "was not found in the SPDX licenses list. " + "(See https://github.com/spdx/license-list-data/blob/main/json/licenses.json).", + ) + + +def _find_license_field(metadata: dict, version: str) -> list: + version = get_metadata_version(metadata) + if version == "OEMetadata-2.0.0": + # Include resource index with each license for traceability + return [(i, resource.get("licenses")) for i, resource in enumerate(metadata.get("resources", []))] + else: # noqa: RET505 + # Return -1 as a placeholder index for top-level licenses + return [(-1, metadata.get("licenses", []))] LICENSES = read_licenses() From c0a0dea28a40e3c0faf6691a3b6cc2988fb3a8fa Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Wed, 30 Oct 2024 11:24:29 +0100 Subject: [PATCH 07/23] add test for v16 to v2 metadata conversion #102 --- tests/test_conversion.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_conversion.py b/tests/test_conversion.py index 6c3e1e4..9d1b647 100644 --- a/tests/test_conversion.py +++ b/tests/test_conversion.py @@ -1,4 +1,5 @@ """Tests for OMIs conversion module.""" + import pytest import omi.base @@ -13,6 +14,14 @@ def test_conversion_from_oep_152_to_160(): validation.validate_metadata(converted_metadata_152) +def test_conversion_from_oep_160_to_200(): + """Test conversion from OEP v1.6.0 -> v2.0.0.""" + metadata_schema_160 = omi.base.get_metadata_specification("OEP-1.6.0").example + converted_metadata_160 = conversion.convert_metadata(metadata_schema_160, "OEMetadata-2.0.0") + assert base.get_metadata_version(converted_metadata_160) == "OEMetadata-2.0.0" + validation.validate_metadata(converted_metadata_160) + + def test_conversion_chain(): """Test conversion chain with conversion tree structure.""" From 2c6394a4081d46dc9b0fde7ed0f4c8f4f4b68f52 Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Wed, 30 Oct 2024 11:28:00 +0100 Subject: [PATCH 08/23] update changelog #102 --- CHANGELOG.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 48bbcd8..6462f0c 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,6 +5,7 @@ Changelog current -------------------- * Fully rewrite OMI and implement the json schema spec only, remove python class based parsing (#104)[https://github.com/OpenEnergyPlatform/omi/pull/104] +* Add a new conversion functionality to convert form v160 to v200 oemetadata [(#111)](https://github.com/rl-institut/super-repo/pull/111) 0.2.1 (2024-01-26) -------------------- From d2671790cba621508fff6a14aeb3eea2b632ec4b Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Wed, 30 Oct 2024 13:23:07 +0100 Subject: [PATCH 09/23] refactor conversion function to make flake8 more happy and reduce complexity #102 --- src/omi/conversion.py | 224 +++++++++++++++++++++++------------------- 1 file changed, 122 insertions(+), 102 deletions(-) diff --git a/src/omi/conversion.py b/src/omi/conversion.py index 6aaf971..80f10a6 100644 --- a/src/omi/conversion.py +++ b/src/omi/conversion.py @@ -96,7 +96,7 @@ def __convert_oep_152_to_160(metadata: dict) -> dict: return metadata -def __convert_oep_160_to_200(metadata: dict) -> dict: # noqa: C901, PLR0915 +def __convert_oep_160_to_200(metadata: dict) -> dict: """ Convert metadata with version "OEP-1.6.0" to "OEMetadata-2.0.0" using the v2.0 template. @@ -110,109 +110,13 @@ def __convert_oep_160_to_200(metadata: dict) -> dict: # noqa: C901, PLR0915 dict Updated metadata dictionary in v2.0 format """ - metadata_v2 = get_metadata_specification("OEMetadata-2.0.0") - # Deep copy template to avoid mutating the original template - metadata_v2 = deepcopy(metadata_v2.template) + metadata_v2 = deepcopy(get_metadata_specification("OEMetadata-2.0.0").template) + metadata_v2["name"] = metadata_v2["title"] = metadata_v2["id"] = None - # Map v1.6 fields to v2.0 fields - metadata_v2["name"] = None - metadata_v2["title"] = None - metadata_v2["id"] = None - - # Populate resources + # Populate metadata v2 resources for i, resource in enumerate(metadata.get("resources", [])): - if i >= len(metadata_v2["resources"]): - metadata_v2["resources"].append(deepcopy(metadata_v2["resources"][0])) - - resource_v2 = metadata_v2["resources"][i] - resource_v2["@id"] = metadata.get("@id") - resource_v2["@context"] = metadata.get("@context") - resource_v2["name"] = resource.get("name").split(".")[1] - resource_v2["topics"] = [resource.get("name", "").split(".")[0]] - resource_v2["title"] = metadata.get("title") - resource_v2["path"] = metadata.get("id") - resource_v2["description"] = metadata.get("description") - resource_v2["languages"] = metadata.get("language", []) - resource_v2["subject"] = metadata.get("subject", []) - resource_v2["keywords"] = metadata.get("keywords", []) - resource_v2["publicationDate"] = metadata.get("publicationDate") - - # Set to null to avoid validation errors: Date - resource_v2["embargoPeriod"]["start"] = None - resource_v2["embargoPeriod"]["end"] = None - - resource_v2["context"] = metadata.get("context", {}) - - # Set to null to avoid validation errors: URI - resource_v2["spatial"]["location"]["@id"] = None - resource_v2["spatial"]["extent"]["name"] = metadata.get("spatial", {}).get("extent") - # Set to null to avoid validation errors: URI - resource_v2["spatial"]["extent"]["@id"] = None - resource_v2["spatial"]["extent"]["resolutionValue"], resource_v2["spatial"]["extent"]["resolutionUnit"] = ( - metadata.get("spatial", {}).get("resolution", "").split(" ", 1) - ) - - resource_v2["temporal"] = metadata.get("temporal", {}) - # # Populate timeseries - # for i_ts, timeseries in enumerate(metadata.get("temporal", {}).get("timeseries", [])): - # if i_ts >= len(resource_v2["temporal"]["timeseries"]): - - # Populate sources - for i_source, source in enumerate(metadata.get("sources", [])): - if i_source >= len(resource_v2["sources"]): - resource_v2["sources"].append(deepcopy(metadata_v2["resources"][0]["sources"][0])) - - sources_v2 = resource_v2["sources"][i_source] - sources_v2["title"] = source.get("title") - sources_v2["description"] = source.get("description") - sources_v2["path"] = source.get("path") - for i_s_license, s_license in enumerate(source.get("licenses", [])): - if i_s_license >= len(sources_v2["licenses"]): - resource_v2["sources"].append(deepcopy(metadata_v2["resources"][0]["sources"][0]["licenses"][0])) - - licenses_v2 = resource_v2["licenses"][i_s_license] - licenses_v2.update(s_license) - licenses_v2["copyrightStatement"] = None - - # _license to avoid shadowing python internal - for i_license, _license in enumerate(metadata.get("licenses", [])): - if i_license >= len(resource_v2["licenses"]): - resource_v2["licenses"].append(deepcopy(metadata_v2["resources"][0]["licenses"][0])) - - licenses_v2 = resource_v2["licenses"][i_license] - licenses_v2.update(_license) - licenses_v2["copyrightStatement"] = None - - for i_contribution, contribution in enumerate(metadata.get("contributors", [])): - if i_contribution >= len(resource_v2["contributors"]): - resource_v2["contributors"].append(deepcopy(metadata_v2["resources"][0]["contributors"][0])) - - contributors_v2 = resource_v2["contributors"][i_contribution] - contributors_v2["title"] = contribution.get("title") - contributors_v2["path"] = contribution.get("path") - contributors_v2["organization"] = contribution.get("organization") - contributors_v2["date"] = contribution.get("date") - contributors_v2["object"] = contribution.get("object") - contributors_v2["comment"] = contribution.get("comment") - - # data resource/distribution definition - resource_v2["type"] = None - resource_v2["format"] = resource.get("format") - resource_v2["encoding"] = resource.get("encoding") - - for i_s_field, field in enumerate(resource.get("schema", {}).get("fields", [])): - if i_s_field >= len(resource_v2["schema"]["fields"]): - resource_v2["schema"]["fields"].append(deepcopy(metadata_v2["resources"][0]["schema"]["fields"][0])) - - schema_fields_v2 = resource_v2["schema"]["fields"][i_s_field] - schema_fields_v2["nullable"] = None - schema_fields_v2.update(field) - - resource_v2["schema"]["primaryKey"] = resource.get("schema", {}).get("primaryKey", []) - resource_v2["schema"]["foreignKeys"] = resource.get("schema", {}).get("foreignKeys", []) - - resource_v2["dialect"] = resource.get("dialect", {}) - resource_v2["review"] = metadata.get("review", {}) + resource_v2 = ensure_resource_entry(metadata_v2, i) + populate_resource_v2(resource_v2, metadata, resource) # Update metaMetadata section metadata_v2["metaMetadata"]["metadataVersion"] = "OEMetadata-2.0.0" @@ -221,6 +125,122 @@ def __convert_oep_160_to_200(metadata: dict) -> dict: # noqa: C901, PLR0915 return metadata_v2 +def ensure_resource_entry(metadata_v2: dict, index: int) -> dict: + """Ensure a resource entry exists in metadata_v2 resources for the given index.""" + if index >= len(metadata_v2["resources"]): + metadata_v2["resources"].append(deepcopy(metadata_v2["resources"][0])) + return metadata_v2["resources"][index] + + +def populate_resource_v2(resource_v2: dict, metadata: dict, resource: dict) -> None: + """Populate resource_v2 fields based on metadata and resource from v1.6.""" + resource_v2.update( + { + "@id": metadata.get("@id"), + "@context": metadata.get("@context"), + "name": resource.get("name").split(".")[1], + "topics": [resource.get("name", "").split(".")[0]], + "title": metadata.get("title"), + "path": metadata.get("id"), + "description": metadata.get("description"), + "languages": metadata.get("language", []), + "subject": metadata.get("subject", []), + "keywords": metadata.get("keywords", []), + "publicationDate": metadata.get("publicationDate"), + "context": metadata.get("context", {}), + "temporal": metadata.get("temporal", {}), + "type": None, + "format": resource.get("format"), + "encoding": resource.get("encoding"), + "schema": { + "fields": resource.get("schema", {}).get("fields", []), + "primaryKey": resource.get("schema", {}).get("primaryKey", []), + "foreignKeys": resource.get("schema", {}).get("foreignKeys", []), + }, + "dialect": resource.get("dialect", {}), + "review": metadata.get("review", {}), + }, + ) + + resource_v2["embargoPeriod"]["start"] = None + resource_v2["embargoPeriod"]["end"] = None + + # Set to null to avoid validation errors: URI + resource_v2["spatial"]["location"]["@id"] = None + resource_v2["spatial"]["extent"]["address"] = None + resource_v2["spatial"]["extent"]["name"] = metadata.get("spatial", {}).get("extent") + resource_v2["spatial"]["extent"]["latitude"] = None + resource_v2["spatial"]["extent"]["longitude"] = None + # Set to null to avoid validation errors: URI + resource_v2["spatial"]["extent"]["@id"] = None + resource_v2["spatial"]["extent"]["resolutionValue"], resource_v2["spatial"]["extent"]["resolutionUnit"] = ( + metadata.get("spatial", {}).get("resolution", "").split(" ", 1) + ) + + populate_sources(resource_v2, metadata.get("sources", [])) + populate_contributors(resource_v2, metadata.get("contributors", [])) + populate_licenses(resource_v2, metadata.get("licenses", [])) + populate_schema_fields(resource_v2, resource) + + +def populate_sources(resource_v2: dict, sources: list) -> None: + """Populate sources in resource_v2 from sources in v1.6.""" + for i_source, source in enumerate(sources): + if i_source >= len(resource_v2["sources"]): + resource_v2["sources"].append(deepcopy(resource_v2["sources"][0])) + source_v2 = resource_v2["sources"][i_source] + source_v2.update( + {"title": source.get("title"), "description": source.get("description"), "path": source.get("path")}, + ) + populate_source_licenses(source_v2, source.get("licenses", [])) + + +def populate_source_licenses(source_v2: dict, licenses: list) -> None: + """Populate licenses in source_v2 from licenses in v1.6.""" + for i_license, license_entry in enumerate(licenses): + if i_license >= len(source_v2["licenses"]): + source_v2["licenses"].append(deepcopy(source_v2["licenses"][0])) + source_v2["licenses"][i_license].update(license_entry) + source_v2["licenses"][i_license]["copyrightStatement"] = None + + +def populate_contributors(resource_v2: dict, contributors: list) -> None: + """Populate contributors in resource_v2 from contributors in v1.6.""" + for i_contribution, contributor in enumerate(contributors): + if i_contribution >= len(resource_v2["contributors"]): + resource_v2["contributors"].append(deepcopy(resource_v2["contributors"][0])) + contributor_v2 = resource_v2["contributors"][i_contribution] + contributor_v2.update( + { + "title": contributor.get("title"), + "path": contributor.get("path"), + "organization": contributor.get("organization"), + "date": contributor.get("date"), + "object": contributor.get("object"), + "comment": contributor.get("comment"), + }, + ) + + +def populate_licenses(resource_v2: dict, licenses: list) -> None: + """Populate licenses in resource_v2 from licenses in v1.6.""" + for i_license, license_entry in enumerate(licenses): + if i_license >= len(resource_v2["licenses"]): + resource_v2["licenses"].append(deepcopy(resource_v2["licenses"][0])) + resource_v2["licenses"][i_license].update(license_entry) + resource_v2["licenses"][i_license]["copyrightStatement"] = None + + +def populate_schema_fields(resource_v2: dict, resource: dict) -> None: + """Populate schema fields in resource_v2 from resource in v1.6.""" + for i_field, field in enumerate(resource.get("schema", {}).get("fields", [])): + if i_field >= len(resource_v2["schema"]["fields"]): + resource_v2["schema"]["fields"].append(deepcopy(resource_v2["schema"]["fields"][0])) + schema_field_v2 = resource_v2["schema"]["fields"][i_field] + schema_field_v2.update(field) + schema_field_v2["nullable"] = None + + METADATA_CONVERSIONS = { ("OEP-1.5.2", "OEP-1.6.0"): __convert_oep_152_to_160, ("OEP-1.6.0", "OEMetadata-2.0.0"): __convert_oep_160_to_200, From 4159d89a3b489bfaf6787138feceecaa299cea76 Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Wed, 30 Oct 2024 13:41:26 +0100 Subject: [PATCH 10/23] enhance spatial conversion #102 --- src/omi/conversion.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/omi/conversion.py b/src/omi/conversion.py index 80f10a6..9ea8042 100644 --- a/src/omi/conversion.py +++ b/src/omi/conversion.py @@ -167,15 +167,16 @@ def populate_resource_v2(resource_v2: dict, metadata: dict, resource: dict) -> N # Set to null to avoid validation errors: URI resource_v2["spatial"]["location"]["@id"] = None - resource_v2["spatial"]["extent"]["address"] = None - resource_v2["spatial"]["extent"]["name"] = metadata.get("spatial", {}).get("extent") - resource_v2["spatial"]["extent"]["latitude"] = None - resource_v2["spatial"]["extent"]["longitude"] = None + resource_v2["spatial"]["location"]["address"] = metadata.get("spatial", {}).get("location") + resource_v2["spatial"]["location"]["latitude"] = None + resource_v2["spatial"]["location"]["longitude"] = None # Set to null to avoid validation errors: URI + resource_v2["spatial"]["extent"]["name"] = metadata.get("spatial", {}).get("extent") resource_v2["spatial"]["extent"]["@id"] = None resource_v2["spatial"]["extent"]["resolutionValue"], resource_v2["spatial"]["extent"]["resolutionUnit"] = ( metadata.get("spatial", {}).get("resolution", "").split(" ", 1) ) + resource_v2["spatial"]["extent"]["crs"] = None populate_sources(resource_v2, metadata.get("sources", [])) populate_contributors(resource_v2, metadata.get("contributors", [])) From 1c1bff56adce8fda2fbc35195ce3b38a06b23633 Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Wed, 30 Oct 2024 13:43:00 +0100 Subject: [PATCH 11/23] fix license check to raise error if license is empty #102 --- src/omi/license.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/omi/license.py b/src/omi/license.py index 9425c40..4d74636 100644 --- a/src/omi/license.py +++ b/src/omi/license.py @@ -93,20 +93,19 @@ def validate_oemetadata_licenses(metadata: dict) -> None: version = get_metadata_version(metadata) licenses_info = _find_license_field(metadata, version) - if not licenses_info: - msg = "No license information available in the metadata." - raise LicenseError(msg) - for resource_index, licenses in licenses_info: + if not licenses: + raise LicenseError(f"No license information available in the metadata for resource: {resource_index + 1}.") for i, license_ in enumerate(licenses or []): if not license_.get("name"): raise LicenseError( - f"The license name is missing in resource {resource_index}, license {i} ({license_}).", + f"The license name is missing in resource {resource_index + 1}, license {i + 1} ({license_}).", ) if not validate_license(license_["name"]): raise LicenseError( - f"The (normalized) license name '{license_['name']}' in resource {resource_index}, license {i} " + f"The (normalized) license name '{license_['name']}' in resource" + f"{resource_index + 1}, license {i + 1} " "was not found in the SPDX licenses list. " "(See https://github.com/spdx/license-list-data/blob/main/json/licenses.json).", ) @@ -116,10 +115,14 @@ def _find_license_field(metadata: dict, version: str) -> list: version = get_metadata_version(metadata) if version == "OEMetadata-2.0.0": # Include resource index with each license for traceability - return [(i, resource.get("licenses")) for i, resource in enumerate(metadata.get("resources", []))] - else: # noqa: RET505 + licenses_per_resource = [ + (i, resource.get("licenses")) for i, resource in enumerate(metadata.get("resources", [])) + ] + else: # Return -1 as a placeholder index for top-level licenses - return [(-1, metadata.get("licenses", []))] + licenses_per_resource = [(0, metadata.get("licenses", []))] + + return licenses_per_resource LICENSES = read_licenses() From e2f5dacdc7a97238c0141474556241883ceb7381 Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Wed, 30 Oct 2024 13:43:50 +0100 Subject: [PATCH 12/23] update expected error msg in license empty test #102 --- tests/test_metadata_validation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_metadata_validation.py b/tests/test_metadata_validation.py index c4b8db5..a5aba38 100644 --- a/tests/test_metadata_validation.py +++ b/tests/test_metadata_validation.py @@ -35,7 +35,10 @@ def test_invalid_oep_metadata(): with (INVALID_METADAT_PATH / "missing_fields.json").open("r") as f: invalid_oep_metadata = json.load(f) - with pytest.raises(license.LicenseError, match="No license information available in the metadata."): + with pytest.raises( + license.LicenseError, + match=r"No license information available in the metadata for resource: \d+\.?", + ): validation.validate_metadata(invalid_oep_metadata) with (INVALID_METADAT_PATH / "wrongly_placed_null_value.json").open("r") as f: From ca25bb726e2a7c9e6284c4cfb33454c17d9511da Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Wed, 30 Oct 2024 16:39:44 +0100 Subject: [PATCH 13/23] handle new fields see #102 --- src/omi/conversion.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/omi/conversion.py b/src/omi/conversion.py index 9ea8042..3242314 100644 --- a/src/omi/conversion.py +++ b/src/omi/conversion.py @@ -111,7 +111,7 @@ def __convert_oep_160_to_200(metadata: dict) -> dict: Updated metadata dictionary in v2.0 format """ metadata_v2 = deepcopy(get_metadata_specification("OEMetadata-2.0.0").template) - metadata_v2["name"] = metadata_v2["title"] = metadata_v2["id"] = None + metadata_v2["name"] = metadata_v2["title"] = metadata_v2["id"] = metadata_v2["description"] = None # Populate metadata v2 resources for i, resource in enumerate(metadata.get("resources", [])): @@ -134,6 +134,7 @@ def ensure_resource_entry(metadata_v2: dict, index: int) -> dict: def populate_resource_v2(resource_v2: dict, metadata: dict, resource: dict) -> None: """Populate resource_v2 fields based on metadata and resource from v1.6.""" + # Bulk update keys without resource_v2.update( { "@id": metadata.get("@id"), @@ -162,6 +163,8 @@ def populate_resource_v2(resource_v2: dict, metadata: dict, resource: dict) -> N }, ) + resource_v2["context"]["publisher"] = None + resource_v2["embargoPeriod"]["start"] = None resource_v2["embargoPeriod"]["end"] = None @@ -191,7 +194,13 @@ def populate_sources(resource_v2: dict, sources: list) -> None: resource_v2["sources"].append(deepcopy(resource_v2["sources"][0])) source_v2 = resource_v2["sources"][i_source] source_v2.update( - {"title": source.get("title"), "description": source.get("description"), "path": source.get("path")}, + { + "title": source.get("title"), + "description": source.get("description"), + "path": source.get("path"), + "publicationYear": None, + "authors": [], + }, ) populate_source_licenses(source_v2, source.get("licenses", [])) From e94edb122e816a396f4167e9f83668ebb6526369 Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Wed, 30 Oct 2024 17:40:12 +0100 Subject: [PATCH 14/23] provide basic documentation for new omi implementation #55 --- README.rst | 85 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 56 insertions(+), 29 deletions(-) diff --git a/README.rst b/README.rst index e89bbfe..79e4df4 100644 --- a/README.rst +++ b/README.rst @@ -84,14 +84,42 @@ of the oemetadata-specification to help users stick with the latest enhancements **Conversion** -To ease the conversion of oemetadata from the outdated version 1.4 to the latest version, we provide -conversion functionality. The following example shows how to convert the oemetadata from v1.4 to v1.5 -by using a CLI command. +To ease the conversion of oemetadata from any outdated version to the latest version, we provide a +conversion functionality. The following example shows how to convert the oemetadata from v1.6 to v2.0. -CLI - oemetadata conversion from v1.4 to v1.5:: +CLI - oemetadata conversion:: + # Not implemented yet omi convert -i {input/path} -o {output/path} +Module usage - In python scripts you can use the conversion:: + + from omi.conversion import convert_metadata + + import json + + # you a function like this one to read you oemetadata json file + def read_json_file(file_path: str) -> dict: + with open(file_path, "r") as file: + data = json.load(file) + return data + + # for example you can use the oemetdata example.json for version 1.6.0 + # find it here https://github.com/OpenEnergyPlatform/oemetadata/blob/develop/metadata/v160/example.json + # make sure to provide a valid path relative to where you store the python environment + file_path = "example_v16.json" + + # read the metadata document + meta = read_json_file(file_path) + + # use omi to convert it to the latest release + converted = convert_metadata(meta, "OEMetadata-2.0.0") + + # now you can store the result as json file + with open("result.json", "w", encoding="utf-8") as json_file: + json.dump(converted, json_file, ensure_ascii=False, indent=4) # `indent=4` makes the JSON file easier to read + + **Validation** The validation is based on `jsonschema`. We release a schema with each `oemetadata` release, that schema @@ -103,42 +131,41 @@ the validation will try to get the matching schema for the current metadata. Module usage:: - # You can import the JSONParser directly like this: import json - from omi import validation - - with open("tests/data/metadata_v15.json", "r", encoding="utf-8") as f: - metadata = json.load(f) - - result = validation(metadata) - # TBD - -**Additional Fields - not related to the OEMetadata specification** + from omi.validation import validate_oemetadata_licenses, validate_metadata -Sometimes it is necessary to store additional key-value pairs along with the keys included in the OEMetadata specification. -OMI's compiler methods are capable of handling additional arguments or key-value arguments, but this must be -be explicitly specified. -To add additional key-value pairs, you must: + # use a function like this one to read you oemetadata json file + def read_json_file(file_path: str) -> dict: + with open(file_path, "r") as file: + data = json.load(file) + return data - NOTE: If you save the renderer return value in a json file and try to parse the file, the extra field is not included. - You must read the json file using Python and then add the extra field back oemetadata object as shown below. + # for example you can use the oemetdata example.json for version 2.0.0 + # find it here https://github.com/OpenEnergyPlatform/oemetadata/blob/develop/metadata/v20/example.json + # make sure to provide a valid path relative to where you store the python environment + file_path = "example_v16.json" -1 Parse the oemetadata from json file / variable into omis internal structure:: + # read the new input from file + meta = read_json_file(file_path) - from omi.dialects.oep.dialect import OEP_V_1_5_Dialect + # validate the oemetadata: This will return noting or the errors including descriptions + validate_metadata(meta) - min_inp = '{"id":"unique_id"} # or read from json file - minimal_oemetadata15 = OEP_V_1_5_Dialect.parse(min_inp) + # As we are prone to open data we use this license check to validate the license name that + # is available in the metadata document for each data resource/distribution. + validate_oemetadata_licenses(meta) -2 Now you can get(from json file)/define the additional data:: - data = "test" +**Additional Fields ** -3 And add it to the OEMetadata object that was parsed in step 1 by ading a key-value argument:: +To be in line with the oemetadata specification we do not allow for additional properties or fields in the metadata. +We want to keep the oemetadata relatively lean and readable still linking to other documents or to +propose a new property to extend the oemetadata would be a possibility here. - compiled = OEP_V_1_5_Dialect.compile(minimal_oemetadata15, _additionalField=data) - rendered = OEP_V_1_5_Dialect.render(compiled) +Still some times it becomes necessary to add additional information then this would be a use case outside of the OpenEnergyPlatform +specifically for your own use. You are welcome to use the oemetadata as base and add new fields we are happy to integrate them +back into the oeplatform and oemetadata if they seem relevant to other users. Development =========== From 488b6a892f6ed01080a90e0b3a4b5b52427836a5 Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Wed, 30 Oct 2024 22:53:50 +0100 Subject: [PATCH 15/23] add inspection --- README.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.rst b/README.rst index 79e4df4..801cae7 100644 --- a/README.rst +++ b/README.rst @@ -157,6 +157,16 @@ Module usage:: validate_oemetadata_licenses(meta) +**Inspection** + +Describing your data structure is a quite technical task. OMI offers functionality to describe your data automatically. +You need to provide yor data in tabular text based format for this, for example a CSV file. Using frictionless OMI +guesses the data schema specification you can use this you provide required fields in an oemetadata document. + +Module usage:: + + + **Additional Fields ** To be in line with the oemetadata specification we do not allow for additional properties or fields in the metadata. From deb3c01ffaaeafdc7b6e4ca0947454580ede8b09 Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Thu, 31 Oct 2024 12:57:51 +0100 Subject: [PATCH 16/23] add usage example for infer metadata functionality --- README.rst | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/README.rst b/README.rst index 801cae7..1f7b43c 100644 --- a/README.rst +++ b/README.rst @@ -129,6 +129,12 @@ This will create a report.json containing information to debug possible errors. two arguments the first one is the metadata and the second optional one is the schmea. By default (if no schema is passed) the validation will try to get the matching schema for the current metadata. + +CLI - oemetadata conversion:: + + # Not implemented yet + + Module usage:: import json @@ -163,9 +169,26 @@ Describing your data structure is a quite technical task. OMI offers functionali You need to provide yor data in tabular text based format for this, for example a CSV file. Using frictionless OMI guesses the data schema specification you can use this you provide required fields in an oemetadata document. +CLI - oemetadata conversion:: + + # Not implemented yet + Module usage:: + import json + + import pathlib + + from omi.inspection import infer_metadata + + CSV_DATA_FILE = pathlib.Path(__file__).parent / "data" / "data.csv" + # infer the data fields from CSV fuile and add to an empty metadata template + with CSV_DATA_FILE.open("r") as f: + metadata = infer_metadata(f, "OEP") + # Save to a JSON file + with open("script/metadata/result_inspection.json", "w", encoding="utf-8") as json_file: + json.dump(metadata, json_file, ensure_ascii=False, indent=4) # `indent=4` makes the JSON file easier to read **Additional Fields ** From c51870551f72fa538dc82818aaa4fe89315443df Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Thu, 31 Oct 2024 12:59:14 +0100 Subject: [PATCH 17/23] fix code block --- README.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/README.rst b/README.rst index 1f7b43c..8fdec07 100644 --- a/README.rst +++ b/README.rst @@ -174,6 +174,7 @@ CLI - oemetadata conversion:: # Not implemented yet Module usage:: + import json import pathlib From 12019c9d9662ded799b1ac3a3517519c779722ed Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Thu, 31 Oct 2024 13:43:49 +0100 Subject: [PATCH 18/23] enhance helper function names for oem v2 conversion functionality --- src/omi/conversion.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/omi/conversion.py b/src/omi/conversion.py index 3242314..57bee0c 100644 --- a/src/omi/conversion.py +++ b/src/omi/conversion.py @@ -115,8 +115,8 @@ def __convert_oep_160_to_200(metadata: dict) -> dict: # Populate metadata v2 resources for i, resource in enumerate(metadata.get("resources", [])): - resource_v2 = ensure_resource_entry(metadata_v2, i) - populate_resource_v2(resource_v2, metadata, resource) + resource_v2 = ___v2_ensure_resource_entry(metadata_v2, i) + ___v2_populate_resource_v2(resource_v2, metadata, resource) # Update metaMetadata section metadata_v2["metaMetadata"]["metadataVersion"] = "OEMetadata-2.0.0" @@ -125,14 +125,14 @@ def __convert_oep_160_to_200(metadata: dict) -> dict: return metadata_v2 -def ensure_resource_entry(metadata_v2: dict, index: int) -> dict: +def ___v2_ensure_resource_entry(metadata_v2: dict, index: int) -> dict: """Ensure a resource entry exists in metadata_v2 resources for the given index.""" if index >= len(metadata_v2["resources"]): metadata_v2["resources"].append(deepcopy(metadata_v2["resources"][0])) return metadata_v2["resources"][index] -def populate_resource_v2(resource_v2: dict, metadata: dict, resource: dict) -> None: +def ___v2_populate_resource_v2(resource_v2: dict, metadata: dict, resource: dict) -> None: """Populate resource_v2 fields based on metadata and resource from v1.6.""" # Bulk update keys without resource_v2.update( @@ -181,13 +181,13 @@ def populate_resource_v2(resource_v2: dict, metadata: dict, resource: dict) -> N ) resource_v2["spatial"]["extent"]["crs"] = None - populate_sources(resource_v2, metadata.get("sources", [])) - populate_contributors(resource_v2, metadata.get("contributors", [])) - populate_licenses(resource_v2, metadata.get("licenses", [])) - populate_schema_fields(resource_v2, resource) + ___v2_populate_sources(resource_v2, metadata.get("sources", [])) + ___v2_populate_contributors(resource_v2, metadata.get("contributors", [])) + ___v2_populate_licenses(resource_v2, metadata.get("licenses", [])) + ___v2_populate_schema_fields(resource_v2, resource) -def populate_sources(resource_v2: dict, sources: list) -> None: +def ___v2_populate_sources(resource_v2: dict, sources: list) -> None: """Populate sources in resource_v2 from sources in v1.6.""" for i_source, source in enumerate(sources): if i_source >= len(resource_v2["sources"]): @@ -202,10 +202,10 @@ def populate_sources(resource_v2: dict, sources: list) -> None: "authors": [], }, ) - populate_source_licenses(source_v2, source.get("licenses", [])) + ___v2_populate_source_licenses(source_v2, source.get("licenses", [])) -def populate_source_licenses(source_v2: dict, licenses: list) -> None: +def ___v2_populate_source_licenses(source_v2: dict, licenses: list) -> None: """Populate licenses in source_v2 from licenses in v1.6.""" for i_license, license_entry in enumerate(licenses): if i_license >= len(source_v2["licenses"]): @@ -214,7 +214,7 @@ def populate_source_licenses(source_v2: dict, licenses: list) -> None: source_v2["licenses"][i_license]["copyrightStatement"] = None -def populate_contributors(resource_v2: dict, contributors: list) -> None: +def ___v2_populate_contributors(resource_v2: dict, contributors: list) -> None: """Populate contributors in resource_v2 from contributors in v1.6.""" for i_contribution, contributor in enumerate(contributors): if i_contribution >= len(resource_v2["contributors"]): @@ -232,7 +232,7 @@ def populate_contributors(resource_v2: dict, contributors: list) -> None: ) -def populate_licenses(resource_v2: dict, licenses: list) -> None: +def ___v2_populate_licenses(resource_v2: dict, licenses: list) -> None: """Populate licenses in resource_v2 from licenses in v1.6.""" for i_license, license_entry in enumerate(licenses): if i_license >= len(resource_v2["licenses"]): @@ -241,7 +241,7 @@ def populate_licenses(resource_v2: dict, licenses: list) -> None: resource_v2["licenses"][i_license]["copyrightStatement"] = None -def populate_schema_fields(resource_v2: dict, resource: dict) -> None: +def ___v2_populate_schema_fields(resource_v2: dict, resource: dict) -> None: """Populate schema fields in resource_v2 from resource in v1.6.""" for i_field, field in enumerate(resource.get("schema", {}).get("fields", [])): if i_field >= len(resource_v2["schema"]["fields"]): From 206d314010188800d0b8af90956fdb07192743e9 Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Thu, 31 Oct 2024 13:45:15 +0100 Subject: [PATCH 19/23] remove deprecated badges --- README.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 8fdec07..6fc474c 100644 --- a/README.rst +++ b/README.rst @@ -17,8 +17,7 @@ Overview * - docs - |docs| * - tests - - | |travis| |appveyor| |requires| - | |codecov| + - | |codecov| * - package - | |version| |wheel| |supported-versions| |supported-implementations| | |commits-since| From 2f1b9cc38f4d6fe6c9db7620eadbb6a8de019eaf Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Thu, 31 Oct 2024 13:46:37 +0100 Subject: [PATCH 20/23] show tests runs badge --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 6fc474c..9b97a4b 100644 --- a/README.rst +++ b/README.rst @@ -17,7 +17,7 @@ Overview * - docs - |docs| * - tests - - | |codecov| + - | |Automated test| |codecov| * - package - | |version| |wheel| |supported-versions| |supported-implementations| | |commits-since| @@ -25,7 +25,7 @@ Overview :target: https://readthedocs.org/projects/omi :alt: Documentation Status -.. |Automated Test| image:: https://github.com/OpenEnergyPlatform/omi/actions/workflows/automated-testing.yml/badge.svg +.. |Automated test| image:: https://github.com/OpenEnergyPlatform/omi/actions/workflows/automated-testing.yml/badge.svg :target: https://github.com/OpenEnergyPlatform/omi/actions/workflows/automated-testing.yml :alt: Test status From 53715ccd33a823f2d782b8eafc4bab6f525e35a7 Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Thu, 31 Oct 2024 13:48:32 +0100 Subject: [PATCH 21/23] enhance description --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 9b97a4b..746f226 100644 --- a/README.rst +++ b/README.rst @@ -2,7 +2,7 @@ Open Energy Family - Open Metadata Integration OMI ================================================== -A library to process and translate and work with the open energy metadata. +A library to work with the open energy metadata. Its main features are validation, version conversion and infer data schemas from CSV to oemetadata. * Free software: AGPL-3.0 From e08ca8ae32860fcd967f969936b9cccb909d6010 Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Thu, 31 Oct 2024 16:00:40 +0100 Subject: [PATCH 22/23] update to new oemetadata v2.0.1 --- README.rst | 2 +- src/omi/base.py | 4 ++-- src/omi/conversion.py | 8 ++++---- src/omi/license.py | 2 +- tests/test_conversion.py | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.rst b/README.rst index 746f226..3577151 100644 --- a/README.rst +++ b/README.rst @@ -112,7 +112,7 @@ Module usage - In python scripts you can use the conversion:: meta = read_json_file(file_path) # use omi to convert it to the latest release - converted = convert_metadata(meta, "OEMetadata-2.0.0") + converted = convert_metadata(meta, "OEMetadata-2.0.1") # now you can store the result as json file with open("result.json", "w", encoding="utf-8") as json_file: diff --git a/src/omi/base.py b/src/omi/base.py index f6d6a4b..7804a80 100644 --- a/src/omi/base.py +++ b/src/omi/base.py @@ -12,7 +12,7 @@ from .settings import OEP_URL # Order matters! First entry equals latest version of metadata format -METADATA_FORMATS = {"OEP": ["OEMetadata-2.0.0", "OEP-1.6.0", "OEP-1.5.2"], "INSPIRE": []} +METADATA_FORMATS = {"OEP": ["OEMetadata-2.0.1", "OEP-1.6.0", "OEP-1.5.2"], "INSPIRE": []} METADATA_VERSIONS = {version: md_format for md_format, versions in METADATA_FORMATS.items() for version in versions} @@ -148,7 +148,7 @@ def __get_metadata_specs_for_oep(metadata_version: str) -> MetadataSpecification MetadataSpecification Metadata schema for given metadata version including template and example. """ - metadata_modules = {"OEP-1.5.2": v152, "OEP-1.6.0": v160, "OEMetadata-2.0.0": v20} + metadata_modules = {"OEP-1.5.2": v152, "OEP-1.6.0": v160, "OEMetadata-2.0.1": v20} metadata_module = metadata_modules[metadata_version] module_path = pathlib.Path(metadata_module.__file__).parent specs = {} diff --git a/src/omi/conversion.py b/src/omi/conversion.py index 57bee0c..d7e5b71 100644 --- a/src/omi/conversion.py +++ b/src/omi/conversion.py @@ -98,7 +98,7 @@ def __convert_oep_152_to_160(metadata: dict) -> dict: def __convert_oep_160_to_200(metadata: dict) -> dict: """ - Convert metadata with version "OEP-1.6.0" to "OEMetadata-2.0.0" using the v2.0 template. + Convert metadata with version "OEP-1.6.0" to "OEMetadata-2.0.1" using the v2.0 template. Parameters ---------- @@ -110,7 +110,7 @@ def __convert_oep_160_to_200(metadata: dict) -> dict: dict Updated metadata dictionary in v2.0 format """ - metadata_v2 = deepcopy(get_metadata_specification("OEMetadata-2.0.0").template) + metadata_v2 = deepcopy(get_metadata_specification("OEMetadata-2.0.1").template) metadata_v2["name"] = metadata_v2["title"] = metadata_v2["id"] = metadata_v2["description"] = None # Populate metadata v2 resources @@ -119,7 +119,7 @@ def __convert_oep_160_to_200(metadata: dict) -> dict: ___v2_populate_resource_v2(resource_v2, metadata, resource) # Update metaMetadata section - metadata_v2["metaMetadata"]["metadataVersion"] = "OEMetadata-2.0.0" + metadata_v2["metaMetadata"]["metadataVersion"] = "OEMetadata-2.0.1" metadata_v2["metaMetadata"]["metadataLicense"] = metadata.get("metaMetadata", {}).get("metadataLicense") return metadata_v2 @@ -253,5 +253,5 @@ def ___v2_populate_schema_fields(resource_v2: dict, resource: dict) -> None: METADATA_CONVERSIONS = { ("OEP-1.5.2", "OEP-1.6.0"): __convert_oep_152_to_160, - ("OEP-1.6.0", "OEMetadata-2.0.0"): __convert_oep_160_to_200, + ("OEP-1.6.0", "OEMetadata-2.0.1"): __convert_oep_160_to_200, } diff --git a/src/omi/license.py b/src/omi/license.py index 4d74636..f0f6f9e 100644 --- a/src/omi/license.py +++ b/src/omi/license.py @@ -113,7 +113,7 @@ def validate_oemetadata_licenses(metadata: dict) -> None: def _find_license_field(metadata: dict, version: str) -> list: version = get_metadata_version(metadata) - if version == "OEMetadata-2.0.0": + if version == "OEMetadata-2.0.1": # Include resource index with each license for traceability licenses_per_resource = [ (i, resource.get("licenses")) for i, resource in enumerate(metadata.get("resources", [])) diff --git a/tests/test_conversion.py b/tests/test_conversion.py index 9d1b647..c848bf3 100644 --- a/tests/test_conversion.py +++ b/tests/test_conversion.py @@ -17,8 +17,8 @@ def test_conversion_from_oep_152_to_160(): def test_conversion_from_oep_160_to_200(): """Test conversion from OEP v1.6.0 -> v2.0.0.""" metadata_schema_160 = omi.base.get_metadata_specification("OEP-1.6.0").example - converted_metadata_160 = conversion.convert_metadata(metadata_schema_160, "OEMetadata-2.0.0") - assert base.get_metadata_version(converted_metadata_160) == "OEMetadata-2.0.0" + converted_metadata_160 = conversion.convert_metadata(metadata_schema_160, "OEMetadata-2.0.1") + assert base.get_metadata_version(converted_metadata_160) == "OEMetadata-2.0.1" validation.validate_metadata(converted_metadata_160) From bf2f4d50f60a288645aacb5ee53538535608df43 Mon Sep 17 00:00:00 2001 From: Jonas Huber Date: Thu, 31 Oct 2024 16:02:34 +0100 Subject: [PATCH 23/23] deactivate test that depend on a OEP table for now. Setup a better test structure later --- tests/test_base.py | 5 +++-- tests/test_data_validation.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/test_base.py b/tests/test_base.py index 7fbd7f0..c6faae6 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -1,10 +1,11 @@ """Tests for OMIs `base` package.""" + import pytest from omi import base, validation -def test_metadata_from_oep(): +def deactivate_test_metadata_from_oep(): """Test metadata from OEP.""" metadata = base.get_metadata_from_oep_table("ind_steel_pellet_1") validation.validate_metadata(metadata) @@ -19,7 +20,7 @@ def test_metadata_from_oep_non_existing_table(): base.get_metadata_from_oep_table("non_existing_table") -def test_metadata_from_oep_empty(): +def deactivate_test_metadata_from_oep_empty(): """Test error for empty metadata.""" with pytest.raises( base.MetadataError, diff --git a/tests/test_data_validation.py b/tests/test_data_validation.py index 512fb53..6772e47 100644 --- a/tests/test_data_validation.py +++ b/tests/test_data_validation.py @@ -1,4 +1,5 @@ """Tests for validating data via OMI.""" + import json import pathlib @@ -9,7 +10,7 @@ from omi import validation -def test_data_validation_against_oep(): +def deactivate_test_data_validation_against_oep(): """Test data validation with example file against OEP table.""" valid_data_file = pathlib.Path(__file__).parent / "test_data" / "validation" / "hackathon_lignite_hh_valid.csv" valid_data = pd.read_csv(valid_data_file, delimiter=";") @@ -26,7 +27,7 @@ def test_data_validation_against_metadata(): validation.validate_data(valid_data, metadata=metadata) -def test_data_validation_report(): +def deactivate_test_data_validation_report(): """Test data validation with example file.""" valid_data_file = pathlib.Path(__file__).parent / "test_data" / "validation" / "hackathon_lignite_hh_valid.csv" valid_data = pd.read_csv(valid_data_file, delimiter=";")