diff --git a/.bumpversion.cfg b/.bumpversion.cfg index eeaf51a9..3b512055 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.13.0 +current_version = 0.13.1 parse = (?P\d+)\.(?P\d+)\.(?P\d+)((?P(a|na))+(?P\d+))? serialize = {major}.{minor}.{patch}{release}{build} diff --git a/.github/workflows/ci-production.yml b/.github/workflows/ci-production.yml index c2b1e226..4b0275f4 100644 --- a/.github/workflows/ci-production.yml +++ b/.github/workflows/ci-production.yml @@ -34,7 +34,7 @@ jobs: - name: create package run: python setup.py sdist - name: import open-mastr - run: python -m pip install ./dist/open_mastr-0.13.0.tar.gz + run: python -m pip install ./dist/open_mastr-0.13.1.tar.gz - name: Create credentials file env: MASTR_TOKEN: ${{ secrets.MASTR_TOKEN }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 07c916a4..7802c0f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,10 @@ For each version important additions, changes and removals are listed here. The format is inspired from [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and the versioning aims to respect [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [v0.13.1] Hotfix - 2023-04-11 +### Added +- Add new table and new columns to the data model [#440](https://github.com/OpenEnergyPlatform/open-MaStR/pull/440) ## [v0.13.0] Maintenance release - 2023-02-16 ### Added - Add a `workflow_dispatch` to run CI pipelines from a button click [#389](https://github.com/OpenEnergyPlatform/open-MaStR/pull/389) diff --git a/CITATION.cff b/CITATION.cff index 55898bed..8ccfb590 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -28,7 +28,7 @@ authors: title: "open-MaStR" type: software license: AGPL-3.0 -version: 0.13.0 +version: 0.13.1 doi: -date-released: 2023-02-16 +date-released: 2023-04-11 url: "https://github.com/OpenEnergyPlatform/open-MaStR/" diff --git a/open_mastr/mastr.py b/open_mastr/mastr.py index 6d13e95a..266e87a6 100644 --- a/open_mastr/mastr.py +++ b/open_mastr/mastr.py @@ -134,6 +134,7 @@ def download( "balancing_area", "Yes", "No" "permit", "Yes", "Yes" "deleted_units", "Yes", "No" + "retrofit_units", "Yes", "No" date: None or :class:`datetime.datetime` or str, optional For bulk method: diff --git a/open_mastr/soap_api/download.py b/open_mastr/soap_api/download.py index 0b1eff07..17879d12 100644 --- a/open_mastr/soap_api/download.py +++ b/open_mastr/soap_api/download.py @@ -479,7 +479,7 @@ def __init__(self, parallel_processes=None): "permit_data": "GetEinheitGenehmigung", }, "gsgk": { - "unit_data": "GetEinheitGeoSolarthermieGrubenKlaerschlammDruckentspannung", + "unit_data": "GetEinheitGeothermieGrubengasDruckentspannung", "energietraeger": [ "Geothermie", "Solarthermie", @@ -487,7 +487,7 @@ def __init__(self, parallel_processes=None): "Klaerschlamm", ], "kwk_data": "GetAnlageKwk", - "eeg_data": "GetAnlageEegGeoSolarthermieGrubenKlaerschlammDruckentspannung", + "eeg_data": "GetAnlageEegGeothermieGrubengasDruckentspannung", "permit_data": "GetEinheitGenehmigung", }, "nuclear": { diff --git a/open_mastr/soap_api/mirror.py b/open_mastr/soap_api/mirror.py index fc4b407b..70755621 100644 --- a/open_mastr/soap_api/mirror.py +++ b/open_mastr/soap_api/mirror.py @@ -318,7 +318,7 @@ def retrieve_additional_data(self, data, data_type, limit=10 ** 8, chunksize=100 data, requested_ids, download_functions[data_type] ) - unit_data = flatten_dict(unit_data) + unit_data = flatten_dict(unit_data, serialize_with_json=False) number_units_merged = 0 # Prepare data and add to database table @@ -839,20 +839,21 @@ def _preprocess_additional_data_entry(self, unit_dat, technology, data_type): ertuechtigung["ProzentualeErhoehungDesLv"] = float( ertuechtigung["ProzentualeErhoehungDesLv"] ) - # The NetzbetreiberMastrNummer is handed over as type:list, hence - # non-compatible with sqlite) - # This replaces the list with the first (string)element in the list - # to make it sqlite compatible - if ( - "NetzbetreiberMastrNummer" in unit_dat - and type(unit_dat["NetzbetreiberMastrNummer"]) == list - ): - if len(unit_dat["NetzbetreiberMastrNummer"]) > 0: - unit_dat["NetzbetreiberMastrNummer"] = unit_dat[ - "NetzbetreiberMastrNummer" - ][0] - else: - unit_dat["NetzbetreiberMastrNummer"] = None + # Some data (data_in_list) is handed over as type:list, hence + # non-compatible with sqlite or postgresql + # This replaces the list with the first element in the list + + data_as_list = ["NetzbetreiberMastrNummer","Netzbetreiberzuordnungen"] + + for dat in data_as_list: + if ( + dat in unit_dat + and type(unit_dat[dat]) == list + ): + if len(unit_dat[dat]) > 0: + unit_dat[dat] = f"{unit_dat[dat][0]}" + else: + unit_dat[dat] = None # Rename the typo in column zugeordneteWirkleistungWechselrichter if "zugeordneteWirkleistungWechselrichter" in unit_dat.keys(): diff --git a/open_mastr/utils/constants.py b/open_mastr/utils/constants.py index 0b2f3c08..e9507708 100644 --- a/open_mastr/utils/constants.py +++ b/open_mastr/utils/constants.py @@ -16,6 +16,7 @@ "balancing_area", "permit", "deleted_units", + "retrofit_units", ] # Possible values for parameter 'data' with API download method @@ -59,7 +60,8 @@ "market_actors", "market_roles", "permit", - "deleted_units" + "deleted_units", + "retrofit_units", ] # Possible data types for API download @@ -99,6 +101,7 @@ "balancing_area": ["bilanzierungsgebiete"], "permit": ["einheitengenehmigung"], "deleted_units": ["geloeschteunddeaktivierteeinheiten"], + "retrofit_units": ["ertuechtigungen"], } # Map bulk data to database table names, for csv export @@ -116,6 +119,7 @@ "balancing_area": ["balancing_area"], "permit": ["permit"], "deleted_units": ["deleted_units"], + "retrofit_units": ["retrofit_units"], } # used to map the parameter options in open-mastr to the exact table class names in orm.py @@ -152,10 +156,7 @@ "eeg_data": "HydroEeg", "permit_data": "Permit", }, - "nuclear": { - "unit_data": "NuclearExtended", - "permit_data": "Permit" - }, + "nuclear": {"unit_data": "NuclearExtended", "permit_data": "Permit"}, "storage": { "unit_data": "StorageExtended", "eeg_data": "StorageEeg", @@ -173,11 +174,11 @@ "grids": "Grids", "balancing_area": "BalancingArea", "permit": "Permit", - "deleted_units": "DeletedUnits" + "deleted_units": "DeletedUnits", + "retrofit_units": "RetrofitUnits", } - UNIT_TYPE_MAP = { "Windeinheit": "wind", "Solareinheit": "solar", diff --git a/open_mastr/utils/orm.py b/open_mastr/utils/orm.py index 0d39a740..6557fb7d 100644 --- a/open_mastr/utils/orm.py +++ b/open_mastr/utils/orm.py @@ -42,6 +42,7 @@ class BasicUnit(Base): GenMastrNummer = Column(String) BestandsanlageMastrNummer = Column(String) NichtVorhandenInMigriertenEinheiten = Column(String) + EinheitSystemstatus = Column(String) class AdditionalDataRequested(Base): @@ -134,6 +135,7 @@ class Extended(object): Einspeisungsart = Column(String) PraequalifiziertFuerRegelenergie = Column(Boolean) GenMastrNummer = Column(String) + Netzbetreiberzuordnungen = Column(String) # from bulk download Hausnummer_nv = Column(Boolean) Weic_nv = Column(Boolean) @@ -227,6 +229,7 @@ class CombustionExtended(Extended, ParentAllTables, Base): Einsatzort = Column(String) KwkMastrNummer = Column(String) Technologie = Column(String) + AusschliesslicheVerwendungImKombibetrieb = Column(Boolean) class GsgkExtended(Extended, ParentAllTables, Base): @@ -292,6 +295,7 @@ class Eeg(object): AusschreibungZuschlag = Column(Boolean) AnlagenkennzifferAnlagenregister = Column(String) AnlagenkennzifferAnlagenregister_nv = Column(Boolean) + Netzbetreiberzuordnungen = Column(String) class WindEeg(Eeg, ParentAllTables, Base): @@ -385,6 +389,7 @@ class Kwk(ParentAllTables, Base): VerknuepfteEinheiten = Column(String) AnlageBetriebsstatus = Column(String) AusschreibungZuschlag = Column(Boolean) + Netzbetreiberzuordnungen = Column(String) class Permit(ParentAllTables, Base): @@ -404,6 +409,7 @@ class Permit(ParentAllTables, Base): VerknuepfteEinheiten = Column(String) Frist_nv = Column(Boolean) WasserrechtAblaufdatum_nv = Column(Boolean) + Netzbetreiberzuordnungen = Column(String) class LocationBasic(Base): @@ -741,6 +747,7 @@ class Grids(ParentAllTables, Base): GeschlossenesVerteilnetz = Column(String) Bezeichnung = Column(String) Marktgebiet = Column(String) + Bundesland = Column(String) class GridConnections(ParentAllTables, Base): @@ -774,6 +781,18 @@ class DeletedUnits(ParentAllTables, Base): EinheitBetriebsstatus = Column(String) +class RetrofitUnits(ParentAllTables, Base): + __tablename__ = "retrofit_units" + + Id = Column(Integer, primary_key=True) + EegMastrNummer = Column(String) + Leistungserhoehung = Column(Float) + WiederinbetriebnahmeDatum = Column(Date) + DatumLetzteAktualisierung = Column(DateTime(timezone=True)) + Ertuechtigungsart = Column(String) + ErtuechtigungIstZulassungspflichtig = Column(Boolean) + + tablename_mapping = { "anlageneegbiomasse": { "__name__": BiomassEeg.__tablename__, @@ -793,7 +812,7 @@ class DeletedUnits(ParentAllTables, Base): "LokationMaStRNummer": "LokationMastrNummer", }, }, - "anlageneeggeosolarthermiegrubenklaerschlammdruckentspannung": { + "anlageneeggeothermiegrubengasdruckentspannung": { "__name__": GsgkEeg.__tablename__, "__class__": GsgkEeg, "replace_column_names": { @@ -801,7 +820,7 @@ class DeletedUnits(ParentAllTables, Base): "VerknuepfteEinheitenMaStRNummern": "VerknuepfteEinheit", }, }, - "einheitengeosolarthermiegrubenklaerschlammdruckentspannung": { + "einheitengeothermiegrubengasdruckentspannung": { "__name__": GsgkExtended.__tablename__, "__class__": GsgkExtended, "replace_column_names": { @@ -958,6 +977,11 @@ class DeletedUnits(ParentAllTables, Base): "KwkMaStRNummer": "KwkMastrNummer", }, }, + "ertuechtigungen": { + "__name__": RetrofitUnits.__tablename__, + "__class__": RetrofitUnits, + "replace_column_names": None, + }, "geloeschteunddeaktivierteeinheiten": { "__name__": DeletedUnits.__tablename__, "__class__": DeletedUnits, diff --git a/open_mastr/xml_download/utils_cleansing_bulk.py b/open_mastr/xml_download/utils_cleansing_bulk.py index c0d857b4..14be3418 100644 --- a/open_mastr/xml_download/utils_cleansing_bulk.py +++ b/open_mastr/xml_download/utils_cleansing_bulk.py @@ -34,7 +34,6 @@ def replace_mastr_katalogeintraege( """Replaces the IDs from the mastr database by its mapped string values from the table katalogwerte""" katalogwerte = create_katalogwerte_from_bulk_download(zipped_xml_file_path) - for column_name in df.columns: if column_name in columns_replace_list: if df[column_name].dtype == "O": @@ -43,6 +42,7 @@ def replace_mastr_katalogeintraege( df[column_name] .str.split(",", expand=True) .apply(lambda x: x.str.strip()) + .replace("", None) .astype("Int64") .applymap(katalogwerte.get) .agg(lambda d: ",".join(i for i in d if isinstance(i, str)), axis=1) diff --git a/open_mastr/xml_download/utils_write_to_database.py b/open_mastr/xml_download/utils_write_to_database.py index f29c1b0e..8a9f9be8 100644 --- a/open_mastr/xml_download/utils_write_to_database.py +++ b/open_mastr/xml_download/utils_write_to_database.py @@ -285,9 +285,16 @@ def write_single_entries_until_not_unique_comes_up( key_list = ( pd.read_sql(sql=select(primary_key), con=con).values.squeeze().tolist() ) - df = df.set_index(primary_key.name) + len_df_before = len(df) - df = df.drop(labels=key_list, errors="ignore") + df = df.drop_duplicates( + subset=[primary_key.name] + ) # drop all entries with duplicated primary keys in the dataframe + df = df.set_index(primary_key.name) + + df = df.drop( + labels=key_list, errors="ignore" + ) # drop primary keys that already exist in the table df = df.reset_index() print(f"{len_df_before-len(df)} entries already existed in the database.") @@ -378,5 +385,3 @@ def handle_xml_syntax_error(data: bytes, err: Error) -> pd.DataFrame: df = pd.read_xml(decoded_data) print("One invalid xml expression was deleted.") return df - - diff --git a/setup.py b/setup.py index 5c32b215..1725a370 100644 --- a/setup.py +++ b/setup.py @@ -18,14 +18,14 @@ "open_mastr.utils.config", "open_mastr.xml_download", ], - version="0.13.0", + version="0.13.1", description="A package that provides an interface for downloading and" " processing the data of the Marktstammdatenregister (MaStR)", long_description=long_description, long_description_content_type="text/x-rst", url="https://github.com/OpenEnergyPlatform/open-MaStR", download_url="https://github.com/OpenEnergyPlatform/open-MaStR/archive" - "/refs/tags/v0.13.0.tar.gz", + "/refs/tags/v0.13.1.tar.gz", author="Open Energy Family", author_email="datenzentrum@rl-institut.de", maintainer="Ludwig Hülk", diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 337ba165..9b2a8ec2 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -65,6 +65,8 @@ def parameter_dict_working_list(): "grid", "balancing_area", "permit", + "deleted_units", + "retrofit_units", None, ["wind", "solar"], ],