Skip to content

Commit

Permalink
Merge pull request #440 from OpenEnergyPlatform/hotfix-439-missing-table
Browse files Browse the repository at this point in the history
Hotfix: orm model change - API & bulk
  • Loading branch information
chrwm authored Apr 11, 2023
2 parents 883bb53 + 7e6565f commit 2c7de28
Show file tree
Hide file tree
Showing 13 changed files with 74 additions and 37 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.13.0
current_version = 0.13.1
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)((?P<release>(a|na))+(?P<build>\d+))?
serialize =
{major}.{minor}.{patch}{release}{build}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci-production.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
- name: create package
run: python setup.py sdist
- name: import open-mastr
run: python -m pip install ./dist/open_mastr-0.13.0.tar.gz
run: python -m pip install ./dist/open_mastr-0.13.1.tar.gz
- name: Create credentials file
env:
MASTR_TOKEN: ${{ secrets.MASTR_TOKEN }}
Expand Down
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ For each version important additions, changes and removals are listed here.
The format is inspired from [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and the versioning aims to respect [Semantic Versioning](http://semver.org/spec/v2.0.0.html).

## [v0.13.1] Hotfix - 2023-04-11

### Added
- Add new table and new columns to the data model [#440](https://github.com/OpenEnergyPlatform/open-MaStR/pull/440)
## [v0.13.0] Maintenance release - 2023-02-16
### Added
- Add a `workflow_dispatch` to run CI pipelines from a button click [#389](https://github.com/OpenEnergyPlatform/open-MaStR/pull/389)
Expand Down
4 changes: 2 additions & 2 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ authors:
title: "open-MaStR"
type: software
license: AGPL-3.0
version: 0.13.0
version: 0.13.1
doi:
date-released: 2023-02-16
date-released: 2023-04-11
url: "https://github.com/OpenEnergyPlatform/open-MaStR/"
1 change: 1 addition & 0 deletions open_mastr/mastr.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ def download(
"balancing_area", "Yes", "No"
"permit", "Yes", "Yes"
"deleted_units", "Yes", "No"
"retrofit_units", "Yes", "No"
date: None or :class:`datetime.datetime` or str, optional
For bulk method:
Expand Down
4 changes: 2 additions & 2 deletions open_mastr/soap_api/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,15 +479,15 @@ def __init__(self, parallel_processes=None):
"permit_data": "GetEinheitGenehmigung",
},
"gsgk": {
"unit_data": "GetEinheitGeoSolarthermieGrubenKlaerschlammDruckentspannung",
"unit_data": "GetEinheitGeothermieGrubengasDruckentspannung",
"energietraeger": [
"Geothermie",
"Solarthermie",
"Grubengas",
"Klaerschlamm",
],
"kwk_data": "GetAnlageKwk",
"eeg_data": "GetAnlageEegGeoSolarthermieGrubenKlaerschlammDruckentspannung",
"eeg_data": "GetAnlageEegGeothermieGrubengasDruckentspannung",
"permit_data": "GetEinheitGenehmigung",
},
"nuclear": {
Expand Down
31 changes: 16 additions & 15 deletions open_mastr/soap_api/mirror.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ def retrieve_additional_data(self, data, data_type, limit=10 ** 8, chunksize=100
data, requested_ids, download_functions[data_type]
)

unit_data = flatten_dict(unit_data)
unit_data = flatten_dict(unit_data, serialize_with_json=False)
number_units_merged = 0

# Prepare data and add to database table
Expand Down Expand Up @@ -839,20 +839,21 @@ def _preprocess_additional_data_entry(self, unit_dat, technology, data_type):
ertuechtigung["ProzentualeErhoehungDesLv"] = float(
ertuechtigung["ProzentualeErhoehungDesLv"]
)
# The NetzbetreiberMastrNummer is handed over as type:list, hence
# non-compatible with sqlite)
# This replaces the list with the first (string)element in the list
# to make it sqlite compatible
if (
"NetzbetreiberMastrNummer" in unit_dat
and type(unit_dat["NetzbetreiberMastrNummer"]) == list
):
if len(unit_dat["NetzbetreiberMastrNummer"]) > 0:
unit_dat["NetzbetreiberMastrNummer"] = unit_dat[
"NetzbetreiberMastrNummer"
][0]
else:
unit_dat["NetzbetreiberMastrNummer"] = None
# Some data (data_in_list) is handed over as type:list, hence
# non-compatible with sqlite or postgresql
# This replaces the list with the first element in the list

data_as_list = ["NetzbetreiberMastrNummer","Netzbetreiberzuordnungen"]

for dat in data_as_list:
if (
dat in unit_dat
and type(unit_dat[dat]) == list
):
if len(unit_dat[dat]) > 0:
unit_dat[dat] = f"{unit_dat[dat][0]}"
else:
unit_dat[dat] = None

# Rename the typo in column zugeordneteWirkleistungWechselrichter
if "zugeordneteWirkleistungWechselrichter" in unit_dat.keys():
Expand Down
15 changes: 8 additions & 7 deletions open_mastr/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"balancing_area",
"permit",
"deleted_units",
"retrofit_units",
]

# Possible values for parameter 'data' with API download method
Expand Down Expand Up @@ -59,7 +60,8 @@
"market_actors",
"market_roles",
"permit",
"deleted_units"
"deleted_units",
"retrofit_units",
]

# Possible data types for API download
Expand Down Expand Up @@ -99,6 +101,7 @@
"balancing_area": ["bilanzierungsgebiete"],
"permit": ["einheitengenehmigung"],
"deleted_units": ["geloeschteunddeaktivierteeinheiten"],
"retrofit_units": ["ertuechtigungen"],
}

# Map bulk data to database table names, for csv export
Expand All @@ -116,6 +119,7 @@
"balancing_area": ["balancing_area"],
"permit": ["permit"],
"deleted_units": ["deleted_units"],
"retrofit_units": ["retrofit_units"],
}

# used to map the parameter options in open-mastr to the exact table class names in orm.py
Expand Down Expand Up @@ -152,10 +156,7 @@
"eeg_data": "HydroEeg",
"permit_data": "Permit",
},
"nuclear": {
"unit_data": "NuclearExtended",
"permit_data": "Permit"
},
"nuclear": {"unit_data": "NuclearExtended", "permit_data": "Permit"},
"storage": {
"unit_data": "StorageExtended",
"eeg_data": "StorageEeg",
Expand All @@ -173,11 +174,11 @@
"grids": "Grids",
"balancing_area": "BalancingArea",
"permit": "Permit",
"deleted_units": "DeletedUnits"
"deleted_units": "DeletedUnits",
"retrofit_units": "RetrofitUnits",
}



UNIT_TYPE_MAP = {
"Windeinheit": "wind",
"Solareinheit": "solar",
Expand Down
28 changes: 26 additions & 2 deletions open_mastr/utils/orm.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class BasicUnit(Base):
GenMastrNummer = Column(String)
BestandsanlageMastrNummer = Column(String)
NichtVorhandenInMigriertenEinheiten = Column(String)
EinheitSystemstatus = Column(String)


class AdditionalDataRequested(Base):
Expand Down Expand Up @@ -134,6 +135,7 @@ class Extended(object):
Einspeisungsart = Column(String)
PraequalifiziertFuerRegelenergie = Column(Boolean)
GenMastrNummer = Column(String)
Netzbetreiberzuordnungen = Column(String)
# from bulk download
Hausnummer_nv = Column(Boolean)
Weic_nv = Column(Boolean)
Expand Down Expand Up @@ -227,6 +229,7 @@ class CombustionExtended(Extended, ParentAllTables, Base):
Einsatzort = Column(String)
KwkMastrNummer = Column(String)
Technologie = Column(String)
AusschliesslicheVerwendungImKombibetrieb = Column(Boolean)


class GsgkExtended(Extended, ParentAllTables, Base):
Expand Down Expand Up @@ -292,6 +295,7 @@ class Eeg(object):
AusschreibungZuschlag = Column(Boolean)
AnlagenkennzifferAnlagenregister = Column(String)
AnlagenkennzifferAnlagenregister_nv = Column(Boolean)
Netzbetreiberzuordnungen = Column(String)


class WindEeg(Eeg, ParentAllTables, Base):
Expand Down Expand Up @@ -385,6 +389,7 @@ class Kwk(ParentAllTables, Base):
VerknuepfteEinheiten = Column(String)
AnlageBetriebsstatus = Column(String)
AusschreibungZuschlag = Column(Boolean)
Netzbetreiberzuordnungen = Column(String)


class Permit(ParentAllTables, Base):
Expand All @@ -404,6 +409,7 @@ class Permit(ParentAllTables, Base):
VerknuepfteEinheiten = Column(String)
Frist_nv = Column(Boolean)
WasserrechtAblaufdatum_nv = Column(Boolean)
Netzbetreiberzuordnungen = Column(String)


class LocationBasic(Base):
Expand Down Expand Up @@ -741,6 +747,7 @@ class Grids(ParentAllTables, Base):
GeschlossenesVerteilnetz = Column(String)
Bezeichnung = Column(String)
Marktgebiet = Column(String)
Bundesland = Column(String)


class GridConnections(ParentAllTables, Base):
Expand Down Expand Up @@ -774,6 +781,18 @@ class DeletedUnits(ParentAllTables, Base):
EinheitBetriebsstatus = Column(String)


class RetrofitUnits(ParentAllTables, Base):
__tablename__ = "retrofit_units"

Id = Column(Integer, primary_key=True)
EegMastrNummer = Column(String)
Leistungserhoehung = Column(Float)
WiederinbetriebnahmeDatum = Column(Date)
DatumLetzteAktualisierung = Column(DateTime(timezone=True))
Ertuechtigungsart = Column(String)
ErtuechtigungIstZulassungspflichtig = Column(Boolean)


tablename_mapping = {
"anlageneegbiomasse": {
"__name__": BiomassEeg.__tablename__,
Expand All @@ -793,15 +812,15 @@ class DeletedUnits(ParentAllTables, Base):
"LokationMaStRNummer": "LokationMastrNummer",
},
},
"anlageneeggeosolarthermiegrubenklaerschlammdruckentspannung": {
"anlageneeggeothermiegrubengasdruckentspannung": {
"__name__": GsgkEeg.__tablename__,
"__class__": GsgkEeg,
"replace_column_names": {
"EegMaStRNummer": "EegMastrNummer",
"VerknuepfteEinheitenMaStRNummern": "VerknuepfteEinheit",
},
},
"einheitengeosolarthermiegrubenklaerschlammdruckentspannung": {
"einheitengeothermiegrubengasdruckentspannung": {
"__name__": GsgkExtended.__tablename__,
"__class__": GsgkExtended,
"replace_column_names": {
Expand Down Expand Up @@ -958,6 +977,11 @@ class DeletedUnits(ParentAllTables, Base):
"KwkMaStRNummer": "KwkMastrNummer",
},
},
"ertuechtigungen": {
"__name__": RetrofitUnits.__tablename__,
"__class__": RetrofitUnits,
"replace_column_names": None,
},
"geloeschteunddeaktivierteeinheiten": {
"__name__": DeletedUnits.__tablename__,
"__class__": DeletedUnits,
Expand Down
2 changes: 1 addition & 1 deletion open_mastr/xml_download/utils_cleansing_bulk.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ def replace_mastr_katalogeintraege(
"""Replaces the IDs from the mastr database by its mapped string values from
the table katalogwerte"""
katalogwerte = create_katalogwerte_from_bulk_download(zipped_xml_file_path)

for column_name in df.columns:
if column_name in columns_replace_list:
if df[column_name].dtype == "O":
Expand All @@ -43,6 +42,7 @@ def replace_mastr_katalogeintraege(
df[column_name]
.str.split(",", expand=True)
.apply(lambda x: x.str.strip())
.replace("", None)
.astype("Int64")
.applymap(katalogwerte.get)
.agg(lambda d: ",".join(i for i in d if isinstance(i, str)), axis=1)
Expand Down
13 changes: 9 additions & 4 deletions open_mastr/xml_download/utils_write_to_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,9 +285,16 @@ def write_single_entries_until_not_unique_comes_up(
key_list = (
pd.read_sql(sql=select(primary_key), con=con).values.squeeze().tolist()
)
df = df.set_index(primary_key.name)

len_df_before = len(df)
df = df.drop(labels=key_list, errors="ignore")
df = df.drop_duplicates(
subset=[primary_key.name]
) # drop all entries with duplicated primary keys in the dataframe
df = df.set_index(primary_key.name)

df = df.drop(
labels=key_list, errors="ignore"
) # drop primary keys that already exist in the table
df = df.reset_index()
print(f"{len_df_before-len(df)} entries already existed in the database.")

Expand Down Expand Up @@ -378,5 +385,3 @@ def handle_xml_syntax_error(data: bytes, err: Error) -> pd.DataFrame:
df = pd.read_xml(decoded_data)
print("One invalid xml expression was deleted.")
return df


4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@
"open_mastr.utils.config",
"open_mastr.xml_download",
],
version="0.13.0",
version="0.13.1",
description="A package that provides an interface for downloading and"
" processing the data of the Marktstammdatenregister (MaStR)",
long_description=long_description,
long_description_content_type="text/x-rst",
url="https://github.com/OpenEnergyPlatform/open-MaStR",
download_url="https://github.com/OpenEnergyPlatform/open-MaStR/archive"
"/refs/tags/v0.13.0.tar.gz",
"/refs/tags/v0.13.1.tar.gz",
author="Open Energy Family",
author_email="datenzentrum@rl-institut.de",
maintainer="Ludwig Hülk",
Expand Down
2 changes: 2 additions & 0 deletions tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ def parameter_dict_working_list():
"grid",
"balancing_area",
"permit",
"deleted_units",
"retrofit_units",
None,
["wind", "solar"],
],
Expand Down

0 comments on commit 2c7de28

Please sign in to comment.