Skip to content

Commit

Permalink
Make holiday features configurable (#569)
Browse files Browse the repository at this point in the history
* add bidding zone config

* format black

* fix unit tests

* disable dutch school holidays for other countries than NL

* add license info

* ran black

* Formatting

Signed-off-by: Clara De Smet <clara.de.smet@alliander.com>

* Applied pydocstyle

Signed-off-by: Clara De Smet <clara.de.smet@alliander.com>

---------

Signed-off-by: Clara De Smet <clara.de.smet@alliander.com>
Co-authored-by: Clara De Smet <clara.de.smet@alliander.com>
Co-authored-by: lschilders <lars.schilders@alliander.com>
  • Loading branch information
3 people authored Nov 29, 2024
1 parent 4661304 commit 6272e5d
Show file tree
Hide file tree
Showing 7 changed files with 270 additions and 27 deletions.
4 changes: 3 additions & 1 deletion openstef/data_classes/prediction_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from openstef.data_classes.data_prep import DataPrepDataClass
from openstef.data_classes.model_specifications import ModelSpecificationDataClass
from openstef.data_classes.split_function import SplitFuncDataClass
from openstef.enums import PipelineType
from openstef.enums import PipelineType, BiddingZone


class PredictionJobDataClass(BaseModel):
Expand Down Expand Up @@ -54,6 +54,8 @@ class PredictionJobDataClass(BaseModel):
lon: Optional[float] = 5.291266
"""Longitude of the forecasted location in degrees. Used for fetching weather data in tasks, calculating derrived features and component splitting."""
name: str
"""Bidding zone is used to determine the electricity price. It is also used to determine the holidays that should be used. Currently only ENTSO-E bidding zones are supported."""
electricity_bidding_zone: Optional[BiddingZone] = BiddingZone.NL
"""Name of the forecast, e.g. the location name."""
train_components: Optional[bool]
"""Whether splitting the forecasts in wind, solar, rest is desired."""
Expand Down
104 changes: 104 additions & 0 deletions openstef/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,110 @@
from enum import Enum


class BiddingZone(Enum):
DE_50HZ = "DE_50HZ"
AL = "AL"
DE_AMPRION = "DE_AMPRION"
AT = "AT"
BY = "BY"
BE = "BE"
BA = "BA"
BG = "BG"
CZ_DE_SK = "CZ_DE_SK"
HR = "HR"
CWE = "CWE"
CY = "CY"
CZ = "CZ"
DE_AT_LU = "DE_AT_LU"
DE_LU = "DE_LU"
DK = "DK"
DK_1 = "DK_1"
DK_1_NO_1 = "DK_1_NO_1"
DK_2 = "DK_2"
DK_CA = "DK_CA"
EE = "EE"
FI = "FI"
MK = "MK"
FR = "FR"
DE = "DE"
GR = "GR"
HU = "HU"
IS = "IS"
IE_SEM = "IE_SEM"
IE = "IE"
IT = "IT"
IT_SACO_AC = "IT_SACO_AC"
IT_CALA = "IT_CALA"
IT_SACO_DC = "IT_SACO_DC"
IT_BRNN = "IT_BRNN"
IT_CNOR = "IT_CNOR"
IT_CSUD = "IT_CSUD"
IT_FOGN = "IT_FOGN"
IT_GR = "IT_GR"
IT_MACRO_NORTH = "IT_MACRO_NORTH"
IT_MACRO_SOUTH = "IT_MACRO_SOUTH"
IT_MALTA = "IT_MALTA"
IT_NORD = "IT_NORD"
IT_NORD_AT = "IT_NORD_AT"
IT_NORD_CH = "IT_NORD_CH"
IT_NORD_FR = "IT_NORD_FR"
IT_NORD_SI = "IT_NORD_SI"
IT_PRGP = "IT_PRGP"
IT_ROSN = "IT_ROSN"
IT_SARD = "IT_SARD"
IT_SICI = "IT_SICI"
IT_SUD = "IT_SUD"
RU_KGD = "RU_KGD"
LV = "LV"
LT = "LT"
LU = "LU"
LU_BZN = "LU_BZN"
MT = "MT"
ME = "ME"
GB = "GB"
GE = "GE"
GB_IFA = "GB_IFA"
GB_IFA2 = "GB_IFA2"
GB_ELECLINK = "GB_ELECLINK"
UK = "UK"
NL = "NL"
NO_1 = "NO_1"
NO_1A = "NO_1A"
NO_2 = "NO_2"
NO_2_NSL = "NO_2_NSL"
NO_2A = "NO_2A"
NO_3 = "NO_3"
NO_4 = "NO_4"
NO_5 = "NO_5"
NO = "NO"
PL_CZ = "PL_CZ"
PL = "PL"
PT = "PT"
MD = "MD"
RO = "RO"
RU = "RU"
SE_1 = "SE_1"
SE_2 = "SE_2"
SE_3 = "SE_3"
SE_4 = "SE_4"
RS = "RS"
SK = "SK"
SI = "SI"
GB_NIR = "GB_NIR"
ES = "ES"
SE = "SE"
CH = "CH"
DE_TENNET = "DE_TENNET"
DE_TRANSNET = "DE_TRANSNET"
TR = "TR"
UA = "UA"
UA_DOBTPP = "UA_DOBTPP"
UA_BEI = "UA_BEI"
UA_IPS = "UA_IPS"
XK = "XK"
DE_AMP_LU = "DE_AMP_LU"


class ModelType(Enum):
XGB = "xgb"
XGB_QUANTILE = "xgb_quantile"
Expand Down
16 changes: 15 additions & 1 deletion openstef/feature_engineering/apply_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,14 @@
import pandas as pd

from openstef.data_classes.prediction_job import PredictionJobDataClass
from openstef.enums import BiddingZone
from openstef.feature_engineering.holiday_features import (
generate_holiday_feature_functions,
)
from openstef.feature_engineering.lag_features import generate_lag_feature_functions
from openstef.feature_engineering.bidding_zone_to_country_mapping import (
BIDDING_ZONE_TO_COUNTRY_CODE_MAPPING,
)
from openstef.feature_engineering.weather_features import (
add_additional_solar_features,
add_additional_wind_features,
Expand Down Expand Up @@ -58,6 +62,7 @@ def apply_features(
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
index = pd.date_range(start = "2017-01-01 09:00:00",
freq = '15T', periods = 200)
data = pd.DataFrame(index = index,
Expand All @@ -66,6 +71,9 @@ def apply_features(
np.random.uniform(0.7,1.7, 200)))
"""
if pj is None:
pj = {"electricity_bidding_zone": BiddingZone.NL}

# Get lag feature functions
feature_functions = generate_lag_feature_functions(feature_names, horizon)

Expand All @@ -80,8 +88,14 @@ def apply_features(
}
)

# Get country code from bidding zone if available
electricity_bidding_zone = pj.get("electricity_bidding_zone", BiddingZone.NL)
country_code = BIDDING_ZONE_TO_COUNTRY_CODE_MAPPING[electricity_bidding_zone.name]

# Get holiday feature functions
feature_functions.update(generate_holiday_feature_functions())
feature_functions.update(
generate_holiday_feature_functions(country_code=country_code)
)

# Add the features to the dataframe using previously defined feature functions
for key, featfunc in feature_functions.items():
Expand Down
106 changes: 106 additions & 0 deletions openstef/feature_engineering/bidding_zone_to_country_mapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
#
# SPDX-License-Identifier: MPL-2.0
BIDDING_ZONE_TO_COUNTRY_CODE_MAPPING = {
"DE_50HZ": "DE",
"AL": "AL",
"DE_AMPRION": "DE",
"AT": "AT",
"BY": "BY",
"BE": "BE",
"BA": "BA",
"BG": "BG",
"CZ_DE_SK": "CZ",
"HR": "HR",
"CWE": "CWE",
"CY": "CY",
"CZ": "CZ",
"DE_AT_LU": "DE",
"DE_LU": "DE",
"DK": "DK",
"DK_1": "DK",
"DK_1_NO_1": "DK",
"DK_2": "DK",
"DK_CA": "DK",
"EE": "EE",
"FI": "FI",
"MK": "MK",
"FR": "FR",
"DE": "DE",
"GR": "GR",
"HU": "HU",
"IS": "IS",
"IE_SEM": "IE",
"IE": "IE",
"IT": "IT",
"IT_SACO_AC": "IT",
"IT_CALA": "IT",
"IT_SACO_DC": "IT",
"IT_BRNN": "IT",
"IT_CNOR": "IT",
"IT_CSUD": "IT",
"IT_FOGN": "IT",
"IT_GR": "IT",
"IT_MACRO_NORTH": "IT",
"IT_MACRO_SOUTH": "IT",
"IT_MALTA": "IT",
"IT_NORD": "IT",
"IT_NORD_AT": "IT",
"IT_NORD_CH": "IT",
"IT_NORD_FR": "IT",
"IT_NORD_SI": "IT",
"IT_PRGP": "IT",
"IT_ROSN": "IT",
"IT_SARD": "IT",
"IT_SICI": "IT",
"IT_SUD": "IT",
"RU_KGD": "RU",
"LV": "LV",
"LT": "LT",
"LU": "LU",
"LU_BZN": "LU",
"MT": "MT",
"ME": "ME",
"GB": "GB",
"GE": "GE",
"GB_IFA": "GB",
"GB_IFA2": "GB",
"GB_ELECLINK": "GB",
"UK": "UK",
"NL": "NL",
"NO_1": "NO",
"NO_1A": "NO",
"NO_2": "NO",
"NO_2_NSL": "NO",
"NO_2A": "NO",
"NO_3": "NO",
"NO_4": "NO",
"NO_5": "NO",
"NO": "NO",
"PL_CZ": "PL",
"PL": "PL",
"PT": "PT",
"MD": "MD",
"RO": "RO",
"RU": "RU",
"SE_1": "SE",
"SE_2": "SE",
"SE_3": "SE",
"SE_4": "SE",
"RS": "RS",
"SK": "SK",
"SI": "SI",
"GB_NIR": "GB",
"ES": "ES",
"SE": "SE",
"CH": "CH",
"DE_TENNET": "DE",
"DE_TRANSNET": "DE",
"TR": "TR",
"UA": "UA",
"UA_DOBTPP": "UA",
"UA_BEI": "UA",
"UA_IPS": "UA",
"XK": "XK",
"DE_AMP_LU": "DE",
}
55 changes: 32 additions & 23 deletions openstef/feature_engineering/holiday_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@


def generate_holiday_feature_functions(
country: str = "NL",
country_code: str = "NL",
years: list = None,
path_to_school_holidays_csv: str = HOLIDAY_CSV_PATH,
) -> dict:
Expand Down Expand Up @@ -69,7 +69,7 @@ def generate_holiday_feature_functions(
now.year + 1,
]

country_holidays = holidays.country_holidays(country, years=years)
country_holidays = holidays.country_holidays(country_code, years=years)

# Make holiday function dict
holiday_functions = {}
Expand All @@ -96,41 +96,50 @@ def make_holiday_func(requested_date):

# Check for bridge day
holiday_functions, bridge_days = check_for_bridge_day(
date, holiday_name, country, years, holiday_functions, bridge_days
date, holiday_name, country_code, years, holiday_functions, bridge_days
)

# Add feature function that includes all bridgedays
holiday_functions.update(
{"is_bridgeday": lambda x: np.isin(x.index.date, np.array(list(bridge_days)))}
)

# Manully generated csv including all dutch schoolholidays for different regions
df_holidays = pd.read_csv(path_to_school_holidays_csv, index_col=None)
df_holidays["datum"] = pd.to_datetime(df_holidays.datum).apply(lambda x: x.date())

# Add check function that includes all holidays of the provided csv
holiday_functions.update(
{"is_schoolholiday": lambda x: np.isin(x.index.date, df_holidays.datum.values)}
)

# Loop over list of holidays names
for holiday_name in list(set(df_holidays.name)):
# Define function explicitely to mitigate 'late binding' problem
def make_holiday_func(holidayname=holiday_name):
return lambda x: np.isin(
x.index.date, df_holidays.datum[df_holidays.name == holidayname].values
)
# Add school holidays if country is NL
if country_code == "NL":
# Manully generated csv including all dutch schoolholidays for different regions
df_holidays = pd.read_csv(path_to_school_holidays_csv, index_col=None)
df_holidays["datum"] = pd.to_datetime(df_holidays.datum).apply(
lambda x: x.date()
)

# Create lag function for each holiday
# Add check function that includes all holidays of the provided csv
holiday_functions.update(
{
"is_"
+ holiday_name.replace(" ", "_").lower(): make_holiday_func(
holidayname=holiday_name
"is_schoolholiday": lambda x: np.isin(
x.index.date, df_holidays.datum.values
)
}
)

# Loop over list of holidays names
for holiday_name in list(set(df_holidays.name)):
# Define function explicitely to mitigate 'late binding' problem
def make_holiday_func(holidayname=holiday_name):
return lambda x: np.isin(
x.index.date,
df_holidays.datum[df_holidays.name == holidayname].values,
)

# Create lag function for each holiday
holiday_functions.update(
{
"is_"
+ holiday_name.replace(" ", "_").lower(): make_holiday_func(
holidayname=holiday_name
)
}
)

return holiday_functions


Expand Down
10 changes: 9 additions & 1 deletion test/unit/feature_engineering/test_apply_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# SPDX-License-Identifier: MPL-2.0

import unittest
from openstef.enums import BiddingZone
from test.unit.utils.base import BaseTestCase
from test.unit.utils.data import TestData

Expand Down Expand Up @@ -187,8 +188,15 @@ def test_apply_holiday_features(self):
"pressure": [3, 4, 5, 6],
},
)
pj = {
"model": "proleaf",
"lat": 52.132633,
"lon": 5.291266,
"electricity_bidding_zone": BiddingZone.NL,
}

input_data_with_features = apply_features.apply_features(
data=input_data, horizon=24
pj=pj, data=input_data, horizon=24
)

expected = TestData.load("../data/input_data_with_holiday_features.csv")
Expand Down
Loading

0 comments on commit 6272e5d

Please sign in to comment.