Skip to content

Commit

Permalink
feature(KTP-890): Added flatliner model for when only zeros need to b…
Browse files Browse the repository at this point in the history
…e detected. (#537)

* feature(KTP-890): Added flatliner model for when only zeros need to be detected.

Signed-off-by: Egor Dmitriev <egor.dmitriev@alliander.com>

* Format Python code with Black

Signed-off-by: black <action@github.com>

* feature(KTP-890): Added flatliner model for when only zeros need to be detected.

Signed-off-by: Egor Dmitriev <egor.dmitriev@alliander.com>

* feature(KTP-890): Added zero output check to tests based on the review feedback.

Signed-off-by: Egor Dmitriev <egor.dmitriev@alliander.com>

* fix: Fixed failing test regaring quantile feature.

Signed-off-by: Egor Dmitriev <egor.dmitriev@alliander.com>

* fix: Added licenses for dazls models.

Signed-off-by: Egor Dmitriev <egor.dmitriev@alliander.com>

---------

Signed-off-by: Egor Dmitriev <egor.dmitriev@alliander.com>
Signed-off-by: black <action@github.com>
Co-authored-by: black <action@github.com>
  • Loading branch information
egordm and actions-user authored May 31, 2024
1 parent 570fd31 commit c2c88d6
Show file tree
Hide file tree
Showing 12 changed files with 211 additions and 22 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -171,3 +171,4 @@ dmypy.json

# Cython debug symbols
cython_debug/
test/test/
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com>

SPDX-License-Identifier: MPL-2.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com>

SPDX-License-Identifier: MPL-2.0
1 change: 1 addition & 0 deletions openstef/data_classes/prediction_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class PredictionJobDataClass(BaseModel):
- ``"linear"``
- ``"linear_quantile"``
- ``"xgb_multioutput_quantile"``
- ``"flatliner"``
If unsure what to pick, choose ``"xgb"``.
Expand Down
1 change: 1 addition & 0 deletions openstef/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class MLModelType(Enum):
LINEAR = "linear"
LINEAR_QUANTILE = "linear_quantile"
ARIMA = "arima"
FLATLINER = "flatliner"


class ForecastType(Enum):
Expand Down
7 changes: 5 additions & 2 deletions openstef/model/confidence_interval_applicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,11 @@ def add_confidence_interval(
result = self._add_quantiles_to_forecast_quantile_regression(
temp_forecast, self.model.quantiles
)
self.logger.warning('Quantiles are requested the model was not trained on. Using the quantiles the model was trained on',
requested_quantiles=pj["quantiles"], trained_quantiles=self.model.quantiles)
self.logger.warning(
"Quantiles are requested the model was not trained on. Using the quantiles the model was trained on",
requested_quantiles=pj["quantiles"],
trained_quantiles=self.model.quantiles,
)
return result

return self._add_quantiles_to_forecast_default(temp_forecast, pj["quantiles"])
Expand Down
5 changes: 5 additions & 0 deletions openstef/model/model_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from openstef.model.regressors.linear import LinearOpenstfRegressor
from openstef.model.regressors.linear_quantile import LinearQuantileOpenstfRegressor
from openstef.model.regressors.regressor import OpenstfRegressor
from openstef.model.regressors.flatliner import FlatlinerRegressor
from openstef.model.regressors.xgb import XGBOpenstfRegressor
from openstef.model.regressors.xgb_quantile import XGBQuantileOpenstfRegressor
from openstef.model.regressors.xgb_multioutput_quantile import (
Expand Down Expand Up @@ -105,6 +106,9 @@
"imputation_strategy",
"fill_value",
],
MLModelType.FLATLINER: [
"quantiles",
],
MLModelType.LINEAR_QUANTILE: [
"alpha",
"quantiles",
Expand Down Expand Up @@ -134,6 +138,7 @@ class ModelCreator:
MLModelType.LINEAR: LinearOpenstfRegressor,
MLModelType.LINEAR_QUANTILE: LinearQuantileOpenstfRegressor,
MLModelType.ARIMA: ARIMAOpenstfRegressor,
MLModelType.FLATLINER: FlatlinerRegressor,
}

@staticmethod
Expand Down
4 changes: 4 additions & 0 deletions openstef/model/regressors/dazls.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def fit(self, features, target):
Args:
features: inputs for domain and adaptation model (domain_model_input, adaptation_model_input)
target: the expected output (y_train)
"""
x, y = (
features.loc[:, self.baseline_input_columns],
Expand All @@ -76,6 +77,7 @@ def predict(self, x: np.array):
Returns:
prediction: The output prediction after both models.
"""
model_test_data = x.loc[:, self.baseline_input_columns]

Expand All @@ -90,6 +92,7 @@ def score(self, truth, prediction):
Returns:
RMSE and R2 scores
"""
rmse = (mean_squared_error(truth, prediction)) ** 0.5
r2_score_value = r2_score(truth, prediction)
Expand All @@ -100,6 +103,7 @@ def __str__(self):
Returns:
Summary represented by a string
"""
summary_str = (
f"{self.__name__} model summary:\n\n"
Expand Down
100 changes: 100 additions & 0 deletions openstef/model/regressors/flatliner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
#
# SPDX-License-Identifier: MPL-2.0
import re
from typing import Dict, Union, Set, Optional, List

import numpy as np
import pandas as pd
from sklearn.base import RegressorMixin
from sklearn.linear_model import QuantileRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils.validation import check_is_fitted

from openstef.feature_engineering.missing_values_transformer import (
MissingValuesTransformer,
)
from openstef.model.regressors.regressor import OpenstfRegressor


class FlatlinerRegressor(OpenstfRegressor, RegressorMixin):
feature_names_: List[str] = []

def __init__(self, quantiles=None):
"""Initialize FlatlinerRegressor.
The model always predicts 0.0, regardless of the input features. The model is
meant to be used for flatliner locations that still expect a prediction while
preserving the prediction interface.
"""
super().__init__()
self.quantiles = quantiles

@property
def feature_names(self) -> list:
"""The names of the features used to train the model."""
check_is_fitted(self)
return self.feature_names_

@staticmethod
def _get_importance_names():
return {
"gain_importance_name": "total_gain",
"weight_importance_name": "weight",
}

@property
def can_predict_quantiles(self) -> bool:
"""Attribute that indicates if the model predict particular quantiles."""
return True

def fit(self, x: pd.DataFrame, y: pd.Series, **kwargs) -> RegressorMixin:
"""Fits flatliner model.
Args:
x: Feature matrix
y: Labels
Returns:
Fitted LinearQuantile model
"""
self.feature_names_ = list(x.columns)
self.feature_importances_ = np.ones(len(self.feature_names_)) / (
len(self.feature_names_) or 1.0
)

return self

def predict(self, x: pd.DataFrame, quantile: float = 0.5, **kwargs) -> np.array:
"""Makes a prediction for a desired quantile.
Args:
x: Feature matrix
quantile: Quantile for which a prediciton is desired,
note that only quantile are available for which a model is trained,
and that this is a quantile-model specific keyword
Returns:
Prediction
Raises:
ValueError in case no model is trained for the requested quantile
"""
check_is_fitted(self)

return np.zeros(x.shape[0])

def _get_feature_importance_from_linear(self, quantile: float = 0.5) -> np.array:
check_is_fitted(self)
return np.array([0.0 for _ in self.feature_names_])

@classmethod
def _get_param_names(cls):
return [
"quantiles",
]

def __sklearn_is_fitted__(self) -> bool:
return True
66 changes: 66 additions & 0 deletions test/unit/model/regressors/test_flatliner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501>
#
# SPDX-License-Identifier: MPL-2.0
import unittest
from unittest.mock import MagicMock

import numpy as np
import pandas as pd
import sklearn
from sklearn.utils.estimator_checks import check_estimator

from openstef.feature_engineering.apply_features import apply_features
from openstef.model.regressors.flatliner import FlatlinerRegressor
from test.unit.utils.base import BaseTestCase
from test.unit.utils.data import TestData

train_input = TestData.load("reference_sets/307-train-data.csv")


class MockModel:
coef_ = np.array([1, 1, 3])


class TestLinearQuantile(BaseTestCase):
def setUp(self) -> None:
self.quantiles = [0.9, 0.5, 0.6, 0.1]

@unittest.skip # Use this during development, this test requires not allowing nan vallues which we explicitly do allow.
def test_sklearn_compliant(self):
# Use sklearn build in check, this will raise an exception if some check fails
# During these tests the fit and predict methods are elaborately tested
# More info: https://scikit-learn.org/stable/modules/generated/sklearn.utils.estimator_checks.check_estimator.html
check_estimator(FlatlinerRegressor(quantiles=tuple(self.quantiles)))

def test_quantile_fit(self):
"""Test happy flow of the training of model"""
# Arrange
model = FlatlinerRegressor()

# Act
model.fit(train_input.iloc[:, 1:], train_input.iloc[:, 0])

# Assert
# check if the model was fitted (raises NotFittedError when not fitted)
self.assertIsNone(sklearn.utils.validation.check_is_fitted(model))

# check if model is sklearn compatible
self.assertIsInstance(model, sklearn.base.BaseEstimator)

result: np.ndarray = model.predict(train_input.iloc[:, 1:])

self.assertEquals(len(result), len(train_input.iloc[:, 1:]))
self.assertTrue((result == 0).all())

def test_get_feature_names_from_linear(self):
# Arrange
model = FlatlinerRegressor()
model.feature_names_ = ["a", "b", "c"]

# Act
feature_importance = model._get_feature_importance_from_linear(quantile=0.5)

# Assert
self.assertTrue(
(feature_importance == np.array([0, 0, 0], dtype=np.float32)).all()
)
40 changes: 20 additions & 20 deletions test/unit/model/test_confidence_interval_applicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,26 @@ class MockModel:
}
)

@staticmethod
def predict(input, quantile):
can_predict_quantiles_ = True

def predict(self, input, quantile):
if self.can_predict_quantiles and quantile not in self.quantiles:
# When model is trained on quantiles, it should fail if quantile is not in
# trained quantiles
raise ValueError("Quantile not in trained quantiles")

stdev_forecast = pd.DataFrame({"forecast": [5, 6, 7], "stdev": [0.5, 0.6, 0.7]})
return stdev_forecast["stdev"].rename(quantile)

@property
def can_predict_quantiles(self):
return True
return self.can_predict_quantiles_

@property
def quantiles(self):
return [0.01, 0.10, 0.25, 0.50, 0.75, 0.90, 0.99]


class MockNonQuantileModel(MockModel):
@property
def can_predict_quantiles(self):
Expand Down Expand Up @@ -182,23 +188,21 @@ def test_add_standard_deviation_to_forecast_in_past(self):
actual_stdev_forecast["stdev"].max(), 14
) # => MockModel.standard_deviation.stdev.max())


def test_add_quantiles_to_forecast_untrained_quantiles_with_quantile_model(self):
"""For quantile models, the trained quantiles can used if the quantiles of the pj are incompatible"""
# Set up
pj = {"quantiles": [0.12, 0.5, 0.65]} # numbers are arbitrary
pj = {"quantiles": [0.12, 0.5, 0.65]} # numbers are arbitrary
model = MockModel()
forecast = pd.DataFrame({"forecast": [5, 6, 7], "tAhead": [-1.0, 0.0, 1.0]})
forecast.index = [
pd.Timestamp(2012, 5, 1, 1, 30),
pd.Timestamp(2012, 5, 1, 1, 45),
pd.Timestamp(2012, 5, 1, 2, 00),
]
model.can_predict_quantiles_ = True
# Specify expectation
expected_quantiles = model.quantiles
expected_columns = [
f"quantile_P{int(q * 100):02d}" for q in expected_quantiles
]
expected_columns = [f"quantile_P{int(q * 100):02d}" for q in expected_quantiles]

# Act
pp_forecast = ConfidenceIntervalApplicator(
Expand All @@ -208,24 +212,22 @@ def test_add_quantiles_to_forecast_untrained_quantiles_with_quantile_model(self)
# Assert
for expected_column in expected_columns:
self.assertTrue(expected_column in pp_forecast.columns)



def test_add_quantiles_to_forecast_untrained_quantiles_with_nonquantile_model(self):
"""For nonquantile models, the quantiles of the pj should be used, also if the model was not trained on those"""
# Set up
pj = {"quantiles": [0.12, 0.5, 0.65]} # numbers are arbitrary
pj = {"quantiles": [0.12, 0.5, 0.65]} # numbers are arbitrary
model = MockModel()
forecast = pd.DataFrame({"forecast": [5, 6, 7], "tAhead": [-1.0, 0.0, 1.0]})
forecast.index = [
pd.Timestamp(2012, 5, 1, 1, 30),
pd.Timestamp(2012, 5, 1, 1, 45),
pd.Timestamp(2012, 5, 1, 2, 00),
]
model.can_predict_quantiles_ = False
# Specify expectation
expected_quantiles = pj['quantiles']
expected_columns = [
f"quantile_P{int(q * 100):02d}" for q in expected_quantiles
]
expected_quantiles = pj["quantiles"]
expected_columns = [f"quantile_P{int(q * 100):02d}" for q in expected_quantiles]

# Act
pp_forecast = ConfidenceIntervalApplicator(
Expand All @@ -235,5 +237,3 @@ def test_add_quantiles_to_forecast_untrained_quantiles_with_nonquantile_model(se
# Assert
for expected_column in expected_columns:
self.assertTrue(expected_column in pp_forecast.columns)


2 changes: 2 additions & 0 deletions test/unit/model/test_model_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ def test_create_model_happy_flow(self):
MLModelType("linear_quantile"),
"xgb_multioutput_quantile",
MLModelType("xgb_multioutput_quantile"),
"flatliner",
MLModelType("flatliner"),
]:
self.assertTrue(model.can_predict_quantiles)
else:
Expand Down

0 comments on commit c2c88d6

Please sign in to comment.