-
Notifications
You must be signed in to change notification settings - Fork 29
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feature(KTP-890): Added flatliner model for when only zeros need to b…
…e detected. (#537) * feature(KTP-890): Added flatliner model for when only zeros need to be detected. Signed-off-by: Egor Dmitriev <egor.dmitriev@alliander.com> * Format Python code with Black Signed-off-by: black <action@github.com> * feature(KTP-890): Added flatliner model for when only zeros need to be detected. Signed-off-by: Egor Dmitriev <egor.dmitriev@alliander.com> * feature(KTP-890): Added zero output check to tests based on the review feedback. Signed-off-by: Egor Dmitriev <egor.dmitriev@alliander.com> * fix: Fixed failing test regaring quantile feature. Signed-off-by: Egor Dmitriev <egor.dmitriev@alliander.com> * fix: Added licenses for dazls models. Signed-off-by: Egor Dmitriev <egor.dmitriev@alliander.com> --------- Signed-off-by: Egor Dmitriev <egor.dmitriev@alliander.com> Signed-off-by: black <action@github.com> Co-authored-by: black <action@github.com>
- Loading branch information
1 parent
570fd31
commit c2c88d6
Showing
12 changed files
with
211 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -171,3 +171,4 @@ dmypy.json | |
|
||
# Cython debug symbols | ||
cython_debug/ | ||
test/test/ |
3 changes: 3 additions & 0 deletions
3
openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_baseline_model.z.license
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> | ||
|
||
SPDX-License-Identifier: MPL-2.0 |
3 changes: 3 additions & 0 deletions
3
openstef/data/dazls_model_3.4.24/dazls_stored_3.4.24_model_card.md.license
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> | ||
|
||
SPDX-License-Identifier: MPL-2.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501> | ||
# | ||
# SPDX-License-Identifier: MPL-2.0 | ||
import re | ||
from typing import Dict, Union, Set, Optional, List | ||
|
||
import numpy as np | ||
import pandas as pd | ||
from sklearn.base import RegressorMixin | ||
from sklearn.linear_model import QuantileRegressor | ||
from sklearn.preprocessing import MinMaxScaler | ||
from sklearn.utils.validation import check_is_fitted | ||
|
||
from openstef.feature_engineering.missing_values_transformer import ( | ||
MissingValuesTransformer, | ||
) | ||
from openstef.model.regressors.regressor import OpenstfRegressor | ||
|
||
|
||
class FlatlinerRegressor(OpenstfRegressor, RegressorMixin): | ||
feature_names_: List[str] = [] | ||
|
||
def __init__(self, quantiles=None): | ||
"""Initialize FlatlinerRegressor. | ||
The model always predicts 0.0, regardless of the input features. The model is | ||
meant to be used for flatliner locations that still expect a prediction while | ||
preserving the prediction interface. | ||
""" | ||
super().__init__() | ||
self.quantiles = quantiles | ||
|
||
@property | ||
def feature_names(self) -> list: | ||
"""The names of the features used to train the model.""" | ||
check_is_fitted(self) | ||
return self.feature_names_ | ||
|
||
@staticmethod | ||
def _get_importance_names(): | ||
return { | ||
"gain_importance_name": "total_gain", | ||
"weight_importance_name": "weight", | ||
} | ||
|
||
@property | ||
def can_predict_quantiles(self) -> bool: | ||
"""Attribute that indicates if the model predict particular quantiles.""" | ||
return True | ||
|
||
def fit(self, x: pd.DataFrame, y: pd.Series, **kwargs) -> RegressorMixin: | ||
"""Fits flatliner model. | ||
Args: | ||
x: Feature matrix | ||
y: Labels | ||
Returns: | ||
Fitted LinearQuantile model | ||
""" | ||
self.feature_names_ = list(x.columns) | ||
self.feature_importances_ = np.ones(len(self.feature_names_)) / ( | ||
len(self.feature_names_) or 1.0 | ||
) | ||
|
||
return self | ||
|
||
def predict(self, x: pd.DataFrame, quantile: float = 0.5, **kwargs) -> np.array: | ||
"""Makes a prediction for a desired quantile. | ||
Args: | ||
x: Feature matrix | ||
quantile: Quantile for which a prediciton is desired, | ||
note that only quantile are available for which a model is trained, | ||
and that this is a quantile-model specific keyword | ||
Returns: | ||
Prediction | ||
Raises: | ||
ValueError in case no model is trained for the requested quantile | ||
""" | ||
check_is_fitted(self) | ||
|
||
return np.zeros(x.shape[0]) | ||
|
||
def _get_feature_importance_from_linear(self, quantile: float = 0.5) -> np.array: | ||
check_is_fitted(self) | ||
return np.array([0.0 for _ in self.feature_names_]) | ||
|
||
@classmethod | ||
def _get_param_names(cls): | ||
return [ | ||
"quantiles", | ||
] | ||
|
||
def __sklearn_is_fitted__(self) -> bool: | ||
return True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
# SPDX-FileCopyrightText: 2017-2023 Contributors to the OpenSTEF project <korte.termijn.prognoses@alliander.com> # noqa E501> | ||
# | ||
# SPDX-License-Identifier: MPL-2.0 | ||
import unittest | ||
from unittest.mock import MagicMock | ||
|
||
import numpy as np | ||
import pandas as pd | ||
import sklearn | ||
from sklearn.utils.estimator_checks import check_estimator | ||
|
||
from openstef.feature_engineering.apply_features import apply_features | ||
from openstef.model.regressors.flatliner import FlatlinerRegressor | ||
from test.unit.utils.base import BaseTestCase | ||
from test.unit.utils.data import TestData | ||
|
||
train_input = TestData.load("reference_sets/307-train-data.csv") | ||
|
||
|
||
class MockModel: | ||
coef_ = np.array([1, 1, 3]) | ||
|
||
|
||
class TestLinearQuantile(BaseTestCase): | ||
def setUp(self) -> None: | ||
self.quantiles = [0.9, 0.5, 0.6, 0.1] | ||
|
||
@unittest.skip # Use this during development, this test requires not allowing nan vallues which we explicitly do allow. | ||
def test_sklearn_compliant(self): | ||
# Use sklearn build in check, this will raise an exception if some check fails | ||
# During these tests the fit and predict methods are elaborately tested | ||
# More info: https://scikit-learn.org/stable/modules/generated/sklearn.utils.estimator_checks.check_estimator.html | ||
check_estimator(FlatlinerRegressor(quantiles=tuple(self.quantiles))) | ||
|
||
def test_quantile_fit(self): | ||
"""Test happy flow of the training of model""" | ||
# Arrange | ||
model = FlatlinerRegressor() | ||
|
||
# Act | ||
model.fit(train_input.iloc[:, 1:], train_input.iloc[:, 0]) | ||
|
||
# Assert | ||
# check if the model was fitted (raises NotFittedError when not fitted) | ||
self.assertIsNone(sklearn.utils.validation.check_is_fitted(model)) | ||
|
||
# check if model is sklearn compatible | ||
self.assertIsInstance(model, sklearn.base.BaseEstimator) | ||
|
||
result: np.ndarray = model.predict(train_input.iloc[:, 1:]) | ||
|
||
self.assertEquals(len(result), len(train_input.iloc[:, 1:])) | ||
self.assertTrue((result == 0).all()) | ||
|
||
def test_get_feature_names_from_linear(self): | ||
# Arrange | ||
model = FlatlinerRegressor() | ||
model.feature_names_ = ["a", "b", "c"] | ||
|
||
# Act | ||
feature_importance = model._get_feature_importance_from_linear(quantile=0.5) | ||
|
||
# Assert | ||
self.assertTrue( | ||
(feature_importance == np.array([0, 0, 0], dtype=np.float32)).all() | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters