From c36f8bbd798ea76444a9b2307555078322d90f92 Mon Sep 17 00:00:00 2001 From: Iskander Gaba Date: Sat, 16 Nov 2024 00:57:01 +0100 Subject: [PATCH] Improve docstrings and add Enum usage --- pyriodicity/detectors/robustperiod.py | 173 ++++++++++++++++++-------- 1 file changed, 118 insertions(+), 55 deletions(-) diff --git a/pyriodicity/detectors/robustperiod.py b/pyriodicity/detectors/robustperiod.py index c33964b..09e6c5b 100644 --- a/pyriodicity/detectors/robustperiod.py +++ b/pyriodicity/detectors/robustperiod.py @@ -1,4 +1,5 @@ import datetime +from enum import Enum, unique from typing import Union import numpy as np @@ -39,10 +40,38 @@ class RobustPeriod: array([12]) """ + @unique + class LambdaSelection(Enum): + """ + Enum for selecting the Hodrick-Prescott filter lambda parameter calculation + method. + + Attributes + ---------- + HODRICK_PRESCOTT : str + Use the Hodrick and Prescott method for lambda calculation [1]_. + RAVN_UHLIG : str + Use the Ravn and Uhlig method for lambda calculation [2]_. + + References + ---------- + .. [1] Hodrick, R. J., & Prescott, E. C. (1997). + Postwar US business cycles: an empirical investigation. + Journal of Money, Credit, and Banking, 1-16. + https://doi.org/10.2307/2953682 + .. [2] Ravn, M. O., & Uhlig, H. (2002). + On adjusting the Hodrick-Prescott filter for the frequency of observations. + Review of Economics and Statistics, 84(2), 371-376. + https://doi.org/10.1162/003465302317411604 + """ + + HODRICK_PRESCOTT = "hodrick-prescott" + RAVN_UHLIG = "ravn-uhlig" + @staticmethod def detect( x: ArrayLike, - lamb: Union[str, float] = "ravn-uhlig", + lamb: Union[float, str, LambdaSelection] = LambdaSelection.RAVN_UHLIG, c: float = 1.5, ) -> NDArray: """ @@ -52,13 +81,14 @@ def detect( ---------- x : array_like Data to be investigated. Must be squeezable to 1-d. - lamb : float, str, default = 'ravn-uhlig' + lamb : float, str, RobustPeriod.LambdaSelection, + default = RobustPeriod.LambdaSelection.RAVN_UHLIG The Hodrick-Prescott filter smoothing parameter. Possible values are either - a float value or one of the following string values: - ['hodrick-prescott', 'ravn-uhlig']. These represent the automatic lambda - parameter selection methods by Hodrick and Prescott [1]_ - and Ravn and Uhlig [2]_, respectively. If a string value is used, ``x`` - must contain an ``index`` attribute representing data point timestamps. + a float, a `RobustPeriod.LambdaSelection` value, or one of the following + string values: ['hodrick-prescott', 'ravn-uhlig']. These represent the + automatic lambda parameter selection methods by Hodrick and Prescott [1]_ + and Ravn and Uhlig [2]_, respectively. If lamb is not float value, then + ``x`` must be a data array with a datetime-like index. c : float, default = 1.5 The constant threshold that determines the robustness of the Huber function. A smaller value makes the Huber function more sensitive to outliers. Huber @@ -84,28 +114,29 @@ def detect( """ # Preprocess the data - y = RobustPeriod._preprocess(x, lamb, c) + y = RobustPeriod._preprocess( + x, RobustPeriod.LambdaSelection(lamb) if isinstance(lamb, str) else lamb, c + ) # TODO Decouple multiple periodicities # TODO Robust single periodicity detection @staticmethod - def _preprocess(x: ArrayLike, lamb: Union[str, float], c: float) -> NDArray: + def _preprocess( + x: ArrayLike, lamb: Union[float, LambdaSelection], c: float + ) -> NDArray: """ - Validate the period hint. + Apply the data preprocessing step of RobustPeriod. Parameters ---------- - x : array_like + x : ArrayLike Data to be preprocessed. Must be squeezable to 1-d. - lamb : float, str - The Hodrick-Prescott filter smoothing parameter. Possible values are either - a float value or one of the following string values: - ['hodrick-prescott', 'ravn-uhlig']. These represent the automatic lambda - parameter selection methods by Hodrick and Prescott and Ravn and Uhlig, - respectively. If a string value is used, ``x`` must contain an ``index`` - attribute representing data point timestamps. + lamb : float, LambdaSelection + The Hodrick-Prescott filter smoothing parameter. If a + `RobustPeriod.LambdaSelection` value is provided, then ``x`` must be a data + array with a datetime-like index. c : float The constant threshold that determines the robustness of the Huber function. A smaller value makes the Huber function more sensitive to outliers. Huber @@ -114,11 +145,11 @@ def _preprocess(x: ArrayLike, lamb: Union[str, float], c: float) -> NDArray: Returns ------- NDArray - Preprocessed series data. + Preprocessed data. """ - # Compute the lambda parameter if needed - if isinstance(lamb, str): + # Compute the lambda parameter if a lambda selection method is provided + if isinstance(lamb, RobustPeriod.LambdaSelection): lamb = RobustPeriod._compute_lambda(x, lamb) # Convert to one-dimensional array @@ -132,6 +163,51 @@ def _preprocess(x: ArrayLike, lamb: Union[str, float], c: float) -> NDArray: mad = np.mean(np.abs(y - mean)) return RobustPeriod._huber((y - mean) / mad, c) + @staticmethod + def _compute_lambda(x: ArrayLike, lambda_selection: LambdaSelection) -> float: + """ + Compute the lambda parameter for Hodrick-Prescott filter application on a time + series. + + Parameters + ---------- + x : ArrayLike + Input data array with a datetime-like index. + lambda_selection : LambdaSelection + The selection method for the lambda parameter. Must be one of the + `RobustPeriod.LambdaSelection` enum values. + + Returns + ------- + float + Computed lambda parameter value. + + Raises + ------ + AttributeError + If the input data does not have an 'index' attribute. + TypeError + If the index values are not of type 'numpy.datetime64' or 'datetime.date'. + ValueError + If an invalid lambda parameter value is provided. + """ + if not hasattr(x, "index"): + raise AttributeError("Data has no attribute 'index'.") + if not isinstance(x.index[0], (np.datetime64, datetime.date)): + raise TypeError( + "Index values are not of 'numpy.datetime64'" "or 'datetime.date' types." + ) + yearly_nobs = np.rint(np.timedelta64(365, "D") / np.diff(x.index.values).mean()) + if lambda_selection == RobustPeriod.LambdaSelection.HODRICK_PRESCOTT: + lambda_selection = 100 * yearly_nobs**2 + + elif lambda_selection == RobustPeriod.LambdaSelection.RAVN_UHLIG: + lambda_selection = 6.25 * yearly_nobs**4 + else: + raise ValueError( + "Invalid lamb parameter value: '{}'".format(lambda_selection) + ) + @staticmethod def _hpfilter(x: ArrayLike, lamb: float): """ @@ -139,16 +215,16 @@ def _hpfilter(x: ArrayLike, lamb: float): Parameters ---------- - x : array_like - The time series to be filtered. + x : ArrayLike + The series data to be filtered. lamb : float - The Hodrick-Prescott filter smoothing parameter. + The smoothing parameter for the Hodrick-Prescott filter. Returns ------- - cycle : ndarray + cycle : NDArray The cyclical component of the time series. - trend : ndarray + trend : NDArray The trend component of the time series. """ @@ -168,35 +244,22 @@ def _hpfilter(x: ArrayLike, lamb: float): cycle = y - trend return cycle, trend - @staticmethod - def _compute_lambda(x: ArrayLike, lamb_approach: str) -> float: - if isinstance(lamb_approach, str): - if not hasattr(x, "index"): - raise AttributeError("Data has no attribute 'index'.") - if not isinstance(x.index[0], (np.datetime64, datetime.date)): - raise TypeError( - "Index values are not of 'numpy.datetime64'" - "or 'datetime.date' types." - ) - yearly_nobs = np.rint( - np.timedelta64(365, "D") / np.diff(x.index.values).mean() - ) - if lamb_approach == "hodrick-prescott": - lamb_approach = 100 * yearly_nobs**2 - elif lamb_approach == "ravn-uhlig": - lamb_approach = 6.25 * yearly_nobs**4 - else: - raise ValueError( - "Invalid lamb parameter value: '{}'".format(lamb_approach) - ) - else: - raise TypeError( - "Invalid 'lambda_approach' parameter type: '{}'".format( - type(lamb_approach) - ) - ) - @staticmethod def _huber(x: ArrayLike, c: float) -> ArrayLike: - # TODO Research the choice of c + """ + Compute the Huber function for an array-like input. + + Parameters + ---------- + x : ArrayLike + Input array-like object containing numerical values. + c : float + Threshold parameter that determines the point where the function + transitions from quadratic to linear. + + Returns + ------- + ArrayLike + An array-like object with the Huber function applied element-wise. + """ return np.sign(x) * np.min(np.abs(x), c)