From 2ccd1fa2392a829b0b9d3c208d6a22b8f3440817 Mon Sep 17 00:00:00 2001 From: GUI Date: Sun, 18 Dec 2022 20:04:16 +0100 Subject: [PATCH 1/5] Update piecewise binning to be compatible with RoPWR>=1.0.0 --- README.rst | 2 +- doc/source/conf.py | 4 +-- optbinning/binning/piecewise/base.py | 30 ++++++++++++++----- optbinning/binning/piecewise/binning.py | 23 +++++++++----- .../binning/piecewise/continuous_binning.py | 30 +++++++++++-------- requirements.txt | 2 +- setup.py | 2 +- 7 files changed, 61 insertions(+), 32 deletions(-) diff --git a/README.rst b/README.rst index 3f46743..a60d4bd 100644 --- a/README.rst +++ b/README.rst @@ -74,7 +74,7 @@ OptBinning requires * numpy (>=1.16.1) * ortools (>=9.4) * pandas -* ropwr (>=0.4.0) +* ropwr (>=1.0.0) * scikit-learn (>=1.0.2) * scipy (>=1.6.0) diff --git a/doc/source/conf.py b/doc/source/conf.py index 3376ee3..2b23337 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -22,9 +22,9 @@ author = 'Guillermo Navas-Palencia' # The short X.Y version -version = '0.17.2' +version = '0.17.3' # The full version, including alpha/beta/rc tags -release = '0.17.2' +release = '0.17.3' # -- General configuration --------------------------------------------------- diff --git a/optbinning/binning/piecewise/base.py b/optbinning/binning/piecewise/base.py index 483f838..7302d6f 100644 --- a/optbinning/binning/piecewise/base.py +++ b/optbinning/binning/piecewise/base.py @@ -28,9 +28,9 @@ def _check_parameters(name, estimator, objective, degree, continuous, - prebinning_method, max_n_prebins, min_prebin_size, - min_n_bins, max_n_bins, min_bin_size, max_bin_size, - monotonic_trend, n_subsamples, max_pvalue, + continuous_deriv, prebinning_method, max_n_prebins, + min_prebin_size, min_n_bins, max_n_bins, min_bin_size, + max_bin_size, monotonic_trend, n_subsamples, max_pvalue, max_pvalue_policy, outlier_detector, outlier_params, user_splits, user_splits_fixed, special_codes, split_digits, solver, h_epsilon, quantile, @@ -64,6 +64,10 @@ def _check_parameters(name, estimator, objective, degree, continuous, raise TypeError("continuous must be a boolean; got {}." .format(verbose)) + if not isinstance(continuous_deriv, bool): + raise TypeError("continuous_deriv must be a boolean; got {}." + .format(verbose)) + if prebinning_method not in ("cart", "quantile", "uniform"): raise ValueError('Invalid value for prebinning_method. Allowed string ' 'values are "cart", "quantile" and "uniform".') @@ -209,7 +213,8 @@ def _check_parameters(name, estimator, objective, degree, continuous, class BasePWBinning(Base, BaseEstimator): def __init__(self, name="", estimator=None, objective="l2", degree=1, - continuous=True, prebinning_method="cart", max_n_prebins=20, + continuous=True, continuous_deriv=True, + prebinning_method="cart", max_n_prebins=20, min_prebin_size=0.05, min_n_bins=None, max_n_bins=None, min_bin_size=None, max_bin_size=None, monotonic_trend="auto", n_subsamples=None, max_pvalue=None, @@ -224,6 +229,7 @@ def __init__(self, name="", estimator=None, objective="l2", degree=1, self.objective = objective self.degree = degree self.continuous = continuous + self.continuous_deriv = continuous_deriv self.prebinning_method = prebinning_method self.max_n_prebins = max_n_prebins @@ -451,9 +457,19 @@ def _fit_binning(self, x, y, prediction, lb, ub): time_solver = time.perf_counter() optimizer = RobustPWRegression( - self.objective, self.degree, self.continuous, monotonic, - self.solver, self.h_epsilon, self.quantile, self.regularization, - self.reg_l1, self.reg_l1, self.verbose) + objective=self.objective, + degree=self.degree, + continuous=self.continuous, + continuous_deriv=self.continuous_deriv, + monotonic_trend=monotonic, + solver=self.solver, + h_epsilon=self.h_epsilon, + quantile=self.quantile, + regularization=self.regularization, + reg_l1=self.reg_l1, + reg_l2=self.reg_l2, + extrapolation="continue", + verbose=self.verbose) optimizer.fit(x_subsamples, pred_subsamples, splits, lb=lb, ub=ub) diff --git a/optbinning/binning/piecewise/binning.py b/optbinning/binning/piecewise/binning.py index 807ec24..73eb33b 100644 --- a/optbinning/binning/piecewise/binning.py +++ b/optbinning/binning/piecewise/binning.py @@ -55,6 +55,11 @@ class OptimalPWBinning(BasePWBinning): continuous : bool (default=True) Whether to fit a continuous or discontinuous piecewise regression. + continuous_deriv : bool (default=True) + Whether to fit a polynomial with continuous derivatives. This option + fits a smooth degree d-polynomial with d-1 continuity in derivatives + (splines). + prebinning_method : str, optional (default="cart") The pre-binning method. Supported methods are "cart" for a CART decision tree, "quantile" to generate prebins with approximately same @@ -175,7 +180,8 @@ class OptimalPWBinning(BasePWBinning): Enable verbose output. """ def __init__(self, name="", estimator=None, objective="l2", degree=1, - continuous=True, prebinning_method="cart", max_n_prebins=20, + continuous=True, continuous_deriv=True, + prebinning_method="cart", max_n_prebins=20, min_prebin_size=0.05, min_n_bins=None, max_n_bins=None, min_bin_size=None, max_bin_size=None, monotonic_trend="auto", n_subsamples=None, max_pvalue=None, @@ -186,13 +192,14 @@ def __init__(self, name="", estimator=None, objective="l2", degree=1, reg_l2=1.0, random_state=None, verbose=False): super().__init__(name, estimator, objective, degree, continuous, - prebinning_method, max_n_prebins, min_prebin_size, - min_n_bins, max_n_bins, min_bin_size, max_bin_size, - monotonic_trend, n_subsamples, max_pvalue, - max_pvalue_policy, outlier_detector, outlier_params, - user_splits, user_splits_fixed, special_codes, - split_digits, solver, h_epsilon, quantile, - regularization, reg_l1, reg_l2, random_state, verbose) + continuous_deriv, prebinning_method, max_n_prebins, + min_prebin_size, min_n_bins, max_n_bins, min_bin_size, + max_bin_size, monotonic_trend, n_subsamples, + max_pvalue, max_pvalue_policy, outlier_detector, + outlier_params, user_splits, user_splits_fixed, + special_codes, split_digits, solver, h_epsilon, + quantile, regularization, reg_l1, reg_l2, + random_state, verbose) self._problem_type = "classification" diff --git a/optbinning/binning/piecewise/continuous_binning.py b/optbinning/binning/piecewise/continuous_binning.py index 718a6be..b80500a 100644 --- a/optbinning/binning/piecewise/continuous_binning.py +++ b/optbinning/binning/piecewise/continuous_binning.py @@ -45,6 +45,11 @@ class ContinuousOptimalPWBinning(BasePWBinning): continuous : bool (default=True) Whether to fit a continuous or discontinuous piecewise regression. + continuous_deriv : bool (default=True) + Whether to fit a polynomial with continuous derivatives. This option + fits a smooth degree d-polynomial with d-1 continuity in derivatives + (splines). + prebinning_method : str, optional (default="cart") The pre-binning method. Supported methods are "cart" for a CART decision tree, "quantile" to generate prebins with approximately same @@ -164,11 +169,11 @@ class ContinuousOptimalPWBinning(BasePWBinning): verbose : bool (default=False) Enable verbose output. """ - def __init__(self, name="", objective="l2", degree=1, - continuous=True, prebinning_method="cart", max_n_prebins=20, - min_prebin_size=0.05, min_n_bins=None, max_n_bins=None, - min_bin_size=None, max_bin_size=None, monotonic_trend="auto", - n_subsamples=None, max_pvalue=None, + def __init__(self, name="", objective="l2", degree=1, continuous=True, + continuous_deriv=True, prebinning_method="cart", + max_n_prebins=20, min_prebin_size=0.05, min_n_bins=None, + max_n_bins=None, min_bin_size=None, max_bin_size=None, + monotonic_trend="auto", n_subsamples=None, max_pvalue=None, max_pvalue_policy="consecutive", outlier_detector=None, outlier_params=None, user_splits=None, user_splits_fixed=None, special_codes=None, split_digits=None, solver="auto", @@ -176,13 +181,14 @@ def __init__(self, name="", objective="l2", degree=1, reg_l2=1.0, random_state=None, verbose=False): super().__init__(name, None, objective, degree, continuous, - prebinning_method, max_n_prebins, min_prebin_size, - min_n_bins, max_n_bins, min_bin_size, max_bin_size, - monotonic_trend, n_subsamples, max_pvalue, - max_pvalue_policy, outlier_detector, outlier_params, - user_splits, user_splits_fixed, special_codes, - split_digits, solver, h_epsilon, quantile, - regularization, reg_l1, reg_l2, random_state, verbose) + continuous_deriv, prebinning_method, max_n_prebins, + min_prebin_size, min_n_bins, max_n_bins, min_bin_size, + max_bin_size, monotonic_trend, n_subsamples, + max_pvalue, max_pvalue_policy, outlier_detector, + outlier_params, user_splits, user_splits_fixed, + special_codes, split_digits, solver, h_epsilon, + quantile, regularization, reg_l1, reg_l2, + random_state, verbose) self._problem_type = "regression" diff --git a/requirements.txt b/requirements.txt index b07be89..454e979 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,6 @@ matplotlib numpy>=1.16.1 ortools>=9.4 pandas -ropwr>=0.4.0 +ropwr>=1.0.0 scikit-learn>=1.0.2 scipy>=1.6.0 diff --git a/setup.py b/setup.py index ef80131..c863f00 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ def run_tests(self): 'numpy>=1.16.1', 'ortools>=9.4', 'pandas', - 'ropwr>=0.4.0', + 'ropwr>=1.0.0', 'scikit-learn>=1.0.2', 'scipy>=1.6.0', ] From ffee6ade64bfdf489f4e3d6e178de79a9fd8e0a9 Mon Sep 17 00:00:00 2001 From: GUI Date: Sun, 18 Dec 2022 20:04:39 +0100 Subject: [PATCH 2/5] Update to version 0.17.3 --- optbinning/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optbinning/_version.py b/optbinning/_version.py index f4ac5a7..b556f60 100644 --- a/optbinning/_version.py +++ b/optbinning/_version.py @@ -1,3 +1,3 @@ """Version information.""" -__version__ = "0.17.2" +__version__ = "0.17.3" From 9b145f66b7e0417a6863fffa2abe1b18be477a70 Mon Sep 17 00:00:00 2001 From: GUI Date: Mon, 9 Jan 2023 23:43:57 +0100 Subject: [PATCH 3/5] Implement sample_weight check in scorecard class #228 --- optbinning/scorecard/scorecard.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/optbinning/scorecard/scorecard.py b/optbinning/scorecard/scorecard.py index fe6174a..d67d3b2 100644 --- a/optbinning/scorecard/scorecard.py +++ b/optbinning/scorecard/scorecard.py @@ -557,7 +557,11 @@ def _fit(self, X, y, sample_weight, metric_special, metric_missing, logger.info("Fitting estimator.") self.estimator_ = clone(self.estimator) - self.estimator_.fit(X_t, y, sample_weight) + + if sample_weight is not None: + self.estimator_.fit(X_t, y, sample_weight=sample_weight) + else: + self.estimator_.fit(X_t, y) self._time_estimator = time.perf_counter() - time_estimator From 289f524345baa93ba75641741a043ad7bb924225 Mon Sep 17 00:00:00 2001 From: Wenhuan Date: Sun, 18 Dec 2022 18:25:51 +0800 Subject: [PATCH 4/5] fix metric_missing=0 is ignored in Scorecard._fit #226 --- optbinning/scorecard/scorecard.py | 2 +- tests/test_scorecard.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/optbinning/scorecard/scorecard.py b/optbinning/scorecard/scorecard.py index d67d3b2..1a44993 100644 --- a/optbinning/scorecard/scorecard.py +++ b/optbinning/scorecard/scorecard.py @@ -605,7 +605,7 @@ def _fit(self, X, y, sample_weight, metric_special, metric_missing, binning_table.loc[ nt-1-n_specials:nt-2, "Points"] = metric_special * c - elif metric_missing != 'empirical': + if metric_missing != 'empirical': binning_table.loc[nt-1, "Points"] = metric_missing * c binning_table.index.names = ['Bin id'] diff --git a/tests/test_scorecard.py b/tests/test_scorecard.py index a4aed27..f937951 100644 --- a/tests/test_scorecard.py +++ b/tests/test_scorecard.py @@ -450,3 +450,26 @@ def test_verbose(): with open("tests/results/test_scorecard_verbose.txt", "w") as f: with redirect_stdout(f): scorecard.fit(X, y) + + +def test_missing_metrics(): + data = pd.DataFrame( + {'target': np.hstack( + (np.tile(np.array([0, 1]), 50), + np.array([0]*90 + [1]*10) + ) + ), + 'var': [np.nan] * 100 + ['A'] * 100} + ) + + binning_process = BinningProcess(['var']) + scaling_method_params = {'min': 0, 'max': 100} + + scorecard = Scorecard( + binning_process=binning_process, + estimator=LogisticRegression(), + scaling_method="min_max", + scaling_method_params=scaling_method_params + ).fit(data, data.target) + + assert scorecard.table()['Points'].iloc[-1] == approx(0, rel=1e-6) From da700e2b2d1ed6fd5310b0a79d8000cd66e3a38d Mon Sep 17 00:00:00 2001 From: GUI Date: Sun, 12 Feb 2023 12:57:59 +0100 Subject: [PATCH 5/5] Update to version 0.17.3 --- README.rst | 8 ++++---- doc/source/conf.py | 2 +- doc/source/release_notes.rst | 16 ++++++++++++++++ 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index a60d4bd..14a84a6 100644 --- a/README.rst +++ b/README.rst @@ -227,8 +227,8 @@ Print overview information about the options settings, problem statistics, and t .. code-block:: text - optbinning (Version 0.17.0) - Copyright (c) 2019-2022 Guillermo Navas-Palencia, Apache License 2.0 + optbinning (Version 0.17.3) + Copyright (c) 2019-2023 Guillermo Navas-Palencia, Apache License 2.0 Begin options name mean radius * U @@ -395,8 +395,8 @@ and the number of selected variables after the binning process. .. code-block:: text - optbinning (Version 0.17.0) - Copyright (c) 2019-2022 Guillermo Navas-Palencia, Apache License 2.0 + optbinning (Version 0.17.3) + Copyright (c) 2019-2023 Guillermo Navas-Palencia, Apache License 2.0 Begin options binning_process yes * U diff --git a/doc/source/conf.py b/doc/source/conf.py index 2b23337..ed62ba6 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -18,7 +18,7 @@ # -- Project information ----------------------------------------------------- project = 'optbinning' -copyright = '2019 - 2022, Guillermo Navas-Palencia' +copyright = '2019 - 2023, Guillermo Navas-Palencia' author = 'Guillermo Navas-Palencia' # The short X.Y version diff --git a/doc/source/release_notes.rst b/doc/source/release_notes.rst index ecdc231..3d14ecc 100644 --- a/doc/source/release_notes.rst +++ b/doc/source/release_notes.rst @@ -1,6 +1,22 @@ Release Notes ============= +Version 0.17.3 (2023-02-12) +--------------------------- + +Improvements: + + - Implement ``sample_weight`` check in Scorecard class (`Issue 228 `_). + +Bugfixes: + + - Fix ``metric_missing`` ignored in Scorecard class (`Issue 226 `_). + +Dependencies: + + - Update RoPWR required version. + + Version 0.17.2 (2022-12-15) ---------------------------