Skip to content

Commit

Permalink
Merge pull request #231 from guillermo-navas-palencia/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
guillermo-navas-palencia authored Feb 12, 2023
2 parents d20ed46 + da700e2 commit 9735aca
Show file tree
Hide file tree
Showing 11 changed files with 112 additions and 40 deletions.
10 changes: 5 additions & 5 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ OptBinning requires
* numpy (>=1.16.1)
* ortools (>=9.4)
* pandas
* ropwr (>=0.4.0)
* ropwr (>=1.0.0)
* scikit-learn (>=1.0.2)
* scipy (>=1.6.0)

Expand Down Expand Up @@ -227,8 +227,8 @@ Print overview information about the options settings, problem statistics, and t
.. code-block:: text
optbinning (Version 0.17.0)
Copyright (c) 2019-2022 Guillermo Navas-Palencia, Apache License 2.0
optbinning (Version 0.17.3)
Copyright (c) 2019-2023 Guillermo Navas-Palencia, Apache License 2.0
Begin options
name mean radius * U
Expand Down Expand Up @@ -395,8 +395,8 @@ and the number of selected variables after the binning process.
.. code-block:: text
optbinning (Version 0.17.0)
Copyright (c) 2019-2022 Guillermo Navas-Palencia, Apache License 2.0
optbinning (Version 0.17.3)
Copyright (c) 2019-2023 Guillermo Navas-Palencia, Apache License 2.0
Begin options
binning_process yes * U
Expand Down
6 changes: 3 additions & 3 deletions doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@
# -- Project information -----------------------------------------------------

project = 'optbinning'
copyright = '2019 - 2022, Guillermo Navas-Palencia'
copyright = '2019 - 2023, Guillermo Navas-Palencia'
author = 'Guillermo Navas-Palencia'

# The short X.Y version
version = '0.17.2'
version = '0.17.3'
# The full version, including alpha/beta/rc tags
release = '0.17.2'
release = '0.17.3'


# -- General configuration ---------------------------------------------------
Expand Down
16 changes: 16 additions & 0 deletions doc/source/release_notes.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,22 @@
Release Notes
=============

Version 0.17.3 (2023-02-12)
---------------------------

Improvements:

- Implement ``sample_weight`` check in Scorecard class (`Issue 228 <https://github.com/guillermo-navas-palencia/optbinning/issues/228>`_).

Bugfixes:

- Fix ``metric_missing`` ignored in Scorecard class (`Issue 226 <https://github.com/guillermo-navas-palencia/optbinning/issues/226>`_).

Dependencies:

- Update RoPWR required version.


Version 0.17.2 (2022-12-15)
---------------------------

Expand Down
2 changes: 1 addition & 1 deletion optbinning/_version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Version information."""

__version__ = "0.17.2"
__version__ = "0.17.3"
30 changes: 23 additions & 7 deletions optbinning/binning/piecewise/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@


def _check_parameters(name, estimator, objective, degree, continuous,
prebinning_method, max_n_prebins, min_prebin_size,
min_n_bins, max_n_bins, min_bin_size, max_bin_size,
monotonic_trend, n_subsamples, max_pvalue,
continuous_deriv, prebinning_method, max_n_prebins,
min_prebin_size, min_n_bins, max_n_bins, min_bin_size,
max_bin_size, monotonic_trend, n_subsamples, max_pvalue,
max_pvalue_policy, outlier_detector, outlier_params,
user_splits, user_splits_fixed, special_codes,
split_digits, solver, h_epsilon, quantile,
Expand Down Expand Up @@ -64,6 +64,10 @@ def _check_parameters(name, estimator, objective, degree, continuous,
raise TypeError("continuous must be a boolean; got {}."
.format(verbose))

if not isinstance(continuous_deriv, bool):
raise TypeError("continuous_deriv must be a boolean; got {}."
.format(verbose))

if prebinning_method not in ("cart", "quantile", "uniform"):
raise ValueError('Invalid value for prebinning_method. Allowed string '
'values are "cart", "quantile" and "uniform".')
Expand Down Expand Up @@ -209,7 +213,8 @@ def _check_parameters(name, estimator, objective, degree, continuous,

class BasePWBinning(Base, BaseEstimator):
def __init__(self, name="", estimator=None, objective="l2", degree=1,
continuous=True, prebinning_method="cart", max_n_prebins=20,
continuous=True, continuous_deriv=True,
prebinning_method="cart", max_n_prebins=20,
min_prebin_size=0.05, min_n_bins=None, max_n_bins=None,
min_bin_size=None, max_bin_size=None, monotonic_trend="auto",
n_subsamples=None, max_pvalue=None,
Expand All @@ -224,6 +229,7 @@ def __init__(self, name="", estimator=None, objective="l2", degree=1,
self.objective = objective
self.degree = degree
self.continuous = continuous
self.continuous_deriv = continuous_deriv
self.prebinning_method = prebinning_method

self.max_n_prebins = max_n_prebins
Expand Down Expand Up @@ -451,9 +457,19 @@ def _fit_binning(self, x, y, prediction, lb, ub):
time_solver = time.perf_counter()

optimizer = RobustPWRegression(
self.objective, self.degree, self.continuous, monotonic,
self.solver, self.h_epsilon, self.quantile, self.regularization,
self.reg_l1, self.reg_l1, self.verbose)
objective=self.objective,
degree=self.degree,
continuous=self.continuous,
continuous_deriv=self.continuous_deriv,
monotonic_trend=monotonic,
solver=self.solver,
h_epsilon=self.h_epsilon,
quantile=self.quantile,
regularization=self.regularization,
reg_l1=self.reg_l1,
reg_l2=self.reg_l2,
extrapolation="continue",
verbose=self.verbose)

optimizer.fit(x_subsamples, pred_subsamples, splits, lb=lb, ub=ub)

Expand Down
23 changes: 15 additions & 8 deletions optbinning/binning/piecewise/binning.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ class OptimalPWBinning(BasePWBinning):
continuous : bool (default=True)
Whether to fit a continuous or discontinuous piecewise regression.
continuous_deriv : bool (default=True)
Whether to fit a polynomial with continuous derivatives. This option
fits a smooth degree d-polynomial with d-1 continuity in derivatives
(splines).
prebinning_method : str, optional (default="cart")
The pre-binning method. Supported methods are "cart" for a CART
decision tree, "quantile" to generate prebins with approximately same
Expand Down Expand Up @@ -175,7 +180,8 @@ class OptimalPWBinning(BasePWBinning):
Enable verbose output.
"""
def __init__(self, name="", estimator=None, objective="l2", degree=1,
continuous=True, prebinning_method="cart", max_n_prebins=20,
continuous=True, continuous_deriv=True,
prebinning_method="cart", max_n_prebins=20,
min_prebin_size=0.05, min_n_bins=None, max_n_bins=None,
min_bin_size=None, max_bin_size=None, monotonic_trend="auto",
n_subsamples=None, max_pvalue=None,
Expand All @@ -186,13 +192,14 @@ def __init__(self, name="", estimator=None, objective="l2", degree=1,
reg_l2=1.0, random_state=None, verbose=False):

super().__init__(name, estimator, objective, degree, continuous,
prebinning_method, max_n_prebins, min_prebin_size,
min_n_bins, max_n_bins, min_bin_size, max_bin_size,
monotonic_trend, n_subsamples, max_pvalue,
max_pvalue_policy, outlier_detector, outlier_params,
user_splits, user_splits_fixed, special_codes,
split_digits, solver, h_epsilon, quantile,
regularization, reg_l1, reg_l2, random_state, verbose)
continuous_deriv, prebinning_method, max_n_prebins,
min_prebin_size, min_n_bins, max_n_bins, min_bin_size,
max_bin_size, monotonic_trend, n_subsamples,
max_pvalue, max_pvalue_policy, outlier_detector,
outlier_params, user_splits, user_splits_fixed,
special_codes, split_digits, solver, h_epsilon,
quantile, regularization, reg_l1, reg_l2,
random_state, verbose)

self._problem_type = "classification"

Expand Down
30 changes: 18 additions & 12 deletions optbinning/binning/piecewise/continuous_binning.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ class ContinuousOptimalPWBinning(BasePWBinning):
continuous : bool (default=True)
Whether to fit a continuous or discontinuous piecewise regression.
continuous_deriv : bool (default=True)
Whether to fit a polynomial with continuous derivatives. This option
fits a smooth degree d-polynomial with d-1 continuity in derivatives
(splines).
prebinning_method : str, optional (default="cart")
The pre-binning method. Supported methods are "cart" for a CART
decision tree, "quantile" to generate prebins with approximately same
Expand Down Expand Up @@ -164,25 +169,26 @@ class ContinuousOptimalPWBinning(BasePWBinning):
verbose : bool (default=False)
Enable verbose output.
"""
def __init__(self, name="", objective="l2", degree=1,
continuous=True, prebinning_method="cart", max_n_prebins=20,
min_prebin_size=0.05, min_n_bins=None, max_n_bins=None,
min_bin_size=None, max_bin_size=None, monotonic_trend="auto",
n_subsamples=None, max_pvalue=None,
def __init__(self, name="", objective="l2", degree=1, continuous=True,
continuous_deriv=True, prebinning_method="cart",
max_n_prebins=20, min_prebin_size=0.05, min_n_bins=None,
max_n_bins=None, min_bin_size=None, max_bin_size=None,
monotonic_trend="auto", n_subsamples=None, max_pvalue=None,
max_pvalue_policy="consecutive", outlier_detector=None,
outlier_params=None, user_splits=None, user_splits_fixed=None,
special_codes=None, split_digits=None, solver="auto",
h_epsilon=1.35, quantile=0.5, regularization=None, reg_l1=1.0,
reg_l2=1.0, random_state=None, verbose=False):

super().__init__(name, None, objective, degree, continuous,
prebinning_method, max_n_prebins, min_prebin_size,
min_n_bins, max_n_bins, min_bin_size, max_bin_size,
monotonic_trend, n_subsamples, max_pvalue,
max_pvalue_policy, outlier_detector, outlier_params,
user_splits, user_splits_fixed, special_codes,
split_digits, solver, h_epsilon, quantile,
regularization, reg_l1, reg_l2, random_state, verbose)
continuous_deriv, prebinning_method, max_n_prebins,
min_prebin_size, min_n_bins, max_n_bins, min_bin_size,
max_bin_size, monotonic_trend, n_subsamples,
max_pvalue, max_pvalue_policy, outlier_detector,
outlier_params, user_splits, user_splits_fixed,
special_codes, split_digits, solver, h_epsilon,
quantile, regularization, reg_l1, reg_l2,
random_state, verbose)

self._problem_type = "regression"

Expand Down
8 changes: 6 additions & 2 deletions optbinning/scorecard/scorecard.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,11 @@ def _fit(self, X, y, sample_weight, metric_special, metric_missing,
logger.info("Fitting estimator.")

self.estimator_ = clone(self.estimator)
self.estimator_.fit(X_t, y, sample_weight)

if sample_weight is not None:
self.estimator_.fit(X_t, y, sample_weight=sample_weight)
else:
self.estimator_.fit(X_t, y)

self._time_estimator = time.perf_counter() - time_estimator

Expand Down Expand Up @@ -601,7 +605,7 @@ def _fit(self, X, y, sample_weight, metric_special, metric_missing,

binning_table.loc[
nt-1-n_specials:nt-2, "Points"] = metric_special * c
elif metric_missing != 'empirical':
if metric_missing != 'empirical':
binning_table.loc[nt-1, "Points"] = metric_missing * c

binning_table.index.names = ['Bin id']
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ matplotlib
numpy>=1.16.1
ortools>=9.4
pandas
ropwr>=0.4.0
ropwr>=1.0.0
scikit-learn>=1.0.2
scipy>=1.6.0
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def run_tests(self):
'numpy>=1.16.1',
'ortools>=9.4',
'pandas',
'ropwr>=0.4.0',
'ropwr>=1.0.0',
'scikit-learn>=1.0.2',
'scipy>=1.6.0',
]
Expand Down
23 changes: 23 additions & 0 deletions tests/test_scorecard.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,3 +450,26 @@ def test_verbose():
with open("tests/results/test_scorecard_verbose.txt", "w") as f:
with redirect_stdout(f):
scorecard.fit(X, y)


def test_missing_metrics():
data = pd.DataFrame(
{'target': np.hstack(
(np.tile(np.array([0, 1]), 50),
np.array([0]*90 + [1]*10)
)
),
'var': [np.nan] * 100 + ['A'] * 100}
)

binning_process = BinningProcess(['var'])
scaling_method_params = {'min': 0, 'max': 100}

scorecard = Scorecard(
binning_process=binning_process,
estimator=LogisticRegression(),
scaling_method="min_max",
scaling_method_params=scaling_method_params
).fit(data, data.target)

assert scorecard.table()['Points'].iloc[-1] == approx(0, rel=1e-6)

0 comments on commit 9735aca

Please sign in to comment.