diff --git a/build_tools/cirrus/arm_tests.yml b/build_tools/cirrus/arm_tests.yml index f64adbcdd4748..8fe3c7b6153f2 100644 --- a/build_tools/cirrus/arm_tests.yml +++ b/build_tools/cirrus/arm_tests.yml @@ -11,6 +11,10 @@ linux_aarch64_test_task: LOCK_FILE: build_tools/cirrus/py39_conda_forge_linux-aarch64_conda.lock CONDA_PKGS_DIRS: /root/.conda/pkgs HOME: / # $HOME is not defined in image and is required to install mambaforge + # Upload tokens have been encrypted via the CirrusCI interface: + # https://cirrus-ci.org/guide/writing-tasks/#encrypted-variables + # See `maint_tools/update_tracking_issue.py` for details on the permissions the token requires. + BOT_GITHUB_TOKEN: ENCRYPTED[9b50205e2693f9e4ce9a3f0fcb897a259289062fda2f5a3b8aaa6c56d839e0854a15872f894a70fca337dd4787274e0f] ccache_cache: folder: /root/.cache/ccache conda_cache: diff --git a/doc/conf.py b/doc/conf.py index 1d4b061f9afb5..288aba5404ae6 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -312,15 +312,18 @@ html_show_search_summary = False +# The "summary-anchor" IDs will be overwritten via JavaScript to be unique. +# See `doc/theme/scikit-learn-modern/static/js/details-permalink.js`. rst_prolog = """ .. |details-start| raw:: html -
+
.. |details-split| raw:: html Click for more details +
diff --git a/doc/modules/grid_search.rst b/doc/modules/grid_search.rst index 3f99e7841bb00..efdde897e841b 100644 --- a/doc/modules/grid_search.rst +++ b/doc/modules/grid_search.rst @@ -612,7 +612,7 @@ Here, ```` is the parameter name of the nested estimator, in this case ``estimator``. If the meta-estimator is constructed as a collection of estimators as in `pipeline.Pipeline`, then ```` refers to the name of the estimator, -see :ref:`pipeline_nested_parameters`. In practice, there can be several +see :ref:`pipeline_nested_parameters`. In practice, there can be several levels of nesting:: >>> from sklearn.pipeline import Pipeline diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 2ebb7dff7810b..a88a92604767e 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -181,9 +181,15 @@ take several parameters: of the python function is negated by the scorer object, conforming to the cross validation convention that scorers return higher values for better models. -* for classification metrics only: whether the python function you provided requires continuous decision - certainties (``needs_threshold=True``). The default value is - False. +* for classification metrics only: whether the python function you provided requires + continuous decision certainties. If the scoring function only accepts probability + estimates (e.g. :func:`metrics.log_loss`) then one needs to set the parameter + `response_method`, thus in this case `response_method="predict_proba"`. Some scoring + function do not necessarily require probability estimates but rather non-thresholded + decision values (e.g. :func:`metrics.roc_auc_score`). In this case, one provides a + list such as `response_method=["decision_function", "predict_proba"]`. In this case, + the scorer will use the first available method, in the order given in the list, + to compute the scores. * any additional parameters, such as ``beta`` or ``labels`` in :func:`f1_score`. diff --git a/doc/themes/scikit-learn-modern/layout.html b/doc/themes/scikit-learn-modern/layout.html index 191434c7ec2e2..a759ec2f8c8f0 100644 --- a/doc/themes/scikit-learn-modern/layout.html +++ b/doc/themes/scikit-learn-modern/layout.html @@ -36,6 +36,7 @@ + {%- block extrahead %} {% endblock %} diff --git a/doc/themes/scikit-learn-modern/static/css/theme.css b/doc/themes/scikit-learn-modern/static/css/theme.css index 21e1a2336a553..56f208540fd70 100644 --- a/doc/themes/scikit-learn-modern/static/css/theme.css +++ b/doc/themes/scikit-learn-modern/static/css/theme.css @@ -149,6 +149,15 @@ div.clearer { /* details / summary */ +/* Enables section links to be visible when anchor-linked */ +div.sk-page-content details::before { + display: block; + height: 52px; + margin-top: -52px; + visibility: hidden; + content: ""; +} + div.sk-page-content details { margin: 4ex 0pt; } @@ -202,6 +211,10 @@ div.sk-page-content summary:hover .tooltiptext { visibility: visible; } +div.sk-page-content summary:hover .headerlink { + visibility: visible; +} + /* Button */ .sk-btn-primary { diff --git a/doc/themes/scikit-learn-modern/static/js/details-permalink.js b/doc/themes/scikit-learn-modern/static/js/details-permalink.js new file mode 100644 index 0000000000000..62392e9836f64 --- /dev/null +++ b/doc/themes/scikit-learn-modern/static/js/details-permalink.js @@ -0,0 +1,47 @@ +// Function to create permalink into
elements to be able to link them +// The assumption is that such a block will be defined as follows: +//
+// +// Some title +// Click for more details +// +// +//
+// Some details +//
+//
+// We seek to replace `#summary-anchor` with a unique identifier based on the +// summary text. +// This syntax is defined in `doc/conf.py` in the `rst_prolog` variable. +function updateIdAndHrefBasedOnSummaryText() { + var allDetailsElements = document.querySelectorAll('details'); + // Counter to store the duplicated summary text to add it as a suffix in the + // anchor ID + var anchorIDCounters = {}; + + allDetailsElements.forEach(function (detailsElement) { + // Get the element within the current
+ var summaryElement = detailsElement.querySelector('summary'); + + // The ID uses the first line, lowercased, and spaces replaced with dashes + var anchorID = summaryElement.textContent.trim().split("\n")[0].replace(/\s+/g, '-').toLowerCase(); + + // Suffix the anchor ID with a counter if it already exists + if (anchorIDCounters[anchorID]) { + anchorIDCounters[anchorID] += 1; + anchorID = anchorID + '-' + anchorIDCounters[anchorID]; + } else { + anchorIDCounters[anchorID] = 1; + } + + detailsElement.setAttribute('id', anchorID); + + var anchorElement = summaryElement.querySelector('a.headerlink'); + anchorElement.setAttribute('href', '#' + anchorID); + }); +} + +// Add an event listener to execute the function when the page is loaded +document.addEventListener('DOMContentLoaded', function () { + updateIdAndHrefBasedOnSummaryText(); +}); diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst index ddb6a2ebe0016..1a445b7436201 100644 --- a/doc/whats_new/v1.3.rst +++ b/doc/whats_new/v1.3.rst @@ -2,6 +2,23 @@ .. currentmodule:: sklearn +.. _changes_1_3_2: + +Version 1.3.2 +============= + +**October 2023** + +Changelog +--------- + +:mod:`sklearn.tree` +................... + +- |Fix| Do not leak data via non-initialized memory in decision tree pickle files and make + the generation of those files deterministic. :pr:`27580` by :user:`Loïc Estève `. + + .. _changes_1_3_1: Version 1.3.1 diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index 7af12c870cf45..8cd4498f53cf0 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -253,6 +253,11 @@ Changelog :pr:`26315` and :pr:`27098` by :user:`Mateusz Sokół `, :user:`Olivier Grisel ` and :user:`Edoardo Abati `. +- |Fix| Fixes a bug in :class:`decomposition.KernelPCA` by forcing the output of + the internal :class:`preprocessing.KernelCenterer` to be a default array. When the + arpack solver was used, it would expect an array with a `dtype` attribute. + :pr:`27583` by :user:`Guillaume Lemaitre `. + :mod:`sklearn.ensemble` ....................... @@ -354,6 +359,14 @@ Changelog :func:`sklearn.metrics.zero_one_loss` now support Array API compatible inputs. :pr:`27137` by :user:`Edoardo Abati `. +- |API| Deprecated `needs_threshold` and `needs_proba` from :func:`metrics.make_scorer`. + These parameters will be removed in version 1.6. Instead, use `response_method` that + accepts `"predict"`, `"predict_proba"` or `"decision_function"` or a list of such + values. `needs_proba=True` is equivalent to `response_method="predict_proba"` and + `needs_threshold=True` is equivalent to + `response_method=("decision_function", "predict_proba")`. + :pr:`26840` by :user:`Guillaume Lemaitre `. + - |Fix| Fixes a bug for metrics using `zero_division=np.nan` (e.g. :func:`~metrics.precision_score`) within a paralell loop (e.g. :func:`~model_selection.cross_val_score`) where the singleton for `np.nan` @@ -366,6 +379,11 @@ Changelog :func:`metrics.root_mean_squared_log_error` instead. :pr:`26734` by :user:`Alejandro Martin Gil <101AlexMartin>`. +- |Fix| :func:`metrics.make_scorer` now raises an error when using a regressor on a + scorer requesting a non-thresholded decision function (from `decision_function` or + `predict_proba`). Such scorer are specific to classification. + :pr:`26840` by :user:`Guillaume Lemaitre `. + :mod:`sklearn.model_selection` .............................. diff --git a/sklearn/cluster/tests/test_hdbscan.py b/sklearn/cluster/tests/test_hdbscan.py index 7a2bac12ef057..26f5b64cb2bfd 100644 --- a/sklearn/cluster/tests/test_hdbscan.py +++ b/sklearn/cluster/tests/test_hdbscan.py @@ -274,14 +274,15 @@ def test_hdbscan_callable_metric(): assert n_clusters == n_clusters_true -@pytest.mark.parametrize("tree", ["kd", "ball"]) +@pytest.mark.parametrize("tree", ["kd_tree", "ball_tree"]) def test_hdbscan_precomputed_non_brute(tree): """ Tests that HDBSCAN correctly raises an error when passing precomputed data while requesting a tree-based algorithm. """ - hdb = HDBSCAN(metric="precomputed", algorithm=f"prims_{tree}tree") - with pytest.raises(ValueError): + hdb = HDBSCAN(metric="precomputed", algorithm=tree) + msg = "precomputed is not a valid metric for" + with pytest.raises(ValueError, match=msg): hdb.fit(X) diff --git a/sklearn/decomposition/_kernel_pca.py b/sklearn/decomposition/_kernel_pca.py index ccf79e896f210..800b472a9b3a6 100644 --- a/sklearn/decomposition/_kernel_pca.py +++ b/sklearn/decomposition/_kernel_pca.py @@ -432,7 +432,7 @@ def fit(self, X, y=None): raise ValueError("Cannot fit_inverse_transform with a precomputed kernel.") X = self._validate_data(X, accept_sparse="csr", copy=self.copy_X) self.gamma_ = 1 / X.shape[1] if self.gamma is None else self.gamma - self._centerer = KernelCenterer() + self._centerer = KernelCenterer().set_output(transform="default") K = self._get_kernel(X) self._fit_transform(K) diff --git a/sklearn/decomposition/tests/test_kernel_pca.py b/sklearn/decomposition/tests/test_kernel_pca.py index 76f7c4f832086..b222cf4e158ff 100644 --- a/sklearn/decomposition/tests/test_kernel_pca.py +++ b/sklearn/decomposition/tests/test_kernel_pca.py @@ -3,7 +3,8 @@ import numpy as np import pytest -from sklearn.datasets import make_blobs, make_circles +import sklearn +from sklearn.datasets import load_iris, make_blobs, make_circles from sklearn.decomposition import PCA, KernelPCA from sklearn.exceptions import NotFittedError from sklearn.linear_model import Perceptron @@ -551,3 +552,15 @@ def test_kernel_pca_inverse_correct_gamma(): X2_recon = kpca2.inverse_transform(kpca1.transform(X)) assert_allclose(X1_recon, X2_recon) + + +def test_kernel_pca_pandas_output(): + """Check that KernelPCA works with pandas output when the solver is arpack. + + Non-regression test for: + https://github.com/scikit-learn/scikit-learn/issues/27579 + """ + pytest.importorskip("pandas") + X, _ = load_iris(as_frame=True, return_X_y=True) + with sklearn.config_context(transform_output="pandas"): + KernelPCA(n_components=2, eigen_solver="arpack").fit_transform(X) diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py index c3730195fdcbb..37f0fa044455c 100644 --- a/sklearn/metrics/_scorer.py +++ b/sklearn/metrics/_scorer.py @@ -25,11 +25,9 @@ from inspect import signature from traceback import format_exc -import numpy as np - from ..base import is_regressor from ..utils import Bunch -from ..utils._param_validation import HasMethods, StrOptions, validate_params +from ..utils._param_validation import HasMethods, Hidden, StrOptions, validate_params from ..utils._response import _get_response_values from ..utils.metadata_routing import ( MetadataRequest, @@ -40,7 +38,7 @@ get_routing_for_object, process_routing, ) -from ..utils.multiclass import type_of_target +from ..utils.validation import _check_response_method from . import ( accuracy_score, average_precision_score, @@ -150,34 +148,24 @@ def __call__(self, estimator, *args, **kwargs): return scores def _use_cache(self, estimator): - """Return True if using a cache is beneficial. - - Caching may be beneficial when one of these conditions holds: - - `_ProbaScorer` will be called twice. - - `_PredictScorer` will be called twice. - - `_ThresholdScorer` will be called twice. - - `_ThresholdScorer` and `_PredictScorer` are called and - estimator is a regressor. - - `_ThresholdScorer` and `_ProbaScorer` are called and - estimator does not have a `decision_function` attribute. - + """Return True if using a cache is beneficial, thus when a response method will + be called several time. """ if len(self._scorers) == 1: # Only one scorer return False - counter = Counter([type(v) for v in self._scorers.values()]) - - if any( - counter[known_type] > 1 - for known_type in [_PredictScorer, _ProbaScorer, _ThresholdScorer] - ): + counter = Counter( + [ + _check_response_method(estimator, scorer._response_method).__name__ + for scorer in self._scorers.values() + if isinstance(scorer, _BaseScorer) + ] + ) + if any(val > 1 for val in counter.values()): + # The exact same response method or iterable of response methods + # will be called more than once. return True - if counter[_ThresholdScorer]: - if is_regressor(estimator) and counter[_PredictScorer]: - return True - elif counter[_ProbaScorer] and not hasattr(estimator, "decision_function"): - return True return False def get_metadata_routing(self): @@ -200,10 +188,11 @@ def get_metadata_routing(self): class _BaseScorer(_MetadataRequester): - def __init__(self, score_func, sign, kwargs): - self._kwargs = kwargs + def __init__(self, score_func, sign, kwargs, response_method="predict"): self._score_func = score_func self._sign = sign + self._kwargs = kwargs + self._response_method = response_method def _get_pos_label(self): if "pos_label" in self._kwargs: @@ -214,14 +203,13 @@ def _get_pos_label(self): return None def __repr__(self): - kwargs_string = "".join( - [", %s=%s" % (str(k), str(v)) for k, v in self._kwargs.items()] - ) - return "make_scorer(%s%s%s%s)" % ( - self._score_func.__name__, - "" if self._sign > 0 else ", greater_is_better=False", - self._factory_args(), - kwargs_string, + sign_string = "" if self._sign > 0 else ", greater_is_better=False" + response_method_string = f", response_method={self._response_method!r}" + kwargs_string = "".join([f", {k}={v}" for k, v in self._kwargs.items()]) + + return ( + f"make_scorer({self._score_func.__name__}{sign_string}" + f"{response_method_string}{kwargs_string})" ) def __call__(self, estimator, X, y_true, sample_weight=None, **kwargs): @@ -264,10 +252,6 @@ def __call__(self, estimator, X, y_true, sample_weight=None, **kwargs): return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs) - def _factory_args(self): - """Return non-default make_scorer arguments for repr.""" - return "" - def _warn_overlap(self, message, kwargs): """Warn if there is any overlap between ``self._kwargs`` and ``kwargs``. @@ -317,9 +301,9 @@ def set_score_request(self, **kwargs): return self -class _PredictScorer(_BaseScorer): +class _Scorer(_BaseScorer): def _score(self, method_caller, estimator, X, y_true, **kwargs): - """Evaluate predicted target values for X relative to y_true. + """Evaluate the response method of `estimator` on `X` and `y_true`. Parameters ---------- @@ -328,108 +312,13 @@ def _score(self, method_caller, estimator, X, y_true, **kwargs): arguments, potentially caching results. estimator : object - Trained estimator to use for scoring. Must have a `predict` - method; the output of that is used to compute the score. - - X : {array-like, sparse matrix} - Test data that will be fed to estimator.predict. - - y_true : array-like - Gold standard target values for X. - - **kwargs : dict - Other parameters passed to the scorer. Refer to - :func:`set_score_request` for more details. - - .. versionadded:: 1.3 - - Returns - ------- - score : float - Score function applied to prediction of estimator on X. - """ - self._warn_overlap( - message=( - "There is an overlap between set kwargs of this scorer instance and" - " passed metadata. Please pass them either as kwargs to `make_scorer`" - " or metadata, but not both." - ), - kwargs=kwargs, - ) - y_pred = method_caller(estimator, "predict", X) - scoring_kwargs = {**self._kwargs, **kwargs} - return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs) - - -class _ProbaScorer(_BaseScorer): - def _score(self, method_caller, clf, X, y, **kwargs): - """Evaluate predicted probabilities for X relative to y_true. - - Parameters - ---------- - method_caller : callable - Returns predictions given an estimator, method name, and other - arguments, potentially caching results. - - clf : object - Trained classifier to use for scoring. Must have a `predict_proba` - method; the output of that is used to compute the score. - - X : {array-like, sparse matrix} - Test data that will be fed to clf.predict_proba. - - y : array-like - Gold standard target values for X. These must be class labels, - not probabilities. - - **kwargs : dict - Other parameters passed to the scorer. Refer to - :func:`set_score_request` for more details. - - .. versionadded:: 1.3 - - Returns - ------- - score : float - Score function applied to prediction of estimator on X. - """ - self._warn_overlap( - message=( - "There is an overlap between set kwargs of this scorer instance and" - " passed metadata. Please pass them either as kwargs to `make_scorer`" - " or metadata, but not both." - ), - kwargs=kwargs, - ) - - y_pred = method_caller(clf, "predict_proba", X, pos_label=self._get_pos_label()) - scoring_kwargs = {**self._kwargs, **kwargs} - return self._sign * self._score_func(y, y_pred, **scoring_kwargs) - - def _factory_args(self): - return ", needs_proba=True" - - -class _ThresholdScorer(_BaseScorer): - def _score(self, method_caller, clf, X, y, **kwargs): - """Evaluate decision function output for X relative to y_true. - - Parameters - ---------- - method_caller : callable - Returns predictions given an estimator, method name, and other - arguments, potentially caching results. - - clf : object - Trained classifier to use for scoring. Must have either a - decision_function method or a predict_proba method; the output of - that is used to compute the score. + Trained estimator to use for scoring. X : {array-like, sparse matrix} Test data that will be fed to clf.decision_function or clf.predict_proba. - y : array-like + y_true : array-like Gold standard target values for X. These must be class labels, not decision function values. @@ -437,8 +326,6 @@ def _score(self, method_caller, clf, X, y, **kwargs): Other parameters passed to the scorer. Refer to :func:`set_score_request` for more details. - .. versionadded:: 1.3 - Returns ------- score : float @@ -453,31 +340,14 @@ def _score(self, method_caller, clf, X, y, **kwargs): kwargs=kwargs, ) - y_type = type_of_target(y) - if y_type not in ("binary", "multilabel-indicator"): - raise ValueError("{0} format is not supported".format(y_type)) - - if is_regressor(clf): - y_pred = method_caller(clf, "predict", X) - else: - pos_label = self._get_pos_label() - try: - y_pred = method_caller(clf, "decision_function", X, pos_label=pos_label) - - if isinstance(y_pred, list): - # For multi-output multi-class estimator - y_pred = np.vstack([p for p in y_pred]).T - - except (NotImplementedError, AttributeError): - y_pred = method_caller(clf, "predict_proba", X, pos_label=pos_label) - if isinstance(y_pred, list): - y_pred = np.vstack([p[:, -1] for p in y_pred]).T + pos_label = None if is_regressor(estimator) else self._get_pos_label() + response_method = _check_response_method(estimator, self._response_method) + y_pred = method_caller( + estimator, response_method.__name__, X, pos_label=pos_label + ) scoring_kwargs = {**self._kwargs, **kwargs} - return self._sign * self._score_func(y, y_pred, **scoring_kwargs) - - def _factory_args(self): - return ", needs_threshold=True" + return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs) @validate_params( @@ -558,6 +428,10 @@ def get_metadata_routing(self): def _check_multimetric_scoring(estimator, scoring): """Check the scoring parameter in cases when multiple metrics are allowed. + In addition, multimetric scoring leverages a caching mechanism to not call the same + estimator response method multiple times. Hence, the scorer is modified to only use + a single response method given a list of response methods and the estimator. + Parameters ---------- estimator : sklearn estimator instance @@ -636,39 +510,93 @@ def _check_multimetric_scoring(estimator, scoring): } else: raise ValueError(err_msg_generic) + return scorers +def _get_response_method(response_method, needs_threshold, needs_proba): + """Handles deprecation of `needs_threshold` and `needs_proba` parameters in + favor of `response_method`. + """ + needs_threshold_provided = needs_threshold != "deprecated" + needs_proba_provided = needs_proba != "deprecated" + response_method_provided = response_method is not None + + needs_threshold = False if needs_threshold == "deprecated" else needs_threshold + needs_proba = False if needs_proba == "deprecated" else needs_proba + + if response_method_provided and (needs_proba_provided or needs_threshold_provided): + raise ValueError( + "You cannot set both `response_method` and `needs_proba` or " + "`needs_threshold` at the same time. Only use `response_method` since " + "the other two are deprecated in version 1.4 and will be removed in 1.6." + ) + + if needs_proba_provided or needs_threshold_provided: + warnings.warn( + ( + "The `needs_threshold` and `needs_proba` parameter are deprecated in " + "version 1.4 and will be removed in 1.6. You can either let " + "`response_method` be `None` or set it to `predict` to preserve the " + "same behaviour." + ), + FutureWarning, + ) + + if response_method_provided: + return response_method + + if needs_proba is True and needs_threshold is True: + raise ValueError( + "You cannot set both `needs_proba` and `needs_threshold` at the same " + "time. Use `response_method` instead since the other two are deprecated " + "in version 1.4 and will be removed in 1.6." + ) + + if needs_proba is True: + response_method = "predict_proba" + elif needs_threshold is True: + response_method = ("decision_function", "predict_proba") + else: + response_method = "predict" + + return response_method + + @validate_params( { "score_func": [callable], + "response_method": [ + None, + list, + tuple, + StrOptions({"predict", "predict_proba", "decision_function"}), + ], "greater_is_better": ["boolean"], - "needs_proba": ["boolean"], - "needs_threshold": ["boolean"], + "needs_proba": ["boolean", Hidden(StrOptions({"deprecated"}))], + "needs_threshold": ["boolean", Hidden(StrOptions({"deprecated"}))], }, prefer_skip_nested_validation=True, ) def make_scorer( score_func, *, + response_method=None, greater_is_better=True, - needs_proba=False, - needs_threshold=False, + needs_proba="deprecated", + needs_threshold="deprecated", **kwargs, ): """Make a scorer from a performance metric or loss function. - This factory function wraps scoring functions for use in - :class:`~sklearn.model_selection.GridSearchCV` and - :func:`~sklearn.model_selection.cross_val_score`. - It takes a score function, such as :func:`~sklearn.metrics.accuracy_score`, - :func:`~sklearn.metrics.mean_squared_error`, - :func:`~sklearn.metrics.adjusted_rand_score` or - :func:`~sklearn.metrics.average_precision_score` - and returns a callable that scores an estimator's output. - The signature of the call is `(estimator, X, y)` where `estimator` - is the model to be evaluated, `X` is the data and `y` is the - ground truth labeling (or `None` in the case of unsupervised models). + A scorer is a wrapper around an arbitrary metric or loss function that is called + with the signature `scorer(estimator, X, y_true, **kwargs)`. + + It is accepted in all scikit-learn estimators or functions allowing a `scoring` + parameter. + + The parameter `response_method` allows to specify which method of the estimator + should be used to feed the scoring/loss function. Read more in the :ref:`User Guide `. @@ -678,6 +606,21 @@ def make_scorer( Score function (or loss function) with signature ``score_func(y, y_pred, **kwargs)``. + response_method : {"predict_proba", "decision_function", "predict"} or \ + list/tuple of such str, default=None + + Specifies the response method to use get prediction from an estimator + (i.e. :term:`predict_proba`, :term:`decision_function` or + :term:`predict`). Possible choices are: + + - if `str`, it corresponds to the name to the method to return; + - if a list or tuple of `str`, it provides the method names in order of + preference. The method returned corresponds to the first method in + the list and which is implemented by `estimator`. + - if `None`, it is equivalent to `"predict"`. + + .. versionadded:: 1.4 + greater_is_better : bool, default=True Whether `score_func` is a score function (default), meaning high is good, or a loss function, meaning low is good. In the latter case, the @@ -691,6 +634,10 @@ def make_scorer( a 1D `y_pred` (i.e., probability of the positive class, shape `(n_samples,)`). + .. deprecated:: 1.4 + `needs_proba` is deprecated in version 1.4 and will be removed in + 1.6. Use `response_method="predict_proba"` instead. + needs_threshold : bool, default=False Whether `score_func` takes a continuous decision certainty. This only works for binary classification using estimators that @@ -703,6 +650,11 @@ def make_scorer( For example `average_precision` or the area under the roc curve can not be computed using discrete predictions alone. + .. deprecated:: 1.4 + `needs_threshold` is deprecated in version 1.4 and will be removed + in 1.6. Use `response_method=("decision_function", "predict_proba")` + instead to preserve the same behaviour. + **kwargs : additional arguments Additional parameters to be passed to `score_func`. @@ -711,40 +663,22 @@ def make_scorer( scorer : callable Callable object that returns a scalar score; greater is better. - Notes - ----- - If `needs_proba=False` and `needs_threshold=False`, the score - function is supposed to accept the output of :term:`predict`. If - `needs_proba=True`, the score function is supposed to accept the - output of :term:`predict_proba` (For binary `y_true`, the score function is - supposed to accept probability of the positive class). If - `needs_threshold=True`, the score function is supposed to accept the - output of :term:`decision_function` or :term:`predict_proba` when - :term:`decision_function` is not present. - Examples -------- >>> from sklearn.metrics import fbeta_score, make_scorer >>> ftwo_scorer = make_scorer(fbeta_score, beta=2) >>> ftwo_scorer - make_scorer(fbeta_score, beta=2) + make_scorer(fbeta_score, response_method='predict', beta=2) >>> from sklearn.model_selection import GridSearchCV >>> from sklearn.svm import LinearSVC >>> grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, ... scoring=ftwo_scorer) """ + response_method = _get_response_method( + response_method, needs_threshold, needs_proba + ) sign = 1 if greater_is_better else -1 - if needs_proba and needs_threshold: - raise ValueError( - "Set either needs_proba or needs_threshold to True, but not both." - ) - if needs_proba: - cls = _ProbaScorer - elif needs_threshold: - cls = _ThresholdScorer - else: - cls = _PredictScorer - return cls(score_func, sign, kwargs) + return _Scorer(score_func, sign, kwargs, response_method) # Standard regression scores @@ -799,28 +733,47 @@ def negative_likelihood_ratio(y_true, y_pred): # Score functions that need decision values top_k_accuracy_scorer = make_scorer( - top_k_accuracy_score, greater_is_better=True, needs_threshold=True + top_k_accuracy_score, + greater_is_better=True, + response_method=("decision_function", "predict_proba"), ) roc_auc_scorer = make_scorer( - roc_auc_score, greater_is_better=True, needs_threshold=True + roc_auc_score, + greater_is_better=True, + response_method=("decision_function", "predict_proba"), +) +average_precision_scorer = make_scorer( + average_precision_score, + response_method=("decision_function", "predict_proba"), +) +roc_auc_ovo_scorer = make_scorer( + roc_auc_score, response_method="predict_proba", multi_class="ovo" ) -average_precision_scorer = make_scorer(average_precision_score, needs_threshold=True) -roc_auc_ovo_scorer = make_scorer(roc_auc_score, needs_proba=True, multi_class="ovo") roc_auc_ovo_weighted_scorer = make_scorer( - roc_auc_score, needs_proba=True, multi_class="ovo", average="weighted" + roc_auc_score, + response_method="predict_proba", + multi_class="ovo", + average="weighted", +) +roc_auc_ovr_scorer = make_scorer( + roc_auc_score, response_method="predict_proba", multi_class="ovr" ) -roc_auc_ovr_scorer = make_scorer(roc_auc_score, needs_proba=True, multi_class="ovr") roc_auc_ovr_weighted_scorer = make_scorer( - roc_auc_score, needs_proba=True, multi_class="ovr", average="weighted" + roc_auc_score, + response_method="predict_proba", + multi_class="ovr", + average="weighted", ) # Score function for probabilistic classification -neg_log_loss_scorer = make_scorer(log_loss, greater_is_better=False, needs_proba=True) +neg_log_loss_scorer = make_scorer( + log_loss, greater_is_better=False, response_method="predict_proba" +) neg_brier_score_scorer = make_scorer( - brier_score_loss, greater_is_better=False, needs_proba=True + brier_score_loss, greater_is_better=False, response_method="predict_proba" ) brier_score_loss_scorer = make_scorer( - brier_score_loss, greater_is_better=False, needs_proba=True + brier_score_loss, greater_is_better=False, response_method="predict_proba" ) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index a7d1220013eee..6db20bff58fc3 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -48,7 +48,7 @@ _check_multimetric_scoring, _MultimetricScorer, _PassthroughScorer, - _PredictScorer, + _Scorer, ) from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split from sklearn.multiclass import OneVsRestClassifier @@ -253,7 +253,8 @@ def check_scoring_validator_for_single_metric_usecases(scoring_validator): estimator = EstimatorWithFit() scorer = scoring_validator(estimator, scoring="accuracy") - assert isinstance(scorer, _PredictScorer) + assert isinstance(scorer, _Scorer) + assert scorer._response_method == "predict" # Test the allow_none parameter for check_scoring alone if scoring_validator is check_scoring: @@ -295,9 +296,8 @@ def test_check_scoring_and_check_multimetric_scoring(scoring): scorers = _check_multimetric_scoring(estimator, scoring) assert isinstance(scorers, dict) assert sorted(scorers.keys()) == sorted(list(scoring)) - assert all( - [isinstance(scorer, _PredictScorer) for scorer in list(scorers.values())] - ) + assert all([isinstance(scorer, _Scorer) for scorer in list(scorers.values())]) + assert all(scorer._response_method == "predict" for scorer in scorers.values()) if "acc" in scoring: assert_almost_equal( @@ -353,11 +353,13 @@ def test_check_scoring_gridsearchcv(): grid = GridSearchCV(LinearSVC(dual="auto"), param_grid={"C": [0.1, 1]}, cv=3) scorer = check_scoring(grid, scoring="f1") - assert isinstance(scorer, _PredictScorer) + assert isinstance(scorer, _Scorer) + assert scorer._response_method == "predict" pipe = make_pipeline(LinearSVC(dual="auto")) scorer = check_scoring(pipe, scoring="f1") - assert isinstance(scorer, _PredictScorer) + assert isinstance(scorer, _Scorer) + assert scorer._response_method == "predict" # check that cross_val_score definitely calls the scorer # and doesn't make any assumptions about the estimator apart from having a @@ -368,13 +370,6 @@ def test_check_scoring_gridsearchcv(): assert_array_equal(scores, 1) -def test_make_scorer(): - # Sanity check on the make_scorer factory function. - f = lambda *args: 0 - with pytest.raises(ValueError): - make_scorer(f, needs_threshold=True, needs_proba=True) - - @pytest.mark.parametrize( "scorer_name, metric", [ @@ -504,15 +499,15 @@ def test_thresholded_scorers(): # test with a regressor (no decision_function) reg = DecisionTreeRegressor() reg.fit(X_train, y_train) - score1 = get_scorer("roc_auc")(reg, X_test, y_test) - score2 = roc_auc_score(y_test, reg.predict(X_test)) - assert_almost_equal(score1, score2) + err_msg = "DecisionTreeRegressor has none of the following attributes" + with pytest.raises(AttributeError, match=err_msg): + get_scorer("roc_auc")(reg, X_test, y_test) # Test that an exception is raised on more than two classes X, y = make_blobs(random_state=0, centers=3) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf.fit(X_train, y_train) - with pytest.raises(ValueError, match="multiclass format is not supported"): + with pytest.raises(ValueError, match="multi_class must be in \\('ovo', 'ovr'\\)"): get_scorer("roc_auc")(clf, X_test, y_test) # test error is raised with a single class present in model @@ -543,22 +538,6 @@ def test_thresholded_scorers_multilabel_indicator_data(): score2 = roc_auc_score(y_test, np.vstack([p[:, -1] for p in y_proba]).T) assert_almost_equal(score1, score2) - # Multi-output multi-class decision_function - # TODO Is there any yet? - class TreeWithDecisionFunction(DecisionTreeClassifier): - # disable predict_proba - predict_proba = None - - def decision_function(self, X): - return [p[:, 1] for p in DecisionTreeClassifier.predict_proba(self, X)] - - clf = TreeWithDecisionFunction() - clf.fit(X_train, y_train) - y_proba = clf.decision_function(X_test) - score1 = get_scorer("roc_auc")(clf, X_test, y_test) - score2 = roc_auc_score(y_test, np.vstack([p for p in y_proba]).T) - assert_almost_equal(score1, score2) - # Multilabel predict_proba clf = OneVsRestClassifier(DecisionTreeClassifier()) clf.fit(X_train, y_train) @@ -812,8 +791,11 @@ def test_multimetric_scorer_calls_method_once( (["roc_auc", "neg_log_loss"]), ( { - "roc_auc": make_scorer(roc_auc_score, needs_threshold=True), - "neg_log_loss": make_scorer(log_loss, needs_proba=True), + "roc_auc": make_scorer( + roc_auc_score, + response_method=["predict_proba", "decision_function"], + ), + "neg_log_loss": make_scorer(log_loss, response_method="predict_proba"), } ), ], @@ -966,7 +948,10 @@ def test_multiclass_roc_proba_scorer(scorer_name, metric): def test_multiclass_roc_proba_scorer_label(): scorer = make_scorer( - roc_auc_score, multi_class="ovo", labels=[0, 1, 2], needs_proba=True + roc_auc_score, + multi_class="ovo", + labels=[0, 1, 2], + response_method="predict_proba", ) X, y = make_classification( n_classes=3, n_informative=3, n_samples=20, random_state=0 @@ -1055,7 +1040,7 @@ def string_labeled_classification_problem(): def test_average_precision_pos_label(string_labeled_classification_problem): - # check that _ThresholdScorer will lead to the right score when passing + # check that _Scorer will lead to the right score when passing # `pos_label`. Currently, only `average_precision_score` is defined to # be such a scorer. ( @@ -1085,7 +1070,7 @@ def test_average_precision_pos_label(string_labeled_classification_problem): # check that it fails if `pos_label` is not provided average_precision_scorer = make_scorer( average_precision_score, - needs_threshold=True, + response_method=("decision_function", "predict_proba"), ) err_msg = "pos_label=1 is not a valid label. It should be one of " with pytest.raises(ValueError, match=err_msg): @@ -1094,7 +1079,9 @@ def test_average_precision_pos_label(string_labeled_classification_problem): # otherwise, the scorer should give the same results than calling the # scoring function average_precision_scorer = make_scorer( - average_precision_score, needs_threshold=True, pos_label=pos_label + average_precision_score, + response_method=("decision_function", "predict_proba"), + pos_label=pos_label, ) ap_scorer = average_precision_scorer(clf, X_test, y_test) @@ -1119,7 +1106,7 @@ def _predict_proba(self, X): def test_brier_score_loss_pos_label(string_labeled_classification_problem): - # check that _ProbaScorer leads to the right score when `pos_label` is + # check that _Scorer leads to the right score when `pos_label` is # provided. Currently only the `brier_score_loss` is defined to be such # a scorer. clf, X_test, y_test, _, y_pred_proba, _ = string_labeled_classification_problem @@ -1136,7 +1123,7 @@ def test_brier_score_loss_pos_label(string_labeled_classification_problem): brier_scorer = make_scorer( brier_score_loss, - needs_proba=True, + response_method="predict_proba", pos_label=pos_label, ) assert brier_scorer(clf, X_test, y_test) == pytest.approx(brier_pos_cancer) @@ -1148,7 +1135,7 @@ def test_brier_score_loss_pos_label(string_labeled_classification_problem): def test_non_symmetric_metric_pos_label( score_func, string_labeled_classification_problem ): - # check that _PredictScorer leads to the right score when `pos_label` is + # check that _Scorer leads to the right score when `pos_label` is # provided. We check for all possible metric supported. # Note: At some point we may end up having "scorer tags". clf, X_test, y_test, y_pred, _, _ = string_labeled_classification_problem @@ -1168,11 +1155,15 @@ def test_non_symmetric_metric_pos_label( @pytest.mark.parametrize( "scorer", [ - make_scorer(average_precision_score, needs_threshold=True, pos_label="xxx"), - make_scorer(brier_score_loss, needs_proba=True, pos_label="xxx"), + make_scorer( + average_precision_score, + response_method=("decision_function", "predict_proba"), + pos_label="xxx", + ), + make_scorer(brier_score_loss, response_method="predict_proba", pos_label="xxx"), make_scorer(f1_score, pos_label="xxx"), ], - ids=["ThresholdScorer", "ProbaScorer", "PredictScorer"], + ids=["non-thresholded scorer", "probability scorer", "thresholded scorer"], ) def test_scorer_select_proba_error(scorer): # check that we raise the proper error when passing an unknown @@ -1194,7 +1185,7 @@ def test_get_scorer_return_copy(): def test_scorer_no_op_multiclass_select_proba(): - # check that calling a ProbaScorer on a multiclass problem do not raise + # check that calling a _Scorer on a multiclass problem do not raise # even if `y_true` would be binary during the scoring. # `_select_proba_binary` should not be called in this case. X, y = make_classification( @@ -1208,7 +1199,7 @@ def test_scorer_no_op_multiclass_select_proba(): scorer = make_scorer( roc_auc_score, - needs_proba=True, + response_method="predict_proba", multi_class="ovo", labels=lr.classes_, ) @@ -1285,7 +1276,7 @@ def test_metadata_kwarg_conflict(): scorer = make_scorer( roc_auc_score, - needs_proba=True, + response_method="predict_proba", multi_class="ovo", labels=lr.classes_, ) @@ -1389,3 +1380,113 @@ def test_get_scorer_multilabel_indicator(): score = get_scorer("average_precision")(estimator, X_test, Y_test) assert score > 0.8 + + +@pytest.mark.parametrize( + "scorer, expected_repr", + [ + ( + get_scorer("accuracy"), + "make_scorer(accuracy_score, response_method='predict')", + ), + ( + get_scorer("neg_log_loss"), + ( + "make_scorer(log_loss, greater_is_better=False," + " response_method='predict_proba')" + ), + ), + ( + get_scorer("roc_auc"), + ( + "make_scorer(roc_auc_score, response_method=" + "('decision_function', 'predict_proba'))" + ), + ), + ( + make_scorer(fbeta_score, beta=2), + "make_scorer(fbeta_score, response_method='predict', beta=2)", + ), + ], +) +def test_make_scorer_repr(scorer, expected_repr): + """Check the representation of the scorer.""" + assert repr(scorer) == expected_repr + + +# TODO(1.6): rework this test after the deprecation of `needs_proba` and +# `needs_threshold` +@pytest.mark.filterwarnings("ignore:.*needs_proba.*:FutureWarning") +@pytest.mark.parametrize( + "params, err_type, err_msg", + [ + # response_method should not be set if needs_* are set + ( + {"response_method": "predict_proba", "needs_proba": True}, + ValueError, + "You cannot set both `response_method`", + ), + ( + {"response_method": "predict_proba", "needs_threshold": True}, + ValueError, + "You cannot set both `response_method`", + ), + # cannot set both needs_proba and needs_threshold + ( + {"needs_proba": True, "needs_threshold": True}, + ValueError, + "You cannot set both `needs_proba` and `needs_threshold`", + ), + ], +) +def test_make_scorer_error(params, err_type, err_msg): + """Check that `make_scorer` raises errors if the parameter used.""" + with pytest.raises(err_type, match=err_msg): + make_scorer(lambda y_true, y_pred: 1, **params) + + +# TODO(1.6): remove the following test +@pytest.mark.parametrize( + "deprecated_params, new_params, warn_msg", + [ + ( + {"needs_proba": True}, + {"response_method": "predict_proba"}, + "The `needs_threshold` and `needs_proba` parameter are deprecated", + ), + ( + {"needs_proba": True, "needs_threshold": False}, + {"response_method": "predict_proba"}, + "The `needs_threshold` and `needs_proba` parameter are deprecated", + ), + ( + {"needs_threshold": True}, + {"response_method": ("decision_function", "predict_proba")}, + "The `needs_threshold` and `needs_proba` parameter are deprecated", + ), + ( + {"needs_threshold": True, "needs_proba": False}, + {"response_method": ("decision_function", "predict_proba")}, + "The `needs_threshold` and `needs_proba` parameter are deprecated", + ), + ( + {"needs_threshold": False, "needs_proba": False}, + {"response_method": "predict"}, + "The `needs_threshold` and `needs_proba` parameter are deprecated", + ), + ], +) +def test_make_scorer_deprecation(deprecated_params, new_params, warn_msg): + """Check that we raise a deprecation warning when using `needs_proba` or + `needs_threshold`.""" + X, y = make_classification(n_samples=150, n_features=10, random_state=0) + classifier = LogisticRegression().fit(X, y) + + # check deprecation of needs_proba + with pytest.warns(FutureWarning, match=warn_msg): + deprecated_roc_auc_scorer = make_scorer(roc_auc_score, **deprecated_params) + roc_auc_scorer = make_scorer(roc_auc_score, **new_params) + + assert deprecated_roc_auc_scorer(classifier, X, y) == pytest.approx( + roc_auc_scorer(classifier, X, y) + ) diff --git a/sklearn/tests/metadata_routing_common.py b/sklearn/tests/metadata_routing_common.py index 14bf037ae9054..3d7d0ab24f1cc 100644 --- a/sklearn/tests/metadata_routing_common.py +++ b/sklearn/tests/metadata_routing_common.py @@ -10,7 +10,7 @@ TransformerMixin, clone, ) -from sklearn.metrics._scorer import _PredictScorer, mean_squared_error +from sklearn.metrics._scorer import _Scorer, mean_squared_error from sklearn.model_selection import BaseCrossValidator from sklearn.model_selection._split import GroupsConsumerMixin from sklearn.utils._metadata_requests import ( @@ -304,9 +304,11 @@ def inverse_transform(self, X, sample_weight=None, metadata=None): return X -class ConsumingScorer(_PredictScorer): +class ConsumingScorer(_Scorer): def __init__(self, registry=None): - super().__init__(score_func=mean_squared_error, sign=1, kwargs={}) + super().__init__( + score_func=mean_squared_error, sign=1, kwargs={}, response_method="predict" + ) self.registry = registry def _score(self, method_caller, clf, X, y, **kwargs): diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx index 5a8a200ed9680..c52aca5c6fefc 100644 --- a/sklearn/tree/_tree.pyx +++ b/sklearn/tree/_tree.pyx @@ -1020,11 +1020,13 @@ cdef class BaseTree: safe_realloc(&self.nodes, capacity) safe_realloc(&self.value, capacity * self.value_stride) - # value memory is initialised to 0 to enable classifier argmax if capacity > self.capacity: + # value memory is initialised to 0 to enable classifier argmax memset((self.value + self.capacity * self.value_stride), 0, (capacity - self.capacity) * self.value_stride * sizeof(float64_t)) + # node memory is initialised to 0 to ensure deterministic pickle (padding in Node struct) + memset((self.nodes + self.capacity), 0, (capacity - self.capacity) * sizeof(Node)) # if capacity smaller than node_count, adjust the counter if capacity < self.node_count: diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py index 8faef50879da2..71ab1d8c5c6b6 100644 --- a/sklearn/tree/tests/test_tree.py +++ b/sklearn/tree/tests/test_tree.py @@ -2760,3 +2760,16 @@ def test_multioutput_quantiles(): assert_array_equal(y_hat[:, 1], y_true) assert_array_equal(y_hat[:, 2], y_true) assert y_hat.shape == (4, 3, 2) + + +def test_deterministic_pickle(): + # Non-regression test for: + # https://github.com/scikit-learn/scikit-learn/issues/27268 + # Uninitialised memory would lead to the two pickle strings being different. + tree1 = DecisionTreeClassifier(random_state=0).fit(iris.data, iris.target) + tree2 = DecisionTreeClassifier(random_state=0).fit(iris.data, iris.target) + + pickle1 = pickle.dumps(tree1) + pickle2 = pickle.dumps(tree2) + + assert pickle1 == pickle2