From bbc73cfd7f5a85f6ac63432d3294abecdcb81d2a Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Sun, 10 Sep 2023 19:13:26 +0200
Subject: [PATCH 01/21] FIX _safe_divide should handle zero-division with numpy
 scalar (#27312)

Co-authored-by: Christian Lorentzen <lorentzen.ch@gmail.com>
---
 sklearn/ensemble/_gb.py                       | 20 +++++++++++++------
 .../ensemble/tests/test_gradient_boosting.py  |  8 +++++---
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py
index 73bb9e08ae619..e198babdb28d7 100644
--- a/sklearn/ensemble/_gb.py
+++ b/sklearn/ensemble/_gb.py
@@ -20,6 +20,7 @@
 #          Arnaud Joly, Jacob Schreiber
 # License: BSD 3 clause
 
+import math
 import warnings
 from abc import ABCMeta, abstractmethod
 from numbers import Integral, Real
@@ -64,11 +65,16 @@
 
 def _safe_divide(numerator, denominator):
     """Prevents overflow and division by zero."""
-    with np.errstate(divide="raise"):
-        try:
-            return numerator / denominator
-        except FloatingPointError:
-            return 0.0
+    try:
+        # Cast to Python float to trigger a ZeroDivisionError without relying
+        # on `np.errstate` that is not supported by Pyodide.
+        result = float(numerator) / float(denominator)
+        if math.isinf(result):
+            warnings.warn("overflow encountered in _safe_divide", RuntimeWarning)
+        return result
+    except ZeroDivisionError:
+        warnings.warn("divide by zero encountered in _safe_divide", RuntimeWarning)
+        return 0.0
 
 
 def _init_raw_predictions(X, estimator, loss, use_predict_proba):
@@ -235,7 +241,9 @@ def compute_update(y_, indices, neg_gradient, raw_prediction, k):
 
         # update each leaf (= perform line search)
         for leaf in np.nonzero(tree.children_left == TREE_LEAF)[0]:
-            indices = np.nonzero(terminal_regions == leaf)[0]  # of terminal regions
+            indices = np.nonzero(masked_terminal_regions == leaf)[
+                0
+            ]  # of terminal regions
             y_ = y.take(indices, axis=0)
             sw = None if sample_weight is None else sample_weight[indices]
             update = compute_update(y_, indices, neg_gradient, raw_prediction, k)
diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index 6671936d2eaae..9d61464d063f0 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -1445,11 +1445,13 @@ def test_huber_vs_mean_and_median():
 
 def test_safe_divide():
     """Test that _safe_divide handles division by zero."""
-    assert _safe_divide(np.array([1e300]), 0) == 0
-
+    with pytest.warns(RuntimeWarning, match="divide"):
+        assert _safe_divide(np.float64(1e300), 0) == 0
+    with pytest.warns(RuntimeWarning, match="divide"):
+        assert _safe_divide(np.float64(0.0), np.float64(0.0)) == 0
     with pytest.warns(RuntimeWarning, match="overflow"):
         # np.finfo(float).max = 1.7976931348623157e+308
-        _safe_divide(np.array([1e300]), 1e-10)
+        _safe_divide(np.float64(1e300), 1e-10)
 
 
 def test_squared_error_exact_backward_compat():

From 7ba963f3e538eb7fe62767af2672c76f8174a763 Mon Sep 17 00:00:00 2001
From: Yuusuke Hiramatsu <140389350+hiramatsuyuusuke@users.noreply.github.com>
Date: Mon, 11 Sep 2023 16:38:05 +0900
Subject: [PATCH 02/21] DOC take `Examples` out of a dropdown (#27034)

---
 doc/modules/svm.rst | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst
index 0ac34cdcb6a10..8f97b8dee8806 100644
--- a/doc/modules/svm.rst
+++ b/doc/modules/svm.rst
@@ -521,9 +521,8 @@ is advised to use :class:`~sklearn.model_selection.GridSearchCV` with
  * :ref:`sphx_glr_auto_examples_svm_plot_rbf_parameters.py`
  * :ref:`sphx_glr_auto_examples_svm_plot_svm_nonlinear.py`
 
-|details-start|
-**Custom Kernels**
-|details-split|
+Custom Kernels
+--------------
 
 You can define your own kernels by either giving the kernel as a
 python function or by precomputing the Gram matrix.
@@ -539,8 +538,9 @@ classifiers, except that:
       use of ``fit()`` and ``predict()`` you will have unexpected results.
 
 
-Using Python functions as kernels
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+|details-start|
+**Using Python functions as kernels**
+|details-split|  
 
 You can use your own defined kernels by passing a function to the
 ``kernel`` parameter.
@@ -558,13 +558,13 @@ instance that will use that kernel::
     ...     return np.dot(X, Y.T)
     ...
     >>> clf = svm.SVC(kernel=my_kernel)
+  
+|details-end|
 
-.. topic:: Examples:
-
- * :ref:`sphx_glr_auto_examples_svm_plot_custom_kernel.py`.
 
-Using the Gram matrix
-~~~~~~~~~~~~~~~~~~~~~
+|details-start|
+**Using the Gram matrix**
+|details-split|  
 
 You can pass pre-computed kernels by using the ``kernel='precomputed'``
 option. You should then pass Gram matrix instead of X to the `fit` and
@@ -589,6 +589,10 @@ test vectors must be provided:
 
 |details-end|
 
+.. topic:: Examples:
+
+ * :ref:`sphx_glr_auto_examples_svm_plot_custom_kernel.py`.
+
 .. _svm_mathematical_formulation:
 
 Mathematical formulation

From a6a6a47fb3172c7fcbf0c52870c1279b24114ad8 Mon Sep 17 00:00:00 2001
From: Puneeth K <32433964+punndcoder28@users.noreply.github.com>
Date: Mon, 11 Sep 2023 13:14:54 +0530
Subject: [PATCH 03/21] DOC Adding dropdown for module 2.1 Gaussian Mixtures
 (#26694)

---
 doc/modules/mixture.rst | 125 +++++++++++++++++++++-------------------
 1 file changed, 67 insertions(+), 58 deletions(-)

diff --git a/doc/modules/mixture.rst b/doc/modules/mixture.rst
index fbf0551da93a4..e9cc94b1d493d 100644
--- a/doc/modules/mixture.rst
+++ b/doc/modules/mixture.rst
@@ -68,33 +68,36 @@ full covariance.
     * See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_pdf.py` for an example on plotting the
       density estimation.
 
-Pros and cons of class :class:`GaussianMixture`
------------------------------------------------
+|details-start|
+**Pros and cons of class GaussianMixture**
+|details-split|
 
-Pros
-....
+.. topic:: Pros:
 
-:Speed: It is the fastest algorithm for learning mixture models
+    :Speed: It is the fastest algorithm for learning mixture models
 
-:Agnostic: As this algorithm maximizes only the likelihood, it
-  will not bias the means towards zero, or bias the cluster sizes to
-  have specific structures that might or might not apply.
+    :Agnostic: As this algorithm maximizes only the likelihood, it
+      will not bias the means towards zero, or bias the cluster sizes to
+      have specific structures that might or might not apply.
 
-Cons
-....
+.. topic:: Cons:
 
-:Singularities: When one has insufficiently many points per
-   mixture, estimating the covariance matrices becomes difficult,
-   and the algorithm is known to diverge and find solutions with
-   infinite likelihood unless one regularizes the covariances artificially.
+    :Singularities: When one has insufficiently many points per
+      mixture, estimating the covariance matrices becomes difficult,
+      and the algorithm is known to diverge and find solutions with
+      infinite likelihood unless one regularizes the covariances artificially.
 
-:Number of components: This algorithm will always use all the
-   components it has access to, needing held-out data
-   or information theoretical criteria to decide how many components to use
-   in the absence of external cues.
+    :Number of components: This algorithm will always use all the
+      components it has access to, needing held-out data
+      or information theoretical criteria to decide how many components to use
+      in the absence of external cues.
 
-Selecting the number of components in a classical Gaussian Mixture Model
-------------------------------------------------------------------------
+|details-end|
+
+
+|details-start|
+**Selecting the number of components in a classical Gaussian Mixture model**
+|details-split|
 
 The BIC criterion can be used to select the number of components in a Gaussian
 Mixture in an efficient way. In theory, it recovers the true number of
@@ -114,10 +117,13 @@ model.
     * See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_selection.py` for an example
       of model selection performed with classical Gaussian mixture.
 
+|details-end|
+
 .. _expectation_maximization:
 
-Estimation algorithm Expectation-maximization
------------------------------------------------
+|details-start|
+**Estimation algorithm expectation-maximization**
+|details-split|
 
 The main difficulty in learning Gaussian mixture models from unlabeled
 data is that one usually doesn't know which points came from
@@ -135,8 +141,11 @@ parameters to maximize the likelihood of the data given those
 assignments. Repeating this process is guaranteed to always converge
 to a local optimum.
 
-Choice of the Initialization Method
------------------------------------
+|details-end|
+
+|details-start|
+**Choice of the Initialization method**
+|details-split|
 
 There is a choice of four initialization methods (as well as inputting user defined
 initial means) to generate the initial centers for the model components:
@@ -172,6 +181,8 @@ random
     * See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_init.py` for an example of
       using different initializations in Gaussian Mixture.
 
+|details-end|
+
 .. _bgmm:
 
 Variational Bayesian Gaussian Mixture
@@ -183,8 +194,7 @@ similar to the one defined by :class:`GaussianMixture`.
 
 .. _variational_inference:
 
-Estimation algorithm: variational inference
----------------------------------------------
+**Estimation algorithm: variational inference**
 
 Variational inference is an extension of expectation-maximization that
 maximizes a lower bound on model evidence (including
@@ -282,48 +292,47 @@ from the two resulting mixtures.
       ``weight_concentration_prior_type`` for different values of the parameter
       ``weight_concentration_prior``.
 
+|details-start|
+**Pros and cons of variational inference with BayesianGaussianMixture**
+|details-split|
 
-Pros and cons of variational inference with :class:`BayesianGaussianMixture`
-----------------------------------------------------------------------------
-
-Pros
-.....
+.. topic:: Pros:
 
-:Automatic selection: when ``weight_concentration_prior`` is small enough and
-   ``n_components`` is larger than what is found necessary by the model, the
-   Variational Bayesian mixture model has a natural tendency to set some mixture
-   weights values close to zero. This makes it possible to let the model choose
-   a suitable number of effective components automatically. Only an upper bound
-   of this number needs to be provided. Note however that the "ideal" number of
-   active components is very application specific and is typically ill-defined
-   in a data exploration setting.
+    :Automatic selection: when ``weight_concentration_prior`` is small enough and
+      ``n_components`` is larger than what is found necessary by the model, the
+      Variational Bayesian mixture model has a natural tendency to set some mixture
+      weights values close to zero. This makes it possible to let the model choose
+      a suitable number of effective components automatically. Only an upper bound
+      of this number needs to be provided. Note however that the "ideal" number of
+      active components is very application specific and is typically ill-defined
+      in a data exploration setting.
 
-:Less sensitivity to the number of parameters: unlike finite models, which will
-   almost always use all components as much as they can, and hence will produce
-   wildly different solutions for different numbers of components, the
-   variational inference with a Dirichlet process prior
-   (``weight_concentration_prior_type='dirichlet_process'``) won't change much
-   with changes to the parameters, leading to more stability and less tuning.
+    :Less sensitivity to the number of parameters: unlike finite models, which will
+      almost always use all components as much as they can, and hence will produce
+      wildly different solutions for different numbers of components, the
+      variational inference with a Dirichlet process prior
+      (``weight_concentration_prior_type='dirichlet_process'``) won't change much
+      with changes to the parameters, leading to more stability and less tuning.
 
-:Regularization: due to the incorporation of prior information,
-   variational solutions have less pathological special cases than
-   expectation-maximization solutions.
+    :Regularization: due to the incorporation of prior information,
+      variational solutions have less pathological special cases than
+      expectation-maximization solutions.
 
 
-Cons
-.....
+.. topic:: Cons:
 
-:Speed: the extra parametrization necessary for variational inference makes
-   inference slower, although not by much.
+    :Speed: the extra parametrization necessary for variational inference makes
+      inference slower, although not by much.
 
-:Hyperparameters: this algorithm needs an extra hyperparameter
-   that might need experimental tuning via cross-validation.
+    :Hyperparameters: this algorithm needs an extra hyperparameter
+      that might need experimental tuning via cross-validation.
 
-:Bias: there are many implicit biases in the inference algorithms (and also in
-   the Dirichlet process if used), and whenever there is a mismatch between
-   these biases and the data it might be possible to fit better models using a
-   finite mixture.
+    :Bias: there are many implicit biases in the inference algorithms (and also in
+      the Dirichlet process if used), and whenever there is a mismatch between
+      these biases and the data it might be possible to fit better models using a
+      finite mixture.
 
+|details-end|
 
 .. _dirichlet_process:
 

From d3aaa75e4a8a890f806e8af710fda615490ee0a9 Mon Sep 17 00:00:00 2001
From: Puneeth K <32433964+punndcoder28@users.noreply.github.com>
Date: Mon, 11 Sep 2023 13:20:14 +0530
Subject: [PATCH 04/21] DOC Adding Dropdown to module 7.2 Realworld Datasets
 (#26693)

---
 sklearn/datasets/descr/lfw.rst               | 12 +++++++-----
 sklearn/datasets/descr/twenty_newsgroups.rst | 19 +++++++++++++------
 2 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/sklearn/datasets/descr/lfw.rst b/sklearn/datasets/descr/lfw.rst
index e7fc35c3caabc..8105d7d6d633a 100644
--- a/sklearn/datasets/descr/lfw.rst
+++ b/sklearn/datasets/descr/lfw.rst
@@ -32,8 +32,9 @@ face detector from various online websites.
     Features            real, between 0 and 255
     =================   =======================
 
-Usage
-~~~~~
+|details-start|
+**Usage**
+|details-split|
 
 ``scikit-learn`` provides two loaders that will automatically download,
 cache, parse the metadata files, decode the jpeg and convert the
@@ -111,6 +112,8 @@ The :func:`sklearn.datasets.fetch_lfw_pairs` datasets is subdivided into
 an evaluation ``10_folds`` set meant to compute performance metrics using a
 10-folds cross validation scheme.
 
+|details-end|
+
 .. topic:: References:
 
  * `Labeled Faces in the Wild: A Database for Studying Face Recognition
@@ -120,7 +123,6 @@ an evaluation ``10_folds`` set meant to compute performance metrics using a
    University of Massachusetts, Amherst, Technical Report 07-49, October, 2007.
 
 
-Examples
-~~~~~~~~
+.. topic:: Examples:
 
-:ref:`sphx_glr_auto_examples_applications_plot_face_recognition.py`
+   * :ref:`sphx_glr_auto_examples_applications_plot_face_recognition.py`
diff --git a/sklearn/datasets/descr/twenty_newsgroups.rst b/sklearn/datasets/descr/twenty_newsgroups.rst
index 8e373c6ec3e74..669e158244134 100644
--- a/sklearn/datasets/descr/twenty_newsgroups.rst
+++ b/sklearn/datasets/descr/twenty_newsgroups.rst
@@ -27,8 +27,9 @@ extractor.
     Features                  text
     =================   ==========
 
-Usage
-~~~~~
+|details-start|
+**Usage**
+|details-split|
 
 The :func:`sklearn.datasets.fetch_20newsgroups` function is a data
 fetching / caching functions that downloads the data archive from
@@ -89,8 +90,11 @@ list of the categories to load to the
   >>> newsgroups_train.target[:10]
   array([0, 1, 1, 1, 0, 1, 1, 0, 0, 0])
 
-Converting text to vectors
-~~~~~~~~~~~~~~~~~~~~~~~~~~
+|details-end|
+
+|details-start|
+**Converting text to vectors**
+|details-split|
 
 In order to feed predictive or clustering models with the text data,
 one first need to turn the text into vectors of numerical values suitable
@@ -122,9 +126,11 @@ returns ready-to-use token counts features instead of file names.
 .. _`20 newsgroups website`: http://people.csail.mit.edu/jrennie/20Newsgroups/
 .. _`TF-IDF`: https://en.wikipedia.org/wiki/Tf-idf
 
+|details-end|
 
-Filtering text for more realistic training
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+|details-start|
+**Filtering text for more realistic training**
+|details-split|
 
 It is easy for a classifier to overfit on particular things that appear in the
 20 Newsgroups data, such as newsgroup headers. Many classifiers achieve very
@@ -218,6 +224,7 @@ It loses even more if we also strip this metadata from the training data:
 Some other classifiers cope better with this harder version of the task. Try the
 :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py`
 example with and without the `remove` option to compare the results.
+|details-end|
 
 .. topic:: Data Considerations
 

From 358a1c91e491d72542359cf10c63e77d3678424d Mon Sep 17 00:00:00 2001
From: Mohit Joshi <46566524+work-mohit@users.noreply.github.com>
Date: Mon, 11 Sep 2023 15:00:09 +0530
Subject: [PATCH 05/21] TST Extend tests for `scipy.sparse.*array`in
 `sklearn/ensemble/tests/test_bagging.py` (#27170)

---
 sklearn/ensemble/tests/test_bagging.py | 56 +++++++++++++-------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py
index 2c1067ccfc248..0ee0dd8be4956 100644
--- a/sklearn/ensemble/tests/test_bagging.py
+++ b/sklearn/ensemble/tests/test_bagging.py
@@ -9,7 +9,6 @@
 import joblib
 import numpy as np
 import pytest
-from scipy.sparse import csc_matrix, csr_matrix
 
 from sklearn.base import BaseEstimator
 from sklearn.datasets import load_diabetes, load_iris, make_hastie_10_2
@@ -31,6 +30,7 @@
 from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 from sklearn.utils import check_random_state
 from sklearn.utils._testing import assert_array_almost_equal, assert_array_equal
+from sklearn.utils.fixes import CSC_CONTAINERS, CSR_CONTAINERS
 
 rng = check_random_state(0)
 
@@ -83,9 +83,9 @@ def test_classification():
 
 
 @pytest.mark.parametrize(
-    "sparse_format, params, method",
+    "sparse_container, params, method",
     product(
-        [csc_matrix, csr_matrix],
+        CSR_CONTAINERS + CSC_CONTAINERS,
         [
             {
                 "max_samples": 0.5,
@@ -105,7 +105,7 @@ def test_classification():
         ["predict", "predict_proba", "predict_log_proba", "decision_function"],
     ),
 )
-def test_sparse_classification(sparse_format, params, method):
+def test_sparse_classification(sparse_container, params, method):
     # Check classification for various parameter settings on sparse input.
 
     class CustomSVC(SVC):
@@ -121,8 +121,8 @@ def fit(self, X, y):
         scale(iris.data), iris.target, random_state=rng
     )
 
-    X_train_sparse = sparse_format(X_train)
-    X_test_sparse = sparse_format(X_test)
+    X_train_sparse = sparse_container(X_train)
+    X_test_sparse = sparse_container(X_test)
     # Trained on sparse format
     sparse_classifier = BaggingClassifier(
         estimator=CustomSVC(kernel="linear", decision_function_shape="ovr"),
@@ -174,7 +174,8 @@ def test_regression():
             ).predict(X_test)
 
 
-def test_sparse_regression():
+@pytest.mark.parametrize("sparse_container", CSR_CONTAINERS + CSC_CONTAINERS)
+def test_sparse_regression(sparse_container):
     # Check regression for various parameter settings on sparse input.
     rng = check_random_state(0)
     X_train, X_test, y_train, y_test = train_test_split(
@@ -206,29 +207,28 @@ def fit(self, X, y):
         {"max_samples": 0.5, "bootstrap": True, "bootstrap_features": False},
     ]
 
-    for sparse_format in [csc_matrix, csr_matrix]:
-        X_train_sparse = sparse_format(X_train)
-        X_test_sparse = sparse_format(X_test)
-        for params in parameter_sets:
-            # Trained on sparse format
-            sparse_classifier = BaggingRegressor(
-                estimator=CustomSVR(), random_state=1, **params
-            ).fit(X_train_sparse, y_train)
-            sparse_results = sparse_classifier.predict(X_test_sparse)
-
-            # Trained on dense format
-            dense_results = (
-                BaggingRegressor(estimator=CustomSVR(), random_state=1, **params)
-                .fit(X_train, y_train)
-                .predict(X_test)
-            )
+    X_train_sparse = sparse_container(X_train)
+    X_test_sparse = sparse_container(X_test)
+    for params in parameter_sets:
+        # Trained on sparse format
+        sparse_classifier = BaggingRegressor(
+            estimator=CustomSVR(), random_state=1, **params
+        ).fit(X_train_sparse, y_train)
+        sparse_results = sparse_classifier.predict(X_test_sparse)
+
+        # Trained on dense format
+        dense_results = (
+            BaggingRegressor(estimator=CustomSVR(), random_state=1, **params)
+            .fit(X_train, y_train)
+            .predict(X_test)
+        )
 
-            sparse_type = type(X_train_sparse)
-            types = [i.data_type_ for i in sparse_classifier.estimators_]
+        sparse_type = type(X_train_sparse)
+        types = [i.data_type_ for i in sparse_classifier.estimators_]
 
-            assert_array_almost_equal(sparse_results, dense_results)
-            assert all([t == sparse_type for t in types])
-            assert_array_almost_equal(sparse_results, dense_results)
+        assert_array_almost_equal(sparse_results, dense_results)
+        assert all([t == sparse_type for t in types])
+        assert_array_almost_equal(sparse_results, dense_results)
 
 
 class DummySizeEstimator(BaseEstimator):

From 8d48624ec9a3f2785b35a6510964c7847d39175f Mon Sep 17 00:00:00 2001
From: Brendan Lu <132311610+brendanlu@users.noreply.github.com>
Date: Mon, 11 Sep 2023 19:31:21 +1000
Subject: [PATCH 06/21] TST Extend tests for `scipy.sparse.*array` in
 `sklearn/feature_selection/tests/test_mutual_info.py` (#27173)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/feature_selection/tests/test_mutual_info.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sklearn/feature_selection/tests/test_mutual_info.py b/sklearn/feature_selection/tests/test_mutual_info.py
index 349147f66e36c..26367544baa53 100644
--- a/sklearn/feature_selection/tests/test_mutual_info.py
+++ b/sklearn/feature_selection/tests/test_mutual_info.py
@@ -1,6 +1,5 @@
 import numpy as np
 import pytest
-from scipy.sparse import csr_matrix
 
 from sklearn.feature_selection import mutual_info_classif, mutual_info_regression
 from sklearn.feature_selection._mutual_info import _compute_mi
@@ -9,6 +8,7 @@
     assert_allclose,
     assert_array_equal,
 )
+from sklearn.utils.fixes import CSR_CONTAINERS
 
 
 def test_compute_mi_dd():
@@ -176,12 +176,13 @@ def test_mutual_info_classif_mixed(global_dtype):
         assert mi_nn[2] == mi[2]
 
 
-def test_mutual_info_options(global_dtype):
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_mutual_info_options(global_dtype, csr_container):
     X = np.array(
         [[0, 0, 0], [1, 1, 0], [2, 0, 1], [2, 0, 1], [2, 0, 1]], dtype=global_dtype
     )
     y = np.array([0, 1, 2, 2, 1], dtype=global_dtype)
-    X_csr = csr_matrix(X)
+    X_csr = csr_container(X)
 
     for mutual_info in (mutual_info_regression, mutual_info_classif):
         with pytest.raises(ValueError):

From c366f93394bba397468cb1e36e013f7a1bb44708 Mon Sep 17 00:00:00 2001
From: Mohit Joshi <46566524+work-mohit@users.noreply.github.com>
Date: Mon, 11 Sep 2023 15:02:44 +0530
Subject: [PATCH 07/21] TST Extend tests for `scipy.sparse.*array` in
 `sklearn/feature_selection/tests/test_base.py` (#27175)

---
 sklearn/feature_selection/tests/test_base.py | 27 +++++++++++---------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/sklearn/feature_selection/tests/test_base.py b/sklearn/feature_selection/tests/test_base.py
index bf883797ddabd..5e2bb27bafd17 100644
--- a/sklearn/feature_selection/tests/test_base.py
+++ b/sklearn/feature_selection/tests/test_base.py
@@ -1,10 +1,10 @@
 import numpy as np
 import pytest
 from numpy.testing import assert_array_equal
-from scipy import sparse as sp
 
 from sklearn.base import BaseEstimator
 from sklearn.feature_selection._base import SelectorMixin
+from sklearn.utils.fixes import CSC_CONTAINERS
 
 
 class StepSelector(SelectorMixin, BaseEstimator):
@@ -60,17 +60,18 @@ def test_transform_dense():
         sel.transform(np.array([[1], [2]]))
 
 
-def test_transform_sparse():
-    sparse = sp.csc_matrix
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+def test_transform_sparse(csc_container):
+    X_sp = csc_container(X)
     sel = StepSelector()
-    Xt_actual = sel.fit(sparse(X)).transform(sparse(X))
-    Xt_actual2 = sel.fit_transform(sparse(X))
+    Xt_actual = sel.fit(X_sp).transform(X_sp)
+    Xt_actual2 = sel.fit_transform(X_sp)
     assert_array_equal(Xt, Xt_actual.toarray())
     assert_array_equal(Xt, Xt_actual2.toarray())
 
     # Check dtype matches
-    assert np.int32 == sel.transform(sparse(X).astype(np.int32)).dtype
-    assert np.float32 == sel.transform(sparse(X).astype(np.float32)).dtype
+    assert np.int32 == sel.transform(X_sp.astype(np.int32)).dtype
+    assert np.float32 == sel.transform(X_sp.astype(np.float32)).dtype
 
     # Check wrong shape raises error
     with pytest.raises(ValueError):
@@ -95,15 +96,17 @@ def test_inverse_transform_dense():
         sel.inverse_transform(np.array([[1], [2]]))
 
 
-def test_inverse_transform_sparse():
-    sparse = sp.csc_matrix
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+def test_inverse_transform_sparse(csc_container):
+    X_sp = csc_container(X)
+    Xt_sp = csc_container(Xt)
     sel = StepSelector()
-    Xinv_actual = sel.fit(sparse(X)).inverse_transform(sparse(Xt))
+    Xinv_actual = sel.fit(X_sp).inverse_transform(Xt_sp)
     assert_array_equal(Xinv, Xinv_actual.toarray())
 
     # Check dtype matches
-    assert np.int32 == sel.inverse_transform(sparse(Xt).astype(np.int32)).dtype
-    assert np.float32 == sel.inverse_transform(sparse(Xt).astype(np.float32)).dtype
+    assert np.int32 == sel.inverse_transform(Xt_sp.astype(np.int32)).dtype
+    assert np.float32 == sel.inverse_transform(Xt_sp.astype(np.float32)).dtype
 
     # Check wrong shape raises error
     with pytest.raises(ValueError):

From 11cf0eec3eae4b684b962e11ef3eadf0357eaa9a Mon Sep 17 00:00:00 2001
From: Mohit Joshi <46566524+work-mohit@users.noreply.github.com>
Date: Mon, 11 Sep 2023 15:03:32 +0530
Subject: [PATCH 08/21] TST Extend tests for `scipy.sparse.*array` in
 `sklearn/feature_selection/tests/test_chi2.py` (#27176)

Co-authored-by: Omar Salman <omar.salman@arbisoft.com>
---
 sklearn/feature_selection/tests/test_chi2.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/sklearn/feature_selection/tests/test_chi2.py b/sklearn/feature_selection/tests/test_chi2.py
index 4fdc652a998a9..c50def36f1b6c 100644
--- a/sklearn/feature_selection/tests/test_chi2.py
+++ b/sklearn/feature_selection/tests/test_chi2.py
@@ -8,11 +8,11 @@
 import numpy as np
 import pytest
 import scipy.stats
-from scipy.sparse import coo_matrix, csr_matrix
 
 from sklearn.feature_selection import SelectKBest, chi2
 from sklearn.feature_selection._univariate_selection import _chisquare
 from sklearn.utils._testing import assert_array_almost_equal, assert_array_equal
+from sklearn.utils.fixes import COO_CONTAINERS, CSR_CONTAINERS
 
 # Feature 0 is highly informative for class 1;
 # feature 1 is the same everywhere;
@@ -26,7 +26,8 @@ def mkchi2(k):
     return SelectKBest(chi2, k=k)
 
 
-def test_chi2():
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_chi2(csr_container):
     # Test Chi2 feature extraction
 
     chi2 = mkchi2(k=1).fit(X, y)
@@ -37,7 +38,7 @@ def test_chi2():
     chi2 = mkchi2(k=2).fit(X, y)
     assert_array_equal(sorted(chi2.get_support(indices=True)), [0, 2])
 
-    Xsp = csr_matrix(X, dtype=np.float64)
+    Xsp = csr_container(X, dtype=np.float64)
     chi2 = mkchi2(k=2).fit(Xsp, y)
     assert_array_equal(sorted(chi2.get_support(indices=True)), [0, 2])
     Xtrans = chi2.transform(Xsp)
@@ -49,18 +50,20 @@ def test_chi2():
     assert_array_almost_equal(Xtrans, Xtrans2)
 
 
-def test_chi2_coo():
+@pytest.mark.parametrize("coo_container", COO_CONTAINERS)
+def test_chi2_coo(coo_container):
     # Check that chi2 works with a COO matrix
     # (as returned by CountVectorizer, DictVectorizer)
-    Xcoo = coo_matrix(X)
+    Xcoo = coo_container(X)
     mkchi2(k=2).fit_transform(Xcoo, y)
     # if we got here without an exception, we're safe
 
 
-def test_chi2_negative():
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_chi2_negative(csr_container):
     # Check for proper error on negative numbers in the input X.
     X, y = [[0, 1], [-1e-20, 1]], [0, 1]
-    for X in (X, np.array(X), csr_matrix(X)):
+    for X in (X, np.array(X), csr_container(X)):
         with pytest.raises(ValueError):
             chi2(X, y)
 

From 8eef53fa1d194009ee2a02948c9d14a45407fc7d Mon Sep 17 00:00:00 2001
From: Mohit Joshi <46566524+work-mohit@users.noreply.github.com>
Date: Mon, 11 Sep 2023 15:04:42 +0530
Subject: [PATCH 09/21] TST Extend tests for `scipy.sparse.*array` in
 `sklearn/feature_selection/tests/test_rfe.py` (#27177)

---
 sklearn/feature_selection/tests/test_rfe.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py
index 0f141f3461d7f..a2b5b1c8d4cf0 100644
--- a/sklearn/feature_selection/tests/test_rfe.py
+++ b/sklearn/feature_selection/tests/test_rfe.py
@@ -7,7 +7,6 @@
 import numpy as np
 import pytest
 from numpy.testing import assert_allclose, assert_array_almost_equal, assert_array_equal
-from scipy import sparse
 
 from sklearn.base import BaseEstimator, ClassifierMixin
 from sklearn.compose import TransformedTargetRegressor
@@ -23,6 +22,7 @@
 from sklearn.svm import SVC, SVR, LinearSVR
 from sklearn.utils import check_random_state
 from sklearn.utils._testing import ignore_warnings
+from sklearn.utils.fixes import CSR_CONTAINERS
 
 
 class MockClassifier:
@@ -79,13 +79,14 @@ def test_rfe_features_importance():
     assert_array_equal(rfe.get_support(), rfe_svc.get_support())
 
 
-def test_rfe():
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_rfe(csr_container):
     generator = check_random_state(0)
     iris = load_iris()
     # Add some irrelevant features. Random seed is set to make sure that
     # irrelevant features are always irrelevant.
     X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
-    X_sparse = sparse.csr_matrix(X)
+    X_sparse = csr_container(X)
     y = iris.target
 
     # dense model
@@ -173,7 +174,8 @@ def test_rfe_mockclassifier():
     assert X_r.shape == iris.data.shape
 
 
-def test_rfecv():
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_rfecv(csr_container):
     generator = check_random_state(0)
     iris = load_iris()
     # Add some irrelevant features. Random seed is set to make sure that
@@ -197,7 +199,7 @@ def test_rfecv():
 
     # same in sparse
     rfecv_sparse = RFECV(estimator=SVC(kernel="linear"), step=1)
-    X_sparse = sparse.csr_matrix(X)
+    X_sparse = csr_container(X)
     rfecv_sparse.fit(X_sparse, y)
     X_r_sparse = rfecv_sparse.transform(X_sparse)
     assert_array_equal(X_r_sparse.toarray(), iris.data)
@@ -241,14 +243,14 @@ def test_scorer(estimator, X, y):
     assert_array_equal(X_r, iris.data)
 
     rfecv_sparse = RFECV(estimator=SVC(kernel="linear"), step=2)
-    X_sparse = sparse.csr_matrix(X)
+    X_sparse = csr_container(X)
     rfecv_sparse.fit(X_sparse, y)
     X_r_sparse = rfecv_sparse.transform(X_sparse)
     assert_array_equal(X_r_sparse.toarray(), iris.data)
 
     # Verifying that steps < 1 don't blow up.
     rfecv_sparse = RFECV(estimator=SVC(kernel="linear"), step=0.2)
-    X_sparse = sparse.csr_matrix(X)
+    X_sparse = csr_container(X)
     rfecv_sparse.fit(X_sparse, y)
     X_r_sparse = rfecv_sparse.transform(X_sparse)
     assert_array_equal(X_r_sparse.toarray(), iris.data)

From af6c35d26e76028952a551e8756bec333209d616 Mon Sep 17 00:00:00 2001
From: Mohit Joshi <46566524+work-mohit@users.noreply.github.com>
Date: Mon, 11 Sep 2023 15:17:58 +0530
Subject: [PATCH 10/21] TST Extend tests for `scipy.sparse.*array` in
 `sklearn/feature_selection/tests/test_sequential.py` (#27178)

---
 sklearn/feature_selection/tests/test_sequential.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sklearn/feature_selection/tests/test_sequential.py b/sklearn/feature_selection/tests/test_sequential.py
index a515bf22cdda3..82d65c55a0195 100644
--- a/sklearn/feature_selection/tests/test_sequential.py
+++ b/sklearn/feature_selection/tests/test_sequential.py
@@ -1,6 +1,5 @@
 import numpy as np
 import pytest
-import scipy
 from numpy.testing import assert_array_equal
 
 from sklearn.cluster import KMeans
@@ -12,6 +11,7 @@
 from sklearn.neighbors import KNeighborsClassifier
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
+from sklearn.utils.fixes import CSR_CONTAINERS
 
 
 def test_bad_n_features_to_select():
@@ -184,11 +184,12 @@ def test_sanity(seed, direction, n_features_to_select, expected_selected_feature
     assert_array_equal(sfs.get_support(indices=True), expected_selected_features)
 
 
-def test_sparse_support():
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_sparse_support(csr_container):
     # Make sure sparse data is supported
 
     X, y = make_regression(n_features=10)
-    X = scipy.sparse.csr_matrix(X)
+    X = csr_container(X)
     sfs = SequentialFeatureSelector(
         LinearRegression(), n_features_to_select="auto", cv=2
     )

From cfbab7769c30f49a01e398e906ed596d3581730d Mon Sep 17 00:00:00 2001
From: Tialo <65392801+Tialo@users.noreply.github.com>
Date: Mon, 11 Sep 2023 13:16:28 +0300
Subject: [PATCH 11/21] TST Extend tests for `scipy.sparse.*array` in
 `sklearn/utils/tests/test_utils.py` (#27201)

---
 sklearn/utils/tests/test_utils.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index 0dd23cb02d399..0f0c9c898b17a 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -6,7 +6,6 @@
 
 import numpy as np
 import pytest
-import scipy.sparse as sp
 
 from sklearn import config_context
 from sklearn.utils import (
@@ -35,6 +34,7 @@
     assert_array_equal,
     assert_no_warnings,
 )
+from sklearn.utils.fixes import CSC_CONTAINERS, CSR_CONTAINERS
 
 # toy array
 X_toy = np.arange(9).reshape((3, 3))
@@ -160,21 +160,23 @@ def test_resample_stratify_2dy():
     assert y.ndim == 2
 
 
-def test_resample_stratify_sparse_error():
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_resample_stratify_sparse_error(csr_container):
     # resample must be ndarray
     rng = np.random.RandomState(0)
     n_samples = 100
     X = rng.normal(size=(n_samples, 2))
     y = rng.randint(0, 2, size=n_samples)
-    stratify = sp.csr_matrix(y)
+    stratify = csr_container(y)
     with pytest.raises(TypeError, match="A sparse matrix was passed"):
         X, y = resample(X, y, n_samples=50, random_state=rng, stratify=stratify)
 
 
-def test_safe_mask():
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_safe_mask(csr_container):
     random_state = check_random_state(0)
     X = random_state.rand(5, 4)
-    X_csr = sp.csr_matrix(X)
+    X_csr = csr_container(X)
     mask = [False, False, True, True, True]
 
     mask = safe_mask(X, mask)
@@ -514,14 +516,15 @@ def to_tuple(A):  # to make the inner arrays hashable
     assert set(to_tuple(A)) == S
 
 
-def test_shuffle_dont_convert_to_array():
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+def test_shuffle_dont_convert_to_array(csc_container):
     # Check that shuffle does not try to convert to numpy arrays with float
     # dtypes can let any indexable datastructure pass-through.
     a = ["a", "b", "c"]
     b = np.array(["a", "b", "c"], dtype=object)
     c = [1, 2, 3]
     d = MockDataFrame(np.array([["a", 0], ["b", 1], ["c", 2]], dtype=object))
-    e = sp.csc_matrix(np.arange(6).reshape(3, 2))
+    e = csc_container(np.arange(6).reshape(3, 2))
     a_s, b_s, c_s, d_s, e_s = shuffle(a, b, c, d, e, random_state=0)
 
     assert a_s == ["c", "b", "a"]

From dfe01c22273b905365d9454cea394367786e4b6a Mon Sep 17 00:00:00 2001
From: Tialo <65392801+Tialo@users.noreply.github.com>
Date: Mon, 11 Sep 2023 13:17:17 +0300
Subject: [PATCH 12/21] TST Extend tests for `scipy.sparse.*array` in
 `sklearn/utils/tests/test_set_output.py` (#27202)

---
 sklearn/utils/tests/test_set_output.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sklearn/utils/tests/test_set_output.py b/sklearn/utils/tests/test_set_output.py
index d1722a1553f9c..afc059a899ccd 100644
--- a/sklearn/utils/tests/test_set_output.py
+++ b/sklearn/utils/tests/test_set_output.py
@@ -3,7 +3,6 @@
 import numpy as np
 import pytest
 from numpy.testing import assert_array_equal
-from scipy.sparse import csr_matrix
 
 from sklearn._config import config_context, get_config
 from sklearn.utils._set_output import (
@@ -12,6 +11,7 @@
     _SetOutputMixin,
     _wrap_in_pandas_container,
 )
+from sklearn.utils.fixes import CSR_CONTAINERS
 
 
 def test__wrap_in_pandas_container_dense():
@@ -41,10 +41,11 @@ def test__wrap_in_pandas_container_dense_update_columns_and_index():
     assert_array_equal(new_df.index, X_df.index)
 
 
-def test__wrap_in_pandas_container_error_validation():
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test__wrap_in_pandas_container_error_validation(csr_container):
     """Check errors in _wrap_in_pandas_container."""
     X = np.asarray([[1, 0, 3], [0, 0, 1]])
-    X_csr = csr_matrix(X)
+    X_csr = csr_container(X)
     match = "Pandas output does not support sparse data"
     with pytest.raises(ValueError, match=match):
         _wrap_in_pandas_container(X_csr, columns=["a", "b", "c"])

From b0da1b7706054f0b78f0a0582a9362a188e1fa38 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marcus=20Fraa=C3=9F?= <marcus.fraass@gmail.com>
Date: Mon, 11 Sep 2023 12:42:58 +0200
Subject: [PATCH 13/21] DOC Add missing links to examples/impute (#27053)

---
 doc/modules/impute.rst       | 4 ++++
 sklearn/impute/_base.py      | 3 +++
 sklearn/impute/_iterative.py | 4 ++++
 sklearn/impute/_knn.py       | 3 +++
 4 files changed, 14 insertions(+)

diff --git a/doc/modules/impute.rst b/doc/modules/impute.rst
index a1985bc460b77..0bb62fa03bc63 100644
--- a/doc/modules/impute.rst
+++ b/doc/modules/impute.rst
@@ -87,6 +87,8 @@ string values or pandas categoricals when using the ``'most_frequent'`` or
      ['a' 'y']
      ['b' 'y']]
 
+For another example on usage, see :ref:`sphx_glr_auto_examples_impute_plot_missing_values.py`.
+
 .. _iterative_imputer:
 
 
@@ -220,6 +222,8 @@ neighbors of samples with missing values::
            [5.5, 6. , 5. ],
            [8. , 8. , 7. ]])
 
+For another example on usage, see :ref:`sphx_glr_auto_examples_impute_plot_missing_values.py`.
+
 .. topic:: References
 
   .. [OL2001] Olga Troyanskaya, Michael Cantor, Gavin Sherlock, Pat Brown,
diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py
index a65210bce6960..e182a6ca73e61 100644
--- a/sklearn/impute/_base.py
+++ b/sklearn/impute/_base.py
@@ -256,6 +256,9 @@ class SimpleImputer(_BaseImputer):
     [[ 7.   2.   3. ]
      [ 4.   3.5  6. ]
      [10.   3.5  9. ]]
+
+    For a more detailed example see
+    :ref:`sphx_glr_auto_examples_impute_plot_missing_values.py`.
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/impute/_iterative.py b/sklearn/impute/_iterative.py
index a0087a5a10d55..e08f1bd773a34 100644
--- a/sklearn/impute/_iterative.py
+++ b/sklearn/impute/_iterative.py
@@ -273,6 +273,10 @@ class IterativeImputer(_BaseImputer):
     array([[ 6.9584...,  2.       ,  3.        ],
            [ 4.       ,  2.6000...,  6.        ],
            [10.       ,  4.9999...,  9.        ]])
+
+    For a more detailed example see
+    :ref:`sphx_glr_auto_examples_impute_plot_missing_values.py` or
+    :ref:`sphx_glr_auto_examples_impute_plot_iterative_imputer_variants_comparison.py`.
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/impute/_knn.py b/sklearn/impute/_knn.py
index e36b49f262b2d..7da7785369c0d 100644
--- a/sklearn/impute/_knn.py
+++ b/sklearn/impute/_knn.py
@@ -121,6 +121,9 @@ class KNNImputer(_BaseImputer):
            [3. , 4. , 3. ],
            [5.5, 6. , 5. ],
            [8. , 8. , 7. ]])
+
+    For a more detailed example see
+    :ref:`sphx_glr_auto_examples_impute_plot_missing_values.py`.
     """
 
     _parameter_constraints: dict = {

From e7ae63f784c5f85af41cf8f346d194775f01f333 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 11 Sep 2023 15:58:14 +0200
Subject: [PATCH 14/21] FIX avoid unecessary `if copy` branch for sparse
 array/matrix (#27336)

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 doc/whats_new/v1.4.rst                 |  6 ++++++
 sklearn/utils/tests/test_validation.py | 27 +++++++++++++++++++-------
 sklearn/utils/validation.py            | 26 ++++++++++++-------------
 3 files changed, 39 insertions(+), 20 deletions(-)

diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
index d54c276b3292b..612bbfebb6502 100644
--- a/doc/whats_new/v1.4.rst
+++ b/doc/whats_new/v1.4.rst
@@ -300,6 +300,12 @@ Changelog
   which can be used to check whether a given set of parameters would be consumed.
   :pr:`26831` by `Adrin Jalali`_.
 
+- |Fix| :func:`sklearn.utils.check_array` should accept both matrix and array from
+  the sparse SciPy module. The previous implementation would fail if `copy=True` by
+  calling specific NumPy `np.may_share_memory` that does not work with SciPy sparse
+  array and does not return the correct result for SciPy sparse matrix.
+  :pr:`27336` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 Code and Documentation Contributors
 -----------------------------------
 
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 96b429846f331..0e6d46e4c4945 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -49,7 +49,13 @@
     skip_if_array_api_compat_not_configured,
 )
 from sklearn.utils.estimator_checks import _NotAnArray
-from sklearn.utils.fixes import parse_version
+from sklearn.utils.fixes import (
+    COO_CONTAINERS,
+    CSC_CONTAINERS,
+    CSR_CONTAINERS,
+    DOK_CONTAINERS,
+    parse_version,
+)
 from sklearn.utils.validation import (
     FLOAT_DTYPES,
     _allclose_dense_sparse,
@@ -356,13 +362,20 @@ def test_check_array():
                 assert X is X_checked
 
     # allowed sparse != None
-    X_csc = sp.csc_matrix(X_C)
-    X_coo = X_csc.tocoo()
-    X_dok = X_csc.todok()
-    X_int = X_csc.astype(int)
-    X_float = X_csc.astype(float)
 
-    Xs = [X_csc, X_coo, X_dok, X_int, X_float]
+    # try different type of sparse format
+    Xs = []
+    Xs.extend(
+        [
+            sparse_container(X_C)
+            for sparse_container in CSR_CONTAINERS
+            + CSC_CONTAINERS
+            + COO_CONTAINERS
+            + DOK_CONTAINERS
+        ]
+    )
+    Xs.extend([Xs[0].astype(np.int64), Xs[0].astype(np.float64)])
+
     accept_sparses = [["csr", "coo"], ["coo", "dok"]]
     # scipy sparse matrices do not support the object dtype so
     # this dtype is skipped in this loop
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 22c8394957cc1..ac75c8afe5e5b 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -962,6 +962,19 @@ def is_sparse(dtype):
                 allow_nan=force_all_finite == "allow-nan",
             )
 
+        if copy:
+            if _is_numpy_namespace(xp):
+                # only make a copy if `array` and `array_orig` may share memory`
+                if np.may_share_memory(array, array_orig):
+                    array = _asarray_with_order(
+                        array, dtype=dtype, order=order, copy=True, xp=xp
+                    )
+            else:
+                # always make a copy for non-numpy arrays
+                array = _asarray_with_order(
+                    array, dtype=dtype, order=order, copy=True, xp=xp
+                )
+
     if ensure_min_samples > 0:
         n_samples = _num_samples(array)
         if n_samples < ensure_min_samples:
@@ -980,19 +993,6 @@ def is_sparse(dtype):
                 % (n_features, array.shape, ensure_min_features, context)
             )
 
-    if copy:
-        if _is_numpy_namespace(xp):
-            # only make a copy if `array` and `array_orig` may share memory`
-            if np.may_share_memory(array, array_orig):
-                array = _asarray_with_order(
-                    array, dtype=dtype, order=order, copy=True, xp=xp
-                )
-        else:
-            # always make a copy for non-numpy arrays
-            array = _asarray_with_order(
-                array, dtype=dtype, order=order, copy=True, xp=xp
-            )
-
     return array
 
 

From 83a8015702213b39510a0f4898bc6879bcdf3ac2 Mon Sep 17 00:00:00 2001
From: Adrin Jalali <adrin.jalali@gmail.com>
Date: Mon, 11 Sep 2023 16:19:24 +0200
Subject: [PATCH 15/21] FEAT add metadata routing to *SearchCV (#27058)

---
 doc/whats_new/v1.4.rst                        |   8 +
 sklearn/model_selection/_search.py            | 165 +++++++--
 .../_search_successive_halving.py             |  28 +-
 sklearn/model_selection/tests/test_search.py  |  81 ++++-
 sklearn/tests/metadata_routing_common.py      |  54 +--
 .../test_metaestimators_metadata_routing.py   | 341 +++++++++++++-----
 sklearn/utils/_metadata_requests.py           |   6 +-
 7 files changed, 509 insertions(+), 174 deletions(-)

diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
index 612bbfebb6502..6531102bba9fd 100644
--- a/doc/whats_new/v1.4.rst
+++ b/doc/whats_new/v1.4.rst
@@ -212,6 +212,14 @@ Changelog
   is enabled and should be passed via the `params` parameter. :pr:`26896` by
   `Adrin Jalali`_.
 
+- |Feature| :class:`~model_selection.GridSearchCV`,
+  :class:`~model_selection.RandomizedSearchCV`,
+  :class:`~model_selection.HalvingGridSearchCV`, and
+  :class:`~model_selection.HalvingRandomSearchCV` now support metadata routing
+  in their ``fit`` and ``score``, and route metadata to the underlying
+  estimator's ``fit``, the CV splitter, and the scorer. :pr:`27058` by `Adrin
+  Jalali`_.
+
 - |Enhancement| :func:`sklearn.model_selection.train_test_split` now supports
   Array API compatible inputs. :pr:`26855` by `Tim Head`_.
 
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index dcf0d2e41c905..9de03c2c663ec 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -27,10 +27,21 @@
 from ..base import BaseEstimator, MetaEstimatorMixin, _fit_context, clone, is_classifier
 from ..exceptions import NotFittedError
 from ..metrics import check_scoring
-from ..metrics._scorer import _check_multimetric_scoring, get_scorer_names
-from ..utils import check_random_state
+from ..metrics._scorer import (
+    _check_multimetric_scoring,
+    _MultimetricScorer,
+    get_scorer_names,
+)
+from ..utils import Bunch, check_random_state
 from ..utils._param_validation import HasMethods, Interval, StrOptions
 from ..utils._tags import _safe_tags
+from ..utils.metadata_routing import (
+    MetadataRouter,
+    MethodMapping,
+    _raise_for_params,
+    _routing_enabled,
+    process_routing,
+)
 from ..utils.metaestimators import available_if
 from ..utils.parallel import Parallel, delayed
 from ..utils.random import sample_without_replacement
@@ -429,7 +440,7 @@ def _more_tags(self):
             },
         }
 
-    def score(self, X, y=None):
+    def score(self, X, y=None, **params):
         """Return the score on the given data, if the estimator has been refit.
 
         This uses the score defined by ``scoring`` where provided, and the
@@ -446,6 +457,14 @@ def score(self, X, y=None):
             Target relative to X for classification or regression;
             None for unsupervised learning.
 
+        **params : dict
+            Parameters to be passed to the underlying scorer(s).
+
+            ..versionadded:: 1.4
+                Only available if `enable_metadata_routing=True`. See
+                :ref:`Metadata Routing User Guide <metadata_routing>` for more
+                details.
+
         Returns
         -------
         score : float
@@ -454,6 +473,14 @@ def score(self, X, y=None):
         """
         _check_refit(self, "score")
         check_is_fitted(self)
+
+        _raise_for_params(params, self, "score")
+
+        if _routing_enabled():
+            score_params = process_routing(self, "score", **params).scorer["score"]
+        else:
+            score_params = dict()
+
         if self.scorer_ is None:
             raise ValueError(
                 "No score function explicitly defined, "
@@ -465,10 +492,10 @@ def score(self, X, y=None):
                 scorer = self.scorer_[self.refit]
             else:
                 scorer = self.scorer_
-            return scorer(self.best_estimator_, X, y)
+            return scorer(self.best_estimator_, X, y, **score_params)
 
         # callable
-        score = self.scorer_(self.best_estimator_, X, y)
+        score = self.scorer_(self.best_estimator_, X, y, **score_params)
         if self.multimetric_:
             score = score[self.refit]
         return score
@@ -754,11 +781,62 @@ def _select_best_index(refit, refit_metric, results):
             best_index = results[f"rank_test_{refit_metric}"].argmin()
         return best_index
 
+    def _get_scorers(self, convert_multimetric):
+        """Get the scorer(s) to be used.
+
+        This is used in ``fit`` and ``get_metadata_routing``.
+
+        Parameters
+        ----------
+        convert_multimetric : bool
+            Whether to convert a dict of scorers to a _MultimetricScorer. This
+            is used in ``get_metadata_routing`` to include the routing info for
+            multiple scorers.
+
+        Returns
+        -------
+        scorers, refit_metric
+        """
+        refit_metric = "score"
+
+        if callable(self.scoring):
+            scorers = self.scoring
+        elif self.scoring is None or isinstance(self.scoring, str):
+            scorers = check_scoring(self.estimator, self.scoring)
+        else:
+            scorers = _check_multimetric_scoring(self.estimator, self.scoring)
+            self._check_refit_for_multimetric(scorers)
+            refit_metric = self.refit
+            if convert_multimetric and isinstance(scorers, dict):
+                scorers = _MultimetricScorer(
+                    scorers=scorers, raise_exc=(self.error_score == "raise")
+                )
+
+        return scorers, refit_metric
+
+    def _get_routed_params_for_fit(self, params):
+        """Get the parameters to be used for routing.
+
+        This is a method instead of a snippet in ``fit`` since it's used twice,
+        here in ``fit``, and in ``HalvingRandomSearchCV.fit``.
+        """
+        if _routing_enabled():
+            routed_params = process_routing(self, "fit", **params)
+        else:
+            params = params.copy()
+            groups = params.pop("groups", None)
+            routed_params = Bunch(
+                estimator=Bunch(fit=params),
+                splitter=Bunch(split={"groups": groups}),
+                scorer=Bunch(score={}),
+            )
+        return routed_params
+
     @_fit_context(
         # *SearchCV.estimator is not validated yet
         prefer_skip_nested_validation=False
     )
-    def fit(self, X, y=None, *, groups=None, **fit_params):
+    def fit(self, X, y=None, **params):
         """Run fit with all sets of parameters.
 
         Parameters
@@ -773,13 +851,9 @@ def fit(self, X, y=None, *, groups=None, **fit_params):
             Target relative to X for classification or regression;
             None for unsupervised learning.
 
-        groups : array-like of shape (n_samples,), default=None
-            Group labels for the samples used while splitting the dataset into
-            train/test set. Only used in conjunction with a "Group" :term:`cv`
-            instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).
-
-        **fit_params : dict of str -> object
-            Parameters passed to the `fit` method of the estimator.
+        **params : dict of str -> object
+            Parameters passed to the ``fit`` method of the estimator, the scorer,
+            and the CV splitter.
 
             If a fit parameter is an array-like whose length is equal to
             `num_samples` then it will be split across CV groups along with `X`
@@ -792,22 +866,18 @@ def fit(self, X, y=None, *, groups=None, **fit_params):
             Instance of fitted estimator.
         """
         estimator = self.estimator
-        refit_metric = "score"
+        # Here we keep a dict of scorers as is, and only convert to a
+        # _MultimetricScorer at a later stage. Issue:
+        # https://github.com/scikit-learn/scikit-learn/issues/27001
+        scorers, refit_metric = self._get_scorers(convert_multimetric=False)
 
-        if callable(self.scoring):
-            scorers = self.scoring
-        elif self.scoring is None or isinstance(self.scoring, str):
-            scorers = check_scoring(self.estimator, self.scoring)
-        else:
-            scorers = _check_multimetric_scoring(self.estimator, self.scoring)
-            self._check_refit_for_multimetric(scorers)
-            refit_metric = self.refit
+        X, y = indexable(X, y)
+        params = _check_method_params(X, params=params)
 
-        X, y, groups = indexable(X, y, groups)
-        fit_params = _check_method_params(X, params=fit_params)
+        routed_params = self._get_routed_params_for_fit(params)
 
         cv_orig = check_cv(self.cv, y, classifier=is_classifier(estimator))
-        n_splits = cv_orig.get_n_splits(X, y, groups)
+        n_splits = cv_orig.get_n_splits(X, y, **routed_params.splitter.split)
 
         base_estimator = clone(self.estimator)
 
@@ -815,9 +885,8 @@ def fit(self, X, y=None, *, groups=None, **fit_params):
 
         fit_and_score_kwargs = dict(
             scorer=scorers,
-            fit_params=fit_params,
-            # TODO(SLEP6): pass score params along
-            score_params=None,
+            fit_params=routed_params.estimator.fit,
+            score_params=routed_params.scorer.score,
             return_train_score=self.return_train_score,
             return_n_test_samples=True,
             return_times=True,
@@ -857,7 +926,8 @@ def evaluate_candidates(candidate_params, cv=None, more_results=None):
                         **fit_and_score_kwargs,
                     )
                     for (cand_idx, parameters), (split_idx, (train, test)) in product(
-                        enumerate(candidate_params), enumerate(cv.split(X, y, groups))
+                        enumerate(candidate_params),
+                        enumerate(cv.split(X, y, **routed_params.splitter.split)),
                     )
                 )
 
@@ -935,9 +1005,9 @@ def evaluate_candidates(candidate_params, cv=None, more_results=None):
 
             refit_start_time = time.time()
             if y is not None:
-                self.best_estimator_.fit(X, y, **fit_params)
+                self.best_estimator_.fit(X, y, **routed_params.estimator.fit)
             else:
-                self.best_estimator_.fit(X, **fit_params)
+                self.best_estimator_.fit(X, **routed_params.estimator.fit)
             refit_end_time = time.time()
             self.refit_time_ = refit_end_time - refit_start_time
 
@@ -1057,6 +1127,39 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
 
         return results
 
+    def get_metadata_routing(self):
+        """Get metadata routing of this object.
+
+        Please check :ref:`User Guide <metadata_routing>` on how the routing
+        mechanism works.
+
+        .. versionadded:: 1.4
+
+        Returns
+        -------
+        routing : MetadataRouter
+            A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating
+            routing information.
+        """
+        router = MetadataRouter(owner=self.__class__.__name__)
+        router.add(
+            estimator=self.estimator,
+            method_mapping=MethodMapping().add(caller="fit", callee="fit"),
+        )
+
+        scorer, _ = self._get_scorers(convert_multimetric=True)
+        router.add(
+            scorer=scorer,
+            method_mapping=MethodMapping()
+            .add(caller="score", callee="score")
+            .add(caller="fit", callee="score"),
+        )
+        router.add(
+            splitter=self.cv,
+            method_mapping=MethodMapping().add(caller="fit", callee="split"),
+        )
+        return router
+
 
 class GridSearchCV(BaseSearchCV):
     """Exhaustive search over specified parameter values for an estimator.
diff --git a/sklearn/model_selection/_search_successive_halving.py b/sklearn/model_selection/_search_successive_halving.py
index 708092d09a2a5..b1cf5ee50965c 100644
--- a/sklearn/model_selection/_search_successive_halving.py
+++ b/sklearn/model_selection/_search_successive_halving.py
@@ -27,20 +27,20 @@ def __init__(self, *, base_cv, fraction, subsample_test, random_state):
         self.subsample_test = subsample_test
         self.random_state = random_state
 
-    def split(self, X, y, groups=None):
-        for train_idx, test_idx in self.base_cv.split(X, y, groups):
+    def split(self, X, y, **kwargs):
+        for train_idx, test_idx in self.base_cv.split(X, y, **kwargs):
             train_idx = resample(
                 train_idx,
                 replace=False,
                 random_state=self.random_state,
-                n_samples=int(self.fraction * train_idx.shape[0]),
+                n_samples=int(self.fraction * len(train_idx)),
             )
             if self.subsample_test:
                 test_idx = resample(
                     test_idx,
                     replace=False,
                     random_state=self.random_state,
-                    n_samples=int(self.fraction * test_idx.shape[0]),
+                    n_samples=int(self.fraction * len(test_idx)),
                 )
             yield train_idx, test_idx
 
@@ -123,7 +123,7 @@ def __init__(
         self.min_resources = min_resources
         self.aggressive_elimination = aggressive_elimination
 
-    def _check_input_parameters(self, X, y, groups):
+    def _check_input_parameters(self, X, y, split_params):
         # We need to enforce that successive calls to cv.split() yield the same
         # splits: see https://github.com/scikit-learn/scikit-learn/issues/15149
         if not _yields_constant_splits(self._checked_cv_orig):
@@ -154,7 +154,7 @@ def _check_input_parameters(self, X, y, groups):
         self.min_resources_ = self.min_resources
         if self.min_resources_ in ("smallest", "exhaust"):
             if self.resource == "n_samples":
-                n_splits = self._checked_cv_orig.get_n_splits(X, y, groups)
+                n_splits = self._checked_cv_orig.get_n_splits(X, y, **split_params)
                 # please see https://gph.is/1KjihQe for a justification
                 magic_factor = 2
                 self.min_resources_ = n_splits * magic_factor
@@ -215,7 +215,7 @@ def _select_best_index(refit, refit_metric, results):
         # Halving*SearchCV.estimator is not validated yet
         prefer_skip_nested_validation=False
     )
-    def fit(self, X, y=None, groups=None, **fit_params):
+    def fit(self, X, y=None, **params):
         """Run fit with all sets of parameters.
 
         Parameters
@@ -229,12 +229,7 @@ def fit(self, X, y=None, groups=None, **fit_params):
             Target relative to X for classification or regression;
             None for unsupervised learning.
 
-        groups : array-like of shape (n_samples,), default=None
-            Group labels for the samples used while splitting the dataset into
-            train/test set. Only used in conjunction with a "Group" :term:`cv`
-            instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).
-
-        **fit_params : dict of string -> object
+        **params : dict of string -> object
             Parameters passed to the ``fit`` method of the estimator.
 
         Returns
@@ -246,15 +241,14 @@ def fit(self, X, y=None, groups=None, **fit_params):
             self.cv, y, classifier=is_classifier(self.estimator)
         )
 
+        routed_params = self._get_routed_params_for_fit(params)
         self._check_input_parameters(
-            X=X,
-            y=y,
-            groups=groups,
+            X=X, y=y, split_params=routed_params.splitter.split
         )
 
         self._n_samples_orig = _num_samples(X)
 
-        super().fit(X, y=y, groups=groups, **fit_params)
+        super().fit(X, y=y, **params)
 
         # Set best_score_: BaseSearchCV does not set it, as refit is a callable
         self.best_score_ = self.cv_results_["mean_test_score"][self.best_index_]
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index 50b519118a2b3..1db38671ef481 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -22,9 +22,14 @@
     make_multilabel_classification,
 )
 from sklearn.ensemble import HistGradientBoostingClassifier
+from sklearn.exceptions import FitFailedWarning
 from sklearn.experimental import enable_halving_search_cv  # noqa
 from sklearn.impute import SimpleImputer
-from sklearn.linear_model import LinearRegression, Ridge, SGDClassifier
+from sklearn.linear_model import (
+    LinearRegression,
+    Ridge,
+    SGDClassifier,
+)
 from sklearn.metrics import (
     accuracy_score,
     confusion_matrix,
@@ -51,11 +56,15 @@
     train_test_split,
 )
 from sklearn.model_selection._search import BaseSearchCV
-from sklearn.model_selection._validation import FitFailedWarning
 from sklearn.model_selection.tests.common import OneTimeSplitter
 from sklearn.neighbors import KernelDensity, KNeighborsClassifier, LocalOutlierFactor
 from sklearn.pipeline import Pipeline
 from sklearn.svm import SVC, LinearSVC
+from sklearn.tests.metadata_routing_common import (
+    ConsumingScorer,
+    _Registry,
+    check_recorded_metadata,
+)
 from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 from sklearn.utils._mocking import CheckingClassifier, MockDataFrame
 from sklearn.utils._testing import (
@@ -68,6 +77,7 @@
     assert_array_equal,
     ignore_warnings,
 )
+from sklearn.utils.validation import _num_samples
 
 
 # Neither of the following two estimators inherit from BaseEstimator,
@@ -2449,3 +2459,70 @@ def test_search_estimator_param(SearchCV, param_search):
     assert params["clf"][0].C == orig_C
     # testing that the GS is setting the parameter of the step correctly
     assert gs.best_estimator_.named_steps["clf"].C == 0.01
+
+
+# Metadata Routing Tests
+# ======================
+
+
+@pytest.mark.usefixtures("enable_slep006")
+@pytest.mark.parametrize(
+    "SearchCV, param_search",
+    [
+        (GridSearchCV, "param_grid"),
+        (RandomizedSearchCV, "param_distributions"),
+    ],
+)
+def test_multi_metric_search_forwards_metadata(SearchCV, param_search):
+    """Test that *SearchCV forwards metadata correctly when passed multiple metrics."""
+    X, y = make_classification(random_state=42)
+    n_samples = _num_samples(X)
+    rng = np.random.RandomState(0)
+    score_weights = rng.rand(n_samples)
+    score_metadata = rng.rand(n_samples)
+
+    est = LinearSVC(dual="auto")
+    param_grid_search = {param_search: {"C": [1]}}
+
+    scorer_registry = _Registry()
+    scorer = ConsumingScorer(registry=scorer_registry).set_score_request(
+        sample_weight="score_weights", metadata="score_metadata"
+    )
+    scoring = dict(my_scorer=scorer, accuracy="accuracy")
+    SearchCV(est, refit="accuracy", cv=2, scoring=scoring, **param_grid_search).fit(
+        X, y, score_weights=score_weights, score_metadata=score_metadata
+    )
+    assert len(scorer_registry)
+    for _scorer in scorer_registry:
+        check_recorded_metadata(
+            obj=_scorer,
+            method="score",
+            split_params=("sample_weight", "metadata"),
+            sample_weight=score_weights,
+            metadata=score_metadata,
+        )
+
+
+@pytest.mark.parametrize(
+    "SearchCV, param_search",
+    [
+        (GridSearchCV, "param_grid"),
+        (RandomizedSearchCV, "param_distributions"),
+        (HalvingGridSearchCV, "param_grid"),
+    ],
+)
+def test_score_rejects_params_with_no_routing_enabled(SearchCV, param_search):
+    """*SearchCV should reject **params when metadata routing is not enabled
+    since this is added only when routing is enabled."""
+    X, y = make_classification(random_state=42)
+    est = LinearSVC(dual="auto")
+    param_grid_search = {param_search: {"C": [1]}}
+
+    gs = SearchCV(est, cv=2, **param_grid_search).fit(X, y)
+
+    with pytest.raises(ValueError, match="is only supported if"):
+        gs.score(X, y, metadata=1)
+
+
+# End of Metadata Routing Tests
+# =============================
diff --git a/sklearn/tests/metadata_routing_common.py b/sklearn/tests/metadata_routing_common.py
index b5f45d3f34c72..2bc7a00aae585 100644
--- a/sklearn/tests/metadata_routing_common.py
+++ b/sklearn/tests/metadata_routing_common.py
@@ -49,7 +49,6 @@ def check_recorded_metadata(obj, method, split_params=tuple(), **kwargs):
     split_params : tuple, default=empty
         specifies any parameters which are to be checked as being a subset
         of the original values.
-
     """
     records = getattr(obj, "_records", dict()).get(method, dict())
     assert set(kwargs.keys()) == set(records.keys())
@@ -70,11 +69,16 @@ def assert_request_is_empty(metadata_request, exclude=None):
     """Check if a metadata request dict is empty.
 
     One can exclude a method or a list of methods from the check using the
-    ``exclude`` parameter.
+    ``exclude`` parameter. If metadata_request is a MetadataRouter, then
+    ``exclude`` can be of the form ``{"object" : [method, ...]}``.
     """
     if isinstance(metadata_request, MetadataRouter):
-        for _, route_mapping in metadata_request:
-            assert_request_is_empty(route_mapping.router)
+        for name, route_mapping in metadata_request:
+            if exclude is not None and name in exclude:
+                _exclude = exclude[name]
+            else:
+                _exclude = None
+            assert_request_is_empty(route_mapping.router, exclude=_exclude)
         return
 
     exclude = [] if exclude is None else exclude
@@ -87,7 +91,7 @@ def assert_request_is_empty(metadata_request, exclude=None):
             for prop, alias in mmr.requests.items()
             if isinstance(alias, str) or alias is not None
         ]
-        assert not len(props)
+        assert not props
 
 
 def assert_request_equal(request, dictionary):
@@ -123,7 +127,6 @@ class ConsumingRegressor(RegressorMixin, BaseEstimator):
         a reference to the estimator later on. Since that reference is not
         required in all tests, registration can be skipped by leaving this value
         as None.
-
     """
 
     def __init__(self, registry=None):
@@ -151,9 +154,6 @@ def predict(self, X, sample_weight="default", metadata="default"):
         pass  # pragma: no cover
 
         # when needed, uncomment the implementation
-        # if self.registry is not None:
-        #     self.registry.append(self)
-
         # record_metadata_not_default(
         #     self, "predict", sample_weight=sample_weight, metadata=metadata
         # )
@@ -188,9 +188,13 @@ class ConsumingClassifier(ClassifierMixin, BaseEstimator):
         required in all tests, registration can be skipped by leaving this value
         as None.
 
+    alpha : float, default=0
+        This parameter is only used to test the ``*SearchCV`` objects, and
+        doesn't do anything.
     """
 
-    def __init__(self, registry=None):
+    def __init__(self, registry=None, alpha=0.0):
+        self.alpha = alpha
         self.registry = registry
 
     def partial_fit(self, X, y, sample_weight="default", metadata="default"):
@@ -214,35 +218,35 @@ def fit(self, X, y, sample_weight="default", metadata="default"):
         return self
 
     def predict(self, X, sample_weight="default", metadata="default"):
-        if self.registry is not None:
-            self.registry.append(self)
-
         record_metadata_not_default(
             self, "predict", sample_weight=sample_weight, metadata=metadata
         )
         return np.zeros(shape=(len(X),))
 
     def predict_proba(self, X, sample_weight="default", metadata="default"):
-        if self.registry is not None:
-            self.registry.append(self)
+        pass  # pragma: no cover
 
-        record_metadata_not_default(
-            self, "predict_proba", sample_weight=sample_weight, metadata=metadata
-        )
-        return np.asarray([[0.0, 1.0]] * len(X))
+        # uncomment when needed
+        # record_metadata_not_default(
+        #     self, "predict_proba", sample_weight=sample_weight, metadata=metadata
+        # )
+        # return np.asarray([[0.0, 1.0]] * len(X))
 
     def predict_log_proba(self, X, sample_weight="default", metadata="default"):
         pass  # pragma: no cover
 
-        # when needed, uncomment the implementation
-        # if self.registry is not None:
-        #     self.registry.append(self)
-
+        # uncomment when needed
         # record_metadata_not_default(
         #     self, "predict_log_proba", sample_weight=sample_weight, metadata=metadata
         # )
         # return np.zeros(shape=(len(X), 2))
 
+    def decision_function(self, X, sample_weight="default", metadata="default"):
+        record_metadata_not_default(
+            self, "predict_proba", sample_weight=sample_weight, metadata=metadata
+        )
+        return np.zeros(shape=(len(X),))
+
 
 class ConsumingTransformer(TransformerMixin, BaseEstimator):
     """A transformer which accepts metadata on fit and transform.
@@ -324,8 +328,8 @@ def split(self, X, y=None, groups="default", metadata="default"):
         yield test_indices, train_indices
         yield train_indices, test_indices
 
-    def get_n_splits(self, X=None, y=None, groups=None):
-        pass  # pragma: no cover
+    def get_n_splits(self, X=None, y=None, groups=None, metadata=None):
+        return 2
 
     def _iter_test_indices(self, X=None, y=None, groups=None):
         split_index = len(X) // 2
diff --git a/sklearn/tests/test_metaestimators_metadata_routing.py b/sklearn/tests/test_metaestimators_metadata_routing.py
index 4a548fe9f067f..d9bbe399728b8 100644
--- a/sklearn/tests/test_metaestimators_metadata_routing.py
+++ b/sklearn/tests/test_metaestimators_metadata_routing.py
@@ -7,7 +7,14 @@
 from sklearn import config_context
 from sklearn.calibration import CalibratedClassifierCV
 from sklearn.exceptions import UnsetMetadataPassedError
+from sklearn.experimental import enable_halving_search_cv  # noqa
 from sklearn.linear_model import LogisticRegressionCV
+from sklearn.model_selection import (
+    GridSearchCV,
+    HalvingGridSearchCV,
+    HalvingRandomSearchCV,
+    RandomizedSearchCV,
+)
 from sklearn.multioutput import (
     ClassifierChain,
     MultiOutputClassifier,
@@ -49,7 +56,7 @@ def enable_slep006():
         "estimator": ConsumingRegressor,
         "X": X,
         "y": y_multi,
-        "routing_methods": ["fit", "partial_fit"],
+        "estimator_routing_methods": ["fit", "partial_fit"],
     },
     {
         "metaestimator": MultiOutputClassifier,
@@ -57,7 +64,7 @@ def enable_slep006():
         "estimator": ConsumingClassifier,
         "X": X,
         "y": y_multi,
-        "routing_methods": ["fit", "partial_fit"],
+        "estimator_routing_methods": ["fit", "partial_fit"],
     },
     {
         "metaestimator": CalibratedClassifierCV,
@@ -65,7 +72,7 @@ def enable_slep006():
         "estimator": ConsumingClassifier,
         "X": X,
         "y": y,
-        "routing_methods": ["fit"],
+        "estimator_routing_methods": ["fit"],
         "preserves_metadata": False,
     },
     {
@@ -74,7 +81,7 @@ def enable_slep006():
         "estimator": ConsumingClassifier,
         "X": X,
         "y": y_multi,
-        "routing_methods": ["fit"],
+        "estimator_routing_methods": ["fit"],
     },
     {
         "metaestimator": RegressorChain,
@@ -82,7 +89,72 @@ def enable_slep006():
         "estimator": ConsumingRegressor,
         "X": X,
         "y": y_multi,
-        "routing_methods": ["fit"],
+        "estimator_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": LogisticRegressionCV,
+        "X": X,
+        "y": y_multi,
+        "scorer_name": "scoring",
+        "scorer_routing_methods": ["fit", "score"],
+        "cv_name": "cv",
+        "cv_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": GridSearchCV,
+        "estimator_name": "estimator",
+        "estimator": ConsumingClassifier,
+        "init_args": {"param_grid": {"alpha": [0.1, 0.2]}},
+        "X": X,
+        "y": y,
+        "estimator_routing_methods": ["fit"],
+        "preserves_metadata": "subset",
+        "scorer_name": "scoring",
+        "scorer_routing_methods": ["fit", "score"],
+        "cv_name": "cv",
+        "cv_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": RandomizedSearchCV,
+        "estimator_name": "estimator",
+        "estimator": ConsumingClassifier,
+        "init_args": {"param_distributions": {"alpha": [0.1, 0.2]}},
+        "X": X,
+        "y": y,
+        "estimator_routing_methods": ["fit"],
+        "preserves_metadata": "subset",
+        "scorer_name": "scoring",
+        "scorer_routing_methods": ["fit", "score"],
+        "cv_name": "cv",
+        "cv_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": HalvingGridSearchCV,
+        "estimator_name": "estimator",
+        "estimator": ConsumingClassifier,
+        "init_args": {"param_grid": {"alpha": [0.1, 0.2]}},
+        "X": X,
+        "y": y,
+        "estimator_routing_methods": ["fit"],
+        "preserves_metadata": "subset",
+        "scorer_name": "scoring",
+        "scorer_routing_methods": ["fit", "score"],
+        "cv_name": "cv",
+        "cv_routing_methods": ["fit"],
+    },
+    {
+        "metaestimator": HalvingRandomSearchCV,
+        "estimator_name": "estimator",
+        "estimator": ConsumingClassifier,
+        "init_args": {"param_distributions": {"alpha": [0.1, 0.2]}},
+        "X": X,
+        "y": y,
+        "estimator_routing_methods": ["fit"],
+        "preserves_metadata": "subset",
+        "scorer_name": "scoring",
+        "scorer_routing_methods": ["fit", "score"],
+        "cv_name": "cv",
+        "cv_routing_methods": ["fit"],
     },
 ]
 """List containing all metaestimators to be tested and their settings
@@ -92,38 +164,79 @@ def enable_slep006():
 - metaestimator: The metaestmator to be tested
 - estimator_name: The name of the argument for the sub-estimator
 - estimator: The sub-estimator
+- init_args: The arguments to be passed to the metaestimator's constructor
 - X: X-data to fit and predict
 - y: y-data to fit
-- routing_methods: list of all methods to check for routing
-- preserves_metadata: Whether the metaestimator passes the metadata to the
-  sub-estimator without modification or not. If it does, we check that the
-  values are identical. If it doesn't, no check is performed. TODO Maybe
-  something smarter could be done if the data is modified.
-
+- estimator_routing_methods: list of all methods to check for routing metadata
+  to the sub-estimator
+- preserves_metadata:
+    - True (default): the metaestimator passes the metadata to the
+      sub-estimator without modification. We check that the values recorded by
+      the sub-estimator are identical to what we've passed to the
+      metaestimator.
+    - False: no check is performed regarding values, we only check that a
+      metadata with the expected names/keys are passed.
+    - "subset": we check that the recorded metadata by the sub-estimator is a
+      subset of what is passed to the metaestimator.
+- scorer_name: The name of the argument for the scorer
+- scorer_routing_methods: list of all methods to check for routing metadata
+  to the scorer
+- cv_name: The name of the argument for the CV splitter
+- cv_routing_methods: list of all methods to check for routing metadata
+  to the splitter
 """
 
-# ids used for pytest fixture
+# IDs used by pytest to get meaningful verbose messages when running the tests
 METAESTIMATOR_IDS = [str(row["metaestimator"].__name__) for row in METAESTIMATORS]
 
-CV_SCORERS: list = [
-    {
-        "cv_estimator": LogisticRegressionCV,
-        "scorer_name": "scoring",
-        "routing_methods": ["fit", "score"],
-    },
-]
 
-CV_SPLITTERS: list = [
-    {
-        "cv_estimator": LogisticRegressionCV,
-        "splitter_name": "cv",
-        "routing_methods": ["fit"],
-    }
-]
+def get_init_args(metaestimator_info):
+    """Get the init args for a metaestimator
 
-# IDs used by pytest to get meaningful verbose messages when running the tests
-CV_SCORER_IDS = [x["cv_estimator"].__name__ for x in CV_SCORERS]
-CV_SPLITTER_IDS = [x["cv_estimator"].__name__ for x in CV_SPLITTERS]
+    This is a helper function to get the init args for a metaestimator from
+    the METAESTIMATORS list. It returns an empty dict if no init args are
+    required.
+
+    Returns
+    -------
+    kwargs : dict
+        The init args for the metaestimator.
+
+    (estimator, estimator_registry) : (estimator, registry)
+        The sub-estimator and the corresponding registry.
+
+    (scorer, scorer_registry) : (scorer, registry)
+        The scorer and the corresponding registry.
+
+    (cv, cv_registry) : (CV splitter, registry)
+        The CV splitter and the corresponding registry.
+    """
+    kwargs = metaestimator_info.get("init_args", {})
+    estimator, estimator_registry = None, None
+    scorer, scorer_registry = None, None
+    cv, cv_registry = None, None
+    if "estimator" in metaestimator_info:
+        estimator_name = metaestimator_info["estimator_name"]
+        estimator_registry = _Registry()
+        estimator = metaestimator_info["estimator"](estimator_registry)
+        kwargs[estimator_name] = estimator
+    if "scorer_name" in metaestimator_info:
+        scorer_name = metaestimator_info["scorer_name"]
+        scorer_registry = _Registry()
+        scorer = ConsumingScorer(registry=scorer_registry)
+        kwargs[scorer_name] = scorer
+    if "cv_name" in metaestimator_info:
+        cv_name = metaestimator_info["cv_name"]
+        cv_registry = _Registry()
+        cv = ConsumingSplitter(registry=cv_registry)
+        kwargs[cv_name] = cv
+
+    return (
+        kwargs,
+        (estimator, estimator_registry),
+        (scorer, scorer_registry),
+        (cv, cv_registry),
+    )
 
 
 def test_registry_copy():
@@ -135,134 +248,170 @@ def test_registry_copy():
     assert a is copy.deepcopy(a)
 
 
-@pytest.mark.parametrize(
-    "metaestimator",
-    METAESTIMATORS,
-    ids=METAESTIMATOR_IDS,
-)
+@pytest.mark.parametrize("metaestimator", METAESTIMATORS, ids=METAESTIMATOR_IDS)
 def test_default_request(metaestimator):
     # Check that by default request is empty and the right type
     cls = metaestimator["metaestimator"]
-    estimator = metaestimator["estimator"]()
-    estimator_name = metaestimator["estimator_name"]
-    instance = cls(**{estimator_name: estimator})
-    assert_request_is_empty(instance.get_metadata_routing())
+    kwargs, *_ = get_init_args(metaestimator)
+    instance = cls(**kwargs)
+    if "cv_name" in metaestimator:
+        # Our GroupCV splitters request groups by default, which we should
+        # ignore in this test.
+        exclude = {"splitter": ["split"]}
+    else:
+        exclude = None
+    assert_request_is_empty(instance.get_metadata_routing(), exclude=exclude)
     assert isinstance(instance.get_metadata_routing(), MetadataRouter)
 
 
-@pytest.mark.parametrize(
-    "metaestimator",
-    METAESTIMATORS,
-    ids=METAESTIMATOR_IDS,
-)
-def test_error_on_missing_requests(metaestimator):
-    # Test that a UnsetMetadataPassedError is raised when it should.
+@pytest.mark.parametrize("metaestimator", METAESTIMATORS, ids=METAESTIMATOR_IDS)
+def test_error_on_missing_requests_for_sub_estimator(metaestimator):
+    # Test that a UnsetMetadataPassedError is raised when the sub-estimator's
+    # requests are not set
+    if "estimator" not in metaestimator:
+        # This test only makes sense for metaestimators which have a
+        # sub-estimator
+        return
+
     cls = metaestimator["metaestimator"]
-    estimator = metaestimator["estimator"]()
-    estimator_name = metaestimator["estimator_name"]
     X = metaestimator["X"]
     y = metaestimator["y"]
-    routing_methods = metaestimator["routing_methods"]
+    routing_methods = metaestimator["estimator_routing_methods"]
 
     for method_name in routing_methods:
         for key in ["sample_weight", "metadata"]:
+            kwargs, (estimator, _), (scorer, _), *_ = get_init_args(metaestimator)
+            if scorer:
+                scorer.set_score_request(**{key: True})
             val = {"sample_weight": sample_weight, "metadata": metadata}[key]
-            kwargs = {key: val}
+            method_kwargs = {key: val}
             msg = (
                 f"[{key}] are passed but are not explicitly set as requested or not"
                 f" for {estimator.__class__.__name__}.{method_name}"
             )
 
-            instance = cls(**{estimator_name: estimator})
-            if "fit" not in method_name:  # instance needs to be fitted first
-                instance.fit(X, y)  # pragma: no cover
+            instance = cls(**kwargs)
             with pytest.raises(UnsetMetadataPassedError, match=re.escape(msg)):
                 method = getattr(instance, method_name)
-                method(X, y, **kwargs)
+                method(X, y, **method_kwargs)
 
 
-@pytest.mark.parametrize(
-    "metaestimator",
-    METAESTIMATORS,
-    ids=METAESTIMATOR_IDS,
-)
-def test_setting_request_removes_error(metaestimator):
-    # When the metadata is explicitly requested, there should be no errors.
+@pytest.mark.parametrize("metaestimator", METAESTIMATORS, ids=METAESTIMATOR_IDS)
+def test_setting_request_on_sub_estimator_removes_error(metaestimator):
+    # When the metadata is explicitly requested on the sub-estimator, there
+    # should be no errors.
+    if "estimator" not in metaestimator:
+        # This test only makes sense for metaestimators which have a
+        # sub-estimator
+        return
+
     def set_request(estimator, method_name):
         # e.g. call set_fit_request on estimator
         set_request_for_method = getattr(estimator, f"set_{method_name}_request")
         set_request_for_method(sample_weight=True, metadata=True)
 
     cls = metaestimator["metaestimator"]
-    estimator_name = metaestimator["estimator_name"]
     X = metaestimator["X"]
     y = metaestimator["y"]
-    routing_methods = metaestimator["routing_methods"]
+    routing_methods = metaestimator["estimator_routing_methods"]
     preserves_metadata = metaestimator.get("preserves_metadata", True)
 
     for method_name in routing_methods:
         for key in ["sample_weight", "metadata"]:
             val = {"sample_weight": sample_weight, "metadata": metadata}[key]
-            kwargs = {key: val}
+            method_kwargs = {key: val}
 
-            registry = _Registry()
-            estimator = metaestimator["estimator"](registry=registry)
+            kwargs, (estimator, registry), (scorer, _), (cv, _) = get_init_args(
+                metaestimator
+            )
+            if scorer:
+                set_request(scorer, "score")
+            if cv:
+                cv.set_split_request(groups=True, metadata=True)
             set_request(estimator, method_name)
-            instance = cls(**{estimator_name: estimator})
+            instance = cls(**kwargs)
             method = getattr(instance, method_name)
-            method(X, y, **kwargs)
+            method(X, y, **method_kwargs)
 
-            if preserves_metadata:
-                # sanity check that registry is not empty, or else the test
-                # passes trivially
-                assert registry
+            # sanity check that registry is not empty, or else the test passes
+            # trivially
+            assert registry
+            if preserves_metadata is True:
+                for estimator in registry:
+                    check_recorded_metadata(estimator, method_name, **method_kwargs)
+            elif preserves_metadata == "subset":
                 for estimator in registry:
-                    check_recorded_metadata(estimator, method_name, **kwargs)
+                    check_recorded_metadata(
+                        estimator,
+                        method_name,
+                        split_params=method_kwargs.keys(),
+                        **method_kwargs,
+                    )
 
 
-@pytest.mark.parametrize("cv_scorer", CV_SCORERS, ids=CV_SCORER_IDS)
-def test_metadata_is_routed_correctly_to_scorer(cv_scorer):
+@pytest.mark.parametrize("metaestimator", METAESTIMATORS, ids=METAESTIMATOR_IDS)
+def test_metadata_is_routed_correctly_to_scorer(metaestimator):
     """Test that any requested metadata is correctly routed to the underlying
     scorers in CV estimators.
     """
-    registry = _Registry()
-    cls = cv_scorer["cv_estimator"]
-    scorer_name = cv_scorer["scorer_name"]
-    scorer = ConsumingScorer(registry=registry)
-    scorer.set_score_request(sample_weight=True)
-    routing_methods = cv_scorer["routing_methods"]
+    if "scorer_name" not in metaestimator:
+        # This test only makes sense for CV estimators
+        return
+
+    cls = metaestimator["metaestimator"]
+    routing_methods = metaestimator["scorer_routing_methods"]
 
     for method_name in routing_methods:
-        instance = cls(**{scorer_name: scorer})
+        kwargs, (estimator, _), (scorer, registry), (cv, _) = get_init_args(
+            metaestimator
+        )
+        if estimator:
+            estimator.set_fit_request(sample_weight=True, metadata=True)
+        scorer.set_score_request(sample_weight=True)
+        if cv:
+            cv.set_split_request(groups=True, metadata=True)
+        instance = cls(**kwargs)
         method = getattr(instance, method_name)
-        kwargs = {"sample_weight": sample_weight}
-        if "fit" not in method_name:  # instance needs to be fitted first
+        method_kwargs = {"sample_weight": sample_weight}
+        if "fit" not in method_name:
             instance.fit(X, y)
-        method(X, y, **kwargs)
+        method(X, y, **method_kwargs)
+
+        assert registry
         for _scorer in registry:
             check_recorded_metadata(
                 obj=_scorer,
                 method="score",
                 split_params=("sample_weight",),
-                **kwargs,
+                **method_kwargs,
             )
 
 
-@pytest.mark.parametrize("cv_splitter", CV_SPLITTERS, ids=CV_SPLITTER_IDS)
-def test_metadata_is_routed_correctly_to_splitter(cv_splitter):
+@pytest.mark.parametrize("metaestimator", METAESTIMATORS, ids=METAESTIMATOR_IDS)
+def test_metadata_is_routed_correctly_to_splitter(metaestimator):
     """Test that any requested metadata is correctly routed to the underlying
     splitters in CV estimators.
     """
-    registry = _Registry()
-    cls = cv_splitter["cv_estimator"]
-    splitter_name = cv_splitter["splitter_name"]
-    splitter = ConsumingSplitter(registry=registry)
-    routing_methods = cv_splitter["routing_methods"]
+    if "cv_routing_methods" not in metaestimator:
+        # This test is only for metaestimators accepting a CV splitter
+        return
+
+    cls = metaestimator["metaestimator"]
+    routing_methods = metaestimator["cv_routing_methods"]
 
     for method_name in routing_methods:
-        instance = cls(**{splitter_name: splitter})
+        kwargs, (estimator, _), (scorer, _), (cv, registry) = get_init_args(
+            metaestimator
+        )
+        if estimator:
+            estimator.set_fit_request(sample_weight=False, metadata=False)
+        if scorer:
+            scorer.set_score_request(sample_weight=False, metadata=False)
+        cv.set_split_request(groups=True, metadata=True)
+        instance = cls(**kwargs)
+        method_kwargs = {"groups": groups, "metadata": metadata}
         method = getattr(instance, method_name)
-        kwargs = {"groups": groups}
-        method(X, y, **kwargs)
+        method(X, y, **method_kwargs)
+        assert registry
         for _splitter in registry:
-            check_recorded_metadata(obj=_splitter, method="split", **kwargs)
+            check_recorded_metadata(obj=_splitter, method="split", **method_kwargs)
diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py
index cafece0406e82..9c53e21c7286c 100644
--- a/sklearn/utils/_metadata_requests.py
+++ b/sklearn/utils/_metadata_requests.py
@@ -985,7 +985,7 @@ def route_params(self, *, caller, params):
     def validate_metadata(self, *, method, params):
         """Validate given metadata for a method.
 
-        This raises a ``ValueError`` if some of the passed metadata are not
+        This raises a ``TypeError`` if some of the passed metadata are not
         understood by child objects.
 
         Parameters
@@ -1010,8 +1010,8 @@ def validate_metadata(self, *, method, params):
         extra_keys = set(params.keys()) - param_names - self_params
         if extra_keys:
             raise TypeError(
-                f"{method} got unexpected argument(s) {extra_keys}, which are "
-                "not requested metadata in any object."
+                f"{self.owner}.{method} got unexpected argument(s) {extra_keys}, which"
+                " are not requested metadata in any object."
             )
 
     def _serialize(self):

From 8ed76c74705276b32c565c2dfeb5f1e952a59487 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@gmail.com>
Date: Mon, 11 Sep 2023 17:37:40 +0200
Subject: [PATCH 16/21] MAINT cython typedefs in _cd_fast (#27334)

---
 sklearn/linear_model/_cd_fast.pyx | 28 +++++++++++++---------------
 sklearn/utils/_typedefs.pxd       |  1 +
 sklearn/utils/_typedefs.pyx       |  1 +
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/sklearn/linear_model/_cd_fast.pyx b/sklearn/linear_model/_cd_fast.pyx
index 38ba169ae8015..37b051a3ef6e3 100644
--- a/sklearn/linear_model/_cd_fast.pyx
+++ b/sklearn/linear_model/_cd_fast.pyx
@@ -14,15 +14,13 @@ from cython cimport floating
 import warnings
 from ..exceptions import ConvergenceWarning
 
-from ..utils._cython_blas cimport (_axpy, _dot, _asum, _gemv, _nrm2,
-                                   _copy, _scal)
+from ..utils._cython_blas cimport (
+    _axpy, _dot, _asum, _gemv, _nrm2, _copy, _scal
+)
 from ..utils._cython_blas cimport ColMajor, Trans, NoTrans
-
-
+from ..utils._typedefs cimport uint32_t
 from ..utils._random cimport our_rand_r
 
-ctypedef cnp.float64_t DOUBLE
-ctypedef cnp.uint32_t UINT32_t
 
 cnp.import_array()
 
@@ -37,7 +35,7 @@ cdef enum:
     RAND_R_MAX = 2147483647
 
 
-cdef inline UINT32_t rand_int(UINT32_t end, UINT32_t* random_state) noexcept nogil:
+cdef inline uint32_t rand_int(uint32_t end, uint32_t* random_state) noexcept nogil:
     """Generate a random integer in [0; end)."""
     return our_rand_r(random_state) % end
 
@@ -156,8 +154,8 @@ def enet_coordinate_descent(
     cdef unsigned int ii
     cdef unsigned int n_iter = 0
     cdef unsigned int f_iter
-    cdef UINT32_t rand_r_state_seed = rng.randint(0, RAND_R_MAX)
-    cdef UINT32_t* rand_r_state = &rand_r_state_seed
+    cdef uint32_t rand_r_state_seed = rng.randint(0, RAND_R_MAX)
+    cdef uint32_t* rand_r_state = &rand_r_state_seed
 
     if alpha == 0 and beta == 0:
         warnings.warn("Coordinate descent with no regularization may lead to "
@@ -370,8 +368,8 @@ def sparse_enet_coordinate_descent(
     cdef unsigned int jj
     cdef unsigned int n_iter = 0
     cdef unsigned int f_iter
-    cdef UINT32_t rand_r_state_seed = rng.randint(0, RAND_R_MAX)
-    cdef UINT32_t* rand_r_state = &rand_r_state_seed
+    cdef uint32_t rand_r_state_seed = rng.randint(0, RAND_R_MAX)
+    cdef uint32_t* rand_r_state = &rand_r_state_seed
     cdef bint center = False
     cdef bint no_sample_weights = sample_weight is None
     cdef int kk
@@ -628,8 +626,8 @@ def enet_coordinate_descent_gram(
     cdef unsigned int ii
     cdef unsigned int n_iter = 0
     cdef unsigned int f_iter
-    cdef UINT32_t rand_r_state_seed = rng.randint(0, RAND_R_MAX)
-    cdef UINT32_t* rand_r_state = &rand_r_state_seed
+    cdef uint32_t rand_r_state_seed = rng.randint(0, RAND_R_MAX)
+    cdef uint32_t* rand_r_state = &rand_r_state_seed
 
     cdef floating y_norm2 = np.dot(y, y)
     cdef floating* w_ptr = &w[0]
@@ -808,8 +806,8 @@ def enet_coordinate_descent_multi_task(
     cdef unsigned int jj
     cdef unsigned int n_iter = 0
     cdef unsigned int f_iter
-    cdef UINT32_t rand_r_state_seed = rng.randint(0, RAND_R_MAX)
-    cdef UINT32_t* rand_r_state = &rand_r_state_seed
+    cdef uint32_t rand_r_state_seed = rng.randint(0, RAND_R_MAX)
+    cdef uint32_t* rand_r_state = &rand_r_state_seed
 
     cdef const floating* X_ptr = &X[0, 0]
     cdef const floating* Y_ptr = &Y[0, 0]
diff --git a/sklearn/utils/_typedefs.pxd b/sklearn/utils/_typedefs.pxd
index 0ef8f35b93a96..254aef54aeda1 100644
--- a/sklearn/utils/_typedefs.pxd
+++ b/sklearn/utils/_typedefs.pxd
@@ -15,6 +15,7 @@
 # use these consistently throughout the codebase.
 # NOTE: Extend this list as needed when converting more cython extensions.
 ctypedef unsigned char uint8_t
+ctypedef unsigned int uint32_t
 ctypedef Py_ssize_t intp_t
 ctypedef float float32_t
 ctypedef double float64_t
diff --git a/sklearn/utils/_typedefs.pyx b/sklearn/utils/_typedefs.pyx
index 22e18cdae8d2e..90039ae7f7bcc 100644
--- a/sklearn/utils/_typedefs.pyx
+++ b/sklearn/utils/_typedefs.pyx
@@ -8,6 +8,7 @@ import numpy as np
 
 ctypedef fused testing_type_t:
     uint8_t
+    uint32_t
     intp_t
     float32_t
     float64_t

From c5f22295029575f050e5ea007cd60130d8538ec2 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@gmail.com>
Date: Mon, 11 Sep 2023 18:01:02 +0200
Subject: [PATCH 17/21] MAINT better fused type names in loss module (#27330)

---
 sklearn/_loss/_loss.pxd    |  12 ++-
 sklearn/_loss/_loss.pyx.tp | 168 +++++++++++++++++++------------------
 2 files changed, 93 insertions(+), 87 deletions(-)

diff --git a/sklearn/_loss/_loss.pxd b/sklearn/_loss/_loss.pxd
index b1ddbadcc5f2c..69bef42b9ed6e 100644
--- a/sklearn/_loss/_loss.pxd
+++ b/sklearn/_loss/_loss.pxd
@@ -1,13 +1,17 @@
 # cython: language_level=3
 
-# Fused types for y_true, y_pred, raw_prediction
-ctypedef fused Y_DTYPE_C:
+# Fused types for input like y_true, raw_prediction, sample_weights.
+ctypedef fused floating_in:
     double
     float
 
 
-# Fused types for gradient and hessian
-ctypedef fused G_DTYPE_C:
+# Fused types for output like gradient and hessian
+# We use a different fused types for input (floating_in) and output (floating_out), such
+# that input and output can have different dtypes in the same function call. A single
+# fused type can only take on one single value (type) for all arguments in one function
+# call.
+ctypedef fused floating_out:
     double
     float
 
diff --git a/sklearn/_loss/_loss.pyx.tp b/sklearn/_loss/_loss.pyx.tp
index d01ff43bdc0b4..8efeeea77f0e6 100644
--- a/sklearn/_loss/_loss.pyx.tp
+++ b/sklearn/_loss/_loss.pyx.tp
@@ -268,8 +268,8 @@ cdef inline double log1pexp(double x) noexcept nogil:
 
 cdef inline void sum_exp_minus_max(
     const int i,
-    const Y_DTYPE_C[:, :] raw_prediction,  # IN
-    Y_DTYPE_C *p                           # OUT
+    const floating_in[:, :] raw_prediction,  # IN
+    floating_in *p                           # OUT
 ) noexcept nogil:
     # Thread local buffers are used to stores results of this function via p.
     # The results are stored as follows:
@@ -744,7 +744,7 @@ cdef inline double_pair cgrad_hess_half_binomial(
     double raw_prediction
 ) noexcept nogil:
     # with y_pred = expit(raw)
-    # hessian = y_pred * (1 - y_pred) = exp(raw) / (1 + exp(raw))**2
+    # hessian = y_pred * (1 - y_pred) = exp( raw) / (1 + exp( raw))**2
     #                                 = exp(-raw) / (1 + exp(-raw))**2
     cdef double_pair gh
     gh.val2 = exp(-raw_prediction)  # used as temporary
@@ -835,7 +835,9 @@ cdef class CyLossFunction:
         """
         pass
 
-    cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil:
+    cdef double_pair cy_grad_hess(
+        self, double y_true, double raw_prediction
+    ) noexcept nogil:
         """Compute gradient and hessian.
 
         Gradient and hessian of loss w.r.t. raw_prediction for a single sample.
@@ -862,10 +864,10 @@ cdef class CyLossFunction:
 
     def loss(
         self,
-        const Y_DTYPE_C[::1] y_true,          # IN
-        const Y_DTYPE_C[::1] raw_prediction,  # IN
-        const Y_DTYPE_C[::1] sample_weight,   # IN
-        G_DTYPE_C[::1] loss_out,        # OUT
+        const floating_in[::1] y_true,          # IN
+        const floating_in[::1] raw_prediction,  # IN
+        const floating_in[::1] sample_weight,   # IN
+        floating_out[::1] loss_out,             # OUT
         int n_threads=1
     ):
         """Compute the pointwise loss value for each input.
@@ -892,10 +894,10 @@ cdef class CyLossFunction:
 
     def gradient(
         self,
-        const Y_DTYPE_C[::1] y_true,          # IN
-        const Y_DTYPE_C[::1] raw_prediction,  # IN
-        const Y_DTYPE_C[::1] sample_weight,   # IN
-        G_DTYPE_C[::1] gradient_out,    # OUT
+        const floating_in[::1] y_true,          # IN
+        const floating_in[::1] raw_prediction,  # IN
+        const floating_in[::1] sample_weight,   # IN
+        floating_out[::1] gradient_out,         # OUT
         int n_threads=1
     ):
         """Compute gradient of loss w.r.t raw_prediction for each input.
@@ -922,11 +924,11 @@ cdef class CyLossFunction:
 
     def loss_gradient(
         self,
-        const Y_DTYPE_C[::1] y_true,          # IN
-        const Y_DTYPE_C[::1] raw_prediction,  # IN
-        const Y_DTYPE_C[::1] sample_weight,   # IN
-        G_DTYPE_C[::1] loss_out,        # OUT
-        G_DTYPE_C[::1] gradient_out,    # OUT
+        const floating_in[::1] y_true,          # IN
+        const floating_in[::1] raw_prediction,  # IN
+        const floating_in[::1] sample_weight,   # IN
+        floating_out[::1] loss_out,             # OUT
+        floating_out[::1] gradient_out,         # OUT
         int n_threads=1
     ):
         """Compute loss and gradient of loss w.r.t raw_prediction.
@@ -960,11 +962,11 @@ cdef class CyLossFunction:
 
     def gradient_hessian(
         self,
-        const Y_DTYPE_C[::1] y_true,          # IN
-        const Y_DTYPE_C[::1] raw_prediction,  # IN
-        const Y_DTYPE_C[::1] sample_weight,   # IN
-        G_DTYPE_C[::1] gradient_out,    # OUT
-        G_DTYPE_C[::1] hessian_out,     # OUT
+        const floating_in[::1] y_true,          # IN
+        const floating_in[::1] raw_prediction,  # IN
+        const floating_in[::1] sample_weight,   # IN
+        floating_out[::1] gradient_out,         # OUT
+        floating_out[::1] hessian_out,          # OUT
         int n_threads=1
     ):
         """Compute gradient and hessian of loss w.r.t raw_prediction.
@@ -1022,10 +1024,10 @@ cdef class {{name}}(CyLossFunction):
 
     def loss(
         self,
-        const Y_DTYPE_C[::1] y_true,          # IN
-        const Y_DTYPE_C[::1] raw_prediction,  # IN
-        const Y_DTYPE_C[::1] sample_weight,   # IN
-        G_DTYPE_C[::1] loss_out,              # OUT
+        const floating_in[::1] y_true,          # IN
+        const floating_in[::1] raw_prediction,  # IN
+        const floating_in[::1] sample_weight,   # IN
+        floating_out[::1] loss_out,             # OUT
         int n_threads=1
     ):
         cdef:
@@ -1048,11 +1050,11 @@ cdef class {{name}}(CyLossFunction):
     {{if closs_grad is not None}}
     def loss_gradient(
         self,
-        const Y_DTYPE_C[::1] y_true,          # IN
-        const Y_DTYPE_C[::1] raw_prediction,  # IN
-        const Y_DTYPE_C[::1] sample_weight,   # IN
-        G_DTYPE_C[::1] loss_out,              # OUT
-        G_DTYPE_C[::1] gradient_out,          # OUT
+        const floating_in[::1] y_true,          # IN
+        const floating_in[::1] raw_prediction,  # IN
+        const floating_in[::1] sample_weight,   # IN
+        floating_out[::1] loss_out,             # OUT
+        floating_out[::1] gradient_out,         # OUT
         int n_threads=1
     ):
         cdef:
@@ -1080,10 +1082,10 @@ cdef class {{name}}(CyLossFunction):
 
     def gradient(
         self,
-        const Y_DTYPE_C[::1] y_true,          # IN
-        const Y_DTYPE_C[::1] raw_prediction,  # IN
-        const Y_DTYPE_C[::1] sample_weight,   # IN
-        G_DTYPE_C[::1] gradient_out,          # OUT
+        const floating_in[::1] y_true,          # IN
+        const floating_in[::1] raw_prediction,  # IN
+        const floating_in[::1] sample_weight,   # IN
+        floating_out[::1] gradient_out,         # OUT
         int n_threads=1
     ):
         cdef:
@@ -1105,11 +1107,11 @@ cdef class {{name}}(CyLossFunction):
 
     def gradient_hessian(
         self,
-        const Y_DTYPE_C[::1] y_true,          # IN
-        const Y_DTYPE_C[::1] raw_prediction,  # IN
-        const Y_DTYPE_C[::1] sample_weight,   # IN
-        G_DTYPE_C[::1] gradient_out,          # OUT
-        G_DTYPE_C[::1] hessian_out,           # OUT
+        const floating_in[::1] y_true,          # IN
+        const floating_in[::1] raw_prediction,  # IN
+        const floating_in[::1] sample_weight,   # IN
+        floating_out[::1] gradient_out,         # OUT
+        floating_out[::1] hessian_out,          # OUT
         int n_threads=1
     ):
         cdef:
@@ -1158,18 +1160,18 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):
     # opposite are welcome.
     def loss(
         self,
-        const Y_DTYPE_C[::1] y_true,           # IN
-        const Y_DTYPE_C[:, :] raw_prediction,  # IN
-        const Y_DTYPE_C[::1] sample_weight,    # IN
-        G_DTYPE_C[::1] loss_out,               # OUT
+        const floating_in[::1] y_true,           # IN
+        const floating_in[:, :] raw_prediction,  # IN
+        const floating_in[::1] sample_weight,    # IN
+        floating_out[::1] loss_out,              # OUT
         int n_threads=1
     ):
         cdef:
             int i, k
             int n_samples = y_true.shape[0]
             int n_classes = raw_prediction.shape[1]
-            Y_DTYPE_C max_value, sum_exps
-            Y_DTYPE_C*  p  # temporary buffer
+            floating_in max_value, sum_exps
+            floating_in*  p  # temporary buffer
 
         # We assume n_samples > n_classes. In this case having the inner loop
         # over n_classes is a good default.
@@ -1181,7 +1183,7 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):
             with nogil, parallel(num_threads=n_threads):
                 # Define private buffer variables as each thread might use its
                 # own.
-                p = <Y_DTYPE_C *> malloc(sizeof(Y_DTYPE_C) * (n_classes + 2))
+                p = <floating_in *> malloc(sizeof(floating_in) * (n_classes + 2))
 
                 for i in prange(n_samples, schedule='static'):
                     sum_exp_minus_max(i, raw_prediction, p)
@@ -1197,7 +1199,7 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):
                 free(p)
         else:
             with nogil, parallel(num_threads=n_threads):
-                p = <Y_DTYPE_C *> malloc(sizeof(Y_DTYPE_C) * (n_classes + 2))
+                p = <floating_in *> malloc(sizeof(floating_in) * (n_classes + 2))
 
                 for i in prange(n_samples, schedule='static'):
                     sum_exp_minus_max(i, raw_prediction, p)
@@ -1218,26 +1220,26 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):
 
     def loss_gradient(
         self,
-        const Y_DTYPE_C[::1] y_true,           # IN
-        const Y_DTYPE_C[:, :] raw_prediction,  # IN
-        const Y_DTYPE_C[::1] sample_weight,    # IN
-        G_DTYPE_C[::1] loss_out,               # OUT
-        G_DTYPE_C[:, :] gradient_out,          # OUT
+        const floating_in[::1] y_true,           # IN
+        const floating_in[:, :] raw_prediction,  # IN
+        const floating_in[::1] sample_weight,    # IN
+        floating_out[::1] loss_out,              # OUT
+        floating_out[:, :] gradient_out,         # OUT
         int n_threads=1
     ):
         cdef:
             int i, k
             int n_samples = y_true.shape[0]
             int n_classes = raw_prediction.shape[1]
-            Y_DTYPE_C max_value, sum_exps
-            Y_DTYPE_C*  p  # temporary buffer
+            floating_in max_value, sum_exps
+            floating_in*  p  # temporary buffer
 
         if sample_weight is None:
             # inner loop over n_classes
             with nogil, parallel(num_threads=n_threads):
                 # Define private buffer variables as each thread might use its
                 # own.
-                p = <Y_DTYPE_C *> malloc(sizeof(Y_DTYPE_C) * (n_classes + 2))
+                p = <floating_in *> malloc(sizeof(floating_in) * (n_classes + 2))
 
                 for i in prange(n_samples, schedule='static'):
                     sum_exp_minus_max(i, raw_prediction, p)
@@ -1256,7 +1258,7 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):
                 free(p)
         else:
             with nogil, parallel(num_threads=n_threads):
-                p = <Y_DTYPE_C *> malloc(sizeof(Y_DTYPE_C) * (n_classes + 2))
+                p = <floating_in *> malloc(sizeof(floating_in) * (n_classes + 2))
 
                 for i in prange(n_samples, schedule='static'):
                     sum_exp_minus_max(i, raw_prediction, p)
@@ -1280,25 +1282,25 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):
 
     def gradient(
         self,
-        const Y_DTYPE_C[::1] y_true,           # IN
-        const Y_DTYPE_C[:, :] raw_prediction,  # IN
-        const Y_DTYPE_C[::1] sample_weight,    # IN
-        G_DTYPE_C[:, :] gradient_out,          # OUT
+        const floating_in[::1] y_true,           # IN
+        const floating_in[:, :] raw_prediction,  # IN
+        const floating_in[::1] sample_weight,    # IN
+        floating_out[:, :] gradient_out,         # OUT
         int n_threads=1
     ):
         cdef:
             int i, k
             int n_samples = y_true.shape[0]
             int n_classes = raw_prediction.shape[1]
-            Y_DTYPE_C sum_exps
-            Y_DTYPE_C*  p  # temporary buffer
+            floating_in sum_exps
+            floating_in*  p  # temporary buffer
 
         if sample_weight is None:
             # inner loop over n_classes
             with nogil, parallel(num_threads=n_threads):
                 # Define private buffer variables as each thread might use its
                 # own.
-                p = <Y_DTYPE_C *> malloc(sizeof(Y_DTYPE_C) * (n_classes + 2))
+                p = <floating_in *> malloc(sizeof(floating_in) * (n_classes + 2))
 
                 for i in prange(n_samples, schedule='static'):
                     sum_exp_minus_max(i, raw_prediction, p)
@@ -1312,7 +1314,7 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):
                 free(p)
         else:
             with nogil, parallel(num_threads=n_threads):
-                p = <Y_DTYPE_C *> malloc(sizeof(Y_DTYPE_C) * (n_classes + 2))
+                p = <floating_in *> malloc(sizeof(floating_in) * (n_classes + 2))
 
                 for i in prange(n_samples, schedule='static'):
                     sum_exp_minus_max(i, raw_prediction, p)
@@ -1329,26 +1331,26 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):
 
     def gradient_hessian(
         self,
-        const Y_DTYPE_C[::1] y_true,           # IN
-        const Y_DTYPE_C[:, :] raw_prediction,  # IN
-        const Y_DTYPE_C[::1] sample_weight,    # IN
-        G_DTYPE_C[:, :] gradient_out,          # OUT
-        G_DTYPE_C[:, :] hessian_out,           # OUT
+        const floating_in[::1] y_true,           # IN
+        const floating_in[:, :] raw_prediction,  # IN
+        const floating_in[::1] sample_weight,    # IN
+        floating_out[:, :] gradient_out,         # OUT
+        floating_out[:, :] hessian_out,          # OUT
         int n_threads=1
     ):
         cdef:
             int i, k
             int n_samples = y_true.shape[0]
             int n_classes = raw_prediction.shape[1]
-            Y_DTYPE_C sum_exps
-            Y_DTYPE_C* p  # temporary buffer
+            floating_in sum_exps
+            floating_in* p  # temporary buffer
 
         if sample_weight is None:
             # inner loop over n_classes
             with nogil, parallel(num_threads=n_threads):
                 # Define private buffer variables as each thread might use its
                 # own.
-                p = <Y_DTYPE_C *> malloc(sizeof(Y_DTYPE_C) * (n_classes + 2))
+                p = <floating_in *> malloc(sizeof(floating_in) * (n_classes + 2))
 
                 for i in prange(n_samples, schedule='static'):
                     sum_exp_minus_max(i, raw_prediction, p)
@@ -1364,7 +1366,7 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):
                 free(p)
         else:
             with nogil, parallel(num_threads=n_threads):
-                p = <Y_DTYPE_C *> malloc(sizeof(Y_DTYPE_C) * (n_classes + 2))
+                p = <floating_in *> malloc(sizeof(floating_in) * (n_classes + 2))
 
                 for i in prange(n_samples, schedule='static'):
                     sum_exp_minus_max(i, raw_prediction, p)
@@ -1387,26 +1389,26 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):
     # diagonal (in the classes) approximation as implemented above.
     def gradient_proba(
         self,
-        const Y_DTYPE_C[::1] y_true,           # IN
-        const Y_DTYPE_C[:, :] raw_prediction,  # IN
-        const Y_DTYPE_C[::1] sample_weight,    # IN
-        G_DTYPE_C[:, :] gradient_out,          # OUT
-        G_DTYPE_C[:, :] proba_out,             # OUT
+        const floating_in[::1] y_true,           # IN
+        const floating_in[:, :] raw_prediction,  # IN
+        const floating_in[::1] sample_weight,    # IN
+        floating_out[:, :] gradient_out,         # OUT
+        floating_out[:, :] proba_out,            # OUT
         int n_threads=1
     ):
         cdef:
             int i, k
             int n_samples = y_true.shape[0]
             int n_classes = raw_prediction.shape[1]
-            Y_DTYPE_C sum_exps
-            Y_DTYPE_C*  p  # temporary buffer
+            floating_in sum_exps
+            floating_in*  p  # temporary buffer
 
         if sample_weight is None:
             # inner loop over n_classes
             with nogil, parallel(num_threads=n_threads):
                 # Define private buffer variables as each thread might use its
                 # own.
-                p = <Y_DTYPE_C *> malloc(sizeof(Y_DTYPE_C) * (n_classes + 2))
+                p = <floating_in *> malloc(sizeof(floating_in) * (n_classes + 2))
 
                 for i in prange(n_samples, schedule='static'):
                     sum_exp_minus_max(i, raw_prediction, p)
@@ -1420,7 +1422,7 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):
                 free(p)
         else:
             with nogil, parallel(num_threads=n_threads):
-                p = <Y_DTYPE_C *> malloc(sizeof(Y_DTYPE_C) * (n_classes + 2))
+                p = <floating_in *> malloc(sizeof(floating_in) * (n_classes + 2))
 
                 for i in prange(n_samples, schedule='static'):
                     sum_exp_minus_max(i, raw_prediction, p)

From 3f110690d084bde4ef02b290f173a473ff930a0f Mon Sep 17 00:00:00 2001
From: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com>
Date: Mon, 11 Sep 2023 18:26:17 +0200
Subject: [PATCH 18/21] DOC Rework outlier detection estimators example
 (#25878)

Co-authored-by: ArturoAmorQ <arturo.amor-quiroz@polytechnique.edu>
Co-authored-by: Tim Head <betatim@gmail.com>
Co-authored-by: Julien Jerphanion <git@jjerphan.xyz>
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 .../plot_outlier_detection_bench.py           | 562 +++++++++++++-----
 .../preprocessing/plot_scaling_importance.py  |   2 +
 2 files changed, 410 insertions(+), 154 deletions(-)

diff --git a/examples/miscellaneous/plot_outlier_detection_bench.py b/examples/miscellaneous/plot_outlier_detection_bench.py
index a0901a31b8a2e..b4fae93131971 100644
--- a/examples/miscellaneous/plot_outlier_detection_bench.py
+++ b/examples/miscellaneous/plot_outlier_detection_bench.py
@@ -3,197 +3,451 @@
 Evaluation of outlier detection estimators
 ==========================================
 
-This example benchmarks outlier detection algorithms, :ref:`local_outlier_factor`
-(LOF) and :ref:`isolation_forest` (IForest), using ROC curves on
-classical anomaly detection datasets. The algorithm performance
-is assessed in an outlier detection context:
+This example compares two outlier detection algorithms, namely
+:ref:`local_outlier_factor` (LOF) and :ref:`isolation_forest` (IForest), on
+real-world datasets available in :class:`sklearn.datasets`. The goal is to show
+that different algorithms perform well on different datasets.
 
-1. The algorithms are trained on the whole dataset which is assumed to
-contain outliers.
+The algorithms are trained in an outlier detection context:
 
-2. The ROC curve from :class:`~sklearn.metrics.RocCurveDisplay` is computed
-on the same dataset using the knowledge of the labels.
+1. The ROC curves are computed using knowledge of the ground-truth labels
+and displayed using :class:`~sklearn.metrics.RocCurveDisplay`.
 
+2. The performance is assessed in terms of the ROC-AUC.
 """
 
 # Author: Pharuj Rajborirug <pharuj.ra@kmitl.ac.th>
+#         Arturo Amor <david-arturo.amor-quiroz@inria.fr>
 # License: BSD 3 clause
 
-print(__doc__)
+# %%
+# Dataset preprocessing and model training
+# ========================================
+#
+# Different outlier detection models require different preprocessing. In the
+# presence of categorical variables,
+# :class:`~sklearn.preprocessing.OrdinalEncoder` is often a good strategy for
+# tree-based models such as :class:`~sklearn.ensemble.IsolationForest`, whereas
+# neighbors-based models such as :class:`~sklearn.neighbors.LocalOutlierFactor`
+# would be impacted by the ordering induced by ordinal encoding. To avoid
+# inducing an ordering, on should rather use
+# :class:`~sklearn.preprocessing.OneHotEncoder`.
+#
+# Neighbors-based models may also require scaling of the numerical features (see
+# for instance :ref:`neighbors_scaling`). In the presence of outliers, a good
+# option is to use a :class:`~sklearn.preprocessing.RobustScaler`.
+
+from sklearn.compose import ColumnTransformer
+from sklearn.ensemble import IsolationForest
+from sklearn.neighbors import LocalOutlierFactor
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import (
+    OneHotEncoder,
+    OrdinalEncoder,
+    RobustScaler,
+)
+
+
+def make_estimator(name, categorical_columns=None, iforest_kw=None, lof_kw=None):
+    """Create an outlier detection estimator based on its name."""
+    if name == "LOF":
+        outlier_detector = LocalOutlierFactor(**(lof_kw or {}))
+        if categorical_columns is None:
+            preprocessor = RobustScaler()
+        else:
+            preprocessor = ColumnTransformer(
+                transformers=[("categorical", OneHotEncoder(), categorical_columns)],
+                remainder=RobustScaler(),
+            )
+    else:  # name == "IForest"
+        outlier_detector = IsolationForest(**(iforest_kw or {}))
+        if categorical_columns is None:
+            preprocessor = None
+        else:
+            ordinal_encoder = OrdinalEncoder(
+                handle_unknown="use_encoded_value", unknown_value=-1
+            )
+            preprocessor = ColumnTransformer(
+                transformers=[
+                    ("categorical", ordinal_encoder, categorical_columns),
+                ],
+                remainder="passthrough",
+            )
+
+    return make_pipeline(preprocessor, outlier_detector)
+
 
 # %%
-# Define a data preprocessing function
-# ------------------------------------
+# The following `fit_predict` function returns the average outlier score of X.
+
+from time import perf_counter
+
+
+def fit_predict(estimator, X):
+    tic = perf_counter()
+    if estimator[-1].__class__.__name__ == "LocalOutlierFactor":
+        estimator.fit(X)
+        y_pred = estimator[-1].negative_outlier_factor_
+    else:  # "IsolationForest"
+        y_pred = estimator.fit(X).decision_function(X)
+    toc = perf_counter()
+    print(f"Duration for {model_name}: {toc - tic:.2f} s")
+    return y_pred
+
+
+# %%
+# On the rest of the example we process one dataset per section. After loading
+# the data, the targets are modified to consist of two classes: 0 representing
+# inliers and 1 representing outliers. Due to computational constraints of the
+# scikit-learn documentation, the sample size of some datasets is reduced using
+# a stratified :class:`~sklearn.model_selection.train_test_split`.
+#
+# Furthermore, we set `n_neighbors` to match the expected number of anomalies
+# `expected_n_anomalies = n_samples * expected_anomaly_fraction`. This is a good
+# heuristic as long as the proportion of outliers is not very low, the reason
+# being that `n_neighbors` should be at least greater than the number of samples
+# in the less populated cluster (see
+# :ref:`sphx_glr_auto_examples_neighbors_plot_lof_outlier_detection.py`).
+#
+# KDDCup99 - SA dataset
+# ---------------------
 #
-# The example uses real-world datasets available in
-# :class:`sklearn.datasets` and the sample size of some datasets is reduced
-# to speed up computation. After the data preprocessing, the datasets' targets
-# will have two classes, 0 representing inliers and 1 representing outliers.
-# The `preprocess_dataset` function returns data and target.
+# The :ref:`kddcup99_dataset` was generated using a closed network and
+# hand-injected attacks. The SA dataset is a subset of it obtained by simply
+# selecting all the normal data and an anomaly proportion of around 3%.
 
+# %%
 import numpy as np
-import pandas as pd
-
-from sklearn.datasets import fetch_covtype, fetch_kddcup99, fetch_openml
-from sklearn.preprocessing import LabelBinarizer
-
-rng = np.random.RandomState(42)
-
-
-def preprocess_dataset(dataset_name):
-    # loading and vectorization
-    print(f"Loading {dataset_name} data")
-    if dataset_name in ["http", "smtp", "SA", "SF"]:
-        dataset = fetch_kddcup99(subset=dataset_name, percent10=True, random_state=rng)
-        X = dataset.data
-        y = dataset.target
-        lb = LabelBinarizer()
-
-        if dataset_name == "SF":
-            idx = rng.choice(X.shape[0], int(X.shape[0] * 0.1), replace=False)
-            X = X[idx]  # reduce the sample size
-            y = y[idx]
-            x1 = lb.fit_transform(X[:, 1].astype(str))
-            X = np.c_[X[:, :1], x1, X[:, 2:]]
-        elif dataset_name == "SA":
-            idx = rng.choice(X.shape[0], int(X.shape[0] * 0.1), replace=False)
-            X = X[idx]  # reduce the sample size
-            y = y[idx]
-            x1 = lb.fit_transform(X[:, 1].astype(str))
-            x2 = lb.fit_transform(X[:, 2].astype(str))
-            x3 = lb.fit_transform(X[:, 3].astype(str))
-            X = np.c_[X[:, :1], x1, x2, x3, X[:, 4:]]
-        y = (y != b"normal.").astype(int)
-    if dataset_name == "forestcover":
-        dataset = fetch_covtype()
-        X = dataset.data
-        y = dataset.target
-        idx = rng.choice(X.shape[0], int(X.shape[0] * 0.1), replace=False)
-        X = X[idx]  # reduce the sample size
-        y = y[idx]
-
-        # inliers are those with attribute 2
-        # outliers are those with attribute 4
-        s = (y == 2) + (y == 4)
-        X = X[s, :]
-        y = y[s]
-        y = (y != 2).astype(int)
-    if dataset_name in ["glass", "wdbc", "cardiotocography"]:
-        dataset = fetch_openml(
-            name=dataset_name, version=1, as_frame=False, parser="pandas"
-        )
-        X = dataset.data
-        y = dataset.target
-
-        if dataset_name == "glass":
-            s = y == "tableware"
-            y = s.astype(int)
-        if dataset_name == "wdbc":
-            s = y == "2"
-            y = s.astype(int)
-            X_mal, y_mal = X[s], y[s]
-            X_ben, y_ben = X[~s], y[~s]
-
-            # downsampled to 39 points (9.8% outliers)
-            idx = rng.choice(y_mal.shape[0], 39, replace=False)
-            X_mal2 = X_mal[idx]
-            y_mal2 = y_mal[idx]
-            X = np.concatenate((X_ben, X_mal2), axis=0)
-            y = np.concatenate((y_ben, y_mal2), axis=0)
-        if dataset_name == "cardiotocography":
-            s = y == "3"
-            y = s.astype(int)
-    # 0 represents inliers, and 1 represents outliers
-    y = pd.Series(y, dtype="category")
-    return (X, y)
-
-
-# %%
-# Define an outlier prediction function
-# -------------------------------------
-# There is no particular reason to choose algorithms
-# :class:`~sklearn.neighbors.LocalOutlierFactor` and
-# :class:`~sklearn.ensemble.IsolationForest`. The goal is to show that
-# different algorithm performs well on different datasets. The following
-# `compute_prediction` function returns average outlier score of X.
 
+from sklearn.datasets import fetch_kddcup99
+from sklearn.model_selection import train_test_split
 
-from sklearn.ensemble import IsolationForest
-from sklearn.neighbors import LocalOutlierFactor
+X, y = fetch_kddcup99(
+    subset="SA", percent10=True, random_state=42, return_X_y=True, as_frame=True
+)
+y = (y != b"normal.").astype(np.int32)
+X, _, y, _ = train_test_split(X, y, train_size=0.1, stratify=y, random_state=42)
 
+n_samples, anomaly_frac = X.shape[0], y.mean()
+print(f"{n_samples} datapoints with {y.sum()} anomalies ({anomaly_frac:.02%})")
 
-def compute_prediction(X, model_name):
-    print(f"Computing {model_name} prediction...")
-    if model_name == "LOF":
-        clf = LocalOutlierFactor(n_neighbors=20, contamination="auto")
-        clf.fit(X)
-        y_pred = clf.negative_outlier_factor_
-    if model_name == "IForest":
-        clf = IsolationForest(random_state=rng, contamination="auto")
-        y_pred = clf.fit(X).decision_function(X)
-    return y_pred
+# %%
+# The SA dataset contains 41 features out of which 3 are categorical:
+# "protocol_type", "service" and "flag".
 
+# %%
+y_true = {}
+y_pred = {"LOF": {}, "IForest": {}}
+model_names = ["LOF", "IForest"]
+cat_columns = ["protocol_type", "service", "flag"]
+
+y_true["KDDCup99 - SA"] = y
+for model_name in model_names:
+    model = make_estimator(
+        name=model_name,
+        categorical_columns=cat_columns,
+        lof_kw={"n_neighbors": int(n_samples * anomaly_frac)},
+        iforest_kw={"random_state": 42},
+    )
+    y_pred[model_name]["KDDCup99 - SA"] = fit_predict(model, X)
 
 # %%
-# Plot and interpret results
-# --------------------------
+# Forest covertypes dataset
+# -------------------------
 #
-# The algorithm performance relates to how good the true positive rate (TPR)
-# is at low value of the false positive rate (FPR). The best algorithms
-# have the curve on the top-left of the plot and the area under curve (AUC)
-# close to 1. The diagonal dashed line represents a random classification
-# of outliers and inliers.
+# The :ref:`covtype_dataset` is a multiclass dataset where the target is the
+# dominant species of tree in a given patch of forest. It contains 54 features,
+# some of which ("Wilderness_Area" and "Soil_Type") are already binary encoded.
+# Though originally meant as a classification task, one can regard inliers as
+# samples encoded with label 2 and outliers as those with label 4.
 
+# %%
+from sklearn.datasets import fetch_covtype
 
-import math
+X, y = fetch_covtype(return_X_y=True, as_frame=True)
+s = (y == 2) + (y == 4)
+X = X.loc[s]
+y = y.loc[s]
+y = (y != 2).astype(np.int32)
+
+X, _, y, _ = train_test_split(X, y, train_size=0.05, stratify=y, random_state=42)
+X_forestcover = X  # save X for later use
 
+n_samples, anomaly_frac = X.shape[0], y.mean()
+print(f"{n_samples} datapoints with {y.sum()} anomalies ({anomaly_frac:.02%})")
+
+# %%
+y_true["forestcover"] = y
+for model_name in model_names:
+    model = make_estimator(
+        name=model_name,
+        lof_kw={"n_neighbors": int(n_samples * anomaly_frac)},
+        iforest_kw={"random_state": 42},
+    )
+    y_pred[model_name]["forestcover"] = fit_predict(model, X)
+
+# %%
+# Ames Housing dataset
+# --------------------
+#
+# The `Ames housing dataset <http://www.openml.org/d/43926>`_ is originally a
+# regression dataset where the target are sales prices of houses in Ames, Iowa.
+# Here we convert it into an outlier detection problem by regarding houses with
+# price over 70 USD/sqft. To make the problem easier, we drop intermediate
+# prices between 40 and 70 USD/sqft.
+
+# %%
 import matplotlib.pyplot as plt
 
-from sklearn.metrics import RocCurveDisplay
+from sklearn.datasets import fetch_openml
 
-datasets_name = [
-    "http",
-    "smtp",
-    "SA",
-    "SF",
-    "forestcover",
-    "glass",
-    "wdbc",
-    "cardiotocography",
-]
+X, y = fetch_openml(
+    name="ames_housing", version=1, return_X_y=True, as_frame=True, parser="pandas"
+)
+y = y.div(X["Lot_Area"])
 
-models_name = [
-    "LOF",
-    "IForest",
-]
+# None values in pandas 1.5.1 were mapped to np.nan in pandas 2.0.1
+X["Misc_Feature"] = X["Misc_Feature"].cat.add_categories("NoInfo").fillna("NoInfo")
+X["Mas_Vnr_Type"] = X["Mas_Vnr_Type"].cat.add_categories("NoInfo").fillna("NoInfo")
+
+X.drop(columns="Lot_Area", inplace=True)
+mask = (y < 40) | (y > 70)
+X = X.loc[mask]
+y = y.loc[mask]
+y.hist(bins=20, edgecolor="black")
+plt.xlabel("House price in USD/sqft")
+_ = plt.title("Distribution of house prices in Ames")
+
+# %%
+y = (y > 70).astype(np.int32)
+
+n_samples, anomaly_frac = X.shape[0], y.mean()
+print(f"{n_samples} datapoints with {y.sum()} anomalies ({anomaly_frac:.02%})")
+
+# %%
+# The dataset contains 46 categorical features. In this case it is easier use a
+# :class:`~sklearn.compose.make_column_selector` to find them instead of passing
+# a list made by hand.
+
+# %%
+from sklearn.compose import make_column_selector as selector
+
+categorical_columns_selector = selector(dtype_include="category")
+cat_columns = categorical_columns_selector(X)
+
+y_true["ames_housing"] = y
+for model_name in model_names:
+    model = make_estimator(
+        name=model_name,
+        categorical_columns=cat_columns,
+        lof_kw={"n_neighbors": int(n_samples * anomaly_frac)},
+        iforest_kw={"random_state": 42},
+    )
+    y_pred[model_name]["ames_housing"] = fit_predict(model, X)
+
+# %%
+# Cardiotocography dataset
+# ------------------------
+#
+# The `Cardiotocography dataset <http://www.openml.org/d/1466>`_ is a multiclass
+# dataset of fetal cardiotocograms, the classes being the fetal heart rate (FHR)
+# pattern encoded with labels from 1 to 10. Here we set class 3 (the minority
+# class) to represent the outliers. It contains 30 numerical features, some of
+# which are binary encoded and some are continuous.
+
+# %%
+X, y = fetch_openml(
+    name="cardiotocography", version=1, return_X_y=True, as_frame=False, parser="pandas"
+)
+X_cardiotocography = X  # save X for later use
+s = y == "3"
+y = s.astype(np.int32)
+
+n_samples, anomaly_frac = X.shape[0], y.mean()
+print(f"{n_samples} datapoints with {y.sum()} anomalies ({anomaly_frac:.02%})")
+
+# %%
+y_true["cardiotocography"] = y
+for model_name in model_names:
+    model = make_estimator(
+        name=model_name,
+        lof_kw={"n_neighbors": int(n_samples * anomaly_frac)},
+        iforest_kw={"random_state": 42},
+    )
+    y_pred[model_name]["cardiotocography"] = fit_predict(model, X)
+
+# %%
+# Plot and interpret results
+# ==========================
+#
+# The algorithm performance relates to how good the true positive rate (TPR) is
+# at low value of the false positive rate (FPR). The best algorithms have the
+# curve on the top-left of the plot and the area under curve (AUC) close to 1.
+# The diagonal dashed line represents a random classification of outliers and
+# inliers.
+
+# %%
+import math
+
+from sklearn.metrics import RocCurveDisplay
 
-# plotting parameters
 cols = 2
-linewidth = 1
 pos_label = 0  # mean 0 belongs to positive class
-rows = math.ceil(len(datasets_name) / cols)
-
-fig, axs = plt.subplots(rows, cols, figsize=(10, 10 * 2), sharex=True, sharey=True)
+datasets_names = y_true.keys()
+rows = math.ceil(len(datasets_names) / cols)
 
-for ax, dataset_name in zip(axs.ravel(), datasets_name):
-    (X, y) = preprocess_dataset(dataset_name=dataset_name)
+fig, axs = plt.subplots(nrows=rows, ncols=cols, squeeze=False, figsize=(10, rows * 4))
 
-    for model_idx, model_name in enumerate(models_name):
-        y_pred = compute_prediction(X, model_name=model_name)
+for ax, dataset_name in zip(axs.ravel(), datasets_names):
+    for model_idx, model_name in enumerate(model_names):
         display = RocCurveDisplay.from_predictions(
-            y,
-            y_pred,
+            y_true[dataset_name],
+            y_pred[model_name][dataset_name],
             pos_label=pos_label,
             name=model_name,
-            linewidth=linewidth,
             ax=ax,
-            plot_chance_level=(model_idx == len(models_name) - 1),
-            chance_level_kw={
-                "linewidth": linewidth,
-                "linestyle": ":",
-            },
+            plot_chance_level=(model_idx == len(model_names) - 1),
+            chance_level_kw={"linestyle": ":"},
         )
     ax.set_title(dataset_name)
-    ax.legend(loc="lower right")
-plt.tight_layout(pad=2.0)  # spacing between subplots
+_ = plt.tight_layout(pad=2.0)  # spacing between subplots
+
+# %%
+# We observe that once the number of neighbors is tuned, LOF and IForest perform
+# similarly in terms of ROC AUC for the forestcover and cardiotocography
+# datasets. The score for IForest is slightly better for the SA dataset and LOF
+# performs considerably better on the Ames housing dataset than IForest.
+#
+# Ablation study
+# ==============
+#
+# In this section we explore the impact of the hyperparameter `n_neighbors` and
+# the choice of scaling the numerical variables on the LOF model. Here we use
+# the :ref:`covtype_dataset` dataset as the binary encoded categories introduce
+# a natural scale of euclidean distances between 0 and 1. We then want a scaling
+# method to avoid granting a privilege to non-binary features and that is robust
+# enough to outliers so that the task of finding them does not become too
+# difficult.
+
+# %%
+X = X_forestcover
+y = y_true["forestcover"]
+
+n_samples = X.shape[0]
+n_neighbors_list = (n_samples * np.array([0.2, 0.02, 0.01, 0.001])).astype(np.int32)
+model = make_pipeline(RobustScaler(), LocalOutlierFactor())
+
+linestyles = ["solid", "dashed", "dashdot", ":", (5, (10, 3))]
+
+fig, ax = plt.subplots()
+for model_idx, (linestyle, n_neighbors) in enumerate(zip(linestyles, n_neighbors_list)):
+    model.set_params(localoutlierfactor__n_neighbors=n_neighbors)
+    model.fit(X)
+    y_pred = model[-1].negative_outlier_factor_
+    display = RocCurveDisplay.from_predictions(
+        y,
+        y_pred,
+        pos_label=pos_label,
+        name=f"n_neighbors = {n_neighbors}",
+        ax=ax,
+        plot_chance_level=(model_idx == len(n_neighbors_list) - 1),
+        chance_level_kw={"linestyle": (0, (1, 10))},
+        linestyle=linestyle,
+        linewidth=2,
+    )
+_ = ax.set_title("RobustScaler with varying n_neighbors\non forestcover dataset")
+
+# %%
+# We observe that the number of neighbors has a big impact on the performance of
+# the model. If one has access to (at least some) ground truth labels, it is
+# then important to tune `n_neighbors` accordingly. A convenient way to do so is
+# to explore values for `n_neighbors` of the order of magnitud of the expected
+# contamination.
+
+# %%
+from sklearn.preprocessing import MinMaxScaler, SplineTransformer, StandardScaler
+
+preprocessor_list = [
+    None,
+    RobustScaler(),
+    StandardScaler(),
+    MinMaxScaler(),
+    SplineTransformer(),
+]
+expected_anomaly_fraction = 0.02
+lof = LocalOutlierFactor(n_neighbors=int(n_samples * expected_anomaly_fraction))
+
+fig, ax = plt.subplots()
+for model_idx, (linestyle, preprocessor) in enumerate(
+    zip(linestyles, preprocessor_list)
+):
+    model = make_pipeline(preprocessor, lof)
+    model.fit(X)
+    y_pred = model[-1].negative_outlier_factor_
+    display = RocCurveDisplay.from_predictions(
+        y,
+        y_pred,
+        pos_label=pos_label,
+        name=str(preprocessor).split("(")[0],
+        ax=ax,
+        plot_chance_level=(model_idx == len(preprocessor_list) - 1),
+        chance_level_kw={"linestyle": (0, (1, 10))},
+        linestyle=linestyle,
+        linewidth=2,
+    )
+_ = ax.set_title("Fixed n_neighbors with varying preprocessing\non forestcover dataset")
+
+# %%
+# On the one hand, :class:`~sklearn.preprocessing.RobustScaler` scales each
+# feature independently by using the interquartile range (IQR) by default, which
+# is the range between the 25th and 75th percentiles of the data. It centers the
+# data by subtracting the median and then scale it by dividing by the IQR. The
+# IQR is robust to outliers: the median and interquartile range are less
+# affected by extreme values than the range, the mean and the standard
+# deviation. Furthermore, :class:`~sklearn.preprocessing.RobustScaler` does not
+# squash marginal outlier values, contrary to
+# :class:`~sklearn.preprocessing.StandardScaler`.
+#
+# On the other hand, :class:`~sklearn.preprocessing.MinMaxScaler` scales each
+# feature individually such that its range maps into the range between zero and
+# one. If there are outliers in the data, they can skew it towards either the
+# minimum or maximum values, leading to a completely different distribution of
+# data with large marginal outliers: all non-outlier values can be collapsed
+# almost together as a result.
+#
+# We also evaluated no preprocessing at all (by passing `None` to the pipeline),
+# :class:`~sklearn.preprocessing.StandardScaler` and
+# :class:`~sklearn.preprocessing.SplineTransformer`. Please refer to their
+# respective documentation for more details.
+#
+# Note that the optimal preprocessing depends on the dataset, as shown below:
+
+# %%
+X = X_cardiotocography
+y = y_true["cardiotocography"]
+
+n_samples, expected_anomaly_fraction = X.shape[0], 0.025
+lof = LocalOutlierFactor(n_neighbors=int(n_samples * expected_anomaly_fraction))
+
+fig, ax = plt.subplots()
+for model_idx, (linestyle, preprocessor) in enumerate(
+    zip(linestyles, preprocessor_list)
+):
+    model = make_pipeline(preprocessor, lof)
+    model.fit(X)
+    y_pred = model[-1].negative_outlier_factor_
+    display = RocCurveDisplay.from_predictions(
+        y,
+        y_pred,
+        pos_label=pos_label,
+        name=str(preprocessor).split("(")[0],
+        ax=ax,
+        plot_chance_level=(model_idx == len(preprocessor_list) - 1),
+        chance_level_kw={"linestyle": (0, (1, 10))},
+        linestyle=linestyle,
+        linewidth=2,
+    )
+ax.set_title(
+    "Fixed n_neighbors with varying preprocessing\non cardiotocography dataset"
+)
 plt.show()
diff --git a/examples/preprocessing/plot_scaling_importance.py b/examples/preprocessing/plot_scaling_importance.py
index 6e0ae0ae1c109..5b78fe0636d8c 100644
--- a/examples/preprocessing/plot_scaling_importance.py
+++ b/examples/preprocessing/plot_scaling_importance.py
@@ -52,6 +52,8 @@
 scaled_X_train = scaler.fit_transform(X_train)
 
 # %%
+# .. _neighbors_scaling:
+#
 # Effect of rescaling on a k-neighbors models
 # ===========================================
 #

From 0b6f442efea6554a692089a8112bb5e89bad38b2 Mon Sep 17 00:00:00 2001
From: Rahil Parikh <75483881+rprkh@users.noreply.github.com>
Date: Mon, 11 Sep 2023 23:04:45 +0530
Subject: [PATCH 19/21] TST Extend tests for `scipy.sparse.*array` in
 `sklearn/model_selection/tests/test_search.py` (#27286)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/model_selection/tests/test_search.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index 1db38671ef481..d2cfdd7f7b2ed 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -11,7 +11,6 @@
 
 import numpy as np
 import pytest
-import scipy.sparse as sp
 from scipy.stats import bernoulli, expon, uniform
 
 from sklearn.base import BaseEstimator, ClassifierMixin, is_classifier
@@ -77,6 +76,7 @@
     assert_array_equal,
     ignore_warnings,
 )
+from sklearn.utils.fixes import CSR_CONTAINERS
 from sklearn.utils.validation import _num_samples
 
 
@@ -489,7 +489,8 @@ def test_grid_search_bad_param_grid():
         search.fit(X, y)
 
 
-def test_grid_search_sparse():
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_grid_search_sparse(csr_container):
     # Test that grid search works with both dense and sparse matrices
     X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
 
@@ -499,7 +500,7 @@ def test_grid_search_sparse():
     y_pred = cv.predict(X_[180:])
     C = cv.best_estimator_.C
 
-    X_ = sp.csr_matrix(X_)
+    X_ = csr_container(X_)
     clf = LinearSVC(dual="auto")
     cv = GridSearchCV(clf, {"C": [0.1, 1.0]})
     cv.fit(X_[:180].tocoo(), y_[:180])
@@ -510,7 +511,8 @@ def test_grid_search_sparse():
     assert C == C2
 
 
-def test_grid_search_sparse_scoring():
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_grid_search_sparse_scoring(csr_container):
     X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
 
     clf = LinearSVC(dual="auto")
@@ -519,7 +521,7 @@ def test_grid_search_sparse_scoring():
     y_pred = cv.predict(X_[180:])
     C = cv.best_estimator_.C
 
-    X_ = sp.csr_matrix(X_)
+    X_ = csr_container(X_)
     clf = LinearSVC(dual="auto")
     cv = GridSearchCV(clf, {"C": [0.1, 1.0]}, scoring="f1")
     cv.fit(X_[:180], y_[:180])

From 3dd49068286b70bcab4295d60e595bc44a9641fc Mon Sep 17 00:00:00 2001
From: Alexander Al-Feghali <alexgalexy89@gmail.com>
Date: Mon, 11 Sep 2023 13:35:34 -0400
Subject: [PATCH 20/21] TST Extend tests for `scipy.sparse.*array` in
 `sklearn\model_selection\tests\test_search.py` (#27326)

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>

From 4b8799798f95b61e80c73c329b7ec2c7ec431dde Mon Sep 17 00:00:00 2001
From: Rahil Parikh <75483881+rprkh@users.noreply.github.com>
Date: Mon, 11 Sep 2023 23:10:14 +0530
Subject: [PATCH 21/21] TST Extend tests for `scipy.sparse.*array` in
 `sklearn/tests/test_pipeline.py` (#27278)

---
 sklearn/tests/test_pipeline.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index 76fa44a04a1d6..c57209d3c00d2 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -10,7 +10,6 @@
 import joblib
 import numpy as np
 import pytest
-from scipy import sparse
 
 from sklearn.base import BaseEstimator, TransformerMixin, clone, is_classifier
 from sklearn.cluster import KMeans
@@ -46,6 +45,7 @@
     assert_array_almost_equal,
     assert_array_equal,
 )
+from sklearn.utils.fixes import CSR_CONTAINERS
 from sklearn.utils.validation import check_is_fitted
 
 iris = load_iris()
@@ -485,7 +485,8 @@ def test_predict_methods_with_predict_params(method_name):
     assert pipe.named_steps["clf"].got_attribute
 
 
-def test_feature_union():
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_feature_union(csr_container):
     # basic sanity check for feature union
     X = iris.data
     X -= X.mean(axis=0)
@@ -504,7 +505,7 @@ def test_feature_union():
     # test if it also works for sparse input
     # We use a different svd object to control the random_state stream
     fs = FeatureUnion([("svd", svd), ("select", select)])
-    X_sp = sparse.csr_matrix(X)
+    X_sp = csr_container(X)
     X_sp_transformed = fs.fit_transform(X_sp, y)
     assert_array_almost_equal(X_transformed, X_sp_transformed.toarray())