diff --git a/build_tools/cirrus/arm_tests.yml b/build_tools/cirrus/arm_tests.yml
index f64adbcdd4748..8fe3c7b6153f2 100644
--- a/build_tools/cirrus/arm_tests.yml
+++ b/build_tools/cirrus/arm_tests.yml
@@ -11,6 +11,10 @@ linux_aarch64_test_task:
     LOCK_FILE: build_tools/cirrus/py39_conda_forge_linux-aarch64_conda.lock
     CONDA_PKGS_DIRS: /root/.conda/pkgs
     HOME: /  # $HOME is not defined in image and is required to install mambaforge
+    # Upload tokens have been encrypted via the CirrusCI interface:
+    # https://cirrus-ci.org/guide/writing-tasks/#encrypted-variables
+    # See `maint_tools/update_tracking_issue.py` for details on the permissions the token requires.
+    BOT_GITHUB_TOKEN: ENCRYPTED[9b50205e2693f9e4ce9a3f0fcb897a259289062fda2f5a3b8aaa6c56d839e0854a15872f894a70fca337dd4787274e0f]
   ccache_cache:
     folder: /root/.cache/ccache
   conda_cache:
diff --git a/doc/conf.py b/doc/conf.py
index 1d4b061f9afb5..288aba5404ae6 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -312,15 +312,18 @@
 html_show_search_summary = False
 
 
+# The "summary-anchor" IDs will be overwritten via JavaScript to be unique.
+# See `doc/theme/scikit-learn-modern/static/js/details-permalink.js`.
 rst_prolog = """
 .. |details-start| raw:: html
 
-    <details>
+    <details id="summary-anchor">
     <summary class="btn btn-light">
 
 .. |details-split| raw:: html
 
     <span class="tooltiptext">Click for more details</span>
+    <a class="headerlink" href="#summary-anchor" title="Permalink to this heading">¶</a>
     </summary>
     <div class="card">
 
diff --git a/doc/modules/grid_search.rst b/doc/modules/grid_search.rst
index 3f99e7841bb00..efdde897e841b 100644
--- a/doc/modules/grid_search.rst
+++ b/doc/modules/grid_search.rst
@@ -612,7 +612,7 @@ Here, ``<estimator>`` is the parameter name of the nested estimator,
 in this case ``estimator``.
 If the meta-estimator is constructed as a collection of estimators as in
 `pipeline.Pipeline`, then ``<estimator>`` refers to the name of the estimator,
-see :ref:`pipeline_nested_parameters`.  In practice, there can be several
+see :ref:`pipeline_nested_parameters`. In practice, there can be several
 levels of nesting::
 
   >>> from sklearn.pipeline import Pipeline
diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index 2ebb7dff7810b..a88a92604767e 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -181,9 +181,15 @@ take several parameters:
   of the python function is negated by the scorer object, conforming to
   the cross validation convention that scorers return higher values for better models.
 
-* for classification metrics only: whether the python function you provided requires continuous decision
-  certainties (``needs_threshold=True``).  The default value is
-  False.
+* for classification metrics only: whether the python function you provided requires
+  continuous decision certainties. If the scoring function only accepts probability
+  estimates (e.g. :func:`metrics.log_loss`) then one needs to set the parameter
+  `response_method`, thus in this case `response_method="predict_proba"`. Some scoring
+  function do not necessarily require probability estimates but rather non-thresholded
+  decision values (e.g. :func:`metrics.roc_auc_score`). In this case, one provides a
+  list such as `response_method=["decision_function", "predict_proba"]`. In this case,
+  the scorer will use the first available method, in the order given in the list,
+  to compute the scores.
 
 * any additional parameters, such as ``beta`` or ``labels`` in :func:`f1_score`.
 
diff --git a/doc/themes/scikit-learn-modern/layout.html b/doc/themes/scikit-learn-modern/layout.html
index 191434c7ec2e2..a759ec2f8c8f0 100644
--- a/doc/themes/scikit-learn-modern/layout.html
+++ b/doc/themes/scikit-learn-modern/layout.html
@@ -36,6 +36,7 @@
   <link rel="stylesheet" href="{{ pathto('_static/' + styles[0], 1) }}" type="text/css" />
 <script id="documentation_options" data-url_root="{{ pathto('', 1) }}" src="{{ pathto('_static/documentation_options.js', 1) }}"></script>
 <script src="{{ pathto('_static/js/vendor/jquery-3.6.3.slim.min.js', 1) }}"></script>
+<script src="{{ pathto('_static/js/details-permalink.js', 1) }}"></script>
 {%- block extrahead %} {% endblock %}
 </head>
 <body>
diff --git a/doc/themes/scikit-learn-modern/static/css/theme.css b/doc/themes/scikit-learn-modern/static/css/theme.css
index 21e1a2336a553..56f208540fd70 100644
--- a/doc/themes/scikit-learn-modern/static/css/theme.css
+++ b/doc/themes/scikit-learn-modern/static/css/theme.css
@@ -149,6 +149,15 @@ div.clearer {
 
 /* details / summary */
 
+/* Enables section links to be visible when anchor-linked */
+div.sk-page-content details::before {
+  display: block;
+  height: 52px;
+  margin-top: -52px;
+  visibility: hidden;
+  content: "";
+}
+
 div.sk-page-content details {
     margin: 4ex 0pt;
 }
@@ -202,6 +211,10 @@ div.sk-page-content summary:hover .tooltiptext {
   visibility: visible;
 }
 
+div.sk-page-content summary:hover .headerlink {
+  visibility: visible;
+}
+
 /* Button */
 
 .sk-btn-primary {
diff --git a/doc/themes/scikit-learn-modern/static/js/details-permalink.js b/doc/themes/scikit-learn-modern/static/js/details-permalink.js
new file mode 100644
index 0000000000000..62392e9836f64
--- /dev/null
+++ b/doc/themes/scikit-learn-modern/static/js/details-permalink.js
@@ -0,0 +1,47 @@
+// Function to create permalink into <details> elements to be able to link them
+// The assumption is that such a block will be defined as follows:
+//     <details id="summary-anchor">
+//     <summary class="btn btn-light">
+//     Some title
+//     <span class="tooltiptext">Click for more details</span>
+//     <a class="headerlink" href="#summary-anchor" title="Permalink to this heading">¶</a>
+//     </summary>
+//     <div class="card">
+//     Some details
+//     </div>
+//     </details>
+// We seek to replace `#summary-anchor` with a unique identifier based on the
+// summary text.
+// This syntax is defined in `doc/conf.py` in the `rst_prolog` variable.
+function updateIdAndHrefBasedOnSummaryText() {
+    var allDetailsElements = document.querySelectorAll('details');
+    // Counter to store the duplicated summary text to add it as a suffix in the
+    // anchor ID
+    var anchorIDCounters = {};
+
+    allDetailsElements.forEach(function (detailsElement) {
+        // Get the <summary> element within the current <details>
+        var summaryElement = detailsElement.querySelector('summary');
+
+        // The ID uses the first line, lowercased, and spaces replaced with dashes
+        var anchorID = summaryElement.textContent.trim().split("\n")[0].replace(/\s+/g, '-').toLowerCase();
+
+        // Suffix the anchor ID with a counter if it already exists
+        if (anchorIDCounters[anchorID]) {
+            anchorIDCounters[anchorID] += 1;
+            anchorID = anchorID + '-' + anchorIDCounters[anchorID];
+        } else {
+            anchorIDCounters[anchorID] = 1;
+        }
+
+        detailsElement.setAttribute('id', anchorID);
+
+        var anchorElement = summaryElement.querySelector('a.headerlink');
+        anchorElement.setAttribute('href', '#' + anchorID);
+    });
+}
+
+// Add an event listener to execute the function when the page is loaded
+document.addEventListener('DOMContentLoaded', function () {
+    updateIdAndHrefBasedOnSummaryText();
+});
diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst
index ddb6a2ebe0016..1a445b7436201 100644
--- a/doc/whats_new/v1.3.rst
+++ b/doc/whats_new/v1.3.rst
@@ -2,6 +2,23 @@
 
 .. currentmodule:: sklearn
 
+.. _changes_1_3_2:
+
+Version 1.3.2
+=============
+
+**October 2023**
+
+Changelog
+---------
+
+:mod:`sklearn.tree`
+...................
+
+- |Fix| Do not leak data via non-initialized memory in decision tree pickle files and make
+  the generation of those files deterministic. :pr:`27580` by :user:`Loïc Estève <lesteve>`.
+
+
 .. _changes_1_3_1:
 
 Version 1.3.1
diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
index 7af12c870cf45..8cd4498f53cf0 100644
--- a/doc/whats_new/v1.4.rst
+++ b/doc/whats_new/v1.4.rst
@@ -253,6 +253,11 @@ Changelog
   :pr:`26315` and :pr:`27098` by :user:`Mateusz Sokół <mtsokol>`,
   :user:`Olivier Grisel <ogrisel>` and :user:`Edoardo Abati <EdAbati>`.
 
+- |Fix| Fixes a bug in :class:`decomposition.KernelPCA` by forcing the output of
+  the internal :class:`preprocessing.KernelCenterer` to be a default array. When the
+  arpack solver was used, it would expect an array with a `dtype` attribute.
+  :pr:`27583` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 :mod:`sklearn.ensemble`
 .......................
 
@@ -354,6 +359,14 @@ Changelog
   :func:`sklearn.metrics.zero_one_loss` now support Array API compatible inputs.
   :pr:`27137` by :user:`Edoardo Abati <EdAbati>`.
 
+- |API| Deprecated `needs_threshold` and `needs_proba` from :func:`metrics.make_scorer`.
+  These parameters will be removed in version 1.6. Instead, use `response_method` that
+  accepts `"predict"`, `"predict_proba"` or `"decision_function"` or a list of such
+  values. `needs_proba=True` is equivalent to `response_method="predict_proba"` and
+  `needs_threshold=True` is equivalent to
+  `response_method=("decision_function", "predict_proba")`.
+  :pr:`26840` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 - |Fix| Fixes a bug for metrics using `zero_division=np.nan`
   (e.g. :func:`~metrics.precision_score`) within a paralell loop
   (e.g. :func:`~model_selection.cross_val_score`) where the singleton for `np.nan`
@@ -366,6 +379,11 @@ Changelog
   :func:`metrics.root_mean_squared_log_error` instead.
   :pr:`26734` by :user:`Alejandro Martin Gil <101AlexMartin>`.
 
+- |Fix| :func:`metrics.make_scorer` now raises an error when using a regressor on a
+  scorer requesting a non-thresholded decision function (from `decision_function` or
+  `predict_proba`). Such scorer are specific to classification.
+  :pr:`26840` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 :mod:`sklearn.model_selection`
 ..............................
 
diff --git a/sklearn/cluster/tests/test_hdbscan.py b/sklearn/cluster/tests/test_hdbscan.py
index 7a2bac12ef057..26f5b64cb2bfd 100644
--- a/sklearn/cluster/tests/test_hdbscan.py
+++ b/sklearn/cluster/tests/test_hdbscan.py
@@ -274,14 +274,15 @@ def test_hdbscan_callable_metric():
     assert n_clusters == n_clusters_true
 
 
-@pytest.mark.parametrize("tree", ["kd", "ball"])
+@pytest.mark.parametrize("tree", ["kd_tree", "ball_tree"])
 def test_hdbscan_precomputed_non_brute(tree):
     """
     Tests that HDBSCAN correctly raises an error when passing precomputed data
     while requesting a tree-based algorithm.
     """
-    hdb = HDBSCAN(metric="precomputed", algorithm=f"prims_{tree}tree")
-    with pytest.raises(ValueError):
+    hdb = HDBSCAN(metric="precomputed", algorithm=tree)
+    msg = "precomputed is not a valid metric for"
+    with pytest.raises(ValueError, match=msg):
         hdb.fit(X)
 
 
diff --git a/sklearn/decomposition/_kernel_pca.py b/sklearn/decomposition/_kernel_pca.py
index ccf79e896f210..800b472a9b3a6 100644
--- a/sklearn/decomposition/_kernel_pca.py
+++ b/sklearn/decomposition/_kernel_pca.py
@@ -432,7 +432,7 @@ def fit(self, X, y=None):
             raise ValueError("Cannot fit_inverse_transform with a precomputed kernel.")
         X = self._validate_data(X, accept_sparse="csr", copy=self.copy_X)
         self.gamma_ = 1 / X.shape[1] if self.gamma is None else self.gamma
-        self._centerer = KernelCenterer()
+        self._centerer = KernelCenterer().set_output(transform="default")
         K = self._get_kernel(X)
         self._fit_transform(K)
 
diff --git a/sklearn/decomposition/tests/test_kernel_pca.py b/sklearn/decomposition/tests/test_kernel_pca.py
index 76f7c4f832086..b222cf4e158ff 100644
--- a/sklearn/decomposition/tests/test_kernel_pca.py
+++ b/sklearn/decomposition/tests/test_kernel_pca.py
@@ -3,7 +3,8 @@
 import numpy as np
 import pytest
 
-from sklearn.datasets import make_blobs, make_circles
+import sklearn
+from sklearn.datasets import load_iris, make_blobs, make_circles
 from sklearn.decomposition import PCA, KernelPCA
 from sklearn.exceptions import NotFittedError
 from sklearn.linear_model import Perceptron
@@ -551,3 +552,15 @@ def test_kernel_pca_inverse_correct_gamma():
     X2_recon = kpca2.inverse_transform(kpca1.transform(X))
 
     assert_allclose(X1_recon, X2_recon)
+
+
+def test_kernel_pca_pandas_output():
+    """Check that KernelPCA works with pandas output when the solver is arpack.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/27579
+    """
+    pytest.importorskip("pandas")
+    X, _ = load_iris(as_frame=True, return_X_y=True)
+    with sklearn.config_context(transform_output="pandas"):
+        KernelPCA(n_components=2, eigen_solver="arpack").fit_transform(X)
diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index c3730195fdcbb..37f0fa044455c 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -25,11 +25,9 @@
 from inspect import signature
 from traceback import format_exc
 
-import numpy as np
-
 from ..base import is_regressor
 from ..utils import Bunch
-from ..utils._param_validation import HasMethods, StrOptions, validate_params
+from ..utils._param_validation import HasMethods, Hidden, StrOptions, validate_params
 from ..utils._response import _get_response_values
 from ..utils.metadata_routing import (
     MetadataRequest,
@@ -40,7 +38,7 @@
     get_routing_for_object,
     process_routing,
 )
-from ..utils.multiclass import type_of_target
+from ..utils.validation import _check_response_method
 from . import (
     accuracy_score,
     average_precision_score,
@@ -150,34 +148,24 @@ def __call__(self, estimator, *args, **kwargs):
         return scores
 
     def _use_cache(self, estimator):
-        """Return True if using a cache is beneficial.
-
-        Caching may be beneficial when one of these conditions holds:
-          - `_ProbaScorer` will be called twice.
-          - `_PredictScorer` will be called twice.
-          - `_ThresholdScorer` will be called twice.
-          - `_ThresholdScorer` and `_PredictScorer` are called and
-             estimator is a regressor.
-          - `_ThresholdScorer` and `_ProbaScorer` are called and
-             estimator does not have a `decision_function` attribute.
-
+        """Return True if using a cache is beneficial, thus when a response method will
+        be called several time.
         """
         if len(self._scorers) == 1:  # Only one scorer
             return False
 
-        counter = Counter([type(v) for v in self._scorers.values()])
-
-        if any(
-            counter[known_type] > 1
-            for known_type in [_PredictScorer, _ProbaScorer, _ThresholdScorer]
-        ):
+        counter = Counter(
+            [
+                _check_response_method(estimator, scorer._response_method).__name__
+                for scorer in self._scorers.values()
+                if isinstance(scorer, _BaseScorer)
+            ]
+        )
+        if any(val > 1 for val in counter.values()):
+            # The exact same response method or iterable of response methods
+            # will be called more than once.
             return True
 
-        if counter[_ThresholdScorer]:
-            if is_regressor(estimator) and counter[_PredictScorer]:
-                return True
-            elif counter[_ProbaScorer] and not hasattr(estimator, "decision_function"):
-                return True
         return False
 
     def get_metadata_routing(self):
@@ -200,10 +188,11 @@ def get_metadata_routing(self):
 
 
 class _BaseScorer(_MetadataRequester):
-    def __init__(self, score_func, sign, kwargs):
-        self._kwargs = kwargs
+    def __init__(self, score_func, sign, kwargs, response_method="predict"):
         self._score_func = score_func
         self._sign = sign
+        self._kwargs = kwargs
+        self._response_method = response_method
 
     def _get_pos_label(self):
         if "pos_label" in self._kwargs:
@@ -214,14 +203,13 @@ def _get_pos_label(self):
         return None
 
     def __repr__(self):
-        kwargs_string = "".join(
-            [", %s=%s" % (str(k), str(v)) for k, v in self._kwargs.items()]
-        )
-        return "make_scorer(%s%s%s%s)" % (
-            self._score_func.__name__,
-            "" if self._sign > 0 else ", greater_is_better=False",
-            self._factory_args(),
-            kwargs_string,
+        sign_string = "" if self._sign > 0 else ", greater_is_better=False"
+        response_method_string = f", response_method={self._response_method!r}"
+        kwargs_string = "".join([f", {k}={v}" for k, v in self._kwargs.items()])
+
+        return (
+            f"make_scorer({self._score_func.__name__}{sign_string}"
+            f"{response_method_string}{kwargs_string})"
         )
 
     def __call__(self, estimator, X, y_true, sample_weight=None, **kwargs):
@@ -264,10 +252,6 @@ def __call__(self, estimator, X, y_true, sample_weight=None, **kwargs):
 
         return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
 
-    def _factory_args(self):
-        """Return non-default make_scorer arguments for repr."""
-        return ""
-
     def _warn_overlap(self, message, kwargs):
         """Warn if there is any overlap between ``self._kwargs`` and ``kwargs``.
 
@@ -317,9 +301,9 @@ def set_score_request(self, **kwargs):
         return self
 
 
-class _PredictScorer(_BaseScorer):
+class _Scorer(_BaseScorer):
     def _score(self, method_caller, estimator, X, y_true, **kwargs):
-        """Evaluate predicted target values for X relative to y_true.
+        """Evaluate the response method of `estimator` on `X` and `y_true`.
 
         Parameters
         ----------
@@ -328,108 +312,13 @@ def _score(self, method_caller, estimator, X, y_true, **kwargs):
             arguments, potentially caching results.
 
         estimator : object
-            Trained estimator to use for scoring. Must have a `predict`
-            method; the output of that is used to compute the score.
-
-        X : {array-like, sparse matrix}
-            Test data that will be fed to estimator.predict.
-
-        y_true : array-like
-            Gold standard target values for X.
-
-        **kwargs : dict
-            Other parameters passed to the scorer. Refer to
-            :func:`set_score_request` for more details.
-
-            .. versionadded:: 1.3
-
-        Returns
-        -------
-        score : float
-            Score function applied to prediction of estimator on X.
-        """
-        self._warn_overlap(
-            message=(
-                "There is an overlap between set kwargs of this scorer instance and"
-                " passed metadata. Please pass them either as kwargs to `make_scorer`"
-                " or metadata, but not both."
-            ),
-            kwargs=kwargs,
-        )
-        y_pred = method_caller(estimator, "predict", X)
-        scoring_kwargs = {**self._kwargs, **kwargs}
-        return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
-
-
-class _ProbaScorer(_BaseScorer):
-    def _score(self, method_caller, clf, X, y, **kwargs):
-        """Evaluate predicted probabilities for X relative to y_true.
-
-        Parameters
-        ----------
-        method_caller : callable
-            Returns predictions given an estimator, method name, and other
-            arguments, potentially caching results.
-
-        clf : object
-            Trained classifier to use for scoring. Must have a `predict_proba`
-            method; the output of that is used to compute the score.
-
-        X : {array-like, sparse matrix}
-            Test data that will be fed to clf.predict_proba.
-
-        y : array-like
-            Gold standard target values for X. These must be class labels,
-            not probabilities.
-
-        **kwargs : dict
-            Other parameters passed to the scorer. Refer to
-            :func:`set_score_request` for more details.
-
-            .. versionadded:: 1.3
-
-        Returns
-        -------
-        score : float
-            Score function applied to prediction of estimator on X.
-        """
-        self._warn_overlap(
-            message=(
-                "There is an overlap between set kwargs of this scorer instance and"
-                " passed metadata. Please pass them either as kwargs to `make_scorer`"
-                " or metadata, but not both."
-            ),
-            kwargs=kwargs,
-        )
-
-        y_pred = method_caller(clf, "predict_proba", X, pos_label=self._get_pos_label())
-        scoring_kwargs = {**self._kwargs, **kwargs}
-        return self._sign * self._score_func(y, y_pred, **scoring_kwargs)
-
-    def _factory_args(self):
-        return ", needs_proba=True"
-
-
-class _ThresholdScorer(_BaseScorer):
-    def _score(self, method_caller, clf, X, y, **kwargs):
-        """Evaluate decision function output for X relative to y_true.
-
-        Parameters
-        ----------
-        method_caller : callable
-            Returns predictions given an estimator, method name, and other
-            arguments, potentially caching results.
-
-        clf : object
-            Trained classifier to use for scoring. Must have either a
-            decision_function method or a predict_proba method; the output of
-            that is used to compute the score.
+            Trained estimator to use for scoring.
 
         X : {array-like, sparse matrix}
             Test data that will be fed to clf.decision_function or
             clf.predict_proba.
 
-        y : array-like
+        y_true : array-like
             Gold standard target values for X. These must be class labels,
             not decision function values.
 
@@ -437,8 +326,6 @@ def _score(self, method_caller, clf, X, y, **kwargs):
             Other parameters passed to the scorer. Refer to
             :func:`set_score_request` for more details.
 
-            .. versionadded:: 1.3
-
         Returns
         -------
         score : float
@@ -453,31 +340,14 @@ def _score(self, method_caller, clf, X, y, **kwargs):
             kwargs=kwargs,
         )
 
-        y_type = type_of_target(y)
-        if y_type not in ("binary", "multilabel-indicator"):
-            raise ValueError("{0} format is not supported".format(y_type))
-
-        if is_regressor(clf):
-            y_pred = method_caller(clf, "predict", X)
-        else:
-            pos_label = self._get_pos_label()
-            try:
-                y_pred = method_caller(clf, "decision_function", X, pos_label=pos_label)
-
-                if isinstance(y_pred, list):
-                    # For multi-output multi-class estimator
-                    y_pred = np.vstack([p for p in y_pred]).T
-
-            except (NotImplementedError, AttributeError):
-                y_pred = method_caller(clf, "predict_proba", X, pos_label=pos_label)
-                if isinstance(y_pred, list):
-                    y_pred = np.vstack([p[:, -1] for p in y_pred]).T
+        pos_label = None if is_regressor(estimator) else self._get_pos_label()
+        response_method = _check_response_method(estimator, self._response_method)
+        y_pred = method_caller(
+            estimator, response_method.__name__, X, pos_label=pos_label
+        )
 
         scoring_kwargs = {**self._kwargs, **kwargs}
-        return self._sign * self._score_func(y, y_pred, **scoring_kwargs)
-
-    def _factory_args(self):
-        return ", needs_threshold=True"
+        return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
 
 
 @validate_params(
@@ -558,6 +428,10 @@ def get_metadata_routing(self):
 def _check_multimetric_scoring(estimator, scoring):
     """Check the scoring parameter in cases when multiple metrics are allowed.
 
+    In addition, multimetric scoring leverages a caching mechanism to not call the same
+    estimator response method multiple times. Hence, the scorer is modified to only use
+    a single response method given a list of response methods and the estimator.
+
     Parameters
     ----------
     estimator : sklearn estimator instance
@@ -636,39 +510,93 @@ def _check_multimetric_scoring(estimator, scoring):
         }
     else:
         raise ValueError(err_msg_generic)
+
     return scorers
 
 
+def _get_response_method(response_method, needs_threshold, needs_proba):
+    """Handles deprecation of `needs_threshold` and `needs_proba` parameters in
+    favor of `response_method`.
+    """
+    needs_threshold_provided = needs_threshold != "deprecated"
+    needs_proba_provided = needs_proba != "deprecated"
+    response_method_provided = response_method is not None
+
+    needs_threshold = False if needs_threshold == "deprecated" else needs_threshold
+    needs_proba = False if needs_proba == "deprecated" else needs_proba
+
+    if response_method_provided and (needs_proba_provided or needs_threshold_provided):
+        raise ValueError(
+            "You cannot set both `response_method` and `needs_proba` or "
+            "`needs_threshold` at the same time. Only use `response_method` since "
+            "the other two are deprecated in version 1.4 and will be removed in 1.6."
+        )
+
+    if needs_proba_provided or needs_threshold_provided:
+        warnings.warn(
+            (
+                "The `needs_threshold` and `needs_proba` parameter are deprecated in "
+                "version 1.4 and will be removed in 1.6. You can either let "
+                "`response_method` be `None` or set it to `predict` to preserve the "
+                "same behaviour."
+            ),
+            FutureWarning,
+        )
+
+    if response_method_provided:
+        return response_method
+
+    if needs_proba is True and needs_threshold is True:
+        raise ValueError(
+            "You cannot set both `needs_proba` and `needs_threshold` at the same "
+            "time. Use `response_method` instead since the other two are deprecated "
+            "in version 1.4 and will be removed in 1.6."
+        )
+
+    if needs_proba is True:
+        response_method = "predict_proba"
+    elif needs_threshold is True:
+        response_method = ("decision_function", "predict_proba")
+    else:
+        response_method = "predict"
+
+    return response_method
+
+
 @validate_params(
     {
         "score_func": [callable],
+        "response_method": [
+            None,
+            list,
+            tuple,
+            StrOptions({"predict", "predict_proba", "decision_function"}),
+        ],
         "greater_is_better": ["boolean"],
-        "needs_proba": ["boolean"],
-        "needs_threshold": ["boolean"],
+        "needs_proba": ["boolean", Hidden(StrOptions({"deprecated"}))],
+        "needs_threshold": ["boolean", Hidden(StrOptions({"deprecated"}))],
     },
     prefer_skip_nested_validation=True,
 )
 def make_scorer(
     score_func,
     *,
+    response_method=None,
     greater_is_better=True,
-    needs_proba=False,
-    needs_threshold=False,
+    needs_proba="deprecated",
+    needs_threshold="deprecated",
     **kwargs,
 ):
     """Make a scorer from a performance metric or loss function.
 
-    This factory function wraps scoring functions for use in
-    :class:`~sklearn.model_selection.GridSearchCV` and
-    :func:`~sklearn.model_selection.cross_val_score`.
-    It takes a score function, such as :func:`~sklearn.metrics.accuracy_score`,
-    :func:`~sklearn.metrics.mean_squared_error`,
-    :func:`~sklearn.metrics.adjusted_rand_score` or
-    :func:`~sklearn.metrics.average_precision_score`
-    and returns a callable that scores an estimator's output.
-    The signature of the call is `(estimator, X, y)` where `estimator`
-    is the model to be evaluated, `X` is the data and `y` is the
-    ground truth labeling (or `None` in the case of unsupervised models).
+    A scorer is a wrapper around an arbitrary metric or loss function that is called
+    with the signature `scorer(estimator, X, y_true, **kwargs)`.
+
+    It is accepted in all scikit-learn estimators or functions allowing a `scoring`
+    parameter.
+
+    The parameter `response_method` allows to specify which method of the estimator
+    should be used to feed the scoring/loss function.
 
     Read more in the :ref:`User Guide <scoring>`.
 
@@ -678,6 +606,21 @@ def make_scorer(
         Score function (or loss function) with signature
         ``score_func(y, y_pred, **kwargs)``.
 
+    response_method : {"predict_proba", "decision_function", "predict"} or \
+            list/tuple of such str, default=None
+
+        Specifies the response method to use get prediction from an estimator
+        (i.e. :term:`predict_proba`, :term:`decision_function` or
+        :term:`predict`). Possible choices are:
+
+        - if `str`, it corresponds to the name to the method to return;
+        - if a list or tuple of `str`, it provides the method names in order of
+          preference. The method returned corresponds to the first method in
+          the list and which is implemented by `estimator`.
+        - if `None`, it is equivalent to `"predict"`.
+
+        .. versionadded:: 1.4
+
     greater_is_better : bool, default=True
         Whether `score_func` is a score function (default), meaning high is
         good, or a loss function, meaning low is good. In the latter case, the
@@ -691,6 +634,10 @@ def make_scorer(
         a 1D `y_pred` (i.e., probability of the positive class, shape
         `(n_samples,)`).
 
+        .. deprecated:: 1.4
+           `needs_proba` is deprecated in version 1.4 and will be removed in
+           1.6. Use `response_method="predict_proba"` instead.
+
     needs_threshold : bool, default=False
         Whether `score_func` takes a continuous decision certainty.
         This only works for binary classification using estimators that
@@ -703,6 +650,11 @@ def make_scorer(
         For example `average_precision` or the area under the roc curve
         can not be computed using discrete predictions alone.
 
+        .. deprecated:: 1.4
+           `needs_threshold` is deprecated in version 1.4 and will be removed
+           in 1.6. Use `response_method=("decision_function", "predict_proba")`
+           instead to preserve the same behaviour.
+
     **kwargs : additional arguments
         Additional parameters to be passed to `score_func`.
 
@@ -711,40 +663,22 @@ def make_scorer(
     scorer : callable
         Callable object that returns a scalar score; greater is better.
 
-    Notes
-    -----
-    If `needs_proba=False` and `needs_threshold=False`, the score
-    function is supposed to accept the output of :term:`predict`. If
-    `needs_proba=True`, the score function is supposed to accept the
-    output of :term:`predict_proba` (For binary `y_true`, the score function is
-    supposed to accept probability of the positive class). If
-    `needs_threshold=True`, the score function is supposed to accept the
-    output of :term:`decision_function` or :term:`predict_proba` when
-    :term:`decision_function` is not present.
-
     Examples
     --------
     >>> from sklearn.metrics import fbeta_score, make_scorer
     >>> ftwo_scorer = make_scorer(fbeta_score, beta=2)
     >>> ftwo_scorer
-    make_scorer(fbeta_score, beta=2)
+    make_scorer(fbeta_score, response_method='predict', beta=2)
     >>> from sklearn.model_selection import GridSearchCV
     >>> from sklearn.svm import LinearSVC
     >>> grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]},
     ...                     scoring=ftwo_scorer)
     """
+    response_method = _get_response_method(
+        response_method, needs_threshold, needs_proba
+    )
     sign = 1 if greater_is_better else -1
-    if needs_proba and needs_threshold:
-        raise ValueError(
-            "Set either needs_proba or needs_threshold to True, but not both."
-        )
-    if needs_proba:
-        cls = _ProbaScorer
-    elif needs_threshold:
-        cls = _ThresholdScorer
-    else:
-        cls = _PredictScorer
-    return cls(score_func, sign, kwargs)
+    return _Scorer(score_func, sign, kwargs, response_method)
 
 
 # Standard regression scores
@@ -799,28 +733,47 @@ def negative_likelihood_ratio(y_true, y_pred):
 
 # Score functions that need decision values
 top_k_accuracy_scorer = make_scorer(
-    top_k_accuracy_score, greater_is_better=True, needs_threshold=True
+    top_k_accuracy_score,
+    greater_is_better=True,
+    response_method=("decision_function", "predict_proba"),
 )
 roc_auc_scorer = make_scorer(
-    roc_auc_score, greater_is_better=True, needs_threshold=True
+    roc_auc_score,
+    greater_is_better=True,
+    response_method=("decision_function", "predict_proba"),
+)
+average_precision_scorer = make_scorer(
+    average_precision_score,
+    response_method=("decision_function", "predict_proba"),
+)
+roc_auc_ovo_scorer = make_scorer(
+    roc_auc_score, response_method="predict_proba", multi_class="ovo"
 )
-average_precision_scorer = make_scorer(average_precision_score, needs_threshold=True)
-roc_auc_ovo_scorer = make_scorer(roc_auc_score, needs_proba=True, multi_class="ovo")
 roc_auc_ovo_weighted_scorer = make_scorer(
-    roc_auc_score, needs_proba=True, multi_class="ovo", average="weighted"
+    roc_auc_score,
+    response_method="predict_proba",
+    multi_class="ovo",
+    average="weighted",
+)
+roc_auc_ovr_scorer = make_scorer(
+    roc_auc_score, response_method="predict_proba", multi_class="ovr"
 )
-roc_auc_ovr_scorer = make_scorer(roc_auc_score, needs_proba=True, multi_class="ovr")
 roc_auc_ovr_weighted_scorer = make_scorer(
-    roc_auc_score, needs_proba=True, multi_class="ovr", average="weighted"
+    roc_auc_score,
+    response_method="predict_proba",
+    multi_class="ovr",
+    average="weighted",
 )
 
 # Score function for probabilistic classification
-neg_log_loss_scorer = make_scorer(log_loss, greater_is_better=False, needs_proba=True)
+neg_log_loss_scorer = make_scorer(
+    log_loss, greater_is_better=False, response_method="predict_proba"
+)
 neg_brier_score_scorer = make_scorer(
-    brier_score_loss, greater_is_better=False, needs_proba=True
+    brier_score_loss, greater_is_better=False, response_method="predict_proba"
 )
 brier_score_loss_scorer = make_scorer(
-    brier_score_loss, greater_is_better=False, needs_proba=True
+    brier_score_loss, greater_is_better=False, response_method="predict_proba"
 )
 
 
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index a7d1220013eee..6db20bff58fc3 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -48,7 +48,7 @@
     _check_multimetric_scoring,
     _MultimetricScorer,
     _PassthroughScorer,
-    _PredictScorer,
+    _Scorer,
 )
 from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split
 from sklearn.multiclass import OneVsRestClassifier
@@ -253,7 +253,8 @@ def check_scoring_validator_for_single_metric_usecases(scoring_validator):
 
     estimator = EstimatorWithFit()
     scorer = scoring_validator(estimator, scoring="accuracy")
-    assert isinstance(scorer, _PredictScorer)
+    assert isinstance(scorer, _Scorer)
+    assert scorer._response_method == "predict"
 
     # Test the allow_none parameter for check_scoring alone
     if scoring_validator is check_scoring:
@@ -295,9 +296,8 @@ def test_check_scoring_and_check_multimetric_scoring(scoring):
     scorers = _check_multimetric_scoring(estimator, scoring)
     assert isinstance(scorers, dict)
     assert sorted(scorers.keys()) == sorted(list(scoring))
-    assert all(
-        [isinstance(scorer, _PredictScorer) for scorer in list(scorers.values())]
-    )
+    assert all([isinstance(scorer, _Scorer) for scorer in list(scorers.values())])
+    assert all(scorer._response_method == "predict" for scorer in scorers.values())
 
     if "acc" in scoring:
         assert_almost_equal(
@@ -353,11 +353,13 @@ def test_check_scoring_gridsearchcv():
 
     grid = GridSearchCV(LinearSVC(dual="auto"), param_grid={"C": [0.1, 1]}, cv=3)
     scorer = check_scoring(grid, scoring="f1")
-    assert isinstance(scorer, _PredictScorer)
+    assert isinstance(scorer, _Scorer)
+    assert scorer._response_method == "predict"
 
     pipe = make_pipeline(LinearSVC(dual="auto"))
     scorer = check_scoring(pipe, scoring="f1")
-    assert isinstance(scorer, _PredictScorer)
+    assert isinstance(scorer, _Scorer)
+    assert scorer._response_method == "predict"
 
     # check that cross_val_score definitely calls the scorer
     # and doesn't make any assumptions about the estimator apart from having a
@@ -368,13 +370,6 @@ def test_check_scoring_gridsearchcv():
     assert_array_equal(scores, 1)
 
 
-def test_make_scorer():
-    # Sanity check on the make_scorer factory function.
-    f = lambda *args: 0
-    with pytest.raises(ValueError):
-        make_scorer(f, needs_threshold=True, needs_proba=True)
-
-
 @pytest.mark.parametrize(
     "scorer_name, metric",
     [
@@ -504,15 +499,15 @@ def test_thresholded_scorers():
     # test with a regressor (no decision_function)
     reg = DecisionTreeRegressor()
     reg.fit(X_train, y_train)
-    score1 = get_scorer("roc_auc")(reg, X_test, y_test)
-    score2 = roc_auc_score(y_test, reg.predict(X_test))
-    assert_almost_equal(score1, score2)
+    err_msg = "DecisionTreeRegressor has none of the following attributes"
+    with pytest.raises(AttributeError, match=err_msg):
+        get_scorer("roc_auc")(reg, X_test, y_test)
 
     # Test that an exception is raised on more than two classes
     X, y = make_blobs(random_state=0, centers=3)
     X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
     clf.fit(X_train, y_train)
-    with pytest.raises(ValueError, match="multiclass format is not supported"):
+    with pytest.raises(ValueError, match="multi_class must be in \\('ovo', 'ovr'\\)"):
         get_scorer("roc_auc")(clf, X_test, y_test)
 
     # test error is raised with a single class present in model
@@ -543,22 +538,6 @@ def test_thresholded_scorers_multilabel_indicator_data():
     score2 = roc_auc_score(y_test, np.vstack([p[:, -1] for p in y_proba]).T)
     assert_almost_equal(score1, score2)
 
-    # Multi-output multi-class decision_function
-    # TODO Is there any yet?
-    class TreeWithDecisionFunction(DecisionTreeClassifier):
-        # disable predict_proba
-        predict_proba = None
-
-        def decision_function(self, X):
-            return [p[:, 1] for p in DecisionTreeClassifier.predict_proba(self, X)]
-
-    clf = TreeWithDecisionFunction()
-    clf.fit(X_train, y_train)
-    y_proba = clf.decision_function(X_test)
-    score1 = get_scorer("roc_auc")(clf, X_test, y_test)
-    score2 = roc_auc_score(y_test, np.vstack([p for p in y_proba]).T)
-    assert_almost_equal(score1, score2)
-
     # Multilabel predict_proba
     clf = OneVsRestClassifier(DecisionTreeClassifier())
     clf.fit(X_train, y_train)
@@ -812,8 +791,11 @@ def test_multimetric_scorer_calls_method_once(
         (["roc_auc", "neg_log_loss"]),
         (
             {
-                "roc_auc": make_scorer(roc_auc_score, needs_threshold=True),
-                "neg_log_loss": make_scorer(log_loss, needs_proba=True),
+                "roc_auc": make_scorer(
+                    roc_auc_score,
+                    response_method=["predict_proba", "decision_function"],
+                ),
+                "neg_log_loss": make_scorer(log_loss, response_method="predict_proba"),
             }
         ),
     ],
@@ -966,7 +948,10 @@ def test_multiclass_roc_proba_scorer(scorer_name, metric):
 
 def test_multiclass_roc_proba_scorer_label():
     scorer = make_scorer(
-        roc_auc_score, multi_class="ovo", labels=[0, 1, 2], needs_proba=True
+        roc_auc_score,
+        multi_class="ovo",
+        labels=[0, 1, 2],
+        response_method="predict_proba",
     )
     X, y = make_classification(
         n_classes=3, n_informative=3, n_samples=20, random_state=0
@@ -1055,7 +1040,7 @@ def string_labeled_classification_problem():
 
 
 def test_average_precision_pos_label(string_labeled_classification_problem):
-    # check that _ThresholdScorer will lead to the right score when passing
+    # check that _Scorer will lead to the right score when passing
     # `pos_label`. Currently, only `average_precision_score` is defined to
     # be such a scorer.
     (
@@ -1085,7 +1070,7 @@ def test_average_precision_pos_label(string_labeled_classification_problem):
     # check that it fails if `pos_label` is not provided
     average_precision_scorer = make_scorer(
         average_precision_score,
-        needs_threshold=True,
+        response_method=("decision_function", "predict_proba"),
     )
     err_msg = "pos_label=1 is not a valid label. It should be one of "
     with pytest.raises(ValueError, match=err_msg):
@@ -1094,7 +1079,9 @@ def test_average_precision_pos_label(string_labeled_classification_problem):
     # otherwise, the scorer should give the same results than calling the
     # scoring function
     average_precision_scorer = make_scorer(
-        average_precision_score, needs_threshold=True, pos_label=pos_label
+        average_precision_score,
+        response_method=("decision_function", "predict_proba"),
+        pos_label=pos_label,
     )
     ap_scorer = average_precision_scorer(clf, X_test, y_test)
 
@@ -1119,7 +1106,7 @@ def _predict_proba(self, X):
 
 
 def test_brier_score_loss_pos_label(string_labeled_classification_problem):
-    # check that _ProbaScorer leads to the right score when `pos_label` is
+    # check that _Scorer leads to the right score when `pos_label` is
     # provided. Currently only the `brier_score_loss` is defined to be such
     # a scorer.
     clf, X_test, y_test, _, y_pred_proba, _ = string_labeled_classification_problem
@@ -1136,7 +1123,7 @@ def test_brier_score_loss_pos_label(string_labeled_classification_problem):
 
     brier_scorer = make_scorer(
         brier_score_loss,
-        needs_proba=True,
+        response_method="predict_proba",
         pos_label=pos_label,
     )
     assert brier_scorer(clf, X_test, y_test) == pytest.approx(brier_pos_cancer)
@@ -1148,7 +1135,7 @@ def test_brier_score_loss_pos_label(string_labeled_classification_problem):
 def test_non_symmetric_metric_pos_label(
     score_func, string_labeled_classification_problem
 ):
-    # check that _PredictScorer leads to the right score when `pos_label` is
+    # check that _Scorer leads to the right score when `pos_label` is
     # provided. We check for all possible metric supported.
     # Note: At some point we may end up having "scorer tags".
     clf, X_test, y_test, y_pred, _, _ = string_labeled_classification_problem
@@ -1168,11 +1155,15 @@ def test_non_symmetric_metric_pos_label(
 @pytest.mark.parametrize(
     "scorer",
     [
-        make_scorer(average_precision_score, needs_threshold=True, pos_label="xxx"),
-        make_scorer(brier_score_loss, needs_proba=True, pos_label="xxx"),
+        make_scorer(
+            average_precision_score,
+            response_method=("decision_function", "predict_proba"),
+            pos_label="xxx",
+        ),
+        make_scorer(brier_score_loss, response_method="predict_proba", pos_label="xxx"),
         make_scorer(f1_score, pos_label="xxx"),
     ],
-    ids=["ThresholdScorer", "ProbaScorer", "PredictScorer"],
+    ids=["non-thresholded scorer", "probability scorer", "thresholded scorer"],
 )
 def test_scorer_select_proba_error(scorer):
     # check that we raise the proper error when passing an unknown
@@ -1194,7 +1185,7 @@ def test_get_scorer_return_copy():
 
 
 def test_scorer_no_op_multiclass_select_proba():
-    # check that calling a ProbaScorer on a multiclass problem do not raise
+    # check that calling a _Scorer on a multiclass problem do not raise
     # even if `y_true` would be binary during the scoring.
     # `_select_proba_binary` should not be called in this case.
     X, y = make_classification(
@@ -1208,7 +1199,7 @@ def test_scorer_no_op_multiclass_select_proba():
 
     scorer = make_scorer(
         roc_auc_score,
-        needs_proba=True,
+        response_method="predict_proba",
         multi_class="ovo",
         labels=lr.classes_,
     )
@@ -1285,7 +1276,7 @@ def test_metadata_kwarg_conflict():
 
     scorer = make_scorer(
         roc_auc_score,
-        needs_proba=True,
+        response_method="predict_proba",
         multi_class="ovo",
         labels=lr.classes_,
     )
@@ -1389,3 +1380,113 @@ def test_get_scorer_multilabel_indicator():
 
     score = get_scorer("average_precision")(estimator, X_test, Y_test)
     assert score > 0.8
+
+
+@pytest.mark.parametrize(
+    "scorer, expected_repr",
+    [
+        (
+            get_scorer("accuracy"),
+            "make_scorer(accuracy_score, response_method='predict')",
+        ),
+        (
+            get_scorer("neg_log_loss"),
+            (
+                "make_scorer(log_loss, greater_is_better=False,"
+                " response_method='predict_proba')"
+            ),
+        ),
+        (
+            get_scorer("roc_auc"),
+            (
+                "make_scorer(roc_auc_score, response_method="
+                "('decision_function', 'predict_proba'))"
+            ),
+        ),
+        (
+            make_scorer(fbeta_score, beta=2),
+            "make_scorer(fbeta_score, response_method='predict', beta=2)",
+        ),
+    ],
+)
+def test_make_scorer_repr(scorer, expected_repr):
+    """Check the representation of the scorer."""
+    assert repr(scorer) == expected_repr
+
+
+# TODO(1.6): rework this test after the deprecation of `needs_proba` and
+# `needs_threshold`
+@pytest.mark.filterwarnings("ignore:.*needs_proba.*:FutureWarning")
+@pytest.mark.parametrize(
+    "params, err_type, err_msg",
+    [
+        # response_method should not be set if needs_* are set
+        (
+            {"response_method": "predict_proba", "needs_proba": True},
+            ValueError,
+            "You cannot set both `response_method`",
+        ),
+        (
+            {"response_method": "predict_proba", "needs_threshold": True},
+            ValueError,
+            "You cannot set both `response_method`",
+        ),
+        # cannot set both needs_proba and needs_threshold
+        (
+            {"needs_proba": True, "needs_threshold": True},
+            ValueError,
+            "You cannot set both `needs_proba` and `needs_threshold`",
+        ),
+    ],
+)
+def test_make_scorer_error(params, err_type, err_msg):
+    """Check that `make_scorer` raises errors if the parameter used."""
+    with pytest.raises(err_type, match=err_msg):
+        make_scorer(lambda y_true, y_pred: 1, **params)
+
+
+# TODO(1.6): remove the following test
+@pytest.mark.parametrize(
+    "deprecated_params, new_params, warn_msg",
+    [
+        (
+            {"needs_proba": True},
+            {"response_method": "predict_proba"},
+            "The `needs_threshold` and `needs_proba` parameter are deprecated",
+        ),
+        (
+            {"needs_proba": True, "needs_threshold": False},
+            {"response_method": "predict_proba"},
+            "The `needs_threshold` and `needs_proba` parameter are deprecated",
+        ),
+        (
+            {"needs_threshold": True},
+            {"response_method": ("decision_function", "predict_proba")},
+            "The `needs_threshold` and `needs_proba` parameter are deprecated",
+        ),
+        (
+            {"needs_threshold": True, "needs_proba": False},
+            {"response_method": ("decision_function", "predict_proba")},
+            "The `needs_threshold` and `needs_proba` parameter are deprecated",
+        ),
+        (
+            {"needs_threshold": False, "needs_proba": False},
+            {"response_method": "predict"},
+            "The `needs_threshold` and `needs_proba` parameter are deprecated",
+        ),
+    ],
+)
+def test_make_scorer_deprecation(deprecated_params, new_params, warn_msg):
+    """Check that we raise a deprecation warning when using `needs_proba` or
+    `needs_threshold`."""
+    X, y = make_classification(n_samples=150, n_features=10, random_state=0)
+    classifier = LogisticRegression().fit(X, y)
+
+    # check deprecation of needs_proba
+    with pytest.warns(FutureWarning, match=warn_msg):
+        deprecated_roc_auc_scorer = make_scorer(roc_auc_score, **deprecated_params)
+    roc_auc_scorer = make_scorer(roc_auc_score, **new_params)
+
+    assert deprecated_roc_auc_scorer(classifier, X, y) == pytest.approx(
+        roc_auc_scorer(classifier, X, y)
+    )
diff --git a/sklearn/tests/metadata_routing_common.py b/sklearn/tests/metadata_routing_common.py
index 14bf037ae9054..3d7d0ab24f1cc 100644
--- a/sklearn/tests/metadata_routing_common.py
+++ b/sklearn/tests/metadata_routing_common.py
@@ -10,7 +10,7 @@
     TransformerMixin,
     clone,
 )
-from sklearn.metrics._scorer import _PredictScorer, mean_squared_error
+from sklearn.metrics._scorer import _Scorer, mean_squared_error
 from sklearn.model_selection import BaseCrossValidator
 from sklearn.model_selection._split import GroupsConsumerMixin
 from sklearn.utils._metadata_requests import (
@@ -304,9 +304,11 @@ def inverse_transform(self, X, sample_weight=None, metadata=None):
         return X
 
 
-class ConsumingScorer(_PredictScorer):
+class ConsumingScorer(_Scorer):
     def __init__(self, registry=None):
-        super().__init__(score_func=mean_squared_error, sign=1, kwargs={})
+        super().__init__(
+            score_func=mean_squared_error, sign=1, kwargs={}, response_method="predict"
+        )
         self.registry = registry
 
     def _score(self, method_caller, clf, X, y, **kwargs):
diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index 5a8a200ed9680..c52aca5c6fefc 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -1020,11 +1020,13 @@ cdef class BaseTree:
         safe_realloc(&self.nodes, capacity)
         safe_realloc(&self.value, capacity * self.value_stride)
 
-        # value memory is initialised to 0 to enable classifier argmax
         if capacity > self.capacity:
+            # value memory is initialised to 0 to enable classifier argmax
             memset(<void*>(self.value + self.capacity * self.value_stride), 0,
                    (capacity - self.capacity) * self.value_stride *
                    sizeof(float64_t))
+            # node memory is initialised to 0 to ensure deterministic pickle (padding in Node struct)
+            memset(<void*>(self.nodes + self.capacity), 0, (capacity - self.capacity) * sizeof(Node))
 
         # if capacity smaller than node_count, adjust the counter
         if capacity < self.node_count:
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index 8faef50879da2..71ab1d8c5c6b6 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -2760,3 +2760,16 @@ def test_multioutput_quantiles():
         assert_array_equal(y_hat[:, 1], y_true)
         assert_array_equal(y_hat[:, 2], y_true)
         assert y_hat.shape == (4, 3, 2)
+
+
+def test_deterministic_pickle():
+    # Non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/27268
+    # Uninitialised memory would lead to the two pickle strings being different.
+    tree1 = DecisionTreeClassifier(random_state=0).fit(iris.data, iris.target)
+    tree2 = DecisionTreeClassifier(random_state=0).fit(iris.data, iris.target)
+
+    pickle1 = pickle.dumps(tree1)
+    pickle2 = pickle.dumps(tree2)
+
+    assert pickle1 == pickle2