Skip to content

Commit

Permalink
Merging main
Browse files Browse the repository at this point in the history
Signed-off-by: Adam Li <adam2392@gmail.com>
  • Loading branch information
adam2392 committed Oct 17, 2023
2 parents 09f7785 + caeb09e commit 1adb209
Show file tree
Hide file tree
Showing 17 changed files with 469 additions and 275 deletions.
4 changes: 4 additions & 0 deletions build_tools/cirrus/arm_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ linux_aarch64_test_task:
LOCK_FILE: build_tools/cirrus/py39_conda_forge_linux-aarch64_conda.lock
CONDA_PKGS_DIRS: /root/.conda/pkgs
HOME: / # $HOME is not defined in image and is required to install mambaforge
# Upload tokens have been encrypted via the CirrusCI interface:
# https://cirrus-ci.org/guide/writing-tasks/#encrypted-variables
# See `maint_tools/update_tracking_issue.py` for details on the permissions the token requires.
BOT_GITHUB_TOKEN: ENCRYPTED[9b50205e2693f9e4ce9a3f0fcb897a259289062fda2f5a3b8aaa6c56d839e0854a15872f894a70fca337dd4787274e0f]
ccache_cache:
folder: /root/.cache/ccache
conda_cache:
Expand Down
5 changes: 4 additions & 1 deletion doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,15 +312,18 @@
html_show_search_summary = False


# The "summary-anchor" IDs will be overwritten via JavaScript to be unique.
# See `doc/theme/scikit-learn-modern/static/js/details-permalink.js`.
rst_prolog = """
.. |details-start| raw:: html
<details>
<details id="summary-anchor">
<summary class="btn btn-light">
.. |details-split| raw:: html
<span class="tooltiptext">Click for more details</span>
<a class="headerlink" href="#summary-anchor" title="Permalink to this heading">¶</a>
</summary>
<div class="card">
Expand Down
2 changes: 1 addition & 1 deletion doc/modules/grid_search.rst
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,7 @@ Here, ``<estimator>`` is the parameter name of the nested estimator,
in this case ``estimator``.
If the meta-estimator is constructed as a collection of estimators as in
`pipeline.Pipeline`, then ``<estimator>`` refers to the name of the estimator,
see :ref:`pipeline_nested_parameters`. In practice, there can be several
see :ref:`pipeline_nested_parameters`. In practice, there can be several
levels of nesting::

>>> from sklearn.pipeline import Pipeline
Expand Down
12 changes: 9 additions & 3 deletions doc/modules/model_evaluation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -181,9 +181,15 @@ take several parameters:
of the python function is negated by the scorer object, conforming to
the cross validation convention that scorers return higher values for better models.

* for classification metrics only: whether the python function you provided requires continuous decision
certainties (``needs_threshold=True``). The default value is
False.
* for classification metrics only: whether the python function you provided requires
continuous decision certainties. If the scoring function only accepts probability
estimates (e.g. :func:`metrics.log_loss`) then one needs to set the parameter
`response_method`, thus in this case `response_method="predict_proba"`. Some scoring
function do not necessarily require probability estimates but rather non-thresholded
decision values (e.g. :func:`metrics.roc_auc_score`). In this case, one provides a
list such as `response_method=["decision_function", "predict_proba"]`. In this case,
the scorer will use the first available method, in the order given in the list,
to compute the scores.

* any additional parameters, such as ``beta`` or ``labels`` in :func:`f1_score`.

Expand Down
1 change: 1 addition & 0 deletions doc/themes/scikit-learn-modern/layout.html
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
<link rel="stylesheet" href="{{ pathto('_static/' + styles[0], 1) }}" type="text/css" />
<script id="documentation_options" data-url_root="{{ pathto('', 1) }}" src="{{ pathto('_static/documentation_options.js', 1) }}"></script>
<script src="{{ pathto('_static/js/vendor/jquery-3.6.3.slim.min.js', 1) }}"></script>
<script src="{{ pathto('_static/js/details-permalink.js', 1) }}"></script>
{%- block extrahead %} {% endblock %}
</head>
<body>
Expand Down
13 changes: 13 additions & 0 deletions doc/themes/scikit-learn-modern/static/css/theme.css
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,15 @@ div.clearer {

/* details / summary */

/* Enables section links to be visible when anchor-linked */
div.sk-page-content details::before {
display: block;
height: 52px;
margin-top: -52px;
visibility: hidden;
content: "";
}

div.sk-page-content details {
margin: 4ex 0pt;
}
Expand Down Expand Up @@ -202,6 +211,10 @@ div.sk-page-content summary:hover .tooltiptext {
visibility: visible;
}

div.sk-page-content summary:hover .headerlink {
visibility: visible;
}

/* Button */

.sk-btn-primary {
Expand Down
47 changes: 47 additions & 0 deletions doc/themes/scikit-learn-modern/static/js/details-permalink.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Function to create permalink into <details> elements to be able to link them
// The assumption is that such a block will be defined as follows:
// <details id="summary-anchor">
// <summary class="btn btn-light">
// Some title
// <span class="tooltiptext">Click for more details</span>
// <a class="headerlink" href="#summary-anchor" title="Permalink to this heading">¶</a>
// </summary>
// <div class="card">
// Some details
// </div>
// </details>
// We seek to replace `#summary-anchor` with a unique identifier based on the
// summary text.
// This syntax is defined in `doc/conf.py` in the `rst_prolog` variable.
function updateIdAndHrefBasedOnSummaryText() {
var allDetailsElements = document.querySelectorAll('details');
// Counter to store the duplicated summary text to add it as a suffix in the
// anchor ID
var anchorIDCounters = {};

allDetailsElements.forEach(function (detailsElement) {
// Get the <summary> element within the current <details>
var summaryElement = detailsElement.querySelector('summary');

// The ID uses the first line, lowercased, and spaces replaced with dashes
var anchorID = summaryElement.textContent.trim().split("\n")[0].replace(/\s+/g, '-').toLowerCase();

// Suffix the anchor ID with a counter if it already exists
if (anchorIDCounters[anchorID]) {
anchorIDCounters[anchorID] += 1;
anchorID = anchorID + '-' + anchorIDCounters[anchorID];
} else {
anchorIDCounters[anchorID] = 1;
}

detailsElement.setAttribute('id', anchorID);

var anchorElement = summaryElement.querySelector('a.headerlink');
anchorElement.setAttribute('href', '#' + anchorID);
});
}

// Add an event listener to execute the function when the page is loaded
document.addEventListener('DOMContentLoaded', function () {
updateIdAndHrefBasedOnSummaryText();
});
17 changes: 17 additions & 0 deletions doc/whats_new/v1.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,23 @@

.. currentmodule:: sklearn

.. _changes_1_3_2:

Version 1.3.2
=============

**October 2023**

Changelog
---------

:mod:`sklearn.tree`
...................

- |Fix| Do not leak data via non-initialized memory in decision tree pickle files and make
the generation of those files deterministic. :pr:`27580` by :user:`Loïc Estève <lesteve>`.


.. _changes_1_3_1:

Version 1.3.1
Expand Down
18 changes: 18 additions & 0 deletions doc/whats_new/v1.4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,11 @@ Changelog
:pr:`26315` and :pr:`27098` by :user:`Mateusz Sokół <mtsokol>`,
:user:`Olivier Grisel <ogrisel>` and :user:`Edoardo Abati <EdAbati>`.

- |Fix| Fixes a bug in :class:`decomposition.KernelPCA` by forcing the output of
the internal :class:`preprocessing.KernelCenterer` to be a default array. When the
arpack solver was used, it would expect an array with a `dtype` attribute.
:pr:`27583` by :user:`Guillaume Lemaitre <glemaitre>`.

:mod:`sklearn.ensemble`
.......................

Expand Down Expand Up @@ -354,6 +359,14 @@ Changelog
:func:`sklearn.metrics.zero_one_loss` now support Array API compatible inputs.
:pr:`27137` by :user:`Edoardo Abati <EdAbati>`.

- |API| Deprecated `needs_threshold` and `needs_proba` from :func:`metrics.make_scorer`.
These parameters will be removed in version 1.6. Instead, use `response_method` that
accepts `"predict"`, `"predict_proba"` or `"decision_function"` or a list of such
values. `needs_proba=True` is equivalent to `response_method="predict_proba"` and
`needs_threshold=True` is equivalent to
`response_method=("decision_function", "predict_proba")`.
:pr:`26840` by :user:`Guillaume Lemaitre <glemaitre>`.

- |Fix| Fixes a bug for metrics using `zero_division=np.nan`
(e.g. :func:`~metrics.precision_score`) within a paralell loop
(e.g. :func:`~model_selection.cross_val_score`) where the singleton for `np.nan`
Expand All @@ -366,6 +379,11 @@ Changelog
:func:`metrics.root_mean_squared_log_error` instead.
:pr:`26734` by :user:`Alejandro Martin Gil <101AlexMartin>`.

- |Fix| :func:`metrics.make_scorer` now raises an error when using a regressor on a
scorer requesting a non-thresholded decision function (from `decision_function` or
`predict_proba`). Such scorer are specific to classification.
:pr:`26840` by :user:`Guillaume Lemaitre <glemaitre>`.

:mod:`sklearn.model_selection`
..............................

Expand Down
7 changes: 4 additions & 3 deletions sklearn/cluster/tests/test_hdbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,14 +274,15 @@ def test_hdbscan_callable_metric():
assert n_clusters == n_clusters_true


@pytest.mark.parametrize("tree", ["kd", "ball"])
@pytest.mark.parametrize("tree", ["kd_tree", "ball_tree"])
def test_hdbscan_precomputed_non_brute(tree):
"""
Tests that HDBSCAN correctly raises an error when passing precomputed data
while requesting a tree-based algorithm.
"""
hdb = HDBSCAN(metric="precomputed", algorithm=f"prims_{tree}tree")
with pytest.raises(ValueError):
hdb = HDBSCAN(metric="precomputed", algorithm=tree)
msg = "precomputed is not a valid metric for"
with pytest.raises(ValueError, match=msg):
hdb.fit(X)


Expand Down
2 changes: 1 addition & 1 deletion sklearn/decomposition/_kernel_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ def fit(self, X, y=None):
raise ValueError("Cannot fit_inverse_transform with a precomputed kernel.")
X = self._validate_data(X, accept_sparse="csr", copy=self.copy_X)
self.gamma_ = 1 / X.shape[1] if self.gamma is None else self.gamma
self._centerer = KernelCenterer()
self._centerer = KernelCenterer().set_output(transform="default")
K = self._get_kernel(X)
self._fit_transform(K)

Expand Down
15 changes: 14 additions & 1 deletion sklearn/decomposition/tests/test_kernel_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import numpy as np
import pytest

from sklearn.datasets import make_blobs, make_circles
import sklearn
from sklearn.datasets import load_iris, make_blobs, make_circles
from sklearn.decomposition import PCA, KernelPCA
from sklearn.exceptions import NotFittedError
from sklearn.linear_model import Perceptron
Expand Down Expand Up @@ -551,3 +552,15 @@ def test_kernel_pca_inverse_correct_gamma():
X2_recon = kpca2.inverse_transform(kpca1.transform(X))

assert_allclose(X1_recon, X2_recon)


def test_kernel_pca_pandas_output():
"""Check that KernelPCA works with pandas output when the solver is arpack.
Non-regression test for:
https://github.com/scikit-learn/scikit-learn/issues/27579
"""
pytest.importorskip("pandas")
X, _ = load_iris(as_frame=True, return_X_y=True)
with sklearn.config_context(transform_output="pandas"):
KernelPCA(n_components=2, eigen_solver="arpack").fit_transform(X)
Loading

0 comments on commit 1adb209

Please sign in to comment.