Skip to content

Commit

Permalink
Allowing FPS to take numpy array of ints as initialize parameter (#225)
Browse files Browse the repository at this point in the history
* Add numpy array support for initialize paramater for FPS

* Adding unit test for initialize as np array

* Fixed linting issue

* Added fix for np array value error

* Adding unit test for case with np array containing non-ints

* Adding documentation in skmatter.sample_selection

* Removed unnecessary test and fixed initialize

* Revert "Removed unnecessary test and fixed initialize"

This reverts commit c25c850.

* Adding "numpy" before ndarray in docstrings

* Changing error message and adding another unit test

* Added unit tests

* Combined if statements for list and array

* Update CHANGELOG

---------

Co-authored-by: Christian Jorgensen <cajorgensen@wisc.edu>
  • Loading branch information
cajchristian and Christian Jorgensen authored May 16, 2024
1 parent bd54517 commit dd31493
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 10 deletions.
1 change: 1 addition & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ The rules for CHANGELOG file:

0.3.0 (XXXX/XX/XX)
------------------
- Updating ``FPS`` to allow a numpy array of ints as an initialize parameter (#145)
- Supported Python versions are now ranging from 3.9 - 3.12.

0.2.0 (2023/08/24)
Expand Down
17 changes: 9 additions & 8 deletions src/skmatter/_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -934,7 +934,7 @@ class _FPS(GreedySelector):
Parameters
----------
initialize: int, list of int, or 'random', default=0
initialize: int, list of int, numpy.ndarray of int, or 'random', default=0
Index of the first selection(s). If 'random', picks a random
value when fit starts. Stored in :py:attr:`self.initialize`.
Expand Down Expand Up @@ -1038,7 +1038,14 @@ def _init_greedy_search(self, X, y, n_to_select):
self.hausdorff_ = np.full(X.shape[self._axis], np.inf)
self.hausdorff_at_select_ = np.full(X.shape[self._axis], np.inf)

if self.initialize == "random":
if isinstance(self.initialize, (np.ndarray, list)):
if all(isinstance(i, numbers.Integral) for i in self.initialize):
for i, val in enumerate(self.initialize):
self.selected_idx_[i] = val
self._update_post_selection(X, y, self.selected_idx_[i])
else:
raise ValueError("Invalid value of the initialize parameter")
elif self.initialize == "random":
random_state = check_random_state(self.random_state)
initialize = random_state.randint(X.shape[self._axis])
self.selected_idx_[0] = initialize
Expand All @@ -1047,12 +1054,6 @@ def _init_greedy_search(self, X, y, n_to_select):
initialize = self.initialize
self.selected_idx_[0] = initialize
self._update_post_selection(X, y, self.selected_idx_[0])
elif isinstance(self.initialize, list) and all(
[isinstance(i, numbers.Integral) for i in self.initialize]
):
for i, val in enumerate(self.initialize):
self.selected_idx_[i] = val
self._update_post_selection(X, y, self.selected_idx_[i])
else:
raise ValueError("Invalid value of the initialize parameter")

Expand Down
2 changes: 1 addition & 1 deletion src/skmatter/feature_selection/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class FPS(_FPS):
Parameters
----------
initialize: int, list of int, or 'random', default=0
initialize: int, list of int, numpy.ndarray of int, or 'random', default=0
Index of the first selection(s). If 'random', picks a random
value when fit starts. Stored in :py:attr:`self.initialize`.
Expand Down
2 changes: 1 addition & 1 deletion src/skmatter/sample_selection/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class FPS(_FPS):
Parameters
----------
initialize: int, list of int, or 'random', default=0
initialize: int, list of int, numpy.ndarray of int, or 'random', default=0
Index of the first selection(s). If 'random', picks a random
value when fit starts. Stored in :py:attr:`self.initialize`.
Expand Down
26 changes: 26 additions & 0 deletions tests/test_feature_simple_fps.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import unittest

import numpy as np
from sklearn.datasets import load_diabetes as get_dataset
from sklearn.utils.validation import NotFittedError

Expand Down Expand Up @@ -42,6 +43,31 @@ def test_initialize(self):
for i in range(4):
self.assertEqual(selector.selected_idx_[i], self.idx[i])

initialize = np.array(self.idx[:4])
with self.subTest(initialize=initialize):
selector = FPS(n_to_select=len(self.idx) - 1, initialize=initialize)
selector.fit(self.X)
for i in range(4):
self.assertEqual(selector.selected_idx_[i], self.idx[i])

initialize = np.array([1, 5, 3, 0.25])
with self.subTest(initialize=initialize):
with self.assertRaises(ValueError) as cm:
selector = FPS(n_to_select=len(self.idx) - 1, initialize=initialize)
selector.fit(self.X)
self.assertEqual(
str(cm.exception), "Invalid value of the initialize parameter"
)

initialize = np.array([[1, 5, 3], [2, 4, 6]])
with self.subTest(initialize=initialize):
with self.assertRaises(ValueError) as cm:
selector = FPS(n_to_select=len(self.idx) - 1, initialize=initialize)
selector.fit(self.X)
self.assertEqual(
str(cm.exception), "Invalid value of the initialize parameter"
)

with self.assertRaises(ValueError) as cm:
selector = FPS(n_to_select=1, initialize="bad")
selector.fit(self.X)
Expand Down
26 changes: 26 additions & 0 deletions tests/test_sample_simple_fps.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import unittest

import numpy as np
from sklearn.datasets import load_diabetes as get_dataset
from sklearn.utils.validation import NotFittedError

Expand Down Expand Up @@ -43,6 +44,31 @@ def test_initialize(self):
for i in range(4):
self.assertEqual(selector.selected_idx_[i], self.idx[i])

initialize = np.array(self.idx[:4])
with self.subTest(initialize=initialize):
selector = FPS(n_to_select=len(self.idx) - 1, initialize=initialize)
selector.fit(self.X)
for i in range(4):
self.assertEqual(selector.selected_idx_[i], self.idx[i])

initialize = np.array([1, 5, 3, 0.25])
with self.subTest(initialize=initialize):
with self.assertRaises(ValueError) as cm:
selector = FPS(n_to_select=len(self.idx) - 1, initialize=initialize)
selector.fit(self.X)
self.assertEqual(
str(cm.exception), "Invalid value of the initialize parameter"
)

initialize = np.array([[1, 5, 3], [2, 4, 6]])
with self.subTest(initialize=initialize):
with self.assertRaises(ValueError) as cm:
selector = FPS(n_to_select=len(self.idx) - 1, initialize=initialize)
selector.fit(self.X)
self.assertEqual(
str(cm.exception), "Invalid value of the initialize parameter"
)

with self.assertRaises(ValueError) as cm:
selector = FPS(n_to_select=1, initialize="bad")
selector.fit(self.X)
Expand Down

0 comments on commit dd31493

Please sign in to comment.