From e84e8bb12ca7ca43e3df3c794fb769868a8cb491 Mon Sep 17 00:00:00 2001 From: KulikDM Date: Tue, 5 Dec 2023 18:15:04 +0200 Subject: [PATCH] DSN random state fix and tests --- .pre-commit-config.yaml | 4 ++-- README.rst | 2 +- docs/index.rst | 2 +- pythresh/test/test_filter.py | 10 ++++++---- pythresh/thresholds/dsn.py | 3 ++- 5 files changed, 12 insertions(+), 9 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 73a08d3..0ae4415 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -42,7 +42,7 @@ repos: name: Sort imports - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.1.4 + rev: v0.1.7 hooks: - id: ruff args: [--exit-non-zero-on-fix, --fix, --line-length=180] diff --git a/README.rst b/README.rst index b2beb51..f590536 100644 --- a/README.rst +++ b/README.rst @@ -293,7 +293,7 @@ smallest uncertainty about its mean and is the most robust (best least accurate prediction). However, for interpretability and general performance the ``FILTER`` thresholder is a good fit. -Further utilities are available for assiting in the selection of the +Further utilities are available for assisting in the selection of the most optimal outlier detection and thresholding methods `ranking `_ as well as determining the confidence with regards to the selected thresholding diff --git a/docs/index.rst b/docs/index.rst index a974dfa..b7e8700 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -100,7 +100,7 @@ provided the smallest uncertainty about its mean and is the most robust (best least accurate prediction). However, for interpretability and general performance the ``FILTER`` thresholder is a good fit. -Further utilities are available for assiting in the selection of the +Further utilities are available for assisting in the selection of the most optimal outlier detection and thresholding methods `ranking `_ as well as determining the confidence with regards to the selected thresholding diff --git a/pythresh/test/test_filter.py b/pythresh/test/test_filter.py index 9cf30fd..e652433 100644 --- a/pythresh/test/test_filter.py +++ b/pythresh/test/test_filter.py @@ -45,15 +45,17 @@ def setUp(self): self.methods = ['gaussian', 'savgol', 'hilbert', 'wiener', 'medfilt', 'decimate', 'detrend', 'resample'] - self.sigma = 'auto' + pre_sig = len(scores) + + self.sigmas = ['auto', int(pre_sig**0.6), int(pre_sig**75)] def test_prediction_labels(self): - params = product(self.all_scores, self.methods) + params = product(self.all_scores, self.methods, self.sigmas) - for scores, method in params: + for scores, method, sigma in params: - self.thres = FILTER(method=method, sigma=self.sigma) + self.thres = FILTER(method=method, sigma=sigma) pred_labels = self.thres.eval(scores) assert (self.thres.thresh_ is not None) diff --git a/pythresh/thresholds/dsn.py b/pythresh/thresholds/dsn.py index aa0ae33..a89d0e2 100644 --- a/pythresh/thresholds/dsn.py +++ b/pythresh/thresholds/dsn.py @@ -217,7 +217,8 @@ def _MAH_metric(self): """Calculate the Mahalanobis distance.""" # fit a Minimum Covariance Determinant (MCD) robust estimator to data - robust_cov = MinCovDet().fit(np.array([self.val_norm]).T) + robust_cov = MinCovDet(random_state=self.random_state).fit( + np.array([self.val_norm]).T) # Get the Mahalanobis distance dist = robust_cov.mahalanobis(np.array([self.val_data]).T)