diff --git a/README.md b/README.md index d324786..e19664d 100644 --- a/README.md +++ b/README.md @@ -123,7 +123,7 @@ y_pred, y_pred_lower, y_pred_upper = split_cp.predict(X_test, alpha=0.1) ``` -The library provides several metrics (`deel.puncc.metrics`) and plotting capabilities (`deel.puncc.plotting`) to evaluate and visualize the results of a conformal procedure. For a target error rate of $\alpha = 0.1$, the marginal coverage reached in this example on the test set is higher than $90$% (see [Introduction tutorial](docs/puncc_intro.ipynb)): +The library provides several metrics (`deel.puncc.metrics`) and plotting capabilities (`deel.puncc.plotting`) to evaluate and visualize the results of a conformal procedure. For a target error rate of $\alpha = 0.1$, the marginal coverage reached in this example on the test set is higher than $90$% (see [**Introduction tutorial**](docs/puncc_intro.ipynb)):
90% Prediction Interval with the Split Conformal Prediction Method @@ -138,7 +138,31 @@ The library provides several metrics (`deel.puncc.metrics`) and plotting capabil - A direct approach to run state-of-the-art conformal prediction procedures. This is what we used in the previous conformal regression example. - **Low-level API**: a more flexible approach based of full customization of the prediction model, the choice of nonconformity scores and the split between fit and calibration datasets. -A quick comparison of both approaches is provided in the [API tutorial](docs/api_intro.ipynb) for a regression problem. +A quick comparison of both approaches is provided in the [**API tutorial**](docs/api_intro.ipynb) for a regression problem. + +
+ +
+ +### 🖥️ Implemented Algorithms +
+ Overview of Implemented Methods from the Literature: + +| Procedure Type | Procedure Name | Description (more details in [Theory overview](https://deel-ai.github.io/puncc/theory_overview.html)) | +|-----------------------------------------|------------------------------------------------------|-------------------------------------------------------| +| Conformal Regression | [`deel.puncc.regression.SplitCP`](https://deel-ai.github.io/puncc/regression.html#deel.puncc.regression.SplitCP) | Split Conformal Regression | +| Conformal Regression | [`deel.puncc.regression.LocallyAdaptiveCP`](https://deel-ai.github.io/puncc/regression.html#deel.puncc.regression.LocallyAdaptiveCP) | Locally Adaptive Conformal Regression | +| Conformal Regression | [`deel.puncc.regression.CQR`](https://deel-ai.github.io/puncc/regression.html#deel.puncc.regression.CQR) | Conformalized Quantile Regression | +| Conformal Regression | [`deel.puncc.regression.CvPlus`](https://deel-ai.github.io/puncc/regression.html#deel.puncc.regression.CVPlus) | CV + (cross-validation) | +| Conformal Regression | [`deel.puncc.regression.EnbPI`](https://deel-ai.github.io/puncc/regression.html#deel.puncc.regression.EnbPI) | Ensemble Batch Prediction Intervals method | +| Conformal Regression | [`deel.puncc.regression.aEnbPI`](https://deel-ai.github.io/puncc/regression.html#deel.puncc.regression.AdaptiveEnbPI) | Locally adaptive Ensemble Batch Prediction Intervals method | +| Conformal Classification | [`deel.puncc.classification.LAC`](https://deel-ai.github.io/puncc/classification.html#deel.puncc.classification.LAC) | Least Ambiguous Set-Valued Classifiers | +| Conformal Classification | [`deel.puncc.classification.APS`](https://deel-ai.github.io/puncc/classification.html#deel.puncc.classification.APS) | Adaptive Prediction Sets | +| Conformal Classification | [`deel.puncc.classification.RAPS`](https://deel-ai.github.io/puncc/classification.html#deel.puncc.classification.RAPS) | Regularized Adaptive Prediction Sets (APS is a special case where $\lambda = 0$) | +| Conformal Anomaly Detection | [`deel.puncc.anomaly_detection.SplitCAD`](https://deel-ai.github.io/puncc/anomaly_detection.html#deel.puncc.anomaly_detection.SplitCAD) | Split Conformal Anomaly detection (used to control the maximum false positive rate) | +| Conformal Object Detection | [`deel.puncc.object_detection.SplitBoxWise`](https://deel-ai.github.io/puncc/object_detection.html#deel.puncc.object_detection.SplitBoxWise) | Box-wise split conformal object detection | + +
## 📚 Citation diff --git a/deel/puncc/api/nonconformity_scores.py b/deel/puncc/api/nonconformity_scores.py index 81971c8..5332d7b 100644 --- a/deel/puncc/api/nonconformity_scores.py +++ b/deel/puncc/api/nonconformity_scores.py @@ -46,6 +46,36 @@ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Classification ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +def lac_score( + Y_pred: Iterable, + y_true: Iterable, +) -> Iterable: + """LAC nonconformity score. + + :param Iterable Y_pred: + :math:`Y_{\\text{pred}} = (P_{\\text{C}_1}, ..., P_{\\text{C}_n})` + where :math:`P_{\\text{C}_i}` is logit associated to class i. + :param Iterable y_true: true labels. + + :returns: RAPS nonconformity scores. + :rtype: Iterable + + :raises TypeError: unsupported data types. + """ + supported_types_check(Y_pred, y_true) + + # Check if logits sum is close to one + logit_normalization_check(Y_pred) + + if not isinstance(Y_pred, np.ndarray): + raise NotImplementedError( + "LAC nonconformity score only implemented for ndarrays" + ) + + # Compute and return the LAC nonconformity score + return 1 - Y_pred[np.arange(y_true.shape[0]), y_true] + + def raps_score( Y_pred: Iterable, y_true: Iterable, diff --git a/deel/puncc/api/prediction_sets.py b/deel/puncc/api/prediction_sets.py index bad48cd..f228820 100644 --- a/deel/puncc/api/prediction_sets.py +++ b/deel/puncc/api/prediction_sets.py @@ -44,6 +44,38 @@ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Classification ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +def lac_set( + Y_pred, scores_quantile +) -> List: + """LAC prediction set. + + :param Iterable Y_pred: + :math:`Y_{\\text{pred}} = (P_{\\text{C}_1}, ..., P_{\\text{C}_n})` + where :math:`P_{\\text{C}_i}` is logit associated to class i. + + :param ndarray scores_quantile: quantile of nonconformity scores computed + on a calibration set for a given :math:`\\alpha` + + + :returns: LAC prediction sets. + :rtype: Iterable + + """ + # Check if logits sum is close to one + logit_normalization_check(Y_pred) + + pred_len = len(Y_pred) + + logger.debug(f"Shape of Y_pred: {Y_pred.shape}") + + # Build prediction sets + prediction_sets = [ + np.where(Y_pred[i] >= 1 - scores_quantile)[0].tolist() for i in range(pred_len) + ] + + return (prediction_sets,) + + def raps_set( Y_pred, scores_quantile, lambd: float = 0, k_reg: int = 1, rand: bool = True ) -> List: diff --git a/deel/puncc/classification.py b/deel/puncc/classification.py index 9789667..2a7b195 100644 --- a/deel/puncc/classification.py +++ b/deel/puncc/classification.py @@ -38,6 +38,133 @@ from deel.puncc.api.prediction import BasePredictor from deel.puncc.api.splitting import IdSplitter from deel.puncc.api.splitting import RandomSplitter +from deel.puncc.regression import SplitCP + + +class LAC(SplitCP): + """Implementation of the Least Ambiguous Set-Valued Classifier (LAC). + For more details, we refer the user to the + :ref:`theory overview page `. + + :param BasePredictor predictor: a predictor implementing fit and predict. + :param bool train: if False, prediction model(s) will not be trained and + will be used as is. Defaults to True. + + .. _example lac: + + Example:: + + from deel.puncc.classification import LAC + from deel.puncc.api.prediction import BasePredictor + + from sklearn.datasets import make_classification + from sklearn.model_selection import train_test_split + from sklearn.ensemble import RandomForestClassifier + + from deel.puncc.metrics import classification_mean_coverage + from deel.puncc.metrics import classification_mean_size + + import numpy as np + + from tensorflow.keras.utils import to_categorical + + # Generate a random regression problem + X, y = make_classification(n_samples=1000, n_features=4, n_informative=2, + n_classes = 2,random_state=0, shuffle=False) + + # Split data into train and test + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=.2, random_state=0 + ) + + # Split train data into fit and calibration + X_fit, X_calib, y_fit, y_calib = train_test_split( + X_train, y_train, test_size=.2, random_state=0 + ) + + # One hot encoding of classes + y_fit_cat = to_categorical(y_fit) + y_calib_cat = to_categorical(y_calib) + y_test_cat = to_categorical(y_test) + + # Create rf classifier + rf_model = RandomForestClassifier(n_estimators=100, random_state=0) + + # Create a wrapper of the random forest model to redefine its predict method + # into logits predictions. Make sure to subclass BasePredictor. + # Note that we needed to build a new wrapper (over BasePredictor) only because + # the predict(.) method of RandomForestClassifier does not predict logits. + # Otherwise, it is enough to use BasePredictor (e.g., neural network with softmax). + class RFPredictor(BasePredictor): + def predict(self, X, **kwargs): + return self.model.predict_proba(X, **kwargs) + + # Wrap model in the newly created RFPredictor + rf_predictor = RFPredictor(rf_model) + + # CP method initialization + lac_cp = LAC(rf_predictor) + + # The call to `fit` trains the model and computes the nonconformity + # scores on the calibration set + lac_cp.fit(X_fit=X_fit, y_fit=y_fit, X_calib=X_calib, y_calib=y_calib) + + + # The predict method infers prediction sets with respect to + # the significance level alpha = 20% + y_pred, set_pred = lac_cp.predict(X_test, alpha=.2) + + # Compute marginal coverage + coverage = classification_mean_coverage(y_test, set_pred) + size = classification_mean_size(set_pred) + + print(f"Marginal coverage: {np.round(coverage, 2)}") + print(f"Average prediction set size: {np.round(size, 2)}") + """ + + def __init__( + self, + predictor: Union[BasePredictor, Any], + train: bool = True, + random_state: float = None, + ): + super().__init__( + predictor=predictor, + train=train, + random_state=random_state, + ) + self.calibrator = BaseCalibrator( + nonconf_score_func=nonconformity_scores.lac_score, + pred_set_func=prediction_sets.lac_set, + weight_func=None, + ) + self.conformal_predictor = ConformalPredictor( + predictor=self.predictor, + calibrator=self.calibrator, + splitter=object(), + train=self.train, + ) + + def predict(self, X_test: Iterable, alpha: float) -> Tuple: + """Conformal set predictions (w.r.t target miscoverage alpha) + for new samples. + + :param Iterable X_test: features of new samples. + :param float alpha: target maximum miscoverage. + + :returns: Tuple composed of the model estimate y_pred and the + prediction set set_pred + :rtype: Tuple + """ + + if self.conformal_predictor is None: + raise RuntimeError("Fit method should be called before predict.") + + (y_pred, set_pred) = self.conformal_predictor.predict( + X_test, alpha=alpha + ) + + return y_pred, set_pred class RAPS: @@ -128,7 +255,7 @@ def predict(self, X, **kwargs): raps_cp.fit(X_fit=X_fit, y_fit=y_fit, X_calib=X_calib, y_calib=y_calib) - # The predict method infers prediction intervals with respect to + # The predict method infers prediction sets with respect to # the significance level alpha = 20% y_pred, set_pred = raps_cp.predict(X_test, alpha=.2) @@ -252,7 +379,7 @@ def fit( self.conformal_predictor.fit(X=X, y=y, **kwargs) def predict(self, X_test: Iterable, alpha: float) -> Tuple: - """Conformal interval predictions (w.r.t target miscoverage alpha) + """Conformal set predictions (w.r.t target miscoverage alpha) for new samples. :param Iterable X_test: features of new samples. @@ -344,7 +471,7 @@ def predict(self, X, **kwargs): # scores on the calibration set aps_cp.(X_fit=X_fit, y_fit=y_fit, X_calib=X_calib, y_calib=y_calib) - # The predict method infers prediction intervals with respect to + # The predict method infers prediction sets with respect to # the significance level alpha = 20% y_pred, set_pred = aps_cp.predict(X_test, alpha=.2) diff --git a/docs/assets/puncc_architecture.png b/docs/assets/puncc_architecture.png index 571415d..4094de2 100644 Binary files a/docs/assets/puncc_architecture.png and b/docs/assets/puncc_architecture.png differ diff --git a/docs/puncc_architecture.ipynb b/docs/puncc_architecture.ipynb index 8977ff1..6d893c8 100644 --- a/docs/puncc_architecture.ipynb +++ b/docs/puncc_architecture.ipynb @@ -13,16 +13,17 @@ "\n", "| Procedure Type | Procedure Name | Description (more details in [Theory overview](https://deel-ai.github.io/puncc/theory_overview.html)) |\n", "|-----------------------------------------|------------------------------------------------------|-------------------------------------------------------|\n", - "| Conformal Regression | `deel.puncc.regression.SplitCP` | Split Conformal Prediction |\n", - "| Conformal Regression | `deel.puncc.regression.LocallyAdaptiveCP` | Locally Adaptive Conformal Prediction |\n", - "| Conformal Regression | `deel.puncc.regression.CQR` | Conformalized Quantile Regression |\n", - "| Conformal Regression | `deel.puncc.regression.CvPlus` | CV + (cross-validation) |\n", - "| Conformal Regression | `deel.puncc.regression.EnbPI` | Ensemble Batch Prediction Intervals method |\n", - "| Conformal Regression | `deel.puncc.regression.aEnbPI` | Locally adaptive Ensemble Batch Prediction Intervals method |\n", - "| Conformal Classification | `deel.puncc.classification.APS` | Adaptive Prediction Sets |\n", - "| Conformal Classification | `deel.puncc.classification.RAPS` | Regularized Adaptive Prediction Sets (APS is a special case where $\\lambda = 0$) |\n", - "| Conformal Anomaly Detection | `deel.puncc.anomaly_detection.SplitCAD` | Split Conformal Anomaly detection (used to control the maximum false positive rate) |\n", - "| Conformal Object Detection | `deel.puncc.object_detection.SplitBoxWise` | Box-wise split conformal object detection |\n", + "| Conformal Regression | [`deel.puncc.regression.SplitCP`](https://deel-ai.github.io/puncc/regression.html#deel.puncc.regression.SplitCP) | Split Conformal Regression |\n", + "| Conformal Regression | [`deel.puncc.regression.LocallyAdaptiveCP`](https://deel-ai.github.io/puncc/regression.html#deel.puncc.regression.LocallyAdaptiveCP) | Locally Adaptive Conformal Regression |\n", + "| Conformal Regression | [`deel.puncc.regression.CQR`](https://deel-ai.github.io/puncc/regression.html#deel.puncc.regression.CQR) | Conformalized Quantile Regression |\n", + "| Conformal Regression | [`deel.puncc.regression.CvPlus`](https://deel-ai.github.io/puncc/regression.html#deel.puncc.regression.CVPlus) | CV + (cross-validation) |\n", + "| Conformal Regression | [`deel.puncc.regression.EnbPI`](https://deel-ai.github.io/puncc/regression.html#deel.puncc.regression.EnbPI) | Ensemble Batch Prediction Intervals method |\n", + "| Conformal Regression | [`deel.puncc.regression.aEnbPI`](https://deel-ai.github.io/puncc/regression.html#deel.puncc.regression.AdaptiveEnbPI) | Locally adaptive Ensemble Batch Prediction Intervals method |\n", + "| Conformal Classification | [`deel.puncc.classification.LAC`](https://deel-ai.github.io/puncc/classification.html#deel.puncc.classification.LAC) | Least Ambiguous Set-Valued Classifiers |\n", + "| Conformal Classification | [`deel.puncc.classification.APS`](https://deel-ai.github.io/puncc/classification.html#deel.puncc.classification.APS) | Adaptive Prediction Sets |\n", + "| Conformal Classification | [`deel.puncc.classification.RAPS`](https://deel-ai.github.io/puncc/classification.html#deel.puncc.classification.RAPS) | Regularized Adaptive Prediction Sets (APS is a special case where $\\lambda = 0$) |\n", + "| Conformal Anomaly Detection | [`deel.puncc.anomaly_detection.SplitCAD`](https://deel-ai.github.io/puncc/anomaly_detection.html#deel.puncc.anomaly_detection.SplitCAD) | Split Conformal Anomaly detection (used to control the maximum false positive rate) |\n", + "| Conformal Object Detection | [`deel.puncc.object_detection.SplitBoxWise`](https://deel-ai.github.io/puncc/object_detection.html#deel.puncc.object_detection.SplitBoxWise) | Box-wise split conformal object detection |\n", "\n", "Each of these procedures conformalize point-based or interval-based models that are wrapped in a predictor and passed as argument to the constructor. Wrapping the models in a predictor (`deel.puncc.api.prediction`) enables to work with several ML/DL libraries and data structures.\n", "\n", @@ -72,7 +73,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "2af0185f", "metadata": {}, "outputs": [], @@ -88,16 +89,18 @@ "model = linear_model.LinearRegression()\n", "\n", "# Definition of a predictor (This will be explained later)\n", - "my_predictor = BasePredictor(model) # Predictor\n", + "my_predictor = BasePredictor(model) # Predictor\n", "\n", "# Definition of a calibrator, built for a given nonconformity scores and a\n", "# procedure to build the prediction sets\n", "\n", + "\n", "## Definition of a custom nonconformity scores function.\n", "## Alternatively, several ready-to-use nonconf scores are provided in\n", "## the module deel.puncc.nonconformity_scores (more on this later)\n", "def my_ncf(y_pred, y_true):\n", - " return np.abs(y_pred-y_true)\n", + " return np.abs(y_pred - y_true)\n", + "\n", "\n", "## Definition of a custom function to build prediction sets.\n", "## Alternatively, several ready-to-use procedure are provided in\n", @@ -107,17 +110,19 @@ " y_upper = y_pred + nonconf_scores_quantile\n", " return y_lower, y_upper\n", "\n", + "\n", "## Calibrator construction\n", - "my_calibrator = BaseCalibrator(nonconf_score_func=my_ncf,\n", - " pred_set_func=my_psf) # Calibrator\n", + "my_calibrator = BaseCalibrator(\n", + " nonconf_score_func=my_ncf, pred_set_func=my_psf\n", + ") # Calibrator\n", "\n", "# Definition of a K-fold splitter that produces 20 folds of fit/calibration\n", - "kfold_splitter = KFoldSplitter(K=20, random_state=42) # Splitter\n", + "kfold_splitter = KFoldSplitter(K=20, random_state=42) # Splitter\n", "\n", "# Conformal prediction canvas\n", - "conformal_predictor = ConformalPredictor(predictor=my_predictor,\n", - " calibrator=my_calibrator,\n", - " splitter=kfold_splitter)" + "conformal_predictor = ConformalPredictor(\n", + " predictor=my_predictor, calibrator=my_calibrator, splitter=kfold_splitter\n", + ")" ] }, { @@ -155,28 +160,36 @@ "\n", "# Data\n", "## Generate a random regression problem\n", - "X, y = make_regression(n_samples=1000, n_features=4, n_informative=2,\n", - " random_state=0, noise=10, shuffle=False)\n", + "X, y = make_regression(\n", + " n_samples=1000,\n", + " n_features=4,\n", + " n_informative=2,\n", + " random_state=0,\n", + " noise=10,\n", + " shuffle=False,\n", + ")\n", "\n", "## Split data into train and test\n", "X_train, X_test, y_train, y_test = train_test_split(\n", - " X, y, test_size=.2, random_state=0\n", + " X, y, test_size=0.2, random_state=0\n", ")\n", "\n", "# Regression linear model\n", "model = linear_model.LinearRegression()\n", "\n", "# Definition of a predictor (This will be explained later)\n", - "my_predictor = BasePredictor(model) # Predictor\n", + "my_predictor = BasePredictor(model) # Predictor\n", "\n", "# Definition of a calibrator, built for a given nonconformity scores and a\n", "# procedure to build the prediction sets\n", "\n", + "\n", "## Definition of a custom nonconformity scores function.\n", "## Alternatively, several ready-to-use nonconf scores are provided in\n", "## the module deel.puncc.nonconformity_scores (more on this later)\n", "def my_ncf(y_pred, y_true):\n", - " return np.abs(y_pred-y_true)\n", + " return np.abs(y_pred - y_true)\n", + "\n", "\n", "## Definition of a custom function to build prediction sets.\n", "## Alternatively, several ready-to-use procedure are provided in\n", @@ -186,37 +199,43 @@ " y_upper = y_pred + nonconf_scores_quantile\n", " return y_lower, y_upper\n", "\n", + "\n", "## Calibrator construction\n", - "my_calibrator = BaseCalibrator(nonconf_score_func=my_ncf,\n", - " pred_set_func=my_psf) # Calibrator\n", + "my_calibrator = BaseCalibrator(\n", + " nonconf_score_func=my_ncf, pred_set_func=my_psf\n", + ") # Calibrator\n", "\n", "# Definition of a K-fold splitter that produces 20 folds of fit/calibration\n", - "kfold_splitter = KFoldSplitter(K=20, random_state=42) # Splitter\n", + "kfold_splitter = KFoldSplitter(K=20, random_state=42) # Splitter\n", "\n", "# Conformal prediction canvas\n", - "conformal_predictor = ConformalPredictor(predictor=my_predictor,\n", - " calibrator=my_calibrator,\n", - " splitter=kfold_splitter)\n", + "conformal_predictor = ConformalPredictor(\n", + " predictor=my_predictor, calibrator=my_calibrator, splitter=kfold_splitter\n", + ")\n", "conformal_predictor.fit(X_train, y_train)\n", - "y_pred, y_pred_lower, y_pred_upper = conformal_predictor.predict(X_test, alpha=.1)\n", + "y_pred, y_pred_lower, y_pred_upper = conformal_predictor.predict(\n", + " X_test, alpha=0.1\n", + ")\n", "\n", "# Compute empirical marginal coverage and average width of the prediction intervals\n", "coverage = metrics.regression_mean_coverage(y_test, y_pred_lower, y_pred_upper)\n", - "width = metrics.regression_sharpness(y_pred_lower=y_pred_lower,\n", - " y_pred_upper=y_pred_upper)\n", + "width = metrics.regression_sharpness(\n", + " y_pred_lower=y_pred_lower, y_pred_upper=y_pred_upper\n", + ")\n", "print(f\"Marginal coverage: {coverage:.2f}\")\n", "print(f\"Average width: {width:.2f}\")\n", "\n", "# Figure of the prediction bands\n", "ax = plot_prediction_intervals(\n", - " X = X_test[:,0],\n", - " y_true=y_test,\n", - " y_pred=y_pred,\n", - " y_pred_lower=y_pred_lower,\n", - " y_pred_upper=y_pred_upper,\n", - " sort_X=True,\n", - " size=(10, 6),\n", - " loc=\"upper left\")\n", + " X=X_test[:, 0],\n", + " y_true=y_test,\n", + " y_pred=y_pred,\n", + " y_pred_lower=y_pred_lower,\n", + " y_pred_upper=y_pred_upper,\n", + " sort_X=True,\n", + " size=(10, 6),\n", + " loc=\"upper left\",\n", + ")\n", "\n", "_ = ax.set_xlabel(\"Dummy X\")\n", "_ = ax.set_ylabel(\"Dummy Y\")" @@ -261,7 +280,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "b9cc46fb", "metadata": {}, "outputs": [], @@ -272,6 +291,7 @@ "# Create rf classifier\n", "rf_model = RandomForestClassifier(n_estimators=100, random_state=0)\n", "\n", + "\n", "# Create a wrapper of the random forest model to redefine its predict method\n", "# into logits predictions. Make sure to subclass BasePredictor.\n", "# Note that we needed to build a new wrapper (over BasePredictor) only because\n", @@ -281,6 +301,7 @@ " def predict(self, X, **kwargs):\n", " return self.model.predict_proba(X, **kwargs)\n", "\n", + "\n", "# Wrap model in the newly created RFPredictor\n", "rf_predictor = RFPredictor(rf_model)" ] @@ -308,7 +329,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "72983b23", "metadata": {}, "outputs": [], @@ -318,8 +339,10 @@ "from deel.puncc.api import prediction_sets\n", "\n", "## Calibrator construction\n", - "my_calibrator = BaseCalibrator(nonconf_score_func=nonconformity_scores.absolute_difference,\n", - " pred_set_func=prediction_sets.constant_interval)" + "my_calibrator = BaseCalibrator(\n", + " nonconf_score_func=nonconformity_scores.absolute_difference,\n", + " pred_set_func=prediction_sets.constant_interval,\n", + ")" ] }, { @@ -333,28 +356,52 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "b9866d85", "metadata": {}, "outputs": [], "source": [ "from deel.puncc.api.calibration import BaseCalibrator\n", "\n", + "\n", + "\n", "## Definition of a custom nonconformity scores function.\n", + "\n", + "\n", "## Alternatively, several ready-to-use nonconf scores are provided in\n", + "\n", + "\n", "## the module deel.puncc.nonconformity_scores\n", + "\n", + "\n", "def my_ncf(y_pred, y_true):\n", - " return np.abs(y_pred-y_true)\n", + "\n", + " return np.abs(y_pred - y_true)\n", + "\n", + "\n", "\n", "## Definition of a custom function to build prediction sets.\n", + "\n", + "\n", "## Alternatively, several ready-to-use procedure are provided in\n", + "\n", + "\n", "## the module deel.puncc.prediction_sets\n", + "\n", + "\n", "def my_psf(y_pred, nonconf_scores_quantile):\n", + "\n", " y_lower = y_pred - nonconf_scores_quantile\n", + "\n", " y_upper = y_pred + nonconf_scores_quantile\n", + "\n", " return y_lower, y_upper\n", "\n", + "\n", + "\n", "## Calibrator construction\n", + "\n", + "\n", "my_calibrator = BaseCalibrator(nonconf_score_func=my_ncf, pred_set_func=my_psf)" ] }, diff --git a/docs/source/classification.rst b/docs/source/classification.rst index b3012e5..d2eb20d 100644 --- a/docs/source/classification.rst +++ b/docs/source/classification.rst @@ -12,6 +12,8 @@ and :func:`predict` methods. :doc:`Prediction module ` from the :doc:`API ` ensures the compliance of models from various ML/DL libraries (such as Keras and scikit-learn) to **puncc**. +.. autoclass:: deel.puncc.classification.LAC + .. autoclass:: deel.puncc.classification.RAPS .. autoclass:: deel.puncc.classification.APS diff --git a/docs/source/conf.py b/docs/source/conf.py index 471b1f5..d7e5765 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -26,7 +26,7 @@ author = "Mouhcine Mendil, Luca Mossina and Joseba Dalmau" # The full version, including alpha/beta/rc tags -release = "0.7.8" +release = "0.8.0" # -- General configuration --------------------------------------------------- diff --git a/docs/source/theory_overview.rst b/docs/source/theory_overview.rst index 604c865..dd0055f 100644 --- a/docs/source/theory_overview.rst +++ b/docs/source/theory_overview.rst @@ -232,7 +232,7 @@ for time series data of the form :math:`Y_t = f(X_t) + \epsilon_t`, where :math:`\epsilon_t` are identically distributed, but not necessarily independent. -Given a training data set :math:`D=\lbrace (X_i, Y_i) \rbrace_{i=1}^n` +Given a training dataset :math:`D=\lbrace (X_i, Y_i) \rbrace_{i=1}^n` and a test set :math:`D_{test} = \lbrace (X_t,Y_t) \rbrace_{t=n+1}^{n_{test}}`, the EnbPI algorithm aims at constructing prediction sets for each test point :math:`X_t`. @@ -247,12 +247,12 @@ e.g. via a simple average, a bagging or an ensembling method. The algorithm EnbPI is performed in three stages: **Training** - #. Sample :math:`B` bootstrap data sets :math:`S_b`, for :math:`b=1,\dots, B` with replacement from :math:`D`. + #. Sample :math:`B` bootstrap datasets :math:`S_b`, for :math:`b=1,\dots, B` with replacement from :math:`D`. #. Train :math:`B` bootstrap models :math:`\widehat{f}^b = \mathcal{A}(S_b)`. **Calibration** #. Compute the predictions on each training sample :math:`X_i\in D`. Only the models :math:`\widehat{f}^b` where :math:`X_i\not\in S_b` are used in the aggregation: :math:`\widehat{f}_{-i}(X_i):=\phi\big( \lbrace \widehat{f}^b(X_i) | X_i\not\in S_b\rbrace\big)`. - #. Compute the errors :math:`R_i=|Y_i-\widehat{f}_{-i}(X_i)|`, and stock them as :math:`\mathcal{R}_1:=\lbrace R_i,i=1,\dots, n\rbrace`. + #. Compute the errors :math:`R_i=|Y_i-\widehat{f}_{-i}(X_i)|`, and store them as :math:`\mathcal{R}_1:=\lbrace R_i,i=1,\dots, n\rbrace`. **Inference** #. Compute the predictions on each test sample :math:`X_t\in D_{test}` by setting :math:`\widehat{f}_{-t}(X_t):= \frac{1}{T}\sum_{i=1}^T \widehat{f}_{-i}(X_t)`. @@ -311,13 +311,13 @@ Conformal Classification .. The set :math:`\tilde{C}(x; \beta) = \{y | \text{min}_l S(x; l) \geq \beta \}` for :math:`\beta := 1 - \alpha` such that :math:`P\{Y \in C(x; l) \} \geq 1 - \alpha` -Adaptive Prediction Sets (APS) -******************************************* -.. _theory aps: +Least Ambiguous Set-Valued Classifiers (LAC) +******************************************** +.. _theory lac: As for the Split Conformal Regression algorithm, -the APS algorithm introduced in [Romano2020]_ -requires us to split the data set :math:`D` into a proper training set :math:`D_{train}` +the LAC algorithm introduced in [Sadinle2018]_ +requires us to split the dataset :math:`D` into a proper training set :math:`D_{train}` and an independent calibration set :math:`D_{calib}`. A classifier :math:`\widehat{\pi}` is trained using the proper training set :math:`D_{train}` only. @@ -326,6 +326,30 @@ I.e. for each input :math:`x`, the output :math:`\widehat{\pi}(x)=(\widehat{\pi}_1(x),\dots,\widehat{\pi}_K(x))` is a probability vector and :math:`k=1,\dots, K` represent the possible different classes in the classification task. + +In order to construct the prediction sets :math:`\widehat{C}_\alpha`, +the LAC algorithm works in two stages: + +**Calibration** + #. For each example :math:`X_i` in the calibration dataset, compute the error :math:`R_i=1-\widehat{\pi}_{Y_i}(X_i)`, i.e. 1 minus the sofmax output of the ground truth class. + #. store all errors in a vector :math:`\mathcal{R}`. + +**Inference** + #. Compute the probability threshold :math:`\delta_{\alpha}` as the :math:`(1-\alpha)(1 + 1/n_{calib})`-th empirical quantile of :math:`\mathcal{R}`. + #. The prediction set for a test point :math:`X_{new}` is defined as + + .. math:: + \widehat{C}_{\alpha}(X_{new})=\big\lbrace + k \, | \, \widehat{\pi}_{k}(X_{new})\geq 1 - \delta_\alpha + \big\rbrace\,. + +Adaptive Prediction Sets (APS) +******************************************* +.. _theory aps: + +The LAC algorithm produces prediction sets that have small average size, and is known to be Bayes optimal. +However, it tends to undercover in regions where the classifier is uncertain, and overcover in regions where the classifier is confident. +The APS algorithm introduced in [Romano2020]_ aims to produce prediction sets that are more stable and have a better coverage rate. We represent by :math:`\widehat{\pi}_{(1)}(x)\geq \cdots\geq \widehat{\pi}_{(K)}(x)` the softmax vector :math:`\widehat{\pi}` arranged in decreasing order, i.e. :math:`(k)` is the index of the class having the :math:`k`-th largest probability mass. @@ -334,11 +358,11 @@ In order to construct the prediction sets :math:`\widehat{C}_\alpha`, the APS algorithm works in two stages: **Calibration** - #. For each example :math:`X_i` in the calibration data set, we compute the error :math:`R_i` as the probability mass needed for reaching the true label :math:`Y_i`, i.e. :math:`R_i=\widehat{\pi}_{(1)}+\cdots+\widehat{\pi}_{(k)}`, wehere :math:`(k)=Y_i`. - #. Stock all errors in a vector :math:`\mathcal{R}`. + #. For each example :math:`X_i` in the calibration dataset, we compute the error :math:`R_i` as the probability mass needed for reaching the true label :math:`Y_i`, i.e. :math:`R_i=\widehat{\pi}_{(1)}+\cdots+\widehat{\pi}_{(k)}`, wehere :math:`(k)=Y_i`. + #. Store all errors in a vector :math:`\mathcal{R}`. **Inference** - #. Compute the error margin :math:`\delta_{\alpha}` as the :math:`(1-\alpha)(1 + 1/n_{calib})`-th empirical quantile of :math:`\mathcal{R}`. + #. Compute the probability threshold :math:`\delta_{\alpha}` as the :math:`(1-\alpha)(1 + 1/n_{calib})`-th empirical quantile of :math:`\mathcal{R}`. #. The prediction set for a test point :math:`X_{new}` is defined as .. math:: @@ -361,7 +385,7 @@ Employing the same notations as for the APS algorithm above, the RAPS algorithm works in two stages: **Calibration** - #. For each example :math:`X_i` in the calibration data set, we compute the error :math:`R_i` as the probability mass needed for reaching the true label :math:`Y_i`, i.e. + #. For each example :math:`X_i` in the calibration dataset, we compute the error :math:`R_i` as the probability mass needed for reaching the true label :math:`Y_i`, i.e. .. math:: @@ -369,10 +393,10 @@ the RAPS algorithm works in two stages: where :math:`(k)=Y_i`. The regularization term :math:`\lambda(k-k_{reg}+1)` is added to the APS error, where :math:`\lambda` and :math:`k_{reg}` are hyperparameters. - #. Stock all errors in a vector :math:`\mathcal{R}`. + #. Store all errors in a vector :math:`\mathcal{R}`. **Inference** - #. Compute the error margin :math:`\delta_{\alpha}` as the :math:`(1-\alpha)(1 + 1/n_{calib})`-th empirical quantile of :math:`\mathcal{R}`. + #. Compute the probability thresholdn :math:`\delta_{\alpha}` as the :math:`(1-\alpha)(1 + 1/n_{calib})`-th empirical quantile of :math:`\mathcal{R}`. #. The prediction set for a test point :math:`X_{new}` is defined as :math:`\widehat{C}_{\alpha}(X_{new})=\big\lbrace (1),\dots,(k)\big\rbrace`, where .. math:: @@ -382,8 +406,31 @@ the RAPS algorithm works in two stages: Conformal Anomaly Detection --------------------------- +.. _theory cad: + +Conformal prediction can be extended to handle unsupervised anomaly detection, allowing us to identify data points that do not conform to the "normal" (or nominal) distribution of a dataset [Laxhammar2015]_. The goal is to assign a statistical guarantee to the anomaly detector, ensuring that it controls the **false positive rate**. + +To detect anomalies, we start with a model that assigns an anomaly score :math:`s(X)` to each data point. Higher scores indicate a higher likelihood of being an outlier. + +**Calibration** + + #. For each example :math:`X_i` in the calibration dataset, we compute the nonconformity score as the anomaly score provided by the model, i.e. :math:`R_i = s(X_i)`. + #. Store all nonconformity scores in a vector :math:`\mathcal{R}`. + +**Inference** + + #. Compute the anomaly score threshold :math:`\delta_{\alpha}` as the :math:`(1-\alpha)(1 + 1/n_{calib})`-th empirical quantile of :math:`\mathcal{R}`. + #. For a new test point :math:`X_{new}`, the conformalized anomaly detector classifies it as: + + .. math:: + + \widehat{C}_{\alpha} = \begin{cases} + \text{Normal} & \text{if } s(X_{new}) \leq \delta_{\alpha} \\ + \text{Anomaly} & \text{if } s(X_{new}) > \delta_{\alpha} + \end{cases} + +Conformal anomaly detection provides an error control guarantee, meaning that under the assumption of exchangeability, the probability of a false positive (labeling a norminal instance as an anomaly) is bounded by :math:`\alpha`. -TBC Conformal Object Detection -------------------------- @@ -399,10 +446,12 @@ References .. [Angelopoulos2021] Angelopoulos, A. N., Bates, S., Jordan, M., & Malik, J (2021). Uncertainty Sets for Image Classifiers using Conformal Prediction. In Proceedings of ICLR 2021. https://openreview.net/forum?id=eNdiU_DbM9 .. [Angelopoulos2022] Angelopoulos, A.N. and Bates, S., (2021). A gentle introduction to conformal prediction and distribution-free uncertainty quantification. arXiv preprint arXiv:2107.07511. https://arxiv.org/abs/2107.07511 .. [Barber2021] Barber, R. F., Candes, E. J., Ramdas, A., & Tibshirani, R. J. (2021). Predictive inference with the jackknife+. Ann. Statist. 49 (1) 486 - 507, February 2021. https://arxiv.org/abs/1905.02928 +.. [Laxhammar2015] Laxhammar, R., & Falkman, G. (2015). Inductive conformal anomaly detection for sequential detection of anomalous sub-trajectories. Annals of Mathematics and Artificial Intelligence, 74, 67-94 .. [Lei2018] Lei, J., G'Sell, M., Rinaldo, A., Tibshirani, R.J. and Wasserman, L., (2018). Distribution-free predictive inference for regression. Journal of the American Statistical Association, 113(523), pp.1094-1111. https://arxiv.org/abs/1604.04173 .. [Papadopoulos2002] Papadopoulos, H., Proedrou, K., Vovk, V. and Gammerman, A., (2002). Inductive confidence machines for regression. In Proceedings of ECML 2002, Springer. https://link.springer.com/chapter/10.1007/3-540-36755-1_29 .. [Papadopoulos2008] Papadopoulos, H., Gammerman, A. and Vovk, V., (2008). Normalized nonconformity measures for regression conformal prediction. In Proceedings of the IASTED International Conference on Artificial Intelligence and Applications (AIA 2008) (pp. 64-69). .. [deGrancey2022] de Grancey, F., Adam, J.L., Alecu, L., Gerchinovitz, S., Mamalet, F. and Vigouroux, D., 2022, June. Object detection with probabilistic guarantees: A conformal prediction approach. In International Conference on Computer Safety, Reliability, and Security. .. [Romano2019] Romano, Y., Patterson, E. and Candes, E., (2019). Conformalized quantile regression. In Proceedings of NeurIPS, 32. https://arxiv.org/abs/1905.03222 .. [Romano2020] Romano, Y., Sesia, M., & Candes, E. (2020). Classification with valid and adaptive coverage. In Proceedings of NeurIPS, 33. https://arxiv.org/abs/2006.02544 +.. [Sadinle2018] Sandinle, M., Lei, J., Wasserman, L., (2018). Least Ambiguous Set-Valued Classifiers With Bounded Error Levels. Journal of the American Statistical Association, 114(525), 223-234. https://arxiv.org/abs/1609.00451 .. [Xu2021] Xu, C. & Xie, Y.. (2021). Conformal prediction interval for dynamic time-series. Proceedings of ICML 2021. https://proceedings.mlr.press/v139/xu21h.html. diff --git a/setup.py b/setup.py index e39839a..e342897 100644 --- a/setup.py +++ b/setup.py @@ -61,7 +61,7 @@ setuptools.setup( name="puncc", - version="0.7.8", + version="0.8.0", author=", ".join(["Mouhcine Mendil", "Luca Mossina", "Joseba Dalmau"]), author_email=", ".join( [ diff --git a/tests/test_classification_seed.py b/tests/test_classification_seed.py index 2afabc1..77490df 100644 --- a/tests/test_classification_seed.py +++ b/tests/test_classification_seed.py @@ -34,10 +34,12 @@ from deel.puncc.api.prediction import BasePredictor from deel.puncc.classification import APS from deel.puncc.classification import RAPS +from deel.puncc.classification import LAC os.environ["CUDA_VISIBLE_DEVICES"] = "-1" RESULTS = { + "lac": {"cov": 0.89, "size": 1.54}, "aps": {"cov": 0.89, "size": 1.92}, "aps-norand": {"cov": 0.98, "size": 3.91}, "raps": {"cov": 0.89, "size": 1.9}, @@ -45,6 +47,60 @@ } +@pytest.mark.parametrize( + "alpha, random_state", + [(0.1, 42)], +) +def test_lac(mnist_data, alpha, random_state): + tf.keras.utils.set_random_seed(random_state) + + # Get data + (X_train, X_test, y_train, y_test, y_train_cat, y_test_cat) = mnist_data + + # Split fit and calib datasets + X_fit, X_calib = X_train[:50000], X_train[50000:] + y_fit, y_calib = y_train[:50000], y_train[50000:] + y_fit_cat, y_calib_cat = y_train_cat[:50000], y_train_cat[50000:] + + # One hot encoding of classes + y_fit_cat = to_categorical(y_fit) + y_calib_cat = to_categorical(y_calib) + y_test_cat = to_categorical(y_test) + + # Classification model + nn_model = models.Sequential() + nn_model.add(layers.Dense(4, activation="relu", input_shape=(28 * 28,))) + nn_model.add(layers.Dense(10, activation="softmax")) + compile_kwargs = { + "optimizer": "rmsprop", + "loss": "categorical_crossentropy", + "metrics": [], + } + fit_kwargs = {"epochs": 2, "batch_size": 128, "verbose": 1} + # Predictor wrapper + class_predictor = BasePredictor( + nn_model, is_trained=False, **compile_kwargs + ) + + # LAC + lac_cp = LAC(class_predictor) + lac_cp.fit( + X_fit=X_fit, + y_fit=y_fit_cat, + X_calib=X_calib, + y_calib=y_calib, + **fit_kwargs + ) + y_pred, set_pred = lac_cp.predict(X_test, alpha=alpha) + assert y_pred is not None + + # Compute marginal coverage + coverage = metrics.classification_mean_coverage(y_test, set_pred) + width = metrics.classification_mean_size(set_pred) + res = {"cov": np.round(coverage, 2), "size": np.round(width, 2)} + assert RESULTS["lac"] == res + + @pytest.mark.parametrize( "alpha, random_state, rand", [(0.1, 42, True)],