Merge pull request #334 from credo-ai/release/1.1.8

Release/1.1.8
credo-ai · May 3, 2023 · 529ef1c · 529ef1c
2 parents 8da1944 + 4c4190a
commit 529ef1c
Show file tree

Hide file tree

Showing 35 changed files with 1,006 additions and 225 deletions.
diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
@@ -6,7 +6,7 @@ jobs:
   lint:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - uses: psf/black@stable
         with:
           version: "22.10.0"
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -9,8 +9,8 @@ jobs:
     runs-on: ubuntu-20.04
 
     steps:
-    - uses: actions/checkout@v2
-    - uses: actions/setup-python@v2
+    - uses: actions/checkout@v3
+    - uses: actions/setup-python@v4
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
@@ -21,4 +21,4 @@ jobs:
         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
       run: |
         python setup.py sdist bdist_wheel
-        twine upload dist/*
+        twine upload dist/*
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -13,13 +13,17 @@ on:
 jobs:
   run-test:
     runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.7", "3.8", "3.9", "3.10"]
     steps:
       - name: Check out repository
-        uses: actions/checkout@v2
-      - name: Set up python
-        uses: actions/setup-python@v2
+        uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
         with:
-          python-version: 3.8
+          python-version: ${{ matrix.python-version }}
+          cache: 'pip'
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
@@ -36,12 +40,13 @@ jobs:
           set -o pipefail
           scripts/test.sh | tee ./pytest-coverage.txt
       - name: my-artifact
-        if: always()
+        if: always() && !env.ACT
         uses: actions/upload-artifact@v3
         with:
           name: pytest-coverage
           path: ./pytest-coverage.txt
       - name: Pytest coverage comment
+        if: always() && !env.ACT
         uses: MishaKav/pytest-coverage-comment@main
         with:
           pytest-coverage-path: ./pytest-coverage.txt
diff --git a/README.md b/README.md
@@ -37,10 +37,9 @@ The latest stable release (and required dependencies) can be installed from PyPI
 pip install credoai-lens
 ```
 
-Additional installation instructions can be found in our [setup documentation](https://credoai-lens.readthedocs.io/en/stable/notebooks/quickstart.html)
+Additional installation instructions can be found in our [setup documentation](https://credoai-lens.readthedocs.io/en/stable/pages/setup.html)
 
 ## Getting Started
-
 To get started, see the [quickstart demo](https://credoai-lens.readthedocs.io/en/stable/notebooks/quickstart.html).
 
 If you are using the Credo AI Governance App, also check out the [governance integration demo](https://credoai-lens.readthedocs.io/en/stable/notebooks/governance_integration.html).

diff --git a/credoai/_version.py b/credoai/_version.py
@@ -2,4 +2,4 @@
 # 1) we don't load dependencies by storing it in __init__.py
 # 2) we can import it in setup.py for the same reason
 # 3) we can import it into your module module
-__version__ = "1.1.7"
+__version__ = "1.1.8"
diff --git a/credoai/artifacts/data/base_data.py b/credoai/artifacts/data/base_data.py
@@ -4,8 +4,8 @@
 from copy import deepcopy
 from typing import Optional, Union
 
-import pandas as pd
 import numpy as np
+import pandas as pd
 
 from credoai.utils import global_logger
 from credoai.utils.common import ValidationError, check_pandas
@@ -218,7 +218,7 @@ def _validate_processed_y(self):
 
     def _validate_processed_sensitive(self):
         """Validation of processed sensitive features"""
-        for col_name, col in self.sensitive_features.iteritems():
+        for col_name, col in self.sensitive_features.items():
             # validate unique
             unique_values = col.unique()
             if len(unique_values) == 1:
@@ -227,16 +227,16 @@ def _validate_processed_sensitive(self):
                     f"than one unique value. Only found one value: {unique_values[0]}"
                 )
             # validate number in each group
-            for group, value in col.value_counts().iteritems():
+            for group, value in col.value_counts().items():
                 if value < 10:
                     global_logger.warning(
                         f"Dataset Issue! Very few ({value}) records were found for {group} under sensitive feature {col_name}."
                     )
             # validate variance in y
             if self.y is not None:
                 y = pd.DataFrame(self.y)
-                for outcome, outcome_col in y.iteritems():
-                    for group, value in outcome_col.groupby(col).nunique().iteritems():
+                for outcome, outcome_col in y.items():
+                    for group, value in outcome_col.groupby(col).nunique().items():
                         if not np.all(value):
                             global_logger.warning(
                                 "%s\n%s",

diff --git a/credoai/evaluators/data_fairness.py b/credoai/evaluators/data_fairness.py
@@ -44,15 +44,26 @@ class DataFairness(Evaluator):
     - group differences of features
     - evaluates whether features in the dataset are proxies for the sensitive feature
     - whether the entire dataset can be seen as a proxy for the sensitive feature
-      (i.e., the sensitive feature is "redundantly encoded")
+    (i.e., the sensitive feature is "redundantly encoded")
+
+    Required Artifacts
+    ------------------
+        **Required Artifacts**
+
+        Generally artifacts are passed directly to :class:`credoai.lens.Lens`, which
+        handles evaluator setup. However, if you are using the evaluator directly, you
+        will need to pass the following artifacts when instantiating the evaluator:
+
+        - data: :class:`credoai.artifacts.TabularData`
+            The data to evaluate, which must include a sensitive feature
 
     Parameters
     ----------
     categorical_features_keys : list[str], optional
         Names of the categorical features
     categorical_threshold : float
         Parameter for automatically identifying categorical columns. See
-        `credoai.utils.common.is_categorical`
+        :class:`credoai.utils.common.is_categorical` for more details.
     """
 
     required_artifacts = {"data", "sensitive_feature"}
@@ -62,7 +73,6 @@ def __init__(
         categorical_features_keys: Optional[List[str]] = None,
         categorical_threshold: float = 0.05,
     ):
-
         self.categorical_features_keys = categorical_features_keys
         self.categorical_threshold = categorical_threshold
         super().__init__()
@@ -220,7 +230,7 @@ def _find_categorical_features(self, threshold):
         if is_categorical(self.sensitive_features, threshold=threshold):
             self.sensitive_features = self.sensitive_features.astype("category")
         cat_cols = []
-        for name, column in self.X.iteritems():
+        for name, column in self.X.items():
             if is_categorical(column, threshold=threshold):
                 cat_cols.append(name)
         return cat_cols

diff --git a/credoai/evaluators/data_profiler.py b/credoai/evaluators/data_profiler.py
@@ -11,8 +11,8 @@
 from credoai.utils.common import ValidationError, check_pandas
 
 backend = matplotlib.get_backend()
-# load pands profiler, which sets backend to Agg
-from pandas_profiling import ProfileReport
+# load ydata profiler, which sets backend to Agg
+from ydata_profiling import ProfileReport
 
 matplotlib.use(backend)
 
@@ -29,7 +29,16 @@ class DataProfiler(Evaluator):
     Parameters
     ----------
     profile_kwargs
-        Potential arguments to be passed to pandas_profiling.ProfileReport
+        Potential arguments to be passed to ydata_profiling.ProfileReport
+
+    Required Artifacts
+    ------------------
+    Generally artifacts are passed directly to :class:`credoai.lens.Lens`, which
+    handles evaluator setup. However, if you are using the evaluator directly, you
+    will need to pass the following artifacts when instantiating the evaluator:
+
+    data : TabularData
+        The data to evaluate, which must include a sensitive feature
     """
 
     required_artifacts = {"data"}

diff --git a/credoai/evaluators/deepchecks_credoai.py b/credoai/evaluators/deepchecks_credoai.py
@@ -27,6 +27,21 @@ class Deepchecks(Evaluator):
     be used in preference to deepchecks, since output formats of other evaluators is generally
     consistent, while this deepchecks evaluator outputs results in a highly structured JSON format.
 
+    Required Artifacts
+    ------------------
+        **Required Artifacts**
+
+        Generally artifacts are passed directly to :class:`credoai.lens.Lens`, which
+        handles evaluator setup. However, if you are using the evaluator directly, you
+        will need to pass **at least one** of the following artifacts when instantiating the evaluator:
+
+        - model: :class:`credoai.artifacts.Model` or :class:`credoai.artifacts.RegressionModel`
+        - assessment_data: :class:`credoai.artifacts.TabularData`
+            The assessment data to evaluate. Assessment data is used to calculate metrics
+            on the model.
+        - training_data: :class:`credoai.artifacts.TabularData`
+            The training data to evaluate. The training data was used to tran the model
+
 
     Parameters
     ----------

diff --git a/credoai/evaluators/equity.py b/credoai/evaluators/equity.py
@@ -26,6 +26,19 @@ class DataEquity(Evaluator):
     - Proportion (Bounded [0-1] continuous outcome): outcome is transformed to logits, then
       proceed as normal for continuous
 
+    Required Artifacts
+    ------------------
+    **Required Artifacts**
+
+    Generally artifacts are passed directly to :class:`credoai.lens.Lens`, which
+    handles evaluator setup. However, if you are using the evaluator directly, you
+    will need to pass the following artifacts when instantiating the evaluator:
+
+    - data: :class:`credoai.artifacts.TabularData`
+        The data to evaluate for equity (based on the outcome variable). Must
+        have sensitive feature defined.
+
+
     Parameters
     ----------
     p_value : float
@@ -190,14 +203,28 @@ class ModelEquity(DataEquity):
     Evaluates the equity of a model's predictions.
 
     This evaluator assesses whether model predictions are distributed equally across a sensitive
-    feature. Depending on the kind of outcome, different tests will be performed.
+    feature. Depending on the kind of outcome, different tests will be performed:
 
-    * Discrete: chi-squared contingency tests,
+    - Discrete: chi-squared contingency tests,
       followed by bonferronni corrected posthoc chi-sq tests
-    * Continuous: One-way ANOVA, followed by Tukey HSD posthoc tests
-    * Proportion (Bounded [0-1] continuous outcome): outcome is transformed to logits, then
+    - Continuous: One-way ANOVA, followed by Tukey HSD posthoc tests
+    - Proportion (Bounded [0-1] continuous outcome): outcome is transformed to logits, then
       proceed as normal for continuous
 
+    Required Artifacts
+    ------------------
+        **Required Artifacts**
+
+        Generally artifacts are passed directly to :class:`credoai.lens.Lens`, which
+        handles evaluator setup. However, if you are using the evaluator directly, you
+        will need to pass the following artifacts when instantiating the evaluator:
+
+        - model: :class:`credoai.artifacts.Model`
+        - assessment_data: :class:`credoai.artifacts.TabularData`
+            The assessment data to use to create model predictions and evaluate
+            the equity of the model. Must have sensitive features.
+
+
     Parameters
     ----------
     use_predict_proba : bool, optional

diff --git a/credoai/evaluators/fairness.py b/credoai/evaluators/fairness.py
@@ -22,6 +22,17 @@ class ModelFairness(Evaluator):
     Handles any metric that can be calculated on a set of ground truth labels and predictions,
     e.g., binary classification, multiclass classification, regression.
 
+    Required Artifacts
+    ------------------
+        **Required Artifacts**
+
+        Generally artifacts are passed directly to :class:`credoai.lens.Lens`, which
+        handles evaluator setup. However, if you are using the evaluator directly, you
+        will need to pass the following artifacts when instantiating the evaluator:
+
+        - model: :class:`credoai.artifacts.Model` or :class:`credoai.artifacts.RegressionModel`
+        - data: :class:`credoai.artifacts.TabularData`
+            The data to use for fairness evaluation. Must include a sensitive feature.
 
     Parameters
     ----------

diff --git a/credoai/evaluators/feature_drift.py b/credoai/evaluators/feature_drift.py
@@ -30,6 +30,18 @@ class FeatureDrift(Evaluator):
          samples per each class is calculated and then fed into the population_stability_index metric.
          The percentage flag in the metric is set to True, to bypass the internal binning process.
 
+    Required Artifacts
+    ------------------
+        **Required Artifacts**
+
+        Generally artifacts are passed directly to :class:`credoai.lens.Lens`, which
+        handles evaluator setup. However, if you are using the evaluator directly, you
+        will need to pass the following artifacts when instantiating the evaluator:
+
+        - model: :class:`credoai.artifacts.Model`
+        - assessment_data: :class:`credoai.artifacts.TabularData`
+        - training_data: :class:`credoai.artifacts.TabularData`
+
 
     Parameters
     ----------

diff --git a/credoai/evaluators/identity_verification.py b/credoai/evaluators/identity_verification.py
@@ -31,6 +31,17 @@ class IdentityVerification(Evaluator):
     This evaluator takes in identity verification data and
     provides functionality to perform performance and fairness assessment
 
+    Required Artifacts
+    ------------------
+        **Required Artifacts**
+
+        Generally artifacts are passed directly to :class:`credoai.lens.Lens`, which
+        handles evaluator setup. However, if you are using the evaluator directly, you
+        will need to pass the following artifacts when instantiating the evaluator:
+
+        model: :class:`credoai.artifacts.ComparisonModel`
+        assessment_data: :class:`credoai.artifacts.ComparisonData`
+
     Parameters
     ----------
     pairs : pd.DataFrame of shape (n_pairs, 4)

diff --git a/credoai/evaluators/model_profiler.py b/credoai/evaluators/model_profiler.py
@@ -54,6 +54,16 @@ class ModelProfiler(Evaluator):
     The method generate_template() provides a dictionary with several entries the
     user could be interested in filling up.
 
+    Required Artifacts
+    ------------------
+        **Required Artifacts**
+
+        Generally artifacts are passed directly to :class:`credoai.lens.Lens`, which
+        handles evaluator setup. However, if you are using the evaluator directly, you
+        will need to pass the following artifacts when instantiating the evaluator:
+
+        - model: :class:`credoai.artifacts.Model`
+
     Parameters
     ----------
     model_info : Optional[dict]

diff --git a/credoai/evaluators/performance.py b/credoai/evaluators/performance.py
@@ -24,6 +24,17 @@ class Performance(Evaluator):
     - calculate the metrics
     - create disaggregated metrics
 
+    Required Artifacts
+    ------------------
+        **Required Artifacts**
+
+        Generally artifacts are passed directly to :class:`credoai.lens.Lens`, which
+        handles evaluator setup. However, if you are using the evaluator directly, you
+        will need to pass the following artifacts when instantiating the evaluator:
+
+        - model: :class:`credoai.artifacts.Model`
+        - assessment_data: :class:`credoai.artifacts.TabularData`
+
     Parameters
     ----------
     metrics : List-like

diff --git a/credoai/evaluators/privacy.py b/credoai/evaluators/privacy.py
@@ -88,6 +88,18 @@ class Privacy(Evaluator):
     * `Membership Inference BlackBox Rule Based`_: Use a simple rule based approach to assess if some records
       were used for the model training.
 
+    Required Artifacts
+    ------------------
+        **Required Artifacts**
+
+        Generally artifacts are passed directly to :class:`credoai.lens.Lens`, which
+        handles evaluator setup. However, if you are using the evaluator directly, you
+        will need to pass the following artifacts when instantiating the evaluator:
+
+        - model: :class:`credoai.artifacts.Model`
+        - assessment_data: :class:`credoai.artifacts.TabularData`
+        - training_data: :class:`credoai.artifacts.TabularData`
+
     Parameters
     ----------
     attack_feature : Union[str, int, None], optional