Skip to content
This repository has been archived by the owner on Jul 12, 2024. It is now read-only.

Commit

Permalink
Merge pull request #218 from credo-ai/release/1.0.1
Browse files Browse the repository at this point in the history
Release/1.0.1
  • Loading branch information
IanAtCredo authored Oct 27, 2022
2 parents f73d4bf + 27e077c commit 65528aa
Show file tree
Hide file tree
Showing 48 changed files with 2,132 additions and 262 deletions.
2 changes: 1 addition & 1 deletion credoai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
Primary interface for Credo AI Lens package
"""

__version__ = "1.0.0"
__version__ = "1.0.1"
5 changes: 3 additions & 2 deletions credoai/artifacts/data/tabular_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ class TabularData(Data):
Outcome
sensitive_features : pd.Series, pd.DataFrame, optional
Sensitive Features, which will be used for disaggregating performance
metrics. This can be the columns you want to perform segmentation analysis on, or
a feature related to fairness like 'race' or 'gender'
metrics. This can be the feature you want to perform segmentation analysis on, or
a feature related to fairness like 'race' or 'gender'. Sensitive Features *must*
be categorical features.
sensitive_intersections : bool, list
Whether to add intersections of sensitive features. If True, add all possible
intersections. If list, only create intersections from specified sensitive features.
Expand Down
5 changes: 4 additions & 1 deletion credoai/evaluators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@

from .evaluator import Evaluator
from .data_fairness import DataFairness
from .data_profiling import DataProfiling
from .data_profiler import DataProfiler
from .privacy import Privacy
from .security import Security
from .equity import DataEquity, ModelEquity
from .performance import Performance
from .fairness import ModelFairness
from .ranking_fairness import RankingFairness
from .survival_fairness import SurvivalFairness
from .shap import ShapExplainer
from .model_profiler import ModelProfiler
from .feature_drift import FeatureDrift
1 change: 0 additions & 1 deletion credoai/evaluators/data_fairness.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ def __init__(
self.categorical_threshold = categorical_threshold
super().__init__()

name = "DataFairness"
required_artifacts = {"data", "sensitive_feature"}

def _setup(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
matplotlib.use(backend)


class DataProfiling(Evaluator):
class DataProfiler(Evaluator):
"""Data profiling module for Credo AI.
This evaluator runs the pandas profiler on a data. Pandas profiler calculates a number
Expand All @@ -31,7 +31,6 @@ class DataProfiling(Evaluator):
Passed to pandas_profiling.ProfileReport
"""

name = "DataProfiler"
required_artifacts = {"data"}

def __init__(self, dataset_name=None, **profile_kwargs):
Expand All @@ -40,14 +39,11 @@ def __init__(self, dataset_name=None, **profile_kwargs):
super().__init__()

def _setup(self):
self.data_to_eval = self.data

self.data = pd.concat([self.data_to_eval.X, self.data_to_eval.y], axis=1)
self.data_to_profile = pd.concat([self.data.X, self.data.y], axis=1)
return self

def _validate_arguments(self):
check_data_instance(self.data, TabularData)

return self

def get_html_report(self):
Expand All @@ -67,4 +63,4 @@ def evaluate(self):
def _create_reporter(self):
default_kwargs = {"title": "Dataset", "minimal": True}
default_kwargs.update(self.profile_kwargs)
return ProfileReport(self.data, **default_kwargs)
return ProfileReport(self.data_to_profile, **default_kwargs)
2 changes: 0 additions & 2 deletions credoai/evaluators/equity.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ class DataEquity(Evaluator):
The significance value to evaluate statistical tests
"""

name = "DataEquity"
required_artifacts = {"data", "sensitive_feature"}

def __init__(self, p_value=0.01):
Expand Down Expand Up @@ -324,7 +323,6 @@ def __init__(self, use_predict_proba=False, p_value=0.01):
self.use_predict_proba = use_predict_proba
super().__init__(p_value)

name = "ModelEquity"
required_artifacts = {"model", "assessment_data", "sensitive_feature"}

def _setup(self):
Expand Down
24 changes: 16 additions & 8 deletions credoai/evaluators/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ def __init__(self):
self._results = None
self.artifact_keys = []
self.logger = global_logger
self.metadata = {}

@property
def name(self):
return self.__class__.__name__

@property
def results(self):
Expand All @@ -36,12 +41,6 @@ def results(self, results):
raise ValidationError("All results must be EvidenceContainers")
self._results = results

@property
@abstractmethod
def name(self):
"""Used to define a unique identifier for the specific evaluator"""
pass

@property
@abstractmethod
def required_artifacts(self):
Expand Down Expand Up @@ -106,11 +105,20 @@ def get_container_info(self, labels: dict = None, metadata: dict = None):
return info

def _base_container_info(self):
return {"labels": {"evaluator": self.name}, "metadata": self._get_artifacts()}
meta = {**self.metadata, **self._get_artifacts()}
labels = {"evaluator": self.name}
if "dataset_type" in meta:
labels["dataset_type"] = meta["dataset_type"]
return {"labels": labels, "metadata": meta}

def _get_artifacts(self):
artifacts = {}
save_keys = {"model": "model_name"}
save_keys = {
"model": "model_name",
"data": "data_name",
"assessment_data": "assessment_data_name",
"training_data": "training_data_name",
}
for k in self.artifact_keys:
save_key = save_keys.get(k, k)
try:
Expand Down
16 changes: 8 additions & 8 deletions credoai/evaluators/fairness.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ def __init__(
self.fairness_prob_metrics = None
super().__init__()

name = "ModelFairness"
required_artifacts = {"model", "data", "sensitive_feature"}

def _setup(self):
Expand Down Expand Up @@ -105,11 +104,9 @@ def evaluate(self):

if disaggregated_thresh_results is not None:
for key, df in disaggregated_thresh_results.items():
df.name = key
labels = {**sens_feat_label, **{"metric_type": key}}
self._results.append(
TableContainer(
df, **self.get_container_info(labels=sens_feat_label)
)
TableContainer(df, **self.get_container_info(labels=labels))
)

return self
Expand Down Expand Up @@ -198,12 +195,15 @@ def get_disaggregated_threshold_performance(self):
var_name="type",
)

to_return = defaultdict(pd.DataFrame)
to_return = defaultdict(list)
for i, row in df.iterrows():
label = f'{row["type"]}_disaggregated_performance'
tmp_df = row["value"]
tmp_df = tmp_df.assign(**row.drop("value"))
to_return[label] = pd.concat([to_return[label], tmp_df])
to_return[row["type"]].append(tmp_df)
for key in to_return.keys():
df = pd.concat(to_return[key])
df.name = "threshold_dependent_disaggregated_performance"
to_return[key] = df
return to_return

def get_fairness_results(self):
Expand Down
154 changes: 154 additions & 0 deletions credoai/evaluators/feature_drift.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
"""Feature Drift evaluator"""
from credoai.artifacts import ClassificationModel
from credoai.evaluators import Evaluator
from credoai.evaluators.utils.validation import check_requirements_existence
from credoai.evidence import MetricContainer
from credoai.evidence.containers import TableContainer
from credoai.modules.credoai_metrics import population_stability_index
from pandas import DataFrame, Series


class FeatureDrift(Evaluator):
"""
Measure Feature Drift using population stability index.
This evaluator measures feature drift in:
1. Model prediction: the prediction for the assessment dataset is compared
to the prediction for the training dataset.
In the case of classifiers, the prediction is performed with predict proba if available.
If it is not available, the prediction is treated like a categorical variable, see the
processing of categorical variables in the item below.
2. Dataset features: 1 to 1 comparison across all features for the datasets. This is also
referred to as "characteristic stability index" (CSI).
- Numerical features are directly fed into the population_stability_index metric, and
binned according to the parameters specified at init time.
- Categorical features percentage distribution is manually calculated. The % amount of
samples per each class is calculated and then fed into the population_stability_index metric.
The percentage flag in the metric is set to True, to bypass the internal binning process.
Parameters
----------
buckets : int, optional
Number of buckets to consider to bin the predictions, by default 10
buckettype : Literal["bins", "quantiles"]
Type of strategy for creating buckets, bins splits into even splits,
quantiles splits into quantiles buckets, by default "bins"
csi_calculation : bool, optional
Calculate characteristic stability index, i.e., PSI for all features in the datasets,
by default False
"""

def __init__(self, buckets: int = 10, buckettype="bins", csi_calculation=False):

self.bucket_number = buckets
self.buckettype = buckettype
self.csi_calculation = csi_calculation
self.percentage = False
super().__init__()

required_artifacts = {"model", "assessment_data", "training_data"}

def _validate_arguments(self):
check_requirements_existence(self)

def _setup(self):
# Default prediction to predict method
prediction_method = self.model.predict
if isinstance(self.model, ClassificationModel):
if hasattr(self.model, "predict_proba"):
prediction_method = self.model.predict_proba
else:
self.percentage = True

self.expected_prediction = prediction_method(self.training_data.X)
self.actual_prediction = prediction_method(self.assessment_data.X)

# Create the bins manually for categorical prediction if predict_proba
# is not available.
if self.percentage:
(
self.expected_prediction,
self.actual_prediction,
) = self._create_bin_percentage(
self.expected_prediction, self.actual_prediction
)

def evaluate(self):
prediction_psi = self._calculate_psi_on_prediction()
self.results = [MetricContainer(prediction_psi, **self.get_container_info())]
if self.csi_calculation:
csi = self._calculate_csi()
self.results.append(TableContainer(csi, **self.get_container_info()))
return self

def _calculate_psi_on_prediction(self) -> DataFrame:
"""
Calculate the psi index on the model prediction.
Returns
-------
DataFrame
Formatted for metric container.
"""
psi = population_stability_index(
self.expected_prediction,
self.actual_prediction,
percentage=self.percentage,
buckets=self.bucket_number,
buckettype=self.buckettype,
)
res = DataFrame({"value": psi, "type": "population_stability_index"}, index=[0])
return res

def _calculate_csi(self) -> DataFrame:
"""
Calculate psi for all the columns in the dataframes.
Returns
-------
DataFrame
Formatted for the table container.
"""
columns_names = list(self.assessment_data.X.columns)
psis = {}
for col_name in columns_names:
train_data = self.training_data.X[col_name]
assess_data = self.assessment_data.X[col_name]
if self.assessment_data.X[col_name].dtype == "category":
train, assess = self._create_bin_percentage(train_data, assess_data)
psis[col_name] = population_stability_index(train, assess, True)
else:
psis[col_name] = population_stability_index(train_data, assess_data)
psis = DataFrame.from_dict(psis, orient="index")
psis = psis.reset_index()
psis.columns = ["feature_names", "value"]
psis.name = "Characteristic Stability Index"
return psis

@staticmethod
def _create_bin_percentage(train: Series, assess: Series) -> tuple:
"""
In case of categorical values proceed to count the instances
of each class and divide by the total amount of samples to get
the ratios.
Parameters
----------
train : Series
Array of values, dtype == category
assess : Series
Array of values, dtype == category
Returns
-------
tuple
Class percentages for both arrays
"""
len_training = len(train)
len_assessment = len(assess)
train_bin_perc = train.value_counts() / len_training
assess_bin_perc = assess.value_counts() / len_assessment
return train_bin_perc, assess_bin_perc
Loading

0 comments on commit 65528aa

Please sign in to comment.