Skip to content
This repository has been archived by the owner on Jul 12, 2024. It is now read-only.

Commit

Permalink
Merge pull request #243 from credo-ai/release/1.1.2
Browse files Browse the repository at this point in the history
Release/1.1.2
  • Loading branch information
IanAtCredo authored Nov 11, 2022
2 parents f7755df + ad14553 commit d0a3ceb
Show file tree
Hide file tree
Showing 29 changed files with 1,490 additions and 149 deletions.
2 changes: 1 addition & 1 deletion credoai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
Primary interface for Credo AI Lens package
"""

__version__ = "1.1.1"
__version__ = "1.1.2"
2 changes: 2 additions & 0 deletions credoai/artifacts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
"""
from .data.base_data import Data
from .data.tabular_data import TabularData
from .data.comparison_data import ComparisonData
from .model.base_model import Model
from .model.classification_model import ClassificationModel, DummyClassifier
from .model.comparison_model import ComparisonModel, DummyComparisonModel
from .model.regression_model import RegressionModel, DummyRegression
130 changes: 130 additions & 0 deletions credoai/artifacts/data/comparison_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
"""Data artifact for pair-wise-comparison-based identity verification"""
from copy import deepcopy

import pandas as pd
from credoai.utils.common import ValidationError

from .base_data import Data


class ComparisonData(Data):
"""Class wrapper for pair-wise-comparison-based identity verification
ComparisonData serves as an adapter between pair-wise-comparison-based identity verification
and the identity verification evaluator in Lens.
Parameters
-------------
name : str
Label of the dataset
pairs : pd.DataFrame of shape (n_pairs, 4)
Dataframe where each row represents a data sample pair and associated subjects
Type of data sample is decided by the ComparisonModel's `compare` function, which takes
data sample pairs and returns their similarity scores. Examples are selfies, fingerprint scans,
or voices of a person.
Required columns:
source-subject-id: unique identifier of the source subject
source-subject-data-sample: data sample from the source subject
target-subject-id: unique identifier of the target subject
target-subject-data-sample: data sample from the target subject
subjects_sensitive_features : pd.DataFrame of shape (n_subjects, n_sensitive_feature_names), optional
Sensitive features of all subjects present in pairs dataframe
If provided, disaggregated performance assessment is also performed.
This can be the columns you want to perform segmentation analysis on, or
a feature related to fairness like 'race' or 'gender'
Required columns:
subject-id: id of subjects. Must cover all the subjects inlcluded in `pairs` dataframe
other columns with arbitrary names for sensitive features
"""

def __init__(self, name: str, pairs=None, subjects_sensitive_features=None):
super().__init__("ComparisonData", name)
self.pairs = pairs
self.subjects_sensitive_features = subjects_sensitive_features
self._validate_pairs()
self._validate_subjects_sensitive_features()
self._preprocess_subjects_sensitive_features()

def copy(self):
"""Returns a deepcopy of the instantiated class"""
return deepcopy(self)

def _validate_pairs(self):
"""Validate the input `pairs` object"""
if self.pairs is not None:
"""Basic validation for pairs"""
if not isinstance(self.pairs, (pd.DataFrame)):
raise ValidationError("pairs must be a pd.DataFrame")

required_columns = [
"source-subject-id",
"source-subject-data-sample",
"target-subject-id",
"target-subject-data-sample",
]
available_columns = self.pairs.columns
for c in required_columns:
if c not in available_columns:
raise ValidationError(
f"pairs dataframe does not contain the required column '{c}'"
)

if len(available_columns) != 4:
raise ValidationError(
f"pairs dataframe has '{len(available_columns)}' columns. It must have 4."
)

if self.pairs.isnull().values.any():
raise ValidationError(
"pairs dataframe contains NaN values. It must not have any."
)

def _validate_subjects_sensitive_features(self):
"""Validate the input `subjects_sensitive_features` object"""
if self.subjects_sensitive_features is not None:
"""Basic validation for subjects_sensitive_features"""
if not isinstance(self.subjects_sensitive_features, (pd.DataFrame)):
raise ValidationError(
"subjects_sensitive_features must be a pd.DataFrame"
)

available_columns = self.subjects_sensitive_features.columns
if "subject-id" not in available_columns:
raise ValidationError(
"subjects_sensitive_features dataframe does not contain the required column 'subject-id'"
)
if len(available_columns) < 2:
raise ValidationError(
"subjects_sensitive_features dataframe includes 'subject-id' column only. It must include at least one sensitive feature column too."
)

if self.subjects_sensitive_features.isnull().values.any():
raise ValidationError(
"subjects_sensitive_features dataframe contains NaN values. It must not have any."
)

sensitive_features_names = list(self.subjects_sensitive_features.columns)
sensitive_features_names.remove("subject-id")
for sf_name in sensitive_features_names:
unique_values = self.subjects_sensitive_features[sf_name].unique()
if len(unique_values) == 1:
raise ValidationError(
f"Sensitive Feature column {sf_name} must have more "
f"than one unique value. Only found one value: {unique_values[0]}"
)


def _preprocess_subjects_sensitive_features(self):
"""Preprocess the input `subjects_sensitive_features` object"""
sensitive_features_names = list(self.subjects_sensitive_features.columns)
sensitive_features_names.remove("subject-id")
for sf_name in sensitive_features_names:
self.subjects_sensitive_features[
sf_name
] = self.subjects_sensitive_features[sf_name].astype(str)

def _validate_X(self):
pass

def _validate_y(self):
pass
4 changes: 3 additions & 1 deletion credoai/artifacts/model/classification_model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""Model artifact wrapping any classification model"""
from .base_model import Model

PREDICT_PROBA_FRAMEWORKS = ["sklearn", "xgboost"]


class ClassificationModel(Model):
"""Class wrapper around classification model to be assessed
Expand Down Expand Up @@ -32,7 +34,7 @@ def __init__(self, name: str, model_like=None, tags=None):

def _update_functionality(self):
"""Conditionally updates functionality based on framework"""
if self.model_info["framework"] == "sklearn":
if self.model_info["framework"] in PREDICT_PROBA_FRAMEWORKS:
func = getattr(self, "predict_proba", None)
if func and len(self.model_like.classes_) == 2:
self.__dict__["predict_proba"] = lambda x: func(x)[:, 1]
Expand Down
52 changes: 52 additions & 0 deletions credoai/artifacts/model/comparison_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""Model artifact wrapping any comparison model"""
from .base_model import Model


class ComparisonModel(Model):
"""Class wrapper around comparison model to be assessed
ComparisonModel serves as an adapter between arbitrary pair-wise comparison
models and the identity verification evaluations in Lens. Evaluations depend
on ComparisonModel instantiating `compare`
Parameters
----------
name : str
Label of the model
model_like : model_like
A pair-wise comparison model or pipeline. It must have a
`compare` function that takes a two-dimensional python list of data sample pairs and
returns a list containing the similarity scores for each pair.
Example input and output: [[sd1,sd2], [sd1,td3], [td3,td8]] --> [98, 4, 91]
Type of data sample is decided by this `compare` function and refelcted in `ComparisonData` object
"""

def __init__(self, name: str, model_like=None):
super().__init__(
"ComparisonModel",
["compare"],
["compare"],
name,
model_like,
)


class DummyComparisonModel:
"""Class wrapper around comparison model predictions
This class can be used when a comparison model is not available but its outputs are.
The output include the array containing the predicted similarity scores.
Wrap the outputs with this class into a dummy comparison model and pass it as
the model to `ComparisonModel`.
Parameters
----------
compare_output : array
Array containing the output of a comparison model's "compare" method
"""

def __init__(self, compare_output=None):
self.compare_output = compare_output

def compare(self):
return self.compare_output
2 changes: 2 additions & 0 deletions credoai/evaluators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@
from .shap import ShapExplainer
from .model_profiler import ModelProfiler
from .feature_drift import FeatureDrift
from .deepchecks import Deepchecks
from .identity_verification import IdentityVerification
131 changes: 131 additions & 0 deletions credoai/evaluators/deepchecks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
from credoai.evaluators import Evaluator

from typing import List, Optional

from credoai.utils.common import ValidationError
from credoai.modules.deepchecks_constants import DEFAULT_CHECKS
from credoai.evaluators.utils.validation import (
check_requirements_deepchecks,
)

from credoai.evidence import DeepchecksContainer

from deepchecks.tabular import Suite, Dataset
from deepchecks.core import BaseCheck


class Deepchecks(Evaluator):
"""
deepchecks evaluator
This evaluator enables running of deepchecks `checks` and passing the results to
the Governance platform in the form of a deepchecks SuiteResult, cast to JSON format.
See https://docs.deepchecks.com/stable/api/generated/deepchecks.tabular.checks.model_evaluation.html
and https://docs.deepchecks.com/stable/api/generated/deepchecks.core.SuiteResult.html
and https://docs.deepchecks.com/stable/user-guide/general/customizations/examples/plot_create_a_custom_suite.html
for more details.
This evaluator provides some redundant functionality. For instance, metrics which can be
calculated using the Performance evaluator can potentially be calculated by deepchecks
(and thus this evaluator) as well. The same applies to the FeatureDrift evaluator.
When a choice exists, the best practice dictates that the "Lens native" evaluator should
be used in preference to deepchecks, since output formats of other evaluators is generally
consistent, while this deepchecks evaluator outputs results in a highly structured JSON format.
Parameters
----------
name : str, optional
Name of the supplied deepchecks suite
checks : List-like, optional
A list of instantiated deepchecks checks objects (e.g. BoostingOverfit, CalibrationScore)
#TODO allow list of strings?
"""

name = "Deepchecks" ##This is going to go away once we merge with Ian's PR #214

required_artifacts = {"model", "assessment_data", "training_data"}
# all artifacts are OPTIONAL; All that's required is that at least one of these is
# provided. The evaluator's custom validation function checks for this.

def __init__(
self,
suite_name: Optional[str] = "Credo_Deepchecks_Suite",
checks: Optional[List[BaseCheck]] = DEFAULT_CHECKS,
):
super().__init__()
self.name = suite_name
self.checks = checks

def _setup(self):
# Set artifacts

# All artifacts are optional and thus any could be NoneType
# Internal (lens) validation ensures that at least one artifact is valid
self.model = self.model
self.test_dataset = self.assessment_data
self.train_dataset = self.training_data

def _setup_deepchecks(self):
if self.test_dataset:
self.test_dataset = Dataset(
df=self.test_dataset.X, label=self.test_dataset.y
)

if self.train_dataset:
self.train_dataset = Dataset(
df=self.train_dataset.X, label=self.train_dataset.y
)

if self.model:
self.deepchecks_model = self.model.model_like

self.suite = Suite(name=self.name)
for check in self.checks:
self.suite.add(check)
# doing this as a for-loop list seems to be the only way
# deepchecks won't let you pass a whole list of checks, which is...silly?

def evaluate(self):
"""
Execute any data/model processing required for the evaluator.
Populates the self.results object.
Returns
-------
self
"""
self._setup_deepchecks()
self.run_suite()

self.results = [DeepchecksContainer(self.name, self.suite_results)]

return self

def run_suite(self):
if self.train_dataset and self.test_dataset:
self.suite_results = self.suite.run(
train_dataset=self.train_dataset,
test_dataset=self.test_dataset,
model=self.model.model_like,
)

elif self.train_dataset:
self.suite_results = self.suite.run(
train_dataset=self.train_dataset, model=self.model.model_like
)
else:
# Deepchecks expects the user to specify a train dataset if only a single
# dataset is specified, even if that single dataset is supposed to be a test set
# This doesn't really make sense and makes client code (like ours) less readable.
# Nevertheless, there's no way around it.
self.suite_results = self.suite.run(
train_dataset=self.test_dataset, model=self.model.model_like
)

def _validate_arguments(self):
"""
Check that basic requirements for the run of an evaluator are met.
"""
check_requirements_deepchecks(self)
Loading

0 comments on commit d0a3ceb

Please sign in to comment.