Merge pull request #243 from credo-ai/release/1.1.2

Release/1.1.2
credo-ai · Nov 11, 2022 · d0a3ceb · d0a3ceb
2 parents f7755df + ad14553
commit d0a3ceb
Show file tree

Hide file tree

Showing 29 changed files with 1,490 additions and 149 deletions.
diff --git a/credoai/__init__.py b/credoai/__init__.py
@@ -2,4 +2,4 @@
 Primary interface for Credo AI Lens package
 """
 
-__version__ = "1.1.1"
+__version__ = "1.1.2"
diff --git a/credoai/artifacts/__init__.py b/credoai/artifacts/__init__.py
@@ -7,6 +7,8 @@
 """
 from .data.base_data import Data
 from .data.tabular_data import TabularData
+from .data.comparison_data import ComparisonData
 from .model.base_model import Model
 from .model.classification_model import ClassificationModel, DummyClassifier
+from .model.comparison_model import ComparisonModel, DummyComparisonModel
 from .model.regression_model import RegressionModel, DummyRegression
diff --git a/credoai/artifacts/data/comparison_data.py b/credoai/artifacts/data/comparison_data.py
@@ -0,0 +1,130 @@
+"""Data artifact for pair-wise-comparison-based identity verification"""
+from copy import deepcopy
+
+import pandas as pd
+from credoai.utils.common import ValidationError
+
+from .base_data import Data
+
+
+class ComparisonData(Data):
+    """Class wrapper for pair-wise-comparison-based identity verification
+
+    ComparisonData serves as an adapter between pair-wise-comparison-based identity verification
+    and the identity verification evaluator in Lens.
+
+    Parameters
+    -------------
+    name : str
+        Label of the dataset
+    pairs : pd.DataFrame of shape (n_pairs, 4)
+        Dataframe where each row represents a data sample pair and associated subjects
+        Type of data sample is decided by the ComparisonModel's `compare` function, which takes
+        data sample pairs and returns their similarity scores. Examples are selfies, fingerprint scans,
+        or voices of a person.
+        Required columns:
+            source-subject-id: unique identifier of the source subject
+            source-subject-data-sample: data sample from the source subject
+            target-subject-id: unique identifier of the target subject
+            target-subject-data-sample: data sample from the target subject
+    subjects_sensitive_features : pd.DataFrame of shape (n_subjects, n_sensitive_feature_names), optional
+        Sensitive features of all subjects present in pairs dataframe
+        If provided, disaggregated performance assessment is also performed.
+        This can be the columns you want to perform segmentation analysis on, or
+        a feature related to fairness like 'race' or 'gender'
+        Required columns:
+            subject-id: id of subjects. Must cover all the subjects inlcluded in `pairs` dataframe
+            other columns with arbitrary names for sensitive features
+    """
+
+    def __init__(self, name: str, pairs=None, subjects_sensitive_features=None):
+        super().__init__("ComparisonData", name)
+        self.pairs = pairs
+        self.subjects_sensitive_features = subjects_sensitive_features
+        self._validate_pairs()
+        self._validate_subjects_sensitive_features()
+        self._preprocess_subjects_sensitive_features()
+
+    def copy(self):
+        """Returns a deepcopy of the instantiated class"""
+        return deepcopy(self)
+
+    def _validate_pairs(self):
+        """Validate the input `pairs` object"""
+        if self.pairs is not None:
+            """Basic validation for pairs"""
+            if not isinstance(self.pairs, (pd.DataFrame)):
+                raise ValidationError("pairs must be a pd.DataFrame")
+
+            required_columns = [
+                "source-subject-id",
+                "source-subject-data-sample",
+                "target-subject-id",
+                "target-subject-data-sample",
+            ]
+            available_columns = self.pairs.columns
+            for c in required_columns:
+                if c not in available_columns:
+                    raise ValidationError(
+                        f"pairs dataframe does not contain the required column '{c}'"
+                    )
+
+            if len(available_columns) != 4:
+                raise ValidationError(
+                    f"pairs dataframe has '{len(available_columns)}' columns. It must have 4."
+                )
+
+            if self.pairs.isnull().values.any():
+                raise ValidationError(
+                    "pairs dataframe contains NaN values. It must not have any."
+                )
+
+    def _validate_subjects_sensitive_features(self):
+        """Validate the input `subjects_sensitive_features` object"""
+        if self.subjects_sensitive_features is not None:
+            """Basic validation for subjects_sensitive_features"""
+            if not isinstance(self.subjects_sensitive_features, (pd.DataFrame)):
+                raise ValidationError(
+                    "subjects_sensitive_features must be a pd.DataFrame"
+                )
+
+            available_columns = self.subjects_sensitive_features.columns
+            if "subject-id" not in available_columns:
+                raise ValidationError(
+                    "subjects_sensitive_features dataframe does not contain the required column 'subject-id'"
+                )
+            if len(available_columns) < 2:
+                raise ValidationError(
+                    "subjects_sensitive_features dataframe includes 'subject-id' column only. It must include at least one sensitive feature column too."
+                )
+
+            if self.subjects_sensitive_features.isnull().values.any():
+                raise ValidationError(
+                    "subjects_sensitive_features dataframe contains NaN values. It must not have any."
+                )
+
+            sensitive_features_names = list(self.subjects_sensitive_features.columns)
+            sensitive_features_names.remove("subject-id")
+            for sf_name in sensitive_features_names:
+                unique_values = self.subjects_sensitive_features[sf_name].unique()
+                if len(unique_values) == 1:
+                    raise ValidationError(
+                        f"Sensitive Feature column {sf_name} must have more "
+                        f"than one unique value. Only found one value: {unique_values[0]}"
+                    )
+
+
+    def _preprocess_subjects_sensitive_features(self):
+        """Preprocess the input `subjects_sensitive_features` object"""
+        sensitive_features_names = list(self.subjects_sensitive_features.columns)
+        sensitive_features_names.remove("subject-id")
+        for sf_name in sensitive_features_names:
+            self.subjects_sensitive_features[
+                sf_name
+            ] = self.subjects_sensitive_features[sf_name].astype(str)
+
+    def _validate_X(self):
+        pass
+
+    def _validate_y(self):
+        pass
diff --git a/credoai/artifacts/model/classification_model.py b/credoai/artifacts/model/classification_model.py
@@ -1,6 +1,8 @@
 """Model artifact wrapping any classification model"""
 from .base_model import Model
 
+PREDICT_PROBA_FRAMEWORKS = ["sklearn", "xgboost"]
+
 
 class ClassificationModel(Model):
     """Class wrapper around classification model to be assessed
@@ -32,7 +34,7 @@ def __init__(self, name: str, model_like=None, tags=None):
 
     def _update_functionality(self):
         """Conditionally updates functionality based on framework"""
-        if self.model_info["framework"] == "sklearn":
+        if self.model_info["framework"] in PREDICT_PROBA_FRAMEWORKS:
             func = getattr(self, "predict_proba", None)
             if func and len(self.model_like.classes_) == 2:
                 self.__dict__["predict_proba"] = lambda x: func(x)[:, 1]

diff --git a/credoai/artifacts/model/comparison_model.py b/credoai/artifacts/model/comparison_model.py
@@ -0,0 +1,52 @@
+"""Model artifact wrapping any comparison model"""
+from .base_model import Model
+
+
+class ComparisonModel(Model):
+    """Class wrapper around comparison model to be assessed
+
+    ComparisonModel serves as an adapter between arbitrary pair-wise comparison
+    models and the identity verification evaluations in Lens. Evaluations depend
+    on ComparisonModel instantiating `compare`
+
+    Parameters
+    ----------
+    name : str
+        Label of the model
+    model_like : model_like
+        A pair-wise comparison model or pipeline. It must have a
+            `compare` function that takes a two-dimensional python list of data sample pairs and
+            returns a list containing the similarity scores for each pair.
+            Example input and output: [[sd1,sd2], [sd1,td3], [td3,td8]] --> [98, 4, 91]
+            Type of data sample is decided by this `compare` function and refelcted in `ComparisonData` object
+    """
+
+    def __init__(self, name: str, model_like=None):
+        super().__init__(
+            "ComparisonModel",
+            ["compare"],
+            ["compare"],
+            name,
+            model_like,
+        )
+
+
+class DummyComparisonModel:
+    """Class wrapper around comparison model predictions
+
+    This class can be used when a comparison model is not available but its outputs are.
+        The output include the array containing the predicted similarity scores.
+        Wrap the outputs with this class into a dummy comparison model and pass it as
+        the model to `ComparisonModel`.
+
+    Parameters
+    ----------
+    compare_output : array
+        Array containing the output of a comparison model's "compare" method
+    """
+
+    def __init__(self, compare_output=None):
+        self.compare_output = compare_output
+
+    def compare(self):
+        return self.compare_output
diff --git a/credoai/evaluators/__init__.py b/credoai/evaluators/__init__.py
@@ -15,3 +15,5 @@
 from .shap import ShapExplainer
 from .model_profiler import ModelProfiler
 from .feature_drift import FeatureDrift
+from .deepchecks import Deepchecks
+from .identity_verification import IdentityVerification
diff --git a/credoai/evaluators/deepchecks.py b/credoai/evaluators/deepchecks.py
@@ -0,0 +1,131 @@
+from credoai.evaluators import Evaluator
+
+from typing import List, Optional
+
+from credoai.utils.common import ValidationError
+from credoai.modules.deepchecks_constants import DEFAULT_CHECKS
+from credoai.evaluators.utils.validation import (
+    check_requirements_deepchecks,
+)
+
+from credoai.evidence import DeepchecksContainer
+
+from deepchecks.tabular import Suite, Dataset
+from deepchecks.core import BaseCheck
+
+
+class Deepchecks(Evaluator):
+    """
+    deepchecks evaluator
+
+    This evaluator enables running of deepchecks `checks` and passing the results to
+    the Governance platform in the form of a deepchecks SuiteResult, cast to JSON format.
+    See https://docs.deepchecks.com/stable/api/generated/deepchecks.tabular.checks.model_evaluation.html
+    and https://docs.deepchecks.com/stable/api/generated/deepchecks.core.SuiteResult.html
+    and https://docs.deepchecks.com/stable/user-guide/general/customizations/examples/plot_create_a_custom_suite.html
+    for more details.
+
+    This evaluator provides some redundant functionality. For instance, metrics which can be
+    calculated using the Performance evaluator can potentially be calculated by deepchecks
+    (and thus this evaluator) as well. The same applies to the FeatureDrift evaluator.
+    When a choice exists, the best practice dictates that the "Lens native" evaluator should
+    be used in preference to deepchecks, since output formats of other evaluators is generally
+    consistent, while this deepchecks evaluator outputs results in a highly structured JSON format.
+
+
+    Parameters
+    ----------
+    name : str, optional
+        Name of the supplied deepchecks suite
+    checks : List-like, optional
+        A list of instantiated deepchecks checks objects (e.g. BoostingOverfit, CalibrationScore)
+        #TODO allow list of strings?
+    """
+
+    name = "Deepchecks"  ##This is going to go away once we merge with Ian's PR #214
+
+    required_artifacts = {"model", "assessment_data", "training_data"}
+    # all artifacts are OPTIONAL; All that's required is that at least one of these is
+    # provided. The evaluator's custom validation function checks for this.
+
+    def __init__(
+        self,
+        suite_name: Optional[str] = "Credo_Deepchecks_Suite",
+        checks: Optional[List[BaseCheck]] = DEFAULT_CHECKS,
+    ):
+        super().__init__()
+        self.name = suite_name
+        self.checks = checks
+
+    def _setup(self):
+        # Set artifacts
+
+        # All artifacts are optional and thus any could be NoneType
+        # Internal (lens) validation ensures that at least one artifact is valid
+        self.model = self.model
+        self.test_dataset = self.assessment_data
+        self.train_dataset = self.training_data
+
+    def _setup_deepchecks(self):
+        if self.test_dataset:
+            self.test_dataset = Dataset(
+                df=self.test_dataset.X, label=self.test_dataset.y
+            )
+
+        if self.train_dataset:
+            self.train_dataset = Dataset(
+                df=self.train_dataset.X, label=self.train_dataset.y
+            )
+
+        if self.model:
+            self.deepchecks_model = self.model.model_like
+
+        self.suite = Suite(name=self.name)
+        for check in self.checks:
+            self.suite.add(check)
+            # doing this as a for-loop list seems to be the only way
+            # deepchecks won't let you pass a whole list of checks, which is...silly?
+
+    def evaluate(self):
+        """
+        Execute any data/model processing required for the evaluator.
+
+        Populates the self.results object.
+
+        Returns
+        -------
+        self
+        """
+        self._setup_deepchecks()
+        self.run_suite()
+
+        self.results = [DeepchecksContainer(self.name, self.suite_results)]
+
+        return self
+
+    def run_suite(self):
+        if self.train_dataset and self.test_dataset:
+            self.suite_results = self.suite.run(
+                train_dataset=self.train_dataset,
+                test_dataset=self.test_dataset,
+                model=self.model.model_like,
+            )
+
+        elif self.train_dataset:
+            self.suite_results = self.suite.run(
+                train_dataset=self.train_dataset, model=self.model.model_like
+            )
+        else:
+            # Deepchecks expects the user to specify a train dataset if only a single
+            # dataset is specified, even if that single dataset is supposed to be a test set
+            # This doesn't really make sense and makes client code (like ours) less readable.
+            # Nevertheless, there's no way around it.
+            self.suite_results = self.suite.run(
+                train_dataset=self.test_dataset, model=self.model.model_like
+            )
+
+    def _validate_arguments(self):
+        """
+        Check that basic requirements for the run of an evaluator are met.
+        """
+        check_requirements_deepchecks(self)