Skip to content

Commit

Permalink
feat: Implement abstract EvaluationHarness class (#5)
Browse files Browse the repository at this point in the history
* feat: Implement abstract `EvaluationHarness` class

* Add license header

* docstring lint

* Sort imports

* Lint
  • Loading branch information
shadeMe authored May 22, 2024
1 parent d9454fc commit 57ece86
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 0 deletions.
4 changes: 4 additions & 0 deletions haystack_experimental/evaluation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2022-present deepset GmbH <[email protected]>
#
# SPDX-License-Identifier: Apache-2.0

from .eval_harness import EvalRunOverrides, EvaluationHarness

_all_ = ["EvaluationHarness", "EvalRunOverrides"]
86 changes: 86 additions & 0 deletions haystack_experimental/evaluation/eval_harness.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# SPDX-FileCopyrightText: 2022-present deepset GmbH <[email protected]>
#
# SPDX-License-Identifier: Apache-2.0

from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Any, Dict, Generic, Optional, Type, TypeVar

from haystack import Pipeline
from haystack.core.serialization import DeserializationCallbacks
from haystack.evaluation.eval_run_result import BaseEvaluationRunResult


@dataclass
class EvalRunOverrides:
"""
Overrides for an evaluation run.
Used to override the init parameters of components in either
(or both) the evaluated and evaluation pipelines. Each key is
a component name and its value a dictionary with init parameters
to override.
:param evaluated_pipeline_overrides:
Overrides for the evaluated pipeline.
:param evaluation_pipeline_overrides:
Overrides for the evaluation pipeline.
"""

evaluated_pipeline_overrides: Optional[Dict[str, Dict[str, Any]]] = None
evaluation_pipeline_overrides: Optional[Dict[str, Dict[str, Any]]] = None


EvalRunInputT = TypeVar("EvalRunInputT")
EvalRunOutputT = TypeVar("EvalRunOutputT", bound=BaseEvaluationRunResult)
EvalRunOverridesT = TypeVar("EvalRunOverridesT")


class EvaluationHarness(ABC, Generic[EvalRunInputT, EvalRunOverridesT, EvalRunOutputT]):
"""
Executes a pipeline with a given set of parameters, inputs and evaluates its outputs with an evaluation pipeline.
"""

@staticmethod
def _override_pipeline(pipeline: Pipeline, parameter_overrides: Optional[Dict[str, Any]]) -> Pipeline:
def component_pre_init_callback(
name: str, cls: Type, init_params: Dict[str, Any]
): # pylint: disable=unused-argument
assert parameter_overrides is not None
overrides = parameter_overrides.get(name)
if overrides:
init_params.update(overrides)

def validate_overrides():
if parameter_overrides is None:
return

pipeline_components = pipeline.inputs(include_components_with_connected_inputs=True).keys()
for component_name in parameter_overrides.keys():
if component_name not in pipeline_components:
raise ValueError(f"Cannot override non-existent component '{component_name}'")

callbacks = DeserializationCallbacks(component_pre_init_callback)
if parameter_overrides:
validate_overrides()
serialized_pipeline = pipeline.dumps()
pipeline = Pipeline.loads(serialized_pipeline, callbacks=callbacks)

return pipeline

@abstractmethod
def run(
self, inputs: EvalRunInputT, *, overrides: Optional[EvalRunOverridesT] = None, run_name: Optional[str] = None
) -> EvalRunOutputT:
"""
Launch a evaluation run.
:param inputs:
Inputs to the evaluated and evaluation pipelines.
:param overrides:
Overrides for the harness.
:param run_name:
A name for the evaluation run.
:returns:
The output of the evaluation pipeline.
"""

0 comments on commit 57ece86

Please sign in to comment.