Skip to content

Commit

Permalink
Pdoc updates for eval_runner
Browse files Browse the repository at this point in the history
  • Loading branch information
scosman committed Mar 1, 2025
1 parent bdec27a commit 1c31181
Showing 1 changed file with 8 additions and 6 deletions.
14 changes: 8 additions & 6 deletions libs/core/kiln_ai/adapters/eval/eval_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@ class EvalProgress:

class EvalRunner:
"""
Runs an eval.
Runs an eval. Async execution is supported to make it faster when using remote/fast model providers.
Specifically, runs a specific eval config on a list of task runs.
Can run an eval in 2 modes:
1) eval_config_eval: evaluate an eval config using existing dataset items.
2) task_run_eval: evaluate a range of task run configs, generating new run output using existing dataset item input.
"""

def __init__(
Expand Down Expand Up @@ -91,7 +93,7 @@ def collect_tasks_for_eval_config_eval(self) -> List[EvalJob]:
"""
Collect all jobs for this run, excluding any that have already been run.
This variant is used when evaluating an eval config, using existing dataset run.
This variant is used for mode "eval_config_eval", using existing dataset run data (input/output).
The tasks:
- should be in the eval config set filter
Expand Down Expand Up @@ -122,11 +124,11 @@ def collect_tasks_for_task_run_eval(self) -> List[EvalJob]:
"""
Collect all jobs for this run, excluding any that have already been run.
This variant is used when evaluating a range of task run configs on an eval config.
This variant is used for mode "task_run_eval", generating new run output using existing dataset item input.
The tasks:
- should be in the eval set filter
- should not have already been run for this eval config + run config pair
- should not have already been run for this eval config + run config + dataset item
"""
filter = dataset_filter_from_id(self.eval.eval_set_filter_id)

Expand Down Expand Up @@ -158,7 +160,7 @@ def collect_tasks_for_task_run_eval(self) -> List[EvalJob]:

async def run(self, concurrency: int = 25) -> AsyncGenerator[EvalProgress, None]:
"""
Runs the eval with parallel workers and yields progress updates.
Runs the configured eval run with parallel workers and yields progress updates.
"""
jobs = self.collect_tasks()

Expand Down

0 comments on commit 1c31181

Please sign in to comment.