diff --git a/mozci/queries/test_task_overhead.query b/mozci/queries/test_task_overhead.query new file mode 100644 index 00000000..c9512b0c --- /dev/null +++ b/mozci/queries/test_task_overhead.query @@ -0,0 +1,11 @@ +from: unittest +format: list +groupby: task.id +limit: 20000 +select: + - {value: action.start_time, name: task_min, aggregate: min} + - {value: action.end_time, name: task_max, aggregate: max} + - {value: result.start_time, name: group_min, aggregate: min} + - {value: result.end_time, name: group_max, aggregate: max} +where: + - in: {task.id: {$eval: task_id}} \ No newline at end of file diff --git a/mozci/task.py b/mozci/task.py index a91a4238..1fa451d4 100644 --- a/mozci/task.py +++ b/mozci/task.py @@ -2,6 +2,7 @@ import json import os from abc import ABC, abstractmethod +from argparse import Namespace from dataclasses import dataclass, field from enum import Enum from inspect import signature @@ -9,6 +10,7 @@ from typing import Dict, List, Optional import requests +from adr.query import run_query from adr.util import memoized_property from loguru import logger from urllib3.response import HTTPResponse @@ -358,6 +360,30 @@ def configuration(self): parts = config.split("-") return "-".join(parts[:-1] if parts[-1].isdigit() else parts) + @property + def overhead(self): + """Calculate the overhead of a task. + + The methodology is simple: each task (action) has a start/end time. + Each group also has a start/end time. Take the earliest known group start + and latest known group end time, ensure the two falls somewhere in between + task start/end. + + This definition of overhead does not take into account inter-group overhead + eg. restarting browser, teardown, etc. + + Returns: + float: difference between task start/end and group start/end times. + """ + data = run_query("test_task_overhead", Namespace(task_id=self.id))["data"].pop() + # Sanity check to ensure group start/end times are within task start/end. + if data["task_min"] < data["group_min"] or data["task_max"] > data["group_max"]: + logger.warning(f"task f{self.id} has inconsistent group duration.") + + return (data["group_min"] - data["task_min"]) + ( + data["task_max"] - data["group_max"] + ) + # Don't perform type checking because of https://github.com/python/mypy/issues/5374. @dataclass # type: ignore @@ -496,6 +522,18 @@ def total_duration(self): def median_duration(self): return median(self.durations) + @property + def overheads(self): + return [task.overhead for task in self.tasks] + + @property + def total_overheads(self): + return sum(self.overheads) + + @property + def median_overhead(self): + return median(self.overheads) + @memoized_property def status(self): overall_status = None