Skip to content

Commit

Permalink
Revert summary metrics changes and expand test cases to get nightly /…
Browse files Browse the repository at this point in the history
… main passing again
  • Loading branch information
markurtz committed Sep 11, 2024
1 parent 1409245 commit 7d3d42e
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 104 deletions.
26 changes: 18 additions & 8 deletions src/guidellm/core/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,15 +147,19 @@ def _create_benchmark_report_data_tokens_summary(
for benchmark in report.benchmarks_sorted:
table.add_row(
_benchmark_rate_id(benchmark),
f"{benchmark.prompt_token:.2f}",
f"{benchmark.prompt_token_distribution.mean:.2f}",
", ".join(
f"{percentile:.1f}"
for percentile in benchmark.prompt_token_percentiles
for percentile in benchmark.prompt_token_distribution.percentiles(
[1, 5, 50, 95, 99]
)
),
f"{benchmark.output_token:.2f}",
f"{benchmark.output_token_distribution.mean:.2f}",
", ".join(
f"{percentile:.1f}"
for percentile in benchmark.output_token_percentiles
for percentile in benchmark.output_token_distribution.percentiles(
[1, 5, 50, 95, 99]
)
),
)
logger.debug("Created data tokens summary table for the report.")
Expand All @@ -177,7 +181,7 @@ def _create_benchmark_report_dist_perf_summary(
"Benchmark",
"Request Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (sec)",
"Time to First Token [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)",
"Inter Token Latency [1%, 5%, 10%, 50%, 90%, 95%, 99%] (ms)",
"Inter Token Latency [1%, 5%, 10%, 50%, 90% 95%, 99%] (ms)",
title="[magenta]Performance Stats by Benchmark[/magenta]",
title_style="bold",
title_justify="left",
Expand All @@ -189,15 +193,21 @@ def _create_benchmark_report_dist_perf_summary(
_benchmark_rate_id(benchmark),
", ".join(
f"{percentile:.2f}"
for percentile in benchmark.request_latency_percentiles
for percentile in benchmark.request_latency_distribution.percentiles(
[1, 5, 10, 50, 90, 95, 99]
)
),
", ".join(
f"{percentile * 1000:.1f}"
for percentile in benchmark.time_to_first_token_percentiles
for percentile in benchmark.ttft_distribution.percentiles(
[1, 5, 10, 50, 90, 95, 99]
)
),
", ".join(
f"{percentile * 1000:.1f}"
for percentile in benchmark.inter_token_latency_percentiles
for percentile in benchmark.itl_distribution.percentiles(
[1, 5, 10, 50, 90, 95, 99]
)
),
)
logger.debug("Created distribution performance summary table for the report.")
Expand Down
94 changes: 1 addition & 93 deletions src/guidellm/core/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Any, Dict, List, Literal, Optional, Union

from loguru import logger
from pydantic import Field, computed_field
from pydantic import Field

from guidellm.core.distribution import Distribution
from guidellm.core.request import TextGenerationRequest
Expand Down Expand Up @@ -221,7 +221,6 @@ def __iter__(self):
"""
return iter(self.results)

@computed_field # type: ignore[misc]
@property
def request_count(self) -> int:
"""
Expand All @@ -232,7 +231,6 @@ def request_count(self) -> int:
"""
return len(self.results)

@computed_field # type: ignore[misc]
@property
def error_count(self) -> int:
"""
Expand All @@ -243,7 +241,6 @@ def error_count(self) -> int:
"""
return len(self.errors)

@computed_field # type: ignore[misc]
@property
def total_count(self) -> int:
"""
Expand All @@ -254,7 +251,6 @@ def total_count(self) -> int:
"""
return self.request_count + self.error_count

@computed_field # type: ignore[misc]
@property
def start_time(self) -> Optional[float]:
"""
Expand All @@ -268,7 +264,6 @@ def start_time(self) -> Optional[float]:

return self.results[0].start_time

@computed_field # type: ignore[misc]
@property
def end_time(self) -> Optional[float]:
"""
Expand All @@ -282,7 +277,6 @@ def end_time(self) -> Optional[float]:

return self.results[-1].end_time

@computed_field # type: ignore[misc]
@property
def duration(self) -> float:
"""
Expand All @@ -296,7 +290,6 @@ def duration(self) -> float:

return self.end_time - self.start_time

@computed_field # type: ignore[misc]
@property
def completed_request_rate(self) -> float:
"""
Expand All @@ -310,7 +303,6 @@ def completed_request_rate(self) -> float:

return len(self.results) / self.duration

@computed_field # type: ignore[misc]
@property
def request_latency(self) -> float:
"""
Expand Down Expand Up @@ -340,19 +332,6 @@ def request_latency_distribution(self) -> Distribution:
]
)

@computed_field # type: ignore[misc]
@property
def request_latency_percentiles(self) -> List[float]:
"""
Get standard percentiles of request latency in seconds.
:return: List of percentile request latency in seconds
:rtype: List[float]
"""
return self.request_latency_distribution.percentiles([1, 5, 10, 50, 90, 95, 99])


@computed_field # type: ignore[misc]
@property
def time_to_first_token(self) -> float:
"""
Expand Down Expand Up @@ -382,20 +361,6 @@ def ttft_distribution(self) -> Distribution:
]
)

@computed_field # type: ignore[misc]
@property
def time_to_first_token_percentiles(self) -> List[float]:
"""
Get standard percentiles for time taken to decode the first token
in milliseconds.
:return: List of percentile time taken to decode the first token
in milliseconds.
:rtype: List[float]
"""
return self.ttft_distribution.percentiles([1, 5, 10, 50, 90, 95, 99])

@computed_field # type: ignore[misc]
@property
def inter_token_latency(self) -> float:
"""
Expand Down Expand Up @@ -423,18 +388,6 @@ def itl_distribution(self) -> Distribution:
]
)

@computed_field # type: ignore[misc]
@property
def inter_token_latency_percentiles(self) -> List[float]:
"""
Get standard percentiles for the time between tokens in milliseconds.
:return: List of percentiles for the average time between tokens.
:rtype: List[float]
"""
return self.itl_distribution.percentiles([1, 5, 10, 50, 90, 95, 99])

@computed_field # type: ignore[misc]
@property
def output_token_throughput(self) -> float:
"""
Expand All @@ -450,17 +403,6 @@ def output_token_throughput(self) -> float:

return total_tokens / self.duration

@computed_field # type: ignore[misc]
@property
def prompt_token(self) -> float:
"""
Get the average number of prompt tokens.
:return: The average number of prompt tokens.
:rtype: float
"""
return self.prompt_token_distribution.mean

@property
def prompt_token_distribution(self) -> Distribution:
"""
Expand All @@ -471,28 +413,6 @@ def prompt_token_distribution(self) -> Distribution:
"""
return Distribution(data=[result.prompt_token_count for result in self.results])

@computed_field # type: ignore[misc]
@property
def prompt_token_percentiles(self) -> List[float]:
"""
Get standard percentiles for number of prompt tokens.
:return: List of percentiles of number of prompt tokens.
:rtype: List[float]
"""
return self.prompt_token_distribution.percentiles([1, 5, 50, 95, 99])

@computed_field # type: ignore[misc]
@property
def output_token(self) -> float:
"""
Get the average number of output tokens.
:return: The average number of output tokens.
:rtype: float
"""
return self.output_token_distribution.mean

@property
def output_token_distribution(self) -> Distribution:
"""
Expand All @@ -503,18 +423,6 @@ def output_token_distribution(self) -> Distribution:
"""
return Distribution(data=[result.output_token_count for result in self.results])

@computed_field # type: ignore[misc]
@property
def output_token_percentiles(self) -> List[float]:
"""
Get standard percentiles for number of output tokens.
:return: List of percentiles of number of output tokens.
:rtype: List[float]
"""
return self.output_token_distribution.percentiles([1, 5, 50, 95, 99])

@computed_field # type: ignore[misc]
@property
def overloaded(self) -> bool:
if (
Expand Down
6 changes: 3 additions & 3 deletions tests/unit/core/test_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,23 +66,23 @@ def test_guidance_report_print(sample_benchmark_report):
report.print() # This will output to the console


@pytest.mark.regression()
@pytest.mark.sanity()
def test_guidance_report_json(sample_benchmark_report):
report = GuidanceReport(benchmarks=[sample_benchmark_report])
json_str = report.to_json()
loaded_report = GuidanceReport.from_json(json_str)
assert compare_guidance_reports(report, loaded_report)


@pytest.mark.regression()
@pytest.mark.sanity()
def test_guidance_report_yaml(sample_benchmark_report):
report = GuidanceReport(benchmarks=[sample_benchmark_report])
yaml_str = report.to_yaml()
loaded_report = GuidanceReport.from_yaml(yaml_str)
assert compare_guidance_reports(report, loaded_report)


@pytest.mark.regression()
@pytest.mark.sanity()
def test_guidance_report_save_load_file(sample_benchmark_report):
report = GuidanceReport(benchmarks=[sample_benchmark_report])
with tempfile.TemporaryDirectory() as temp_dir:
Expand Down

0 comments on commit 7d3d42e

Please sign in to comment.