Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #7105: Expose metrics directly on conversation rather than inside events #7107

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions openhands/events/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,3 +428,41 @@ def get_matching_events(
break

return matching_events

def get_metrics(self):
"""Get the accumulated metrics from all events in the stream.

This method extracts metrics from events that contain them and returns
the aggregated metrics object.

Returns:
Metrics: The metrics object containing accumulated cost and token usage data.
Returns None if no metrics are found.
"""
from openhands.llm.metrics import Metrics

# Look for events with metrics
metrics = None
events_with_metrics = []

try:
# First collect all events with metrics
for event in self.get_events():
if hasattr(event, 'llm_metrics') and event.llm_metrics is not None:
events_with_metrics.append(event)

# Then merge them if any were found
if events_with_metrics:
# Get the first event with metrics to initialize our metrics object
first_event = events_with_metrics[0]
if first_event.llm_metrics is not None:
metrics = Metrics(model_name=first_event.llm_metrics.model_name)

# Merge metrics from all events
for event in events_with_metrics:
if event.llm_metrics is not None:
metrics.merge(event.llm_metrics)
except Exception as e:
logger.error(f'Error retrieving metrics from events: {e}')

return metrics
68 changes: 68 additions & 0 deletions openhands/server/routes/conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,74 @@
app = APIRouter(prefix='/api/conversations/{conversation_id}')


@app.get('/metrics')
async def get_conversation_metrics(request: Request):
"""Retrieve the conversation metrics.

This endpoint returns the accumulated cost and token usage metrics for the conversation.
Metrics are retrieved directly from the runtime's state rather than reconstructing from events,
providing a more accurate representation of costs, including those not associated with events.

Args:
request (Request): The incoming FastAPI request object.

Returns:
JSONResponse: A JSON response containing the metrics data.
"""
try:
if not hasattr(request.state, 'conversation'):
return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={'error': 'No conversation found in request state'},
)

conversation = request.state.conversation

# Get metrics directly from the conversation's runtime state
metrics = conversation.get_metrics()

# If no metrics from state, fall back to event stream metrics for backward compatibility
if not metrics and hasattr(conversation.event_stream, 'get_metrics'):
metrics = conversation.event_stream.get_metrics()

if not metrics:
# Return empty metrics if not available
return JSONResponse(
status_code=status.HTTP_200_OK,
content={
'accumulated_cost': 0.0,
'total_prompt_tokens': 0,
'total_completion_tokens': 0,
'total_tokens': 0,
},
)

# Calculate total tokens
total_prompt_tokens = sum(usage.prompt_tokens for usage in metrics.token_usages)
total_completion_tokens = sum(
usage.completion_tokens for usage in metrics.token_usages
)
total_tokens = total_prompt_tokens + total_completion_tokens

return JSONResponse(
status_code=status.HTTP_200_OK,
content={
'accumulated_cost': metrics.accumulated_cost,
'total_prompt_tokens': total_prompt_tokens,
'total_completion_tokens': total_completion_tokens,
'total_tokens': total_tokens,
},
)
except Exception as e:
logger.error(f'Error getting conversation metrics: {e}')
return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={
'error': f'Error getting conversation metrics: {e}',
},
)


@app.get('/config')
async def get_remote_runtime_config(request: Request):
"""Retrieve the runtime configuration.
Expand Down
21 changes: 21 additions & 0 deletions openhands/server/session/conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,24 @@ async def disconnect(self):
if self.event_stream:
self.event_stream.close()
asyncio.create_task(call_sync_from_async(self.runtime.close))

def get_metrics(self):
"""Get metrics directly from the runtime's state.

This method retrieves metrics from the runtime's state object rather than
reconstructing them from events, providing a more accurate representation
of costs and token usage, including those not associated with events.

Returns:
Metrics: The metrics object containing accumulated cost and token usage data.
Returns None if no metrics are available or if the runtime has no state.
"""
try:
if hasattr(self.runtime, 'state') and self.runtime.state:
return self.runtime.state.metrics
return None
except Exception as e:
from openhands.core.logger import openhands_logger as logger

logger.error(f'Error retrieving metrics from runtime state: {e}')
return None
129 changes: 129 additions & 0 deletions tests/unit/events/test_stream.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
from unittest.mock import MagicMock, patch

from openhands.events.event import Event
from openhands.events.stream import EventStream
from openhands.llm.metrics import Metrics


class TestEventStream:
def test_get_metrics_empty_stream(self):
"""Test that get_metrics returns None for an empty stream."""
sid = 'test-stream-id'
file_store = MagicMock()
stream = EventStream(sid=sid, file_store=file_store)
assert stream.get_metrics() is None

def test_get_metrics_no_metrics_in_events(self):
"""Test that get_metrics returns None when no events have metrics."""
sid = 'test-stream-id'
file_store = MagicMock()
stream = EventStream(sid=sid, file_store=file_store)
event = MagicMock(spec=Event)
event.llm_metrics = None

with patch.object(stream, 'get_events', return_value=[event]):
assert stream.get_metrics() is None

def test_get_metrics_with_metrics(self):
"""Test that get_metrics correctly aggregates metrics from events."""
sid = 'test-stream-id'
file_store = MagicMock()
stream = EventStream(sid=sid, file_store=file_store)

# Create mock events with metrics
event1 = MagicMock(spec=Event)
metrics1 = Metrics(model_name='gpt-4')
metrics1.add_token_usage(
prompt_tokens=10,
completion_tokens=20,
cache_read_tokens=0,
cache_write_tokens=0,
response_id='resp1',
)
event1.llm_metrics = metrics1

event2 = MagicMock(spec=Event)
metrics2 = Metrics(model_name='gpt-4')
metrics2.add_token_usage(
prompt_tokens=15,
completion_tokens=25,
cache_read_tokens=0,
cache_write_tokens=0,
response_id='resp2',
)
event2.llm_metrics = metrics2

with patch.object(stream, 'get_events', return_value=[event1, event2]):
result = stream.get_metrics()

assert result is not None
assert result.model_name == 'gpt-4'
# Check token usages are merged correctly
total_prompt_tokens = sum(
usage.prompt_tokens for usage in result.token_usages
)
total_completion_tokens = sum(
usage.completion_tokens for usage in result.token_usages
)
assert total_prompt_tokens == 25 # 10 + 15
assert total_completion_tokens == 45 # 20 + 25
assert len(result.token_usages) == 2

def test_get_metrics_with_exception(self):
"""Test that get_metrics handles exceptions gracefully."""
sid = 'test-stream-id'
file_store = MagicMock()
stream = EventStream(sid=sid, file_store=file_store)

with patch.object(
stream, 'get_events', side_effect=Exception('Test exception')
):
assert stream.get_metrics() is None

def test_get_metrics_with_mixed_events(self):
"""Test that get_metrics correctly handles a mix of events with and without metrics."""
sid = 'test-stream-id'
file_store = MagicMock()
stream = EventStream(sid=sid, file_store=file_store)

# Create mock events, some with metrics and some without
event1 = MagicMock(spec=Event)
metrics1 = Metrics(model_name='gpt-4')
metrics1.add_token_usage(
prompt_tokens=10,
completion_tokens=20,
cache_read_tokens=0,
cache_write_tokens=0,
response_id='resp1',
)
event1.llm_metrics = metrics1

event2 = MagicMock(spec=Event)
event2.llm_metrics = None

event3 = MagicMock(spec=Event)
metrics3 = Metrics(model_name='gpt-4')
metrics3.add_token_usage(
prompt_tokens=15,
completion_tokens=25,
cache_read_tokens=0,
cache_write_tokens=0,
response_id='resp3',
)
event3.llm_metrics = metrics3

with patch.object(stream, 'get_events', return_value=[event1, event2, event3]):
result = stream.get_metrics()

assert result is not None
assert result.model_name == 'gpt-4'
# Check token usages are merged correctly
total_prompt_tokens = sum(
usage.prompt_tokens for usage in result.token_usages
)
total_completion_tokens = sum(
usage.completion_tokens for usage in result.token_usages
)
assert total_prompt_tokens == 25 # 10 + 15
assert total_completion_tokens == 45 # 20 + 25
assert len(result.token_usages) == 2
Loading