Skip to content

Commit

Permalink
chore(llmobs): [MLOB-1944] generalize helper for extracting token met…
Browse files Browse the repository at this point in the history
…rics (#12223)

[applying #12026 to
3.x-staging]

This PR generalizes the helper method used to extract token metrics from
an APM span to be attached to an LLMObs span. Before, Anthropic,
Bedrock, and Open AI had specific methods on each of their integration
classes to accomplish this. Now, there is one get_llmobs_metrics_tags
utils function adapted from the google-specific
get_llmobs_metrics_tags_google function that gets reused across these
integrations as well as Vertex AI and Gemini. The Langchain integration
was excluded from this change since its logic for extracting token
metrics varies significantly compared to the other integrations.

## Checklist
- [x] PR author has checked that all the criteria below are met
- The PR description includes an overview of the change
- The PR description articulates the motivation for the change
- The change includes tests OR the PR description describes a testing
strategy
- The PR description notes risks associated with the change, if any
- Newly-added code is easy to change
- The change follows the [library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
- The change includes or references documentation updates if necessary
- Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))

## Reviewer Checklist
- [x] Reviewer has checked that all the criteria below are met 
- Title is accurate
- All changes are related to the pull request's stated goal
- Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- Testing strategy adequately addresses listed risks
- Newly-added code is easy to change
- Release note makes sense to a user of the library
- If necessary, author has acknowledged and discussed the performance
implications of this PR as reported in the benchmarks PR comment
- Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)

---------

Co-authored-by: Nicole Cybul <[email protected]>
  • Loading branch information
Kyle-Verhoog and ncybul authored Feb 5, 2025
1 parent e5055b7 commit b17990b
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 50 deletions.
21 changes: 2 additions & 19 deletions ddtrace/llmobs/_integrations/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,14 @@

from ddtrace.internal.logger import get_logger
from ddtrace.llmobs._constants import INPUT_MESSAGES
from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY
from ddtrace.llmobs._constants import METADATA
from ddtrace.llmobs._constants import METRICS
from ddtrace.llmobs._constants import MODEL_NAME
from ddtrace.llmobs._constants import MODEL_PROVIDER
from ddtrace.llmobs._constants import OUTPUT_MESSAGES
from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY
from ddtrace.llmobs._constants import SPAN_KIND
from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
from ddtrace.llmobs._integrations.base import BaseLLMIntegration
from ddtrace.llmobs._integrations.utils import get_llmobs_metrics_tags
from ddtrace.llmobs._utils import _get_attr
from ddtrace.trace import Span

Expand Down Expand Up @@ -77,7 +75,7 @@ def _llmobs_set_tags(
INPUT_MESSAGES: input_messages,
METADATA: parameters,
OUTPUT_MESSAGES: output_messages,
METRICS: self._get_llmobs_metrics_tags(span),
METRICS: get_llmobs_metrics_tags("anthropic", span),
}
)

Expand Down Expand Up @@ -188,18 +186,3 @@ def record_usage(self, span: Span, usage: Dict[str, Any]) -> None:
span.set_metric("anthropic.response.usage.output_tokens", output_tokens)
if input_tokens is not None and output_tokens is not None:
span.set_metric("anthropic.response.usage.total_tokens", input_tokens + output_tokens)

@staticmethod
def _get_llmobs_metrics_tags(span):
usage = {}
input_tokens = span.get_metric("anthropic.response.usage.input_tokens")
output_tokens = span.get_metric("anthropic.response.usage.output_tokens")
total_tokens = span.get_metric("anthropic.response.usage.total_tokens")

if input_tokens is not None:
usage[INPUT_TOKENS_METRIC_KEY] = input_tokens
if output_tokens is not None:
usage[OUTPUT_TOKENS_METRIC_KEY] = output_tokens
if total_tokens is not None:
usage[TOTAL_TOKENS_METRIC_KEY] = total_tokens
return usage
17 changes: 2 additions & 15 deletions ddtrace/llmobs/_integrations/bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,16 @@

from ddtrace.internal.logger import get_logger
from ddtrace.llmobs._constants import INPUT_MESSAGES
from ddtrace.llmobs._constants import INPUT_TOKENS_METRIC_KEY
from ddtrace.llmobs._constants import METADATA
from ddtrace.llmobs._constants import METRICS
from ddtrace.llmobs._constants import MODEL_NAME
from ddtrace.llmobs._constants import MODEL_PROVIDER
from ddtrace.llmobs._constants import OUTPUT_MESSAGES
from ddtrace.llmobs._constants import OUTPUT_TOKENS_METRIC_KEY
from ddtrace.llmobs._constants import PARENT_ID_KEY
from ddtrace.llmobs._constants import PROPAGATED_PARENT_ID_KEY
from ddtrace.llmobs._constants import SPAN_KIND
from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
from ddtrace.llmobs._integrations import BaseLLMIntegration
from ddtrace.llmobs._integrations.utils import get_llmobs_metrics_tags
from ddtrace.llmobs._utils import _get_llmobs_parent_id
from ddtrace.trace import Span

Expand Down Expand Up @@ -57,22 +55,11 @@ def _llmobs_set_tags(
MODEL_PROVIDER: span.get_tag("bedrock.request.model_provider") or "",
INPUT_MESSAGES: input_messages,
METADATA: parameters,
METRICS: self._llmobs_metrics(span, response),
METRICS: get_llmobs_metrics_tags("bedrock", span),
OUTPUT_MESSAGES: output_messages,
}
)

@staticmethod
def _llmobs_metrics(span: Span, response: Optional[Dict[str, Any]]) -> Dict[str, Any]:
metrics = {}
if response and response.get("text"):
prompt_tokens = int(span.get_tag("bedrock.usage.prompt_tokens") or 0)
completion_tokens = int(span.get_tag("bedrock.usage.completion_tokens") or 0)
metrics[INPUT_TOKENS_METRIC_KEY] = prompt_tokens
metrics[OUTPUT_TOKENS_METRIC_KEY] = completion_tokens
metrics[TOTAL_TOKENS_METRIC_KEY] = prompt_tokens + completion_tokens
return metrics

@staticmethod
def _extract_input_message(prompt):
"""Extract input messages from the stored prompt.
Expand Down
4 changes: 2 additions & 2 deletions ddtrace/llmobs/_integrations/gemini.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from ddtrace.llmobs._constants import SPAN_KIND
from ddtrace.llmobs._integrations.base import BaseLLMIntegration
from ddtrace.llmobs._integrations.utils import extract_message_from_part_google
from ddtrace.llmobs._integrations.utils import get_llmobs_metrics_tags_google
from ddtrace.llmobs._integrations.utils import get_llmobs_metrics_tags
from ddtrace.llmobs._integrations.utils import get_system_instructions_from_google_model
from ddtrace.llmobs._integrations.utils import llmobs_get_metadata_google
from ddtrace.llmobs._utils import _get_attr
Expand Down Expand Up @@ -59,7 +59,7 @@ def _llmobs_set_tags(
METADATA: metadata,
INPUT_MESSAGES: input_messages,
OUTPUT_MESSAGES: output_messages,
METRICS: get_llmobs_metrics_tags_google("google_generativeai", span),
METRICS: get_llmobs_metrics_tags("google_generativeai", span),
}
)

Expand Down
11 changes: 2 additions & 9 deletions ddtrace/llmobs/_integrations/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from ddtrace.llmobs._constants import SPAN_KIND
from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
from ddtrace.llmobs._integrations.base import BaseLLMIntegration
from ddtrace.llmobs._integrations.utils import get_llmobs_metrics_tags
from ddtrace.llmobs._utils import _get_attr
from ddtrace.llmobs.utils import Document
from ddtrace.trace import Pin
Expand Down Expand Up @@ -234,12 +235,4 @@ def _extract_llmobs_metrics_tags(span: Span, resp: Any) -> Dict[str, Any]:
OUTPUT_TOKENS_METRIC_KEY: completion_tokens,
TOTAL_TOKENS_METRIC_KEY: prompt_tokens + completion_tokens,
}
prompt_tokens = span.get_metric("openai.response.usage.prompt_tokens")
completion_tokens = span.get_metric("openai.response.usage.completion_tokens")
if prompt_tokens is None or completion_tokens is None:
return {}
return {
INPUT_TOKENS_METRIC_KEY: prompt_tokens,
OUTPUT_TOKENS_METRIC_KEY: completion_tokens,
TOTAL_TOKENS_METRIC_KEY: prompt_tokens + completion_tokens,
}
return get_llmobs_metrics_tags("openai", span)
25 changes: 22 additions & 3 deletions ddtrace/llmobs/_integrations/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,29 @@ def extract_message_from_part_google(part, role=None):
return message


def get_llmobs_metrics_tags_google(integration_name, span):
def get_llmobs_metrics_tags(integration_name, span):
usage = {}
input_tokens = span.get_metric("%s.response.usage.prompt_tokens" % integration_name)
output_tokens = span.get_metric("%s.response.usage.completion_tokens" % integration_name)

# bedrock integration tags usage under meta instead of metrics
if integration_name == "bedrock":
input_tokens = int(span.get_tag("bedrock.usage.prompt_tokens") or 0)
output_tokens = int(span.get_tag("bedrock.usage.completion_tokens") or 0)
total_tokens = input_tokens + output_tokens
if input_tokens:
usage[INPUT_TOKENS_METRIC_KEY] = input_tokens
if output_tokens:
usage[OUTPUT_TOKENS_METRIC_KEY] = output_tokens
if total_tokens:
usage[TOTAL_TOKENS_METRIC_KEY] = total_tokens
return usage

# check for both prompt / completion or input / output tokens
input_tokens = span.get_metric("%s.response.usage.prompt_tokens" % integration_name) or span.get_metric(
"%s.response.usage.input_tokens" % integration_name
)
output_tokens = span.get_metric("%s.response.usage.completion_tokens" % integration_name) or span.get_metric(
"%s.response.usage.output_tokens" % integration_name
)
total_tokens = span.get_metric("%s.response.usage.total_tokens" % integration_name)

if input_tokens is not None:
Expand Down
4 changes: 2 additions & 2 deletions ddtrace/llmobs/_integrations/vertexai.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from ddtrace.llmobs._constants import SPAN_KIND
from ddtrace.llmobs._integrations.base import BaseLLMIntegration
from ddtrace.llmobs._integrations.utils import extract_message_from_part_google
from ddtrace.llmobs._integrations.utils import get_llmobs_metrics_tags_google
from ddtrace.llmobs._integrations.utils import get_llmobs_metrics_tags
from ddtrace.llmobs._integrations.utils import get_system_instructions_from_google_model
from ddtrace.llmobs._integrations.utils import llmobs_get_metadata_google
from ddtrace.llmobs._utils import _get_attr
Expand Down Expand Up @@ -65,7 +65,7 @@ def _llmobs_set_tags(
METADATA: metadata,
INPUT_MESSAGES: input_messages,
OUTPUT_MESSAGES: output_messages,
METRICS: get_llmobs_metrics_tags_google("vertexai", span),
METRICS: get_llmobs_metrics_tags("vertexai", span),
}
)

Expand Down

0 comments on commit b17990b

Please sign in to comment.