Skip to content

Commit

Permalink
chore(llmobs): use span store instead of temporary tags (#11543)
Browse files Browse the repository at this point in the history
This PR performs some cleanup refactors on the LLM Obs SDK and
associated integrations. Specifically regarding the data stored, which
includes LLMObs span metadata/metrics/tags/IO:
- Stop storing these as temporary span tags and instead use the span
store field, which allows arbitrary key value pairs but is not submitted
to Datadog. This removes the potential for temporary tags to be not
extracted and still submitted as a APM span tag.
- Stop attempting `safe_json()` (i.e. `json.dumps()`) to store the above
data, which is an expensive operation that adds up with the number of
separate calls, and instead just store the raw values of the stored
objects in the store field, and only call `safe_json()` "once" at
payload encoding time.

Things to look out for:
- Previously we were calling `safe_json()` every time to store data as
string span tags. One danger includes errors during span processing due
to wrong types (expect string, likely receive a dictionary/object from
the span store field)
- By avoiding any jsonify processing before encode time, a small edge
case appeared from the LLMObs SDK decorator function which
auto-annotates non-LLM spans with input function argument maps. In
Python 3.8, the `bind_partial().arguments` call used to extract the
function arguments returns an OrderedDict (otherwise returns a regular
Dict() in Python >= 3.9, which broke some tests as we were simply
casting to a string when storing the input/output value). I added a fix
to cast the `bind_partial().arguments` object to a dict to avoid this
issue coming up.

## Next Steps
This is a great first step, but there are still tons of performance
improvements we can make to our encoding/writing. The most notable is
that we call `json.dumps()` on span events more than once (to calculate
the payload size before adding to the buffer).

## Checklist
- [x] PR author has checked that all the criteria below are met
- The PR description includes an overview of the change
- The PR description articulates the motivation for the change
- The change includes tests OR the PR description describes a testing
strategy
- The PR description notes risks associated with the change, if any
- Newly-added code is easy to change
- The change follows the [library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
- The change includes or references documentation updates if necessary
- Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))

## Reviewer Checklist
- [x] Reviewer has checked that all the criteria below are met 
- Title is accurate
- All changes are related to the pull request's stated goal
- Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- Testing strategy adequately addresses listed risks
- Newly-added code is easy to change
- Release note makes sense to a user of the library
- If necessary, author has acknowledged and discussed the performance
implications of this PR as reported in the benchmarks PR comment
- Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
  • Loading branch information
Yun-Kim authored Dec 12, 2024
1 parent a37aa20 commit e474267
Show file tree
Hide file tree
Showing 19 changed files with 495 additions and 563 deletions.
27 changes: 13 additions & 14 deletions ddtrace/llmobs/_integrations/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
from ddtrace.llmobs._integrations.base import BaseLLMIntegration
from ddtrace.llmobs._utils import _get_attr
from ddtrace.llmobs._utils import safe_json


log = get_logger(__name__)
Expand Down Expand Up @@ -66,21 +65,21 @@ def _llmobs_set_tags(
system_prompt = kwargs.get("system")
input_messages = self._extract_input_message(messages, system_prompt)

span.set_tag_str(SPAN_KIND, "llm")
span.set_tag_str(MODEL_NAME, span.get_tag("anthropic.request.model") or "")
span.set_tag_str(INPUT_MESSAGES, safe_json(input_messages))
span.set_tag_str(METADATA, safe_json(parameters))
span.set_tag_str(MODEL_PROVIDER, "anthropic")

if span.error or response is None:
span.set_tag_str(OUTPUT_MESSAGES, json.dumps([{"content": ""}]))
else:
output_messages = [{"content": ""}]
if not span.error and response is not None:
output_messages = self._extract_output_message(response)
span.set_tag_str(OUTPUT_MESSAGES, safe_json(output_messages))

usage = self._get_llmobs_metrics_tags(span)
if usage:
span.set_tag_str(METRICS, safe_json(usage))
span._set_ctx_items(
{
SPAN_KIND: "llm",
MODEL_NAME: span.get_tag("anthropic.request.model") or "",
MODEL_PROVIDER: "anthropic",
INPUT_MESSAGES: input_messages,
METADATA: parameters,
OUTPUT_MESSAGES: output_messages,
METRICS: self._get_llmobs_metrics_tags(span),
}
)

def _extract_input_message(self, messages, system_prompt=None):
"""Extract input messages from the stored prompt.
Expand Down
31 changes: 15 additions & 16 deletions ddtrace/llmobs/_integrations/bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
from ddtrace.llmobs._integrations import BaseLLMIntegration
from ddtrace.llmobs._utils import _get_llmobs_parent_id
from ddtrace.llmobs._utils import safe_json


log = get_logger(__name__)
Expand All @@ -37,9 +36,9 @@ def _llmobs_set_tags(
operation: str = "",
) -> None:
"""Extract prompt/response tags from a completion and set them as temporary "_ml_obs.*" tags."""
if span.get_tag(PROPAGATED_PARENT_ID_KEY) is None:
if span._get_ctx_item(PROPAGATED_PARENT_ID_KEY) is None:
parent_id = _get_llmobs_parent_id(span) or "undefined"
span.set_tag(PARENT_ID_KEY, parent_id)
span._set_ctx_item(PARENT_ID_KEY, parent_id)
parameters = {}
if span.get_tag("bedrock.request.temperature"):
parameters["temperature"] = float(span.get_tag("bedrock.request.temperature") or 0.0)
Expand All @@ -48,20 +47,20 @@ def _llmobs_set_tags(

prompt = kwargs.get("prompt", "")
input_messages = self._extract_input_message(prompt)

span.set_tag_str(SPAN_KIND, "llm")
span.set_tag_str(MODEL_NAME, span.get_tag("bedrock.request.model") or "")
span.set_tag_str(MODEL_PROVIDER, span.get_tag("bedrock.request.model_provider") or "")

span.set_tag_str(INPUT_MESSAGES, safe_json(input_messages))
span.set_tag_str(METADATA, safe_json(parameters))
if span.error or response is None:
span.set_tag_str(OUTPUT_MESSAGES, safe_json([{"content": ""}]))
else:
output_messages = [{"content": ""}]
if not span.error and response is not None:
output_messages = self._extract_output_message(response)
span.set_tag_str(OUTPUT_MESSAGES, safe_json(output_messages))
metrics = self._llmobs_metrics(span, response)
span.set_tag_str(METRICS, safe_json(metrics))
span._set_ctx_items(
{
SPAN_KIND: "llm",
MODEL_NAME: span.get_tag("bedrock.request.model") or "",
MODEL_PROVIDER: span.get_tag("bedrock.request.model_provider") or "",
INPUT_MESSAGES: input_messages,
METADATA: parameters,
METRICS: self._llmobs_metrics(span, response),
OUTPUT_MESSAGES: output_messages,
}
)

@staticmethod
def _llmobs_metrics(span: Span, response: Optional[Dict[str, Any]]) -> Dict[str, Any]:
Expand Down
27 changes: 13 additions & 14 deletions ddtrace/llmobs/_integrations/gemini.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from ddtrace.llmobs._integrations.utils import get_system_instructions_from_google_model
from ddtrace.llmobs._integrations.utils import llmobs_get_metadata_google
from ddtrace.llmobs._utils import _get_attr
from ddtrace.llmobs._utils import safe_json


class GeminiIntegration(BaseLLMIntegration):
Expand All @@ -41,28 +40,28 @@ def _llmobs_set_tags(
response: Optional[Any] = None,
operation: str = "",
) -> None:
span.set_tag_str(SPAN_KIND, "llm")
span.set_tag_str(MODEL_NAME, span.get_tag("google_generativeai.request.model") or "")
span.set_tag_str(MODEL_PROVIDER, span.get_tag("google_generativeai.request.provider") or "")

instance = kwargs.get("instance", None)
metadata = llmobs_get_metadata_google(kwargs, instance)
span.set_tag_str(METADATA, safe_json(metadata))

system_instruction = get_system_instructions_from_google_model(instance)
input_contents = get_argument_value(args, kwargs, 0, "contents")
input_messages = self._extract_input_message(input_contents, system_instruction)
span.set_tag_str(INPUT_MESSAGES, safe_json(input_messages))

if span.error or response is None:
span.set_tag_str(OUTPUT_MESSAGES, safe_json([{"content": ""}]))
else:
output_messages = [{"content": ""}]
if not span.error and response is not None:
output_messages = self._extract_output_message(response)
span.set_tag_str(OUTPUT_MESSAGES, safe_json(output_messages))

usage = get_llmobs_metrics_tags_google("google_generativeai", span)
if usage:
span.set_tag_str(METRICS, safe_json(usage))
span._set_ctx_items(
{
SPAN_KIND: "llm",
MODEL_NAME: span.get_tag("google_generativeai.request.model") or "",
MODEL_PROVIDER: span.get_tag("google_generativeai.request.provider") or "",
METADATA: metadata,
INPUT_MESSAGES: input_messages,
OUTPUT_MESSAGES: output_messages,
METRICS: get_llmobs_metrics_tags_google("google_generativeai", span),
}
)

def _extract_input_message(self, contents, system_instruction=None):
messages = []
Expand Down
Loading

0 comments on commit e474267

Please sign in to comment.