From e4742671776a9e09a29846a976c5804b08d252b1 Mon Sep 17 00:00:00 2001
From: Yun Kim <35776586+Yun-Kim@users.noreply.github.com>
Date: Thu, 12 Dec 2024 18:34:35 -0500
Subject: [PATCH] chore(llmobs): use span store instead of temporary tags
 (#11543)

This PR performs some cleanup refactors on the LLM Obs SDK and
associated integrations. Specifically regarding the data stored, which
includes LLMObs span metadata/metrics/tags/IO:
- Stop storing these as temporary span tags and instead use the span
store field, which allows arbitrary key value pairs but is not submitted
to Datadog. This removes the potential for temporary tags to be not
extracted and still submitted as a APM span tag.
- Stop attempting `safe_json()` (i.e. `json.dumps()`) to store the above
data, which is an expensive operation that adds up with the number of
separate calls, and instead just store the raw values of the stored
objects in the store field, and only call `safe_json()` "once" at
payload encoding time.

Things to look out for:
- Previously we were calling `safe_json()` every time to store data as
string span tags. One danger includes errors during span processing due
to wrong types (expect string, likely receive a dictionary/object from
the span store field)
- By avoiding any jsonify processing before encode time, a small edge
case appeared from the LLMObs SDK decorator function which
auto-annotates non-LLM spans with input function argument maps. In
Python 3.8, the `bind_partial().arguments` call used to extract the
function arguments returns an OrderedDict (otherwise returns a regular
Dict() in Python >= 3.9, which broke some tests as we were simply
casting to a string when storing the input/output value). I added a fix
to cast the `bind_partial().arguments` object to a dict to avoid this
issue coming up.

## Next Steps
This is a great first step, but there are still tons of performance
improvements we can make to our encoding/writing. The most notable is
that we call `json.dumps()` on span events more than once (to calculate
the payload size before adding to the buffer).

## Checklist
- [x] PR author has checked that all the criteria below are met
- The PR description includes an overview of the change
- The PR description articulates the motivation for the change
- The change includes tests OR the PR description describes a testing
strategy
- The PR description notes risks associated with the change, if any
- Newly-added code is easy to change
- The change follows the [library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
- The change includes or references documentation updates if necessary
- Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))

## Reviewer Checklist
- [x] Reviewer has checked that all the criteria below are met
- Title is accurate
- All changes are related to the pull request's stated goal
- Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- Testing strategy adequately addresses listed risks
- Newly-added code is easy to change
- Release note makes sense to a user of the library
- If necessary, author has acknowledged and discussed the performance
implications of this PR as reported in the benchmarks PR comment
- Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
---
 ddtrace/llmobs/_integrations/anthropic.py     |  27 +-
 ddtrace/llmobs/_integrations/bedrock.py       |  31 +-
 ddtrace/llmobs/_integrations/gemini.py        |  27 +-
 ddtrace/llmobs/_integrations/langchain.py     | 132 +++++----
 ddtrace/llmobs/_integrations/openai.py        |  57 ++--
 ddtrace/llmobs/_integrations/vertexai.py      |  30 +-
 ddtrace/llmobs/_llmobs.py                     |  49 ++--
 ddtrace/llmobs/_trace_processor.py            |  74 ++---
 ddtrace/llmobs/_utils.py                      |  12 +-
 ddtrace/llmobs/_writer.py                     |  14 +-
 ddtrace/llmobs/decorators.py                  |  12 +-
 .../anthropic/test_anthropic_llmobs.py        |  32 ---
 tests/contrib/openai/test_openai_llmobs.py    |  32 ---
 tests/llmobs/_utils.py                        |  52 +++-
 tests/llmobs/test_llmobs_decorators.py        |   8 +-
 tests/llmobs/test_llmobs_service.py           | 271 ++++++------------
 .../test_llmobs_span_agentless_writer.py      |  28 +-
 tests/llmobs/test_llmobs_span_encoder.py      |  72 +++++
 tests/llmobs/test_llmobs_trace_processor.py   |  98 ++++---
 19 files changed, 495 insertions(+), 563 deletions(-)
 create mode 100644 tests/llmobs/test_llmobs_span_encoder.py

diff --git a/ddtrace/llmobs/_integrations/anthropic.py b/ddtrace/llmobs/_integrations/anthropic.py
index 0747d68e77b..dfb39c0f7e9 100644
--- a/ddtrace/llmobs/_integrations/anthropic.py
+++ b/ddtrace/llmobs/_integrations/anthropic.py
@@ -19,7 +19,6 @@
 from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
 from ddtrace.llmobs._integrations.base import BaseLLMIntegration
 from ddtrace.llmobs._utils import _get_attr
-from ddtrace.llmobs._utils import safe_json
 
 
 log = get_logger(__name__)
@@ -66,21 +65,21 @@ def _llmobs_set_tags(
         system_prompt = kwargs.get("system")
         input_messages = self._extract_input_message(messages, system_prompt)
 
-        span.set_tag_str(SPAN_KIND, "llm")
-        span.set_tag_str(MODEL_NAME, span.get_tag("anthropic.request.model") or "")
-        span.set_tag_str(INPUT_MESSAGES, safe_json(input_messages))
-        span.set_tag_str(METADATA, safe_json(parameters))
-        span.set_tag_str(MODEL_PROVIDER, "anthropic")
-
-        if span.error or response is None:
-            span.set_tag_str(OUTPUT_MESSAGES, json.dumps([{"content": ""}]))
-        else:
+        output_messages = [{"content": ""}]
+        if not span.error and response is not None:
             output_messages = self._extract_output_message(response)
-            span.set_tag_str(OUTPUT_MESSAGES, safe_json(output_messages))
 
-        usage = self._get_llmobs_metrics_tags(span)
-        if usage:
-            span.set_tag_str(METRICS, safe_json(usage))
+        span._set_ctx_items(
+            {
+                SPAN_KIND: "llm",
+                MODEL_NAME: span.get_tag("anthropic.request.model") or "",
+                MODEL_PROVIDER: "anthropic",
+                INPUT_MESSAGES: input_messages,
+                METADATA: parameters,
+                OUTPUT_MESSAGES: output_messages,
+                METRICS: self._get_llmobs_metrics_tags(span),
+            }
+        )
 
     def _extract_input_message(self, messages, system_prompt=None):
         """Extract input messages from the stored prompt.
diff --git a/ddtrace/llmobs/_integrations/bedrock.py b/ddtrace/llmobs/_integrations/bedrock.py
index 78798ae4f98..bf8b020ebea 100644
--- a/ddtrace/llmobs/_integrations/bedrock.py
+++ b/ddtrace/llmobs/_integrations/bedrock.py
@@ -19,7 +19,6 @@
 from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
 from ddtrace.llmobs._integrations import BaseLLMIntegration
 from ddtrace.llmobs._utils import _get_llmobs_parent_id
-from ddtrace.llmobs._utils import safe_json
 
 
 log = get_logger(__name__)
@@ -37,9 +36,9 @@ def _llmobs_set_tags(
         operation: str = "",
     ) -> None:
         """Extract prompt/response tags from a completion and set them as temporary "_ml_obs.*" tags."""
-        if span.get_tag(PROPAGATED_PARENT_ID_KEY) is None:
+        if span._get_ctx_item(PROPAGATED_PARENT_ID_KEY) is None:
             parent_id = _get_llmobs_parent_id(span) or "undefined"
-            span.set_tag(PARENT_ID_KEY, parent_id)
+            span._set_ctx_item(PARENT_ID_KEY, parent_id)
         parameters = {}
         if span.get_tag("bedrock.request.temperature"):
             parameters["temperature"] = float(span.get_tag("bedrock.request.temperature") or 0.0)
@@ -48,20 +47,20 @@ def _llmobs_set_tags(
 
         prompt = kwargs.get("prompt", "")
         input_messages = self._extract_input_message(prompt)
-
-        span.set_tag_str(SPAN_KIND, "llm")
-        span.set_tag_str(MODEL_NAME, span.get_tag("bedrock.request.model") or "")
-        span.set_tag_str(MODEL_PROVIDER, span.get_tag("bedrock.request.model_provider") or "")
-
-        span.set_tag_str(INPUT_MESSAGES, safe_json(input_messages))
-        span.set_tag_str(METADATA, safe_json(parameters))
-        if span.error or response is None:
-            span.set_tag_str(OUTPUT_MESSAGES, safe_json([{"content": ""}]))
-        else:
+        output_messages = [{"content": ""}]
+        if not span.error and response is not None:
             output_messages = self._extract_output_message(response)
-            span.set_tag_str(OUTPUT_MESSAGES, safe_json(output_messages))
-        metrics = self._llmobs_metrics(span, response)
-        span.set_tag_str(METRICS, safe_json(metrics))
+        span._set_ctx_items(
+            {
+                SPAN_KIND: "llm",
+                MODEL_NAME: span.get_tag("bedrock.request.model") or "",
+                MODEL_PROVIDER: span.get_tag("bedrock.request.model_provider") or "",
+                INPUT_MESSAGES: input_messages,
+                METADATA: parameters,
+                METRICS: self._llmobs_metrics(span, response),
+                OUTPUT_MESSAGES: output_messages,
+            }
+        )
 
     @staticmethod
     def _llmobs_metrics(span: Span, response: Optional[Dict[str, Any]]) -> Dict[str, Any]:
diff --git a/ddtrace/llmobs/_integrations/gemini.py b/ddtrace/llmobs/_integrations/gemini.py
index f1a4730812f..491187475f0 100644
--- a/ddtrace/llmobs/_integrations/gemini.py
+++ b/ddtrace/llmobs/_integrations/gemini.py
@@ -19,7 +19,6 @@
 from ddtrace.llmobs._integrations.utils import get_system_instructions_from_google_model
 from ddtrace.llmobs._integrations.utils import llmobs_get_metadata_google
 from ddtrace.llmobs._utils import _get_attr
-from ddtrace.llmobs._utils import safe_json
 
 
 class GeminiIntegration(BaseLLMIntegration):
@@ -41,28 +40,28 @@ def _llmobs_set_tags(
         response: Optional[Any] = None,
         operation: str = "",
     ) -> None:
-        span.set_tag_str(SPAN_KIND, "llm")
-        span.set_tag_str(MODEL_NAME, span.get_tag("google_generativeai.request.model") or "")
-        span.set_tag_str(MODEL_PROVIDER, span.get_tag("google_generativeai.request.provider") or "")
-
         instance = kwargs.get("instance", None)
         metadata = llmobs_get_metadata_google(kwargs, instance)
-        span.set_tag_str(METADATA, safe_json(metadata))
 
         system_instruction = get_system_instructions_from_google_model(instance)
         input_contents = get_argument_value(args, kwargs, 0, "contents")
         input_messages = self._extract_input_message(input_contents, system_instruction)
-        span.set_tag_str(INPUT_MESSAGES, safe_json(input_messages))
 
-        if span.error or response is None:
-            span.set_tag_str(OUTPUT_MESSAGES, safe_json([{"content": ""}]))
-        else:
+        output_messages = [{"content": ""}]
+        if not span.error and response is not None:
             output_messages = self._extract_output_message(response)
-            span.set_tag_str(OUTPUT_MESSAGES, safe_json(output_messages))
 
-        usage = get_llmobs_metrics_tags_google("google_generativeai", span)
-        if usage:
-            span.set_tag_str(METRICS, safe_json(usage))
+        span._set_ctx_items(
+            {
+                SPAN_KIND: "llm",
+                MODEL_NAME: span.get_tag("google_generativeai.request.model") or "",
+                MODEL_PROVIDER: span.get_tag("google_generativeai.request.provider") or "",
+                METADATA: metadata,
+                INPUT_MESSAGES: input_messages,
+                OUTPUT_MESSAGES: output_messages,
+                METRICS: get_llmobs_metrics_tags_google("google_generativeai", span),
+            }
+        )
 
     def _extract_input_message(self, contents, system_instruction=None):
         messages = []
diff --git a/ddtrace/llmobs/_integrations/langchain.py b/ddtrace/llmobs/_integrations/langchain.py
index 2128458253d..1fce3d11804 100644
--- a/ddtrace/llmobs/_integrations/langchain.py
+++ b/ddtrace/llmobs/_integrations/langchain.py
@@ -28,7 +28,6 @@
 from ddtrace.llmobs._constants import SPAN_KIND
 from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
 from ddtrace.llmobs._integrations.base import BaseLLMIntegration
-from ddtrace.llmobs._utils import safe_json
 from ddtrace.llmobs.utils import Document
 
 
@@ -130,15 +129,11 @@ def _llmobs_set_metadata(self, span: Span, model_provider: Optional[str] = None)
         if max_tokens is not None and max_tokens != "None":
             metadata["max_tokens"] = int(max_tokens)
         if metadata:
-            span.set_tag_str(METADATA, safe_json(metadata))
+            span._set_ctx_item(METADATA, metadata)
 
     def _llmobs_set_tags_from_llm(
         self, span: Span, args: List[Any], kwargs: Dict[str, Any], completions: Any, is_workflow: bool = False
     ) -> None:
-        span.set_tag_str(SPAN_KIND, "workflow" if is_workflow else "llm")
-        span.set_tag_str(MODEL_NAME, span.get_tag(MODEL) or "")
-        span.set_tag_str(MODEL_PROVIDER, span.get_tag(PROVIDER) or "")
-
         input_tag_key = INPUT_VALUE if is_workflow else INPUT_MESSAGES
         output_tag_key = OUTPUT_VALUE if is_workflow else OUTPUT_MESSAGES
         stream = span.get_tag("langchain.request.stream")
@@ -146,21 +141,28 @@ def _llmobs_set_tags_from_llm(
         prompts = get_argument_value(args, kwargs, 0, "input" if stream else "prompts")
         if isinstance(prompts, str) or not isinstance(prompts, list):
             prompts = [prompts]
-
         if stream:
             # chat and llm take the same input types for streamed calls
-            span.set_tag_str(input_tag_key, safe_json(self._handle_stream_input_messages(prompts)))
+            input_messages = self._handle_stream_input_messages(prompts)
         else:
-            span.set_tag_str(input_tag_key, safe_json([{"content": str(prompt)} for prompt in prompts]))
+            input_messages = [{"content": str(prompt)} for prompt in prompts]
+
+        span._set_ctx_items(
+            {
+                SPAN_KIND: "workflow" if is_workflow else "llm",
+                MODEL_NAME: span.get_tag(MODEL) or "",
+                MODEL_PROVIDER: span.get_tag(PROVIDER) or "",
+                input_tag_key: input_messages,
+            }
+        )
 
         if span.error:
-            span.set_tag_str(output_tag_key, safe_json([{"content": ""}]))
+            span._set_ctx_item(output_tag_key, [{"content": ""}])
             return
         if stream:
             message_content = [{"content": completions}]  # single completion for streams
         else:
             message_content = [{"content": completion[0].text} for completion in completions.generations]
-
             if not is_workflow:
                 input_tokens, output_tokens, total_tokens = self.check_token_usage_chat_or_llm_result(completions)
                 if total_tokens > 0:
@@ -169,8 +171,8 @@ def _llmobs_set_tags_from_llm(
                         OUTPUT_TOKENS_METRIC_KEY: output_tokens,
                         TOTAL_TOKENS_METRIC_KEY: total_tokens,
                     }
-                    span.set_tag_str(METRICS, safe_json(metrics))
-        span.set_tag_str(output_tag_key, safe_json(message_content))
+                    span._set_ctx_item(METRICS, metrics)
+        span._set_ctx_item(output_tag_key, message_content)
 
     def _llmobs_set_tags_from_chat_model(
         self,
@@ -180,10 +182,13 @@ def _llmobs_set_tags_from_chat_model(
         chat_completions: Any,
         is_workflow: bool = False,
     ) -> None:
-        span.set_tag_str(SPAN_KIND, "workflow" if is_workflow else "llm")
-        span.set_tag_str(MODEL_NAME, span.get_tag(MODEL) or "")
-        span.set_tag_str(MODEL_PROVIDER, span.get_tag(PROVIDER) or "")
-
+        span._set_ctx_items(
+            {
+                SPAN_KIND: "workflow" if is_workflow else "llm",
+                MODEL_NAME: span.get_tag(MODEL) or "",
+                MODEL_PROVIDER: span.get_tag(PROVIDER) or "",
+            }
+        )
         input_tag_key = INPUT_VALUE if is_workflow else INPUT_MESSAGES
         output_tag_key = OUTPUT_VALUE if is_workflow else OUTPUT_MESSAGES
         stream = span.get_tag("langchain.request.stream")
@@ -203,17 +208,17 @@ def _llmobs_set_tags_from_chat_model(
                     )
                     role = getattr(message, "role", ROLE_MAPPING.get(message.type, ""))
                     input_messages.append({"content": str(content), "role": str(role)})
-        span.set_tag_str(input_tag_key, safe_json(input_messages))
+        span._set_ctx_item(input_tag_key, input_messages)
 
         if span.error:
-            span.set_tag_str(output_tag_key, json.dumps([{"content": ""}]))
+            span._set_ctx_item(output_tag_key, [{"content": ""}])
             return
 
         output_messages = []
         if stream:
             content = chat_completions.content
             role = chat_completions.__class__.__name__.replace("MessageChunk", "").lower()  # AIMessageChunk --> ai
-            span.set_tag_str(output_tag_key, safe_json([{"content": content, "role": ROLE_MAPPING.get(role, "")}]))
+            span._set_ctx_item(output_tag_key, [{"content": content, "role": ROLE_MAPPING.get(role, "")}])
             return
 
         input_tokens, output_tokens, total_tokens = 0, 0, 0
@@ -249,7 +254,7 @@ def _llmobs_set_tags_from_chat_model(
             output_tokens = sum(v["output_tokens"] for v in tokens_per_choice_run_id.values())
             total_tokens = sum(v["total_tokens"] for v in tokens_per_choice_run_id.values())
 
-        span.set_tag_str(output_tag_key, safe_json(output_messages))
+        span._set_ctx_item(output_tag_key, output_messages)
 
         if not is_workflow and total_tokens > 0:
             metrics = {
@@ -257,7 +262,7 @@ def _llmobs_set_tags_from_chat_model(
                 OUTPUT_TOKENS_METRIC_KEY: output_tokens,
                 TOTAL_TOKENS_METRIC_KEY: total_tokens,
             }
-            span.set_tag_str(METRICS, safe_json(metrics))
+            span._set_ctx_item(METRICS, metrics)
 
     def _extract_tool_calls(self, chat_completion_msg: Any) -> List[Dict[str, Any]]:
         """Extracts tool calls from a langchain chat completion."""
@@ -301,20 +306,17 @@ def _handle_stream_input_messages(self, inputs):
         return input_messages
 
     def _llmobs_set_meta_tags_from_chain(self, span: Span, args, kwargs, outputs: Any) -> None:
-        span.set_tag_str(SPAN_KIND, "workflow")
-        stream = span.get_tag("langchain.request.stream")
-        if stream:
+        if span.get_tag("langchain.request.stream"):
             inputs = get_argument_value(args, kwargs, 0, "input")
         else:
             inputs = kwargs
+        formatted_inputs = ""
         if inputs is not None:
             formatted_inputs = self.format_io(inputs)
-            span.set_tag_str(INPUT_VALUE, safe_json(formatted_inputs))
-        if span.error or outputs is None:
-            span.set_tag_str(OUTPUT_VALUE, "")
-            return
-        formatted_outputs = self.format_io(outputs)
-        span.set_tag_str(OUTPUT_VALUE, safe_json(formatted_outputs))
+        formatted_outputs = ""
+        if not span.error and outputs is not None:
+            formatted_outputs = self.format_io(outputs)
+        span._set_ctx_items({SPAN_KIND: "workflow", INPUT_VALUE: formatted_inputs, OUTPUT_VALUE: formatted_outputs})
 
     def _llmobs_set_meta_tags_from_embedding(
         self,
@@ -324,13 +326,15 @@ def _llmobs_set_meta_tags_from_embedding(
         output_embedding: Union[List[float], List[List[float]], None],
         is_workflow: bool = False,
     ) -> None:
-        span.set_tag_str(SPAN_KIND, "workflow" if is_workflow else "embedding")
-        span.set_tag_str(MODEL_NAME, span.get_tag(MODEL) or "")
-        span.set_tag_str(MODEL_PROVIDER, span.get_tag(PROVIDER) or "")
-
+        span._set_ctx_items(
+            {
+                SPAN_KIND: "workflow" if is_workflow else "embedding",
+                MODEL_NAME: span.get_tag(MODEL) or "",
+                MODEL_PROVIDER: span.get_tag(PROVIDER) or "",
+            }
+        )
         input_tag_key = INPUT_VALUE if is_workflow else INPUT_DOCUMENTS
         output_tag_key = OUTPUT_VALUE
-
         output_values: Any
 
         try:
@@ -343,16 +347,16 @@ def _llmobs_set_meta_tags_from_embedding(
             ):
                 if is_workflow:
                     formatted_inputs = self.format_io(input_texts)
-                    span.set_tag_str(input_tag_key, safe_json(formatted_inputs))
+                    span._set_ctx_item(input_tag_key, formatted_inputs)
                 else:
                     if isinstance(input_texts, str):
                         input_texts = [input_texts]
                     input_documents = [Document(text=str(doc)) for doc in input_texts]
-                    span.set_tag_str(input_tag_key, safe_json(input_documents))
+                    span._set_ctx_item(input_tag_key, input_documents)
         except TypeError:
             log.warning("Failed to serialize embedding input data to JSON")
         if span.error or output_embedding is None:
-            span.set_tag_str(output_tag_key, "")
+            span._set_ctx_item(output_tag_key, "")
             return
         try:
             if isinstance(output_embedding[0], float):
@@ -364,7 +368,7 @@ def _llmobs_set_meta_tags_from_embedding(
                 output_values = output_embedding
                 embeddings_count = len(output_embedding)
             embedding_dim = len(output_values[0])
-            span.set_tag_str(
+            span._set_ctx_item(
                 output_tag_key,
                 "[{} embedding(s) returned with size {}]".format(embeddings_count, embedding_dim),
             )
@@ -379,19 +383,22 @@ def _llmobs_set_meta_tags_from_similarity_search(
         output_documents: Union[List[Any], None],
         is_workflow: bool = False,
     ) -> None:
-        span.set_tag_str(SPAN_KIND, "workflow" if is_workflow else "retrieval")
-        span.set_tag_str(MODEL_NAME, span.get_tag(MODEL) or "")
-        span.set_tag_str(MODEL_PROVIDER, span.get_tag(PROVIDER) or "")
-
+        span._set_ctx_items(
+            {
+                SPAN_KIND: "workflow" if is_workflow else "retrieval",
+                MODEL_NAME: span.get_tag(MODEL) or "",
+                MODEL_PROVIDER: span.get_tag(PROVIDER) or "",
+            }
+        )
         input_query = get_argument_value(args, kwargs, 0, "query")
         if input_query is not None:
             formatted_inputs = self.format_io(input_query)
-            span.set_tag_str(INPUT_VALUE, safe_json(formatted_inputs))
+            span._set_ctx_item(INPUT_VALUE, formatted_inputs)
         if span.error or not output_documents or not isinstance(output_documents, list):
-            span.set_tag_str(OUTPUT_VALUE, "")
+            span._set_ctx_item(OUTPUT_VALUE, "")
             return
         if is_workflow:
-            span.set_tag_str(OUTPUT_VALUE, "[{} document(s) retrieved]".format(len(output_documents)))
+            span._set_ctx_item(OUTPUT_VALUE, "[{} document(s) retrieved]".format(len(output_documents)))
             return
         documents = []
         for d in output_documents:
@@ -400,32 +407,31 @@ def _llmobs_set_meta_tags_from_similarity_search(
             metadata = getattr(d, "metadata", {})
             doc["name"] = metadata.get("name", doc["id"])
             documents.append(doc)
-        span.set_tag_str(OUTPUT_DOCUMENTS, safe_json(self.format_io(documents)))
+        span._set_ctx_item(OUTPUT_DOCUMENTS, self.format_io(documents))
         # we set the value as well to ensure that the UI would display it in case the span was the root
-        span.set_tag_str(OUTPUT_VALUE, "[{} document(s) retrieved]".format(len(documents)))
+        span._set_ctx_item(OUTPUT_VALUE, "[{} document(s) retrieved]".format(len(documents)))
 
     def _llmobs_set_meta_tags_from_tool(self, span: Span, tool_inputs: Dict[str, Any], tool_output: object) -> None:
-        if span.get_tag(METADATA):
-            metadata = json.loads(str(span.get_tag(METADATA)))
-        else:
-            metadata = {}
-
-        span.set_tag_str(SPAN_KIND, "tool")
+        metadata = json.loads(str(span.get_tag(METADATA))) if span.get_tag(METADATA) else {}
+        formatted_input = ""
         if tool_inputs is not None:
             tool_input = tool_inputs.get("input")
             if tool_inputs.get("config"):
                 metadata["tool_config"] = tool_inputs.get("config")
             if tool_inputs.get("info"):
                 metadata["tool_info"] = tool_inputs.get("info")
-            if metadata:
-                span.set_tag_str(METADATA, safe_json(metadata))
             formatted_input = self.format_io(tool_input)
-            span.set_tag_str(INPUT_VALUE, safe_json(formatted_input))
-        if span.error or tool_output is None:
-            span.set_tag_str(OUTPUT_VALUE, "")
-            return
-        formatted_outputs = self.format_io(tool_output)
-        span.set_tag_str(OUTPUT_VALUE, safe_json(formatted_outputs))
+        formatted_outputs = ""
+        if not span.error and tool_output is not None:
+            formatted_outputs = self.format_io(tool_output)
+        span._set_ctx_items(
+            {
+                SPAN_KIND: "tool",
+                METADATA: metadata,
+                INPUT_VALUE: formatted_input,
+                OUTPUT_VALUE: formatted_outputs,
+            }
+        )
 
     def _set_base_span_tags(  # type: ignore[override]
         self,
diff --git a/ddtrace/llmobs/_integrations/openai.py b/ddtrace/llmobs/_integrations/openai.py
index 5c9e73eaca7..bd727b1a5a2 100644
--- a/ddtrace/llmobs/_integrations/openai.py
+++ b/ddtrace/llmobs/_integrations/openai.py
@@ -23,7 +23,6 @@
 from ddtrace.llmobs._constants import TOTAL_TOKENS_METRIC_KEY
 from ddtrace.llmobs._integrations.base import BaseLLMIntegration
 from ddtrace.llmobs._utils import _get_attr
-from ddtrace.llmobs._utils import safe_json
 from ddtrace.llmobs.utils import Document
 from ddtrace.pin import Pin
 
@@ -148,19 +147,18 @@ def _llmobs_set_tags(
     ) -> None:
         """Sets meta tags and metrics for span events to be sent to LLMObs."""
         span_kind = "embedding" if operation == "embedding" else "llm"
-        span.set_tag_str(SPAN_KIND, span_kind)
         model_name = span.get_tag("openai.response.model") or span.get_tag("openai.request.model")
-        span.set_tag_str(MODEL_NAME, model_name or "")
         model_provider = "azure_openai" if self._is_azure_openai(span) else "openai"
-        span.set_tag_str(MODEL_PROVIDER, model_provider)
         if operation == "completion":
             self._llmobs_set_meta_tags_from_completion(span, kwargs, response)
         elif operation == "chat":
             self._llmobs_set_meta_tags_from_chat(span, kwargs, response)
         elif operation == "embedding":
             self._llmobs_set_meta_tags_from_embedding(span, kwargs, response)
-        metrics = self._set_llmobs_metrics_tags(span, response)
-        span.set_tag_str(METRICS, safe_json(metrics))
+        metrics = self._extract_llmobs_metrics_tags(span, response)
+        span._set_ctx_items(
+            {SPAN_KIND: span_kind, MODEL_NAME: model_name or "", MODEL_PROVIDER: model_provider, METRICS: metrics}
+        )
 
     @staticmethod
     def _llmobs_set_meta_tags_from_completion(span: Span, kwargs: Dict[str, Any], completions: Any) -> None:
@@ -168,20 +166,18 @@ def _llmobs_set_meta_tags_from_completion(span: Span, kwargs: Dict[str, Any], co
         prompt = kwargs.get("prompt", "")
         if isinstance(prompt, str):
             prompt = [prompt]
-        span.set_tag_str(INPUT_MESSAGES, safe_json([{"content": str(p)} for p in prompt]))
-
         parameters = {k: v for k, v in kwargs.items() if k not in ("model", "prompt")}
-        span.set_tag_str(METADATA, safe_json(parameters))
-
-        if span.error or not completions:
-            span.set_tag_str(OUTPUT_MESSAGES, safe_json([{"content": ""}]))
-            return
-        if hasattr(completions, "choices"):  # non-streaming response
-            choices = completions.choices
-        else:  # streamed response
-            choices = completions
-        messages = [{"content": _get_attr(choice, "text", "")} for choice in choices]
-        span.set_tag_str(OUTPUT_MESSAGES, safe_json(messages))
+        output_messages = [{"content": ""}]
+        if not span.error and completions:
+            choices = getattr(completions, "choices", completions)
+            output_messages = [{"content": _get_attr(choice, "text", "")} for choice in choices]
+        span._set_ctx_items(
+            {
+                INPUT_MESSAGES: [{"content": str(p)} for p in prompt],
+                METADATA: parameters,
+                OUTPUT_MESSAGES: output_messages,
+            }
+        )
 
     @staticmethod
     def _llmobs_set_meta_tags_from_chat(span: Span, kwargs: Dict[str, Any], messages: Optional[Any]) -> None:
@@ -189,16 +185,14 @@ def _llmobs_set_meta_tags_from_chat(span: Span, kwargs: Dict[str, Any], messages
         input_messages = []
         for m in kwargs.get("messages", []):
             input_messages.append({"content": str(_get_attr(m, "content", "")), "role": str(_get_attr(m, "role", ""))})
-        span.set_tag_str(INPUT_MESSAGES, safe_json(input_messages))
-
         parameters = {k: v for k, v in kwargs.items() if k not in ("model", "messages", "tools", "functions")}
-        span.set_tag_str(METADATA, safe_json(parameters))
+        span._set_ctx_items({INPUT_MESSAGES: input_messages, METADATA: parameters})
 
         if span.error or not messages:
-            span.set_tag_str(OUTPUT_MESSAGES, safe_json([{"content": ""}]))
+            span._set_ctx_item(OUTPUT_MESSAGES, [{"content": ""}])
             return
-        output_messages = []
         if isinstance(messages, list):  # streamed response
+            output_messages = []
             for streamed_message in messages:
                 message = {"content": streamed_message["content"], "role": streamed_message["role"]}
                 tool_calls = streamed_message.get("tool_calls", [])
@@ -213,9 +207,10 @@ def _llmobs_set_meta_tags_from_chat(span: Span, kwargs: Dict[str, Any], messages
                         for tool_call in tool_calls
                     ]
                 output_messages.append(message)
-            span.set_tag_str(OUTPUT_MESSAGES, safe_json(output_messages))
+            span._set_ctx_item(OUTPUT_MESSAGES, output_messages)
             return
         choices = _get_attr(messages, "choices", [])
+        output_messages = []
         for idx, choice in enumerate(choices):
             tool_calls_info = []
             choice_message = _get_attr(choice, "message", {})
@@ -241,7 +236,7 @@ def _llmobs_set_meta_tags_from_chat(span: Span, kwargs: Dict[str, Any], messages
                 output_messages.append({"content": content, "role": role, "tool_calls": tool_calls_info})
                 continue
             output_messages.append({"content": content, "role": role})
-        span.set_tag_str(OUTPUT_MESSAGES, safe_json(output_messages))
+        span._set_ctx_item(OUTPUT_MESSAGES, output_messages)
 
     @staticmethod
     def _llmobs_set_meta_tags_from_embedding(span: Span, kwargs: Dict[str, Any], resp: Any) -> None:
@@ -250,7 +245,6 @@ def _llmobs_set_meta_tags_from_embedding(span: Span, kwargs: Dict[str, Any], res
         metadata = {"encoding_format": encoding_format}
         if kwargs.get("dimensions"):
             metadata["dimensions"] = kwargs.get("dimensions")
-        span.set_tag_str(METADATA, safe_json(metadata))
 
         embedding_inputs = kwargs.get("input", "")
         if isinstance(embedding_inputs, str) or isinstance(embedding_inputs[0], int):
@@ -258,20 +252,19 @@ def _llmobs_set_meta_tags_from_embedding(span: Span, kwargs: Dict[str, Any], res
         input_documents = []
         for doc in embedding_inputs:
             input_documents.append(Document(text=str(doc)))
-        span.set_tag_str(INPUT_DOCUMENTS, safe_json(input_documents))
-
+        span._set_ctx_items({METADATA: metadata, INPUT_DOCUMENTS: input_documents})
         if span.error:
             return
         if encoding_format == "float":
             embedding_dim = len(resp.data[0].embedding)
-            span.set_tag_str(
+            span._set_ctx_item(
                 OUTPUT_VALUE, "[{} embedding(s) returned with size {}]".format(len(resp.data), embedding_dim)
             )
             return
-        span.set_tag_str(OUTPUT_VALUE, "[{} embedding(s) returned]".format(len(resp.data)))
+        span._set_ctx_item(OUTPUT_VALUE, "[{} embedding(s) returned]".format(len(resp.data)))
 
     @staticmethod
-    def _set_llmobs_metrics_tags(span: Span, resp: Any) -> Dict[str, Any]:
+    def _extract_llmobs_metrics_tags(span: Span, resp: Any) -> Dict[str, Any]:
         """Extract metrics from a chat/completion and set them as a temporary "_ml_obs.metrics" tag."""
         token_usage = _get_attr(resp, "usage", None)
         if token_usage is not None:
diff --git a/ddtrace/llmobs/_integrations/vertexai.py b/ddtrace/llmobs/_integrations/vertexai.py
index 69fdc7eb665..4019268e0c4 100644
--- a/ddtrace/llmobs/_integrations/vertexai.py
+++ b/ddtrace/llmobs/_integrations/vertexai.py
@@ -19,7 +19,6 @@
 from ddtrace.llmobs._integrations.utils import get_system_instructions_from_google_model
 from ddtrace.llmobs._integrations.utils import llmobs_get_metadata_google
 from ddtrace.llmobs._utils import _get_attr
-from ddtrace.llmobs._utils import safe_json
 
 
 class VertexAIIntegration(BaseLLMIntegration):
@@ -41,30 +40,29 @@ def _llmobs_set_tags(
         response: Optional[Any] = None,
         operation: str = "",
     ) -> None:
-        span.set_tag_str(SPAN_KIND, "llm")
-        span.set_tag_str(MODEL_NAME, span.get_tag("vertexai.request.model") or "")
-        span.set_tag_str(MODEL_PROVIDER, span.get_tag("vertexai.request.provider") or "")
-
         instance = kwargs.get("instance", None)
         history = kwargs.get("history", [])
         metadata = llmobs_get_metadata_google(kwargs, instance)
-        span.set_tag_str(METADATA, safe_json(metadata))
 
         system_instruction = get_system_instructions_from_google_model(instance)
         input_contents = get_argument_value(args, kwargs, 0, "contents")
         input_messages = self._extract_input_message(input_contents, history, system_instruction)
-        span.set_tag_str(INPUT_MESSAGES, safe_json(input_messages))
-
-        if span.error or response is None:
-            span.set_tag_str(OUTPUT_MESSAGES, safe_json([{"content": ""}]))
-            return
 
-        output_messages = self._extract_output_message(response)
-        span.set_tag_str(OUTPUT_MESSAGES, safe_json(output_messages))
+        output_messages = [{"content": ""}]
+        if not span.error and response is not None:
+            output_messages = self._extract_output_message(response)
 
-        usage = get_llmobs_metrics_tags_google("vertexai", span)
-        if usage:
-            span.set_tag_str(METRICS, safe_json(usage))
+        span._set_ctx_items(
+            {
+                SPAN_KIND: "llm",
+                MODEL_NAME: span.get_tag("vertexai.request.model") or "",
+                MODEL_PROVIDER: span.get_tag("vertexai.request.provider") or "",
+                METADATA: metadata,
+                INPUT_MESSAGES: input_messages,
+                OUTPUT_MESSAGES: output_messages,
+                METRICS: get_llmobs_metrics_tags_google("vertexai", span),
+            }
+        )
 
     def _extract_input_message(self, contents, history, system_instruction=None):
         from vertexai.generative_models._generative_models import Part
diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index 808cee89e0f..867edbdca4f 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -399,23 +399,23 @@ def _start_span(
         if name is None:
             name = operation_kind
         span = self.tracer.trace(name, resource=operation_kind, span_type=SpanTypes.LLM)
-        span.set_tag_str(SPAN_KIND, operation_kind)
+        span._set_ctx_item(SPAN_KIND, operation_kind)
         if model_name is not None:
-            span.set_tag_str(MODEL_NAME, model_name)
+            span._set_ctx_item(MODEL_NAME, model_name)
         if model_provider is not None:
-            span.set_tag_str(MODEL_PROVIDER, model_provider)
+            span._set_ctx_item(MODEL_PROVIDER, model_provider)
         session_id = session_id if session_id is not None else _get_session_id(span)
         if session_id is not None:
-            span.set_tag_str(SESSION_ID, session_id)
+            span._set_ctx_item(SESSION_ID, session_id)
         if ml_app is None:
             ml_app = _get_ml_app(span)
-        span.set_tag_str(ML_APP, ml_app)
-        if span.get_tag(PROPAGATED_PARENT_ID_KEY) is None:
+        span._set_ctx_item(ML_APP, ml_app)
+        if span._get_ctx_item(PROPAGATED_PARENT_ID_KEY) is None:
             # For non-distributed traces or spans in the first service of a distributed trace,
             # The LLMObs parent ID tag is not set at span start time. We need to manually set the parent ID tag now
             # in these cases to avoid conflicting with the later propagated tags.
             parent_id = _get_llmobs_parent_id(span) or "undefined"
-            span.set_tag_str(PARENT_ID_KEY, str(parent_id))
+            span._set_ctx_item(PARENT_ID_KEY, str(parent_id))
         return span
 
     @classmethod
@@ -638,7 +638,7 @@ def annotate(
             cls._tag_metrics(span, metrics)
         if tags is not None:
             cls._tag_span_tags(span, tags)
-        span_kind = span.get_tag(SPAN_KIND)
+        span_kind = span._get_ctx_item(SPAN_KIND)
         if parameters is not None:
             log.warning("Setting parameters is deprecated, please set parameters and other metadata as tags instead.")
             cls._tag_params(span, parameters)
@@ -664,7 +664,7 @@ def _tag_prompt(span, prompt: dict) -> None:
         """Tags a given LLMObs span with a prompt"""
         try:
             validated_prompt = validate_prompt(prompt)
-            span.set_tag_str(INPUT_PROMPT, safe_json(validated_prompt))
+            span._set_ctx_item(INPUT_PROMPT, validated_prompt)
         except TypeError:
             log.warning("Failed to validate prompt with error: ", exc_info=True)
             return
@@ -677,7 +677,7 @@ def _tag_params(span: Span, params: Dict[str, Any]) -> None:
         if not isinstance(params, dict):
             log.warning("parameters must be a dictionary of key-value pairs.")
             return
-        span.set_tag_str(INPUT_PARAMETERS, safe_json(params))
+        span._set_ctx_item(INPUT_PARAMETERS, params)
 
     @classmethod
     def _tag_llm_io(cls, span, input_messages=None, output_messages=None):
@@ -689,7 +689,7 @@ def _tag_llm_io(cls, span, input_messages=None, output_messages=None):
                 if not isinstance(input_messages, Messages):
                     input_messages = Messages(input_messages)
                 if input_messages.messages:
-                    span.set_tag_str(INPUT_MESSAGES, safe_json(input_messages.messages))
+                    span._set_ctx_item(INPUT_MESSAGES, input_messages.messages)
             except TypeError:
                 log.warning("Failed to parse input messages.", exc_info=True)
         if output_messages is None:
@@ -699,7 +699,7 @@ def _tag_llm_io(cls, span, input_messages=None, output_messages=None):
                 output_messages = Messages(output_messages)
             if not output_messages.messages:
                 return
-            span.set_tag_str(OUTPUT_MESSAGES, safe_json(output_messages.messages))
+            span._set_ctx_item(OUTPUT_MESSAGES, output_messages.messages)
         except TypeError:
             log.warning("Failed to parse output messages.", exc_info=True)
 
@@ -713,12 +713,12 @@ def _tag_embedding_io(cls, span, input_documents=None, output_text=None):
                 if not isinstance(input_documents, Documents):
                     input_documents = Documents(input_documents)
                 if input_documents.documents:
-                    span.set_tag_str(INPUT_DOCUMENTS, safe_json(input_documents.documents))
+                    span._set_ctx_item(INPUT_DOCUMENTS, input_documents.documents)
             except TypeError:
                 log.warning("Failed to parse input documents.", exc_info=True)
         if output_text is None:
             return
-        span.set_tag_str(OUTPUT_VALUE, safe_json(output_text))
+        span._set_ctx_item(OUTPUT_VALUE, str(output_text))
 
     @classmethod
     def _tag_retrieval_io(cls, span, input_text=None, output_documents=None):
@@ -726,7 +726,7 @@ def _tag_retrieval_io(cls, span, input_text=None, output_documents=None):
         Will be mapped to span's `meta.{input,output}.text` fields.
         """
         if input_text is not None:
-            span.set_tag_str(INPUT_VALUE, safe_json(input_text))
+            span._set_ctx_item(INPUT_VALUE, str(input_text))
         if output_documents is None:
             return
         try:
@@ -734,7 +734,7 @@ def _tag_retrieval_io(cls, span, input_text=None, output_documents=None):
                 output_documents = Documents(output_documents)
             if not output_documents.documents:
                 return
-            span.set_tag_str(OUTPUT_DOCUMENTS, safe_json(output_documents.documents))
+            span._set_ctx_item(OUTPUT_DOCUMENTS, output_documents.documents)
         except TypeError:
             log.warning("Failed to parse output documents.", exc_info=True)
 
@@ -744,9 +744,9 @@ def _tag_text_io(cls, span, input_value=None, output_value=None):
         Will be mapped to span's `meta.{input,output}.values` fields.
         """
         if input_value is not None:
-            span.set_tag_str(INPUT_VALUE, safe_json(input_value))
+            span._set_ctx_item(INPUT_VALUE, str(input_value))
         if output_value is not None:
-            span.set_tag_str(OUTPUT_VALUE, safe_json(output_value))
+            span._set_ctx_item(OUTPUT_VALUE, str(output_value))
 
     @staticmethod
     def _tag_span_tags(span: Span, span_tags: Dict[str, Any]) -> None:
@@ -759,12 +759,9 @@ def _tag_span_tags(span: Span, span_tags: Dict[str, Any]) -> None:
             log.warning("span_tags must be a dictionary of string key - primitive value pairs.")
             return
         try:
-            current_tags_str = span.get_tag(TAGS)
-            if current_tags_str:
-                current_tags = json.loads(current_tags_str)
-                current_tags.update(span_tags)
-                span_tags = current_tags
-            span.set_tag_str(TAGS, safe_json(span_tags))
+            existing_tags = span._get_ctx_item(TAGS) or {}
+            existing_tags.update(span_tags)
+            span._set_ctx_item(TAGS, existing_tags)
         except Exception:
             log.warning("Failed to parse tags.", exc_info=True)
 
@@ -776,7 +773,7 @@ def _tag_metadata(span: Span, metadata: Dict[str, Any]) -> None:
         if not isinstance(metadata, dict):
             log.warning("metadata must be a dictionary of string key-value pairs.")
             return
-        span.set_tag_str(METADATA, safe_json(metadata))
+        span._set_ctx_item(METADATA, metadata)
 
     @staticmethod
     def _tag_metrics(span: Span, metrics: Dict[str, Any]) -> None:
@@ -786,7 +783,7 @@ def _tag_metrics(span: Span, metrics: Dict[str, Any]) -> None:
         if not isinstance(metrics, dict):
             log.warning("metrics must be a dictionary of string key - numeric value pairs.")
             return
-        span.set_tag_str(METRICS, safe_json(metrics))
+        span._set_ctx_item(METRICS, metrics)
 
     @classmethod
     def submit_evaluation(
diff --git a/ddtrace/llmobs/_trace_processor.py b/ddtrace/llmobs/_trace_processor.py
index b4af0c5ffd1..231d53d7626 100644
--- a/ddtrace/llmobs/_trace_processor.py
+++ b/ddtrace/llmobs/_trace_processor.py
@@ -1,4 +1,3 @@
-import json
 from typing import Any
 from typing import Dict
 from typing import List
@@ -27,7 +26,6 @@
 from ddtrace.llmobs._constants import OUTPUT_DOCUMENTS
 from ddtrace.llmobs._constants import OUTPUT_MESSAGES
 from ddtrace.llmobs._constants import OUTPUT_VALUE
-from ddtrace.llmobs._constants import PARENT_ID_KEY
 from ddtrace.llmobs._constants import RAGAS_ML_APP_PREFIX
 from ddtrace.llmobs._constants import RUNNER_IS_INTEGRATION_SPAN_TAG
 from ddtrace.llmobs._constants import SESSION_ID
@@ -37,6 +35,7 @@
 from ddtrace.llmobs._utils import _get_ml_app
 from ddtrace.llmobs._utils import _get_session_id
 from ddtrace.llmobs._utils import _get_span_name
+from ddtrace.llmobs._utils import safe_json
 
 
 log = get_logger(__name__)
@@ -62,7 +61,7 @@ def process_trace(self, trace: List[Span]) -> Optional[List[Span]]:
     def submit_llmobs_span(self, span: Span) -> None:
         """Generate and submit an LLMObs span event to be sent to LLMObs."""
         span_event = None
-        is_llm_span = span.get_tag(SPAN_KIND) == "llm"
+        is_llm_span = span._get_ctx_item(SPAN_KIND) == "llm"
         is_ragas_integration_span = False
         try:
             span_event, is_ragas_integration_span = self._llmobs_span_event(span)
@@ -77,44 +76,49 @@ def submit_llmobs_span(self, span: Span) -> None:
 
     def _llmobs_span_event(self, span: Span) -> Tuple[Dict[str, Any], bool]:
         """Span event object structure."""
-        span_kind = span._meta.pop(SPAN_KIND)
+        span_kind = span._get_ctx_item(SPAN_KIND)
+        if not span_kind:
+            raise KeyError("Span kind not found in span context")
         meta: Dict[str, Any] = {"span.kind": span_kind, "input": {}, "output": {}}
-        if span_kind in ("llm", "embedding") and span.get_tag(MODEL_NAME) is not None:
-            meta["model_name"] = span._meta.pop(MODEL_NAME)
-            meta["model_provider"] = span._meta.pop(MODEL_PROVIDER, "custom").lower()
-        if span.get_tag(METADATA) is not None:
-            meta["metadata"] = json.loads(span._meta.pop(METADATA))
-        if span.get_tag(INPUT_PARAMETERS):
-            meta["input"]["parameters"] = json.loads(span._meta.pop(INPUT_PARAMETERS))
-        if span_kind == "llm" and span.get_tag(INPUT_MESSAGES) is not None:
-            meta["input"]["messages"] = json.loads(span._meta.pop(INPUT_MESSAGES))
-        if span.get_tag(INPUT_VALUE) is not None:
-            meta["input"]["value"] = span._meta.pop(INPUT_VALUE)
-        if span_kind == "llm" and span.get_tag(OUTPUT_MESSAGES) is not None:
-            meta["output"]["messages"] = json.loads(span._meta.pop(OUTPUT_MESSAGES))
-        if span_kind == "embedding" and span.get_tag(INPUT_DOCUMENTS) is not None:
-            meta["input"]["documents"] = json.loads(span._meta.pop(INPUT_DOCUMENTS))
-        if span.get_tag(OUTPUT_VALUE) is not None:
-            meta["output"]["value"] = span._meta.pop(OUTPUT_VALUE)
-        if span_kind == "retrieval" and span.get_tag(OUTPUT_DOCUMENTS) is not None:
-            meta["output"]["documents"] = json.loads(span._meta.pop(OUTPUT_DOCUMENTS))
-        if span.get_tag(INPUT_PROMPT) is not None:
-            prompt_json_str = span._meta.pop(INPUT_PROMPT)
+        if span_kind in ("llm", "embedding") and span._get_ctx_item(MODEL_NAME) is not None:
+            meta["model_name"] = span._get_ctx_item(MODEL_NAME)
+            meta["model_provider"] = (span._get_ctx_item(MODEL_PROVIDER) or "custom").lower()
+        meta["metadata"] = span._get_ctx_item(METADATA) or {}
+        if span._get_ctx_item(INPUT_PARAMETERS):
+            meta["input"]["parameters"] = span._get_ctx_item(INPUT_PARAMETERS)
+        if span_kind == "llm" and span._get_ctx_item(INPUT_MESSAGES) is not None:
+            meta["input"]["messages"] = span._get_ctx_item(INPUT_MESSAGES)
+        if span._get_ctx_item(INPUT_VALUE) is not None:
+            meta["input"]["value"] = safe_json(span._get_ctx_item(INPUT_VALUE))
+        if span_kind == "llm" and span._get_ctx_item(OUTPUT_MESSAGES) is not None:
+            meta["output"]["messages"] = span._get_ctx_item(OUTPUT_MESSAGES)
+        if span_kind == "embedding" and span._get_ctx_item(INPUT_DOCUMENTS) is not None:
+            meta["input"]["documents"] = span._get_ctx_item(INPUT_DOCUMENTS)
+        if span._get_ctx_item(OUTPUT_VALUE) is not None:
+            meta["output"]["value"] = safe_json(span._get_ctx_item(OUTPUT_VALUE))
+        if span_kind == "retrieval" and span._get_ctx_item(OUTPUT_DOCUMENTS) is not None:
+            meta["output"]["documents"] = span._get_ctx_item(OUTPUT_DOCUMENTS)
+        if span._get_ctx_item(INPUT_PROMPT) is not None:
+            prompt_json_str = span._get_ctx_item(INPUT_PROMPT)
             if span_kind != "llm":
                 log.warning(
                     "Dropping prompt on non-LLM span kind, annotating prompts is only supported for LLM span kinds."
                 )
             else:
-                meta["input"]["prompt"] = json.loads(prompt_json_str)
+                meta["input"]["prompt"] = prompt_json_str
         if span.error:
-            meta[ERROR_MSG] = span.get_tag(ERROR_MSG)
-            meta[ERROR_STACK] = span.get_tag(ERROR_STACK)
-            meta[ERROR_TYPE] = span.get_tag(ERROR_TYPE)
+            meta.update(
+                {
+                    ERROR_MSG: span.get_tag(ERROR_MSG),
+                    ERROR_STACK: span.get_tag(ERROR_STACK),
+                    ERROR_TYPE: span.get_tag(ERROR_TYPE),
+                }
+            )
         if not meta["input"]:
             meta.pop("input")
         if not meta["output"]:
             meta.pop("output")
-        metrics = json.loads(span._meta.pop(METRICS, "{}"))
+        metrics = span._get_ctx_item(METRICS) or {}
         ml_app = _get_ml_app(span)
 
         is_ragas_integration_span = False
@@ -122,10 +126,8 @@ def _llmobs_span_event(self, span: Span) -> Tuple[Dict[str, Any], bool]:
         if ml_app.startswith(RAGAS_ML_APP_PREFIX):
             is_ragas_integration_span = True
 
-        span.set_tag_str(ML_APP, ml_app)
-
+        span._set_ctx_item(ML_APP, ml_app)
         parent_id = str(_get_llmobs_parent_id(span) or "undefined")
-        span._meta.pop(PARENT_ID_KEY, None)
 
         llmobs_span_event = {
             "trace_id": "{:x}".format(span.trace_id),
@@ -140,7 +142,7 @@ def _llmobs_span_event(self, span: Span) -> Tuple[Dict[str, Any], bool]:
         }
         session_id = _get_session_id(span)
         if session_id is not None:
-            span.set_tag_str(SESSION_ID, session_id)
+            span._set_ctx_item(SESSION_ID, session_id)
             llmobs_span_event["session_id"] = session_id
 
         llmobs_span_event["tags"] = self._llmobs_tags(
@@ -169,7 +171,7 @@ def _llmobs_tags(
             tags["session_id"] = session_id
         if is_ragas_integration_span:
             tags[RUNNER_IS_INTEGRATION_SPAN_TAG] = "ragas"
-        existing_tags = span._meta.pop(TAGS, None)
+        existing_tags = span._get_ctx_item(TAGS)
         if existing_tags is not None:
-            tags.update(json.loads(existing_tags))
+            tags.update(existing_tags)
         return ["{}:{}".format(k, v) for k, v in tags.items()]
diff --git a/ddtrace/llmobs/_utils.py b/ddtrace/llmobs/_utils.py
index 8813788f0a3..c1b1c4a776c 100644
--- a/ddtrace/llmobs/_utils.py
+++ b/ddtrace/llmobs/_utils.py
@@ -110,8 +110,8 @@ def _get_llmobs_parent_id(span: Span) -> Optional[str]:
     """Return the span ID of the nearest LLMObs-type span in the span's ancestor tree.
     In priority order: manually set parent ID tag, nearest LLMObs ancestor, local root's propagated parent ID tag.
     """
-    if span.get_tag(PARENT_ID_KEY):
-        return span.get_tag(PARENT_ID_KEY)
+    if span._get_ctx_item(PARENT_ID_KEY):
+        return span._get_ctx_item(PARENT_ID_KEY)
     nearest_llmobs_ancestor = _get_nearest_llmobs_ancestor(span)
     if nearest_llmobs_ancestor:
         return str(nearest_llmobs_ancestor.span_id)
@@ -132,12 +132,12 @@ def _get_ml_app(span: Span) -> str:
     Return the ML app name for a given span, by checking the span's nearest LLMObs span ancestor.
     Default to the global config LLMObs ML app name otherwise.
     """
-    ml_app = span.get_tag(ML_APP)
+    ml_app = span._get_ctx_item(ML_APP)
     if ml_app:
         return ml_app
     nearest_llmobs_ancestor = _get_nearest_llmobs_ancestor(span)
     if nearest_llmobs_ancestor:
-        ml_app = nearest_llmobs_ancestor.get_tag(ML_APP)
+        ml_app = nearest_llmobs_ancestor._get_ctx_item(ML_APP)
     return ml_app or config._llmobs_ml_app or "unknown-ml-app"
 
 
@@ -146,12 +146,12 @@ def _get_session_id(span: Span) -> Optional[str]:
     Return the session ID for a given span, by checking the span's nearest LLMObs span ancestor.
     Default to the span's trace ID.
     """
-    session_id = span.get_tag(SESSION_ID)
+    session_id = span._get_ctx_item(SESSION_ID)
     if session_id:
         return session_id
     nearest_llmobs_ancestor = _get_nearest_llmobs_ancestor(span)
     if nearest_llmobs_ancestor:
-        session_id = nearest_llmobs_ancestor.get_tag(SESSION_ID)
+        session_id = nearest_llmobs_ancestor._get_ctx_item(SESSION_ID)
     return session_id
 
 
diff --git a/ddtrace/llmobs/_writer.py b/ddtrace/llmobs/_writer.py
index 6496de96cfe..5a293f05c4e 100644
--- a/ddtrace/llmobs/_writer.py
+++ b/ddtrace/llmobs/_writer.py
@@ -1,5 +1,4 @@
 import atexit
-import json
 from typing import Any
 from typing import Dict
 from typing import List
@@ -32,6 +31,7 @@
 from ddtrace.llmobs._constants import EVP_PROXY_AGENT_ENDPOINT
 from ddtrace.llmobs._constants import EVP_SUBDOMAIN_HEADER_NAME
 from ddtrace.llmobs._constants import EVP_SUBDOMAIN_HEADER_VALUE
+from ddtrace.llmobs._utils import safe_json
 
 
 logger = get_logger(__name__)
@@ -108,11 +108,7 @@ def periodic(self) -> None:
             self._buffer = []
 
         data = self._data(events)
-        try:
-            enc_llm_events = json.dumps(data)
-        except TypeError:
-            logger.error("failed to encode %d LLMObs %s events", len(events), self._event_type, exc_info=True)
-            return
+        enc_llm_events = safe_json(data)
         conn = httplib.HTTPSConnection(self._intake, 443, timeout=self._timeout)
         try:
             conn.request("POST", self._endpoint, enc_llm_events, self._headers)
@@ -197,7 +193,7 @@ def put(self, events: List[LLMObsSpanEvent]):
                 )
                 return
             self._buffer.extend(events)
-            self.buffer_size += len(json.dumps(events))
+            self.buffer_size += len(safe_json(events))
 
     def encode(self):
         with self._lock:
@@ -207,7 +203,7 @@ def encode(self):
             self._init_buffer()
         data = {"_dd.stage": "raw", "_dd.tracer_version": ddtrace.__version__, "event_type": "span", "spans": events}
         try:
-            enc_llm_events = json.dumps(data)
+            enc_llm_events = safe_json(data)
             logger.debug("encode %d LLMObs span events to be sent", len(events))
         except TypeError:
             logger.error("failed to encode %d LLMObs span events", len(events), exc_info=True)
@@ -277,7 +273,7 @@ def stop(self, timeout=None):
             super(LLMObsSpanWriter, self).stop(timeout=timeout)
 
     def enqueue(self, event: LLMObsSpanEvent) -> None:
-        event_size = len(json.dumps(event))
+        event_size = len(safe_json(event))
 
         if event_size >= EVP_EVENT_SIZE_LIMIT:
             logger.warning(
diff --git a/ddtrace/llmobs/decorators.py b/ddtrace/llmobs/decorators.py
index 93f329f2889..7e61f9b4e18 100644
--- a/ddtrace/llmobs/decorators.py
+++ b/ddtrace/llmobs/decorators.py
@@ -172,7 +172,7 @@ def generator_wrapper(*args, **kwargs):
                     func_signature = signature(func)
                     bound_args = func_signature.bind_partial(*args, **kwargs)
                     if _automatic_io_annotation and bound_args.arguments:
-                        LLMObs.annotate(span=span, input_data=bound_args.arguments)
+                        LLMObs.annotate(span=span, input_data=dict(bound_args.arguments))
                     return yield_from_async_gen(func, span, args, kwargs)
 
                 @wraps(func)
@@ -186,13 +186,13 @@ async def wrapper(*args, **kwargs):
                         func_signature = signature(func)
                         bound_args = func_signature.bind_partial(*args, **kwargs)
                         if _automatic_io_annotation and bound_args.arguments:
-                            LLMObs.annotate(span=span, input_data=bound_args.arguments)
+                            LLMObs.annotate(span=span, input_data=dict(bound_args.arguments))
                         resp = await func(*args, **kwargs)
                         if (
                             _automatic_io_annotation
                             and resp
                             and operation_kind != "retrieval"
-                            and span.get_tag(OUTPUT_VALUE) is None
+                            and span._get_ctx_item(OUTPUT_VALUE) is None
                         ):
                             LLMObs.annotate(span=span, output_data=resp)
                         return resp
@@ -211,7 +211,7 @@ def generator_wrapper(*args, **kwargs):
                         func_signature = signature(func)
                         bound_args = func_signature.bind_partial(*args, **kwargs)
                         if _automatic_io_annotation and bound_args.arguments:
-                            LLMObs.annotate(span=span, input_data=bound_args.arguments)
+                            LLMObs.annotate(span=span, input_data=dict(bound_args.arguments))
                         try:
                             yield from func(*args, **kwargs)
                         except (StopIteration, GeneratorExit):
@@ -234,13 +234,13 @@ def wrapper(*args, **kwargs):
                         func_signature = signature(func)
                         bound_args = func_signature.bind_partial(*args, **kwargs)
                         if _automatic_io_annotation and bound_args.arguments:
-                            LLMObs.annotate(span=span, input_data=bound_args.arguments)
+                            LLMObs.annotate(span=span, input_data=dict(bound_args.arguments))
                         resp = func(*args, **kwargs)
                         if (
                             _automatic_io_annotation
                             and resp
                             and operation_kind != "retrieval"
-                            and span.get_tag(OUTPUT_VALUE) is None
+                            and span._get_ctx_item(OUTPUT_VALUE) is None
                         ):
                             LLMObs.annotate(span=span, output_data=resp)
                         return resp
diff --git a/tests/contrib/anthropic/test_anthropic_llmobs.py b/tests/contrib/anthropic/test_anthropic_llmobs.py
index f286a890209..e2850a4157f 100644
--- a/tests/contrib/anthropic/test_anthropic_llmobs.py
+++ b/tests/contrib/anthropic/test_anthropic_llmobs.py
@@ -1,6 +1,5 @@
 from pathlib import Path
 
-import mock
 import pytest
 
 from tests.llmobs._utils import _expected_llmobs_llm_span_event
@@ -117,37 +116,6 @@ def test_error(self, anthropic, ddtrace_global_config, mock_llmobs_writer, mock_
                     )
                 )
 
-    def test_error_unserializable_arg(
-        self, anthropic, ddtrace_global_config, mock_llmobs_writer, mock_tracer, request_vcr
-    ):
-        """Ensure we handle unserializable arguments correctly and still emit llmobs records."""
-        llm = anthropic.Anthropic()
-        with pytest.raises(Exception):
-            llm.messages.create(
-                model="claude-3-opus-20240229",
-                max_tokens=object(),
-                temperature=0.8,
-                messages=[{"role": "user", "content": "Hello World!"}],
-            )
-
-        span = mock_tracer.pop_traces()[0][0]
-        assert mock_llmobs_writer.enqueue.call_count == 1
-        expected_span = _expected_llmobs_llm_span_event(
-            span,
-            model_name="claude-3-opus-20240229",
-            model_provider="anthropic",
-            input_messages=[{"content": "Hello World!", "role": "user"}],
-            output_messages=[{"content": ""}],
-            error=span.get_tag("error.type"),
-            error_message=span.get_tag("error.message"),
-            error_stack=span.get_tag("error.stack"),
-            metadata={"temperature": 0.8, "max_tokens": mock.ANY},
-            tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.anthropic"},
-        )
-        mock_llmobs_writer.enqueue.assert_called_with(expected_span)
-        actual_span = mock_llmobs_writer.enqueue.call_args[0][0]
-        assert "[Unserializable object: <object object at " in actual_span["meta"]["metadata"]["max_tokens"]
-
     def test_stream(self, anthropic, ddtrace_global_config, mock_llmobs_writer, mock_tracer, request_vcr):
         """Ensure llmobs records are emitted for completion endpoints when configured and there is an stream input.
 
diff --git a/tests/contrib/openai/test_openai_llmobs.py b/tests/contrib/openai/test_openai_llmobs.py
index ddba259e928..a1a2b93a5ca 100644
--- a/tests/contrib/openai/test_openai_llmobs.py
+++ b/tests/contrib/openai/test_openai_llmobs.py
@@ -867,38 +867,6 @@ def test_embedding_string_base64(self, openai, ddtrace_global_config, mock_llmob
             )
         )
 
-    def test_unserializable_param_is_handled(self, openai, ddtrace_global_config, mock_llmobs_writer, mock_tracer):
-        with pytest.raises(Exception):
-            model = "babbage-002"
-            client = openai.OpenAI()
-            client.completions.create(
-                model=model,
-                prompt="Hello world",
-                temperature=0.8,
-                n=object(),
-                stop=".",
-                max_tokens=10,
-                user="ddtrace-test",
-            )
-        span = mock_tracer.pop_traces()[0][0]
-        assert mock_llmobs_writer.enqueue.call_count == 1
-        expected_span = _expected_llmobs_llm_span_event(
-            span,
-            model_name=model,
-            model_provider="openai",
-            input_messages=[{"content": "Hello world"}],
-            output_messages=[{"content": ""}],
-            metadata={"temperature": 0.8, "max_tokens": 10, "n": mock.ANY, "stop": ".", "user": "ddtrace-test"},
-            token_metrics={},
-            error=span.get_tag("error.type"),
-            error_message=span.get_tag("error.message"),
-            error_stack=span.get_tag("error.stack"),
-            tags={"ml_app": "<ml-app-name>", "service": "tests.contrib.openai"},
-        )
-        mock_llmobs_writer.enqueue.assert_called_with(expected_span)
-        actual_span = mock_llmobs_writer.enqueue.call_args[0][0]
-        assert "[Unserializable object: <object object at " in actual_span["meta"]["metadata"]["n"]
-
 
 @pytest.mark.parametrize(
     "ddtrace_global_config",
diff --git a/tests/llmobs/_utils.py b/tests/llmobs/_utils.py
index d39e69808fb..0ecdde36ee6 100644
--- a/tests/llmobs/_utils.py
+++ b/tests/llmobs/_utils.py
@@ -111,8 +111,7 @@ def _expected_llmobs_llm_span_event(
         meta_dict.update({"model_name": model_name})
     if model_provider is not None:
         meta_dict.update({"model_provider": model_provider})
-    if metadata is not None:
-        meta_dict.update({"metadata": metadata})
+    meta_dict.update({"metadata": metadata or {}})
     if parameters is not None:
         meta_dict["input"].update({"parameters": parameters})
     span_event["meta"].update(meta_dict)
@@ -163,8 +162,7 @@ def _expected_llmobs_non_llm_span_event(
         meta_dict["input"].update({"value": input_value})
     if parameters is not None:
         meta_dict["input"].update({"parameters": parameters})
-    if metadata is not None:
-        meta_dict.update({"metadata": metadata})
+    meta_dict.update({"metadata": metadata or {}})
     if output_value is not None:
         meta_dict["output"].update({"value": output_value})
     if not meta_dict["input"]:
@@ -324,6 +322,45 @@ def _chat_completion_event():
     }
 
 
+def _chat_completion_event_with_unserializable_field():
+    return {
+        "span_id": "12345678902",
+        "trace_id": "98765432102",
+        "parent_id": "",
+        "session_id": "98765432102",
+        "name": "chat_completion_span",
+        "tags": ["version:", "env:", "service:tests.llmobs", "source:integration"],
+        "start_ns": 1707763310981223936,
+        "duration": 12345678900,
+        "error": 0,
+        "meta": {
+            "span.kind": "llm",
+            "model_name": "gpt-3.5-turbo",
+            "model_provider": "openai",
+            "metadata": {"unserializable": object()},
+            "input": {
+                "messages": [
+                    {
+                        "role": "system",
+                        "content": "You are an evil dark lord looking for his one ring to rule them all",
+                    },
+                    {"role": "user", "content": "I am a hobbit looking to go to Mordor"},
+                ],
+                "parameters": {"temperature": 0.9, "max_tokens": 256},
+            },
+            "output": {
+                "messages": [
+                    {
+                        "content": "Ah, a bold and foolish hobbit seeking to challenge my dominion in Mordor. Very well, little creature, I shall play along. But know that I am always watching, and your quest will not go unnoticed",  # noqa: E501
+                        "role": "assistant",
+                    },
+                ]
+            },
+        },
+        "metrics": {"input_tokens": 64, "output_tokens": 128, "total_tokens": 192},
+    }
+
+
 def _large_event():
     return {
         "span_id": "12345678903",
@@ -552,6 +589,7 @@ def _expected_ragas_spans(ragas_inputs=None):
                 "span.kind": "workflow",
                 "input": {"value": mock.ANY},
                 "output": {"value": mock.ANY},
+                "metadata": {},
             },
             "metrics": {},
             "tags": expected_ragas_trace_tags(),
@@ -568,6 +606,7 @@ def _expected_ragas_spans(ragas_inputs=None):
                 "span.kind": "workflow",
                 "input": {"value": mock.ANY},
                 "output": {"value": mock.ANY},
+                "metadata": {},
             },
             "metrics": {},
             "tags": expected_ragas_trace_tags(),
@@ -580,7 +619,7 @@ def _expected_ragas_spans(ragas_inputs=None):
             "start_ns": mock.ANY,
             "duration": mock.ANY,
             "status": "ok",
-            "meta": {"span.kind": "task"},
+            "meta": {"span.kind": "task", "metadata": {}},
             "metrics": {},
             "tags": expected_ragas_trace_tags(),
         },
@@ -596,6 +635,7 @@ def _expected_ragas_spans(ragas_inputs=None):
                 "span.kind": "workflow",
                 "input": {"value": mock.ANY},
                 "output": {"value": mock.ANY},
+                "metadata": {},
             },
             "metrics": {},
             "tags": expected_ragas_trace_tags(),
@@ -608,7 +648,7 @@ def _expected_ragas_spans(ragas_inputs=None):
             "start_ns": mock.ANY,
             "duration": mock.ANY,
             "status": "ok",
-            "meta": {"span.kind": "task"},
+            "meta": {"span.kind": "task", "metadata": {}},
             "metrics": {},
             "tags": expected_ragas_trace_tags(),
         },
diff --git a/tests/llmobs/test_llmobs_decorators.py b/tests/llmobs/test_llmobs_decorators.py
index 347fb55f652..e94d72aec64 100644
--- a/tests/llmobs/test_llmobs_decorators.py
+++ b/tests/llmobs/test_llmobs_decorators.py
@@ -1,5 +1,3 @@
-import json
-
 import mock
 import pytest
 
@@ -469,7 +467,7 @@ def f(prompt, arg_2, kwarg_1=None, kwarg_2=None):
             _expected_llmobs_non_llm_span_event(
                 span,
                 decorator_name,
-                input_value=json.dumps({"prompt": "test_prompt", "arg_2": "arg_2", "kwarg_2": 12345}),
+                input_value=str({"prompt": "test_prompt", "arg_2": "arg_2", "kwarg_2": 12345}),
                 output_value="test_prompt",
                 session_id="test_session_id",
             )
@@ -489,7 +487,7 @@ def test_retrieval(query, arg_2, kwarg_1=None, kwarg_2=None):
         _expected_llmobs_non_llm_span_event(
             span,
             "retrieval",
-            input_value=json.dumps({"query": "test_query", "arg_2": "arg_2", "kwarg_2": 12345}),
+            input_value=str({"query": "test_query", "arg_2": "arg_2", "kwarg_2": 12345}),
             session_id="test_session_id",
         )
     )
@@ -880,7 +878,7 @@ def get_next_element(alist):
         _expected_llmobs_non_llm_span_event(
             span,
             "workflow",
-            input_value=json.dumps({"alist": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]}),
+            input_value=str({"alist": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]}),
             error=span.get_tag("error.type"),
             error_message=span.get_tag("error.message"),
             error_stack=span.get_tag("error.stack"),
diff --git a/tests/llmobs/test_llmobs_service.py b/tests/llmobs/test_llmobs_service.py
index 5808ed01513..98748250c3a 100644
--- a/tests/llmobs/test_llmobs_service.py
+++ b/tests/llmobs/test_llmobs_service.py
@@ -1,4 +1,3 @@
-import json
 import os
 import threading
 import time
@@ -237,15 +236,15 @@ def test_start_span_uses_kind_as_default_name(LLMObs):
 
 def test_start_span_with_session_id(LLMObs):
     with LLMObs.llm(model_name="test_model", session_id="test_session_id") as span:
-        assert span.get_tag(SESSION_ID) == "test_session_id"
+        assert span._get_ctx_item(SESSION_ID) == "test_session_id"
     with LLMObs.tool(session_id="test_session_id") as span:
-        assert span.get_tag(SESSION_ID) == "test_session_id"
+        assert span._get_ctx_item(SESSION_ID) == "test_session_id"
     with LLMObs.task(session_id="test_session_id") as span:
-        assert span.get_tag(SESSION_ID) == "test_session_id"
+        assert span._get_ctx_item(SESSION_ID) == "test_session_id"
     with LLMObs.workflow(session_id="test_session_id") as span:
-        assert span.get_tag(SESSION_ID) == "test_session_id"
+        assert span._get_ctx_item(SESSION_ID) == "test_session_id"
     with LLMObs.agent(session_id="test_session_id") as span:
-        assert span.get_tag(SESSION_ID) == "test_session_id"
+        assert span._get_ctx_item(SESSION_ID) == "test_session_id"
 
 
 def test_session_id_becomes_top_level_field(LLMObs, mock_llmobs_span_writer):
@@ -271,9 +270,9 @@ def test_llm_span(LLMObs, mock_llmobs_span_writer):
         assert span.name == "test_llm_call"
         assert span.resource == "llm"
         assert span.span_type == "llm"
-        assert span.get_tag(SPAN_KIND) == "llm"
-        assert span.get_tag(MODEL_NAME) == "test_model"
-        assert span.get_tag(MODEL_PROVIDER) == "test_provider"
+        assert span._get_ctx_item(SPAN_KIND) == "llm"
+        assert span._get_ctx_item(MODEL_NAME) == "test_model"
+        assert span._get_ctx_item(MODEL_PROVIDER) == "test_provider"
 
     mock_llmobs_span_writer.enqueue.assert_called_with(
         _expected_llmobs_llm_span_event(span, "llm", model_name="test_model", model_provider="test_provider")
@@ -285,9 +284,9 @@ def test_llm_span_agentless(AgentlessLLMObs, mock_llmobs_span_agentless_writer):
         assert span.name == "test_llm_call"
         assert span.resource == "llm"
         assert span.span_type == "llm"
-        assert span.get_tag(SPAN_KIND) == "llm"
-        assert span.get_tag(MODEL_NAME) == "test_model"
-        assert span.get_tag(MODEL_PROVIDER) == "test_provider"
+        assert span._get_ctx_item(SPAN_KIND) == "llm"
+        assert span._get_ctx_item(MODEL_NAME) == "test_model"
+        assert span._get_ctx_item(MODEL_PROVIDER) == "test_provider"
 
     mock_llmobs_span_agentless_writer.enqueue.assert_called_with(
         _expected_llmobs_llm_span_event(span, "llm", model_name="test_model", model_provider="test_provider")
@@ -296,7 +295,7 @@ def test_llm_span_agentless(AgentlessLLMObs, mock_llmobs_span_agentless_writer):
 
 def test_llm_span_no_model_sets_default(LLMObs, mock_llmobs_span_writer):
     with LLMObs.llm(name="test_llm_call", model_provider="test_provider") as span:
-        assert span.get_tag(MODEL_NAME) == "custom"
+        assert span._get_ctx_item(MODEL_NAME) == "custom"
 
     mock_llmobs_span_writer.enqueue.assert_called_with(
         _expected_llmobs_llm_span_event(span, "llm", model_name="custom", model_provider="test_provider")
@@ -308,9 +307,9 @@ def test_default_model_provider_set_to_custom(LLMObs):
         assert span.name == "test_llm_call"
         assert span.resource == "llm"
         assert span.span_type == "llm"
-        assert span.get_tag(SPAN_KIND) == "llm"
-        assert span.get_tag(MODEL_NAME) == "test_model"
-        assert span.get_tag(MODEL_PROVIDER) == "custom"
+        assert span._get_ctx_item(SPAN_KIND) == "llm"
+        assert span._get_ctx_item(MODEL_NAME) == "test_model"
+        assert span._get_ctx_item(MODEL_PROVIDER) == "custom"
 
 
 def test_tool_span(LLMObs, mock_llmobs_span_writer):
@@ -318,7 +317,7 @@ def test_tool_span(LLMObs, mock_llmobs_span_writer):
         assert span.name == "test_tool"
         assert span.resource == "tool"
         assert span.span_type == "llm"
-        assert span.get_tag(SPAN_KIND) == "tool"
+        assert span._get_ctx_item(SPAN_KIND) == "tool"
     mock_llmobs_span_writer.enqueue.assert_called_with(_expected_llmobs_non_llm_span_event(span, "tool"))
 
 
@@ -327,7 +326,7 @@ def test_tool_span_agentless(AgentlessLLMObs, mock_llmobs_span_agentless_writer)
         assert span.name == "test_tool"
         assert span.resource == "tool"
         assert span.span_type == "llm"
-        assert span.get_tag(SPAN_KIND) == "tool"
+        assert span._get_ctx_item(SPAN_KIND) == "tool"
     mock_llmobs_span_agentless_writer.enqueue.assert_called_with(_expected_llmobs_non_llm_span_event(span, "tool"))
 
 
@@ -336,7 +335,7 @@ def test_task_span(LLMObs, mock_llmobs_span_writer):
         assert span.name == "test_task"
         assert span.resource == "task"
         assert span.span_type == "llm"
-        assert span.get_tag(SPAN_KIND) == "task"
+        assert span._get_ctx_item(SPAN_KIND) == "task"
     mock_llmobs_span_writer.enqueue.assert_called_with(_expected_llmobs_non_llm_span_event(span, "task"))
 
 
@@ -345,7 +344,7 @@ def test_task_span_agentless(AgentlessLLMObs, mock_llmobs_span_agentless_writer)
         assert span.name == "test_task"
         assert span.resource == "task"
         assert span.span_type == "llm"
-        assert span.get_tag(SPAN_KIND) == "task"
+        assert span._get_ctx_item(SPAN_KIND) == "task"
     mock_llmobs_span_agentless_writer.enqueue.assert_called_with(_expected_llmobs_non_llm_span_event(span, "task"))
 
 
@@ -354,7 +353,7 @@ def test_workflow_span(LLMObs, mock_llmobs_span_writer):
         assert span.name == "test_workflow"
         assert span.resource == "workflow"
         assert span.span_type == "llm"
-        assert span.get_tag(SPAN_KIND) == "workflow"
+        assert span._get_ctx_item(SPAN_KIND) == "workflow"
     mock_llmobs_span_writer.enqueue.assert_called_with(_expected_llmobs_non_llm_span_event(span, "workflow"))
 
 
@@ -363,7 +362,7 @@ def test_workflow_span_agentless(AgentlessLLMObs, mock_llmobs_span_agentless_wri
         assert span.name == "test_workflow"
         assert span.resource == "workflow"
         assert span.span_type == "llm"
-        assert span.get_tag(SPAN_KIND) == "workflow"
+        assert span._get_ctx_item(SPAN_KIND) == "workflow"
     mock_llmobs_span_agentless_writer.enqueue.assert_called_with(_expected_llmobs_non_llm_span_event(span, "workflow"))
 
 
@@ -372,7 +371,7 @@ def test_agent_span(LLMObs, mock_llmobs_span_writer):
         assert span.name == "test_agent"
         assert span.resource == "agent"
         assert span.span_type == "llm"
-        assert span.get_tag(SPAN_KIND) == "agent"
+        assert span._get_ctx_item(SPAN_KIND) == "agent"
     mock_llmobs_span_writer.enqueue.assert_called_with(_expected_llmobs_llm_span_event(span, "agent"))
 
 
@@ -381,13 +380,13 @@ def test_agent_span_agentless(AgentlessLLMObs, mock_llmobs_span_agentless_writer
         assert span.name == "test_agent"
         assert span.resource == "agent"
         assert span.span_type == "llm"
-        assert span.get_tag(SPAN_KIND) == "agent"
+        assert span._get_ctx_item(SPAN_KIND) == "agent"
     mock_llmobs_span_agentless_writer.enqueue.assert_called_with(_expected_llmobs_llm_span_event(span, "agent"))
 
 
 def test_embedding_span_no_model_sets_default(LLMObs, mock_llmobs_span_writer):
     with LLMObs.embedding(name="test_embedding", model_provider="test_provider") as span:
-        assert span.get_tag(MODEL_NAME) == "custom"
+        assert span._get_ctx_item(MODEL_NAME) == "custom"
     mock_llmobs_span_writer.enqueue.assert_called_with(
         _expected_llmobs_llm_span_event(span, "embedding", model_name="custom", model_provider="test_provider")
     )
@@ -398,9 +397,9 @@ def test_embedding_default_model_provider_set_to_custom(LLMObs):
         assert span.name == "test_embedding"
         assert span.resource == "embedding"
         assert span.span_type == "llm"
-        assert span.get_tag(SPAN_KIND) == "embedding"
-        assert span.get_tag(MODEL_NAME) == "test_model"
-        assert span.get_tag(MODEL_PROVIDER) == "custom"
+        assert span._get_ctx_item(SPAN_KIND) == "embedding"
+        assert span._get_ctx_item(MODEL_NAME) == "test_model"
+        assert span._get_ctx_item(MODEL_PROVIDER) == "custom"
 
 
 def test_embedding_span(LLMObs, mock_llmobs_span_writer):
@@ -408,9 +407,9 @@ def test_embedding_span(LLMObs, mock_llmobs_span_writer):
         assert span.name == "test_embedding"
         assert span.resource == "embedding"
         assert span.span_type == "llm"
-        assert span.get_tag(SPAN_KIND) == "embedding"
-        assert span.get_tag(MODEL_NAME) == "test_model"
-        assert span.get_tag(MODEL_PROVIDER) == "test_provider"
+        assert span._get_ctx_item(SPAN_KIND) == "embedding"
+        assert span._get_ctx_item(MODEL_NAME) == "test_model"
+        assert span._get_ctx_item(MODEL_PROVIDER) == "test_provider"
 
     mock_llmobs_span_writer.enqueue.assert_called_with(
         _expected_llmobs_llm_span_event(span, "embedding", model_name="test_model", model_provider="test_provider")
@@ -424,9 +423,9 @@ def test_embedding_span_agentless(AgentlessLLMObs, mock_llmobs_span_agentless_wr
         assert span.name == "test_embedding"
         assert span.resource == "embedding"
         assert span.span_type == "llm"
-        assert span.get_tag(SPAN_KIND) == "embedding"
-        assert span.get_tag(MODEL_NAME) == "test_model"
-        assert span.get_tag(MODEL_PROVIDER) == "test_provider"
+        assert span._get_ctx_item(SPAN_KIND) == "embedding"
+        assert span._get_ctx_item(MODEL_NAME) == "test_model"
+        assert span._get_ctx_item(MODEL_PROVIDER) == "test_provider"
 
     mock_llmobs_span_agentless_writer.enqueue.assert_called_with(
         _expected_llmobs_llm_span_event(span, "embedding", model_name="test_model", model_provider="test_provider")
@@ -455,7 +454,7 @@ def test_annotate_finished_span_does_nothing(LLMObs, mock_logs):
 def test_annotate_parameters(LLMObs, mock_logs):
     with LLMObs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span:
         LLMObs.annotate(span=span, parameters={"temperature": 0.9, "max_tokens": 50})
-        assert json.loads(span.get_tag(INPUT_PARAMETERS)) == {"temperature": 0.9, "max_tokens": 50}
+        assert span._get_ctx_item(INPUT_PARAMETERS) == {"temperature": 0.9, "max_tokens": 50}
         mock_logs.warning.assert_called_once_with(
             "Setting parameters is deprecated, please set parameters and other metadata as tags instead."
         )
@@ -464,128 +463,92 @@ def test_annotate_parameters(LLMObs, mock_logs):
 def test_annotate_metadata(LLMObs):
     with LLMObs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span:
         LLMObs.annotate(span=span, metadata={"temperature": 0.5, "max_tokens": 20, "top_k": 10, "n": 3})
-        assert json.loads(span.get_tag(METADATA)) == {"temperature": 0.5, "max_tokens": 20, "top_k": 10, "n": 3}
+        assert span._get_ctx_item(METADATA) == {"temperature": 0.5, "max_tokens": 20, "top_k": 10, "n": 3}
 
 
 def test_annotate_metadata_wrong_type_raises_warning(LLMObs, mock_logs):
     with LLMObs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span:
         LLMObs.annotate(span=span, metadata="wrong_metadata")
-        assert span.get_tag(METADATA) is None
+        assert span._get_ctx_item(METADATA) is None
         mock_logs.warning.assert_called_once_with("metadata must be a dictionary of string key-value pairs.")
         mock_logs.reset_mock()
 
 
-def test_annotate_metadata_non_serializable_marks_with_placeholder_value(LLMObs):
-    with LLMObs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span:
-        with mock.patch("ddtrace.llmobs._utils.log") as mock_logs:
-            LLMObs.annotate(span=span, metadata={"unserializable": object()})
-            metadata = json.loads(span.get_tag(METADATA))
-            assert metadata is not None
-            assert "[Unserializable object: <object object" in metadata["unserializable"]
-            mock_logs.warning.assert_called_once_with(
-                "I/O object is not JSON serializable. Defaulting to placeholder value instead."
-            )
-
-
 def test_annotate_tag(LLMObs):
     with LLMObs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span:
         LLMObs.annotate(span=span, tags={"test_tag_name": "test_tag_value", "test_numeric_tag": 10})
-        assert json.loads(span.get_tag(TAGS)) == {"test_tag_name": "test_tag_value", "test_numeric_tag": 10}
+        assert span._get_ctx_item(TAGS) == {"test_tag_name": "test_tag_value", "test_numeric_tag": 10}
 
 
 def test_annotate_tag_wrong_type(LLMObs, mock_logs):
     with LLMObs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span:
         LLMObs.annotate(span=span, tags=12345)
-        assert span.get_tag(TAGS) is None
+        assert span._get_ctx_item(TAGS) is None
         mock_logs.warning.assert_called_once_with(
             "span_tags must be a dictionary of string key - primitive value pairs."
         )
 
 
-def test_annotate_tag_non_serializable_marks_with_placeholder_value(LLMObs):
-    with LLMObs.llm(model_name="test_model", name="test_llm_call", model_provider="test_provider") as span:
-        with mock.patch("ddtrace.llmobs._utils.log") as mock_logs:
-            LLMObs.annotate(span=span, tags={"unserializable": object()})
-            tags = json.loads(span.get_tag(TAGS))
-            assert tags is not None
-            assert "[Unserializable object:" in tags["unserializable"]
-            mock_logs.warning.assert_called_once_with(
-                "I/O object is not JSON serializable. Defaulting to placeholder value instead."
-            )
-
-
 def test_annotate_input_string(LLMObs):
     with LLMObs.llm(model_name="test_model") as llm_span:
         LLMObs.annotate(span=llm_span, input_data="test_input")
-        assert json.loads(llm_span.get_tag(INPUT_MESSAGES)) == [{"content": "test_input"}]
+        assert llm_span._get_ctx_item(INPUT_MESSAGES) == [{"content": "test_input"}]
     with LLMObs.task() as task_span:
         LLMObs.annotate(span=task_span, input_data="test_input")
-        assert task_span.get_tag(INPUT_VALUE) == "test_input"
+        assert task_span._get_ctx_item(INPUT_VALUE) == "test_input"
     with LLMObs.tool() as tool_span:
         LLMObs.annotate(span=tool_span, input_data="test_input")
-        assert tool_span.get_tag(INPUT_VALUE) == "test_input"
+        assert tool_span._get_ctx_item(INPUT_VALUE) == "test_input"
     with LLMObs.workflow() as workflow_span:
         LLMObs.annotate(span=workflow_span, input_data="test_input")
-        assert workflow_span.get_tag(INPUT_VALUE) == "test_input"
+        assert workflow_span._get_ctx_item(INPUT_VALUE) == "test_input"
     with LLMObs.agent() as agent_span:
         LLMObs.annotate(span=agent_span, input_data="test_input")
-        assert agent_span.get_tag(INPUT_VALUE) == "test_input"
+        assert agent_span._get_ctx_item(INPUT_VALUE) == "test_input"
     with LLMObs.retrieval() as retrieval_span:
         LLMObs.annotate(span=retrieval_span, input_data="test_input")
-        assert retrieval_span.get_tag(INPUT_VALUE) == "test_input"
+        assert retrieval_span._get_ctx_item(INPUT_VALUE) == "test_input"
 
 
 def test_annotate_numeric_io(LLMObs):
     with LLMObs.task() as task_span:
         LLMObs.annotate(span=task_span, input_data=0, output_data=0)
-        assert task_span.get_tag(INPUT_VALUE) == "0"
-        assert task_span.get_tag(OUTPUT_VALUE) == "0"
+        assert task_span._get_ctx_item(INPUT_VALUE) == "0"
+        assert task_span._get_ctx_item(OUTPUT_VALUE) == "0"
     with LLMObs.task() as task_span:
         LLMObs.annotate(span=task_span, input_data=1.23, output_data=1.23)
-        assert task_span.get_tag(INPUT_VALUE) == "1.23"
-        assert task_span.get_tag(OUTPUT_VALUE) == "1.23"
+        assert task_span._get_ctx_item(INPUT_VALUE) == "1.23"
+        assert task_span._get_ctx_item(OUTPUT_VALUE) == "1.23"
 
 
 def test_annotate_input_serializable_value(LLMObs):
     with LLMObs.task() as task_span:
         LLMObs.annotate(span=task_span, input_data=["test_input"])
-        assert task_span.get_tag(INPUT_VALUE) == '["test_input"]'
+        assert task_span._get_ctx_item(INPUT_VALUE) == str(["test_input"])
     with LLMObs.tool() as tool_span:
         LLMObs.annotate(span=tool_span, input_data={"test_input": "hello world"})
-        assert tool_span.get_tag(INPUT_VALUE) == '{"test_input": "hello world"}'
+        assert tool_span._get_ctx_item(INPUT_VALUE) == str({"test_input": "hello world"})
     with LLMObs.workflow() as workflow_span:
         LLMObs.annotate(span=workflow_span, input_data=("asd", 123))
-        assert workflow_span.get_tag(INPUT_VALUE) == '["asd", 123]'
+        assert workflow_span._get_ctx_item(INPUT_VALUE) == str(("asd", 123))
     with LLMObs.agent() as agent_span:
         LLMObs.annotate(span=agent_span, input_data="test_input")
-        assert agent_span.get_tag(INPUT_VALUE) == "test_input"
+        assert agent_span._get_ctx_item(INPUT_VALUE) == "test_input"
     with LLMObs.retrieval() as retrieval_span:
         LLMObs.annotate(span=retrieval_span, input_data=[0, 1, 2, 3, 4])
-        assert retrieval_span.get_tag(INPUT_VALUE) == "[0, 1, 2, 3, 4]"
-
-
-def test_annotate_input_value_non_serializable_marks_with_placeholder_value(LLMObs):
-    with LLMObs.workflow() as span:
-        with mock.patch("ddtrace.llmobs._utils.log") as mock_logs:
-            LLMObs.annotate(span=span, input_data=object())
-            input_value = span.get_tag(INPUT_VALUE)
-            assert input_value is not None
-            assert "[Unserializable object:" in input_value
-            mock_logs.warning.assert_called_once_with(
-                "I/O object is not JSON serializable. Defaulting to placeholder value instead."
-            )
+        assert retrieval_span._get_ctx_item(INPUT_VALUE) == str([0, 1, 2, 3, 4])
 
 
 def test_annotate_input_llm_message(LLMObs):
     with LLMObs.llm(model_name="test_model") as span:
         LLMObs.annotate(span=span, input_data=[{"content": "test_input", "role": "human"}])
-        assert json.loads(span.get_tag(INPUT_MESSAGES)) == [{"content": "test_input", "role": "human"}]
+        assert span._get_ctx_item(INPUT_MESSAGES) == [{"content": "test_input", "role": "human"}]
 
 
 def test_annotate_input_llm_message_wrong_type(LLMObs, mock_logs):
     with LLMObs.llm(model_name="test_model") as span:
         LLMObs.annotate(span=span, input_data=[{"content": object()}])
-        assert span.get_tag(INPUT_MESSAGES) is None
+        assert span._get_ctx_item(INPUT_MESSAGES) is None
         mock_logs.warning.assert_called_once_with("Failed to parse input messages.", exc_info=True)
 
 
@@ -601,13 +564,13 @@ def test_llmobs_annotate_incorrect_message_content_type_raises_warning(LLMObs, m
 def test_annotate_document_str(LLMObs):
     with LLMObs.embedding(model_name="test_model") as span:
         LLMObs.annotate(span=span, input_data="test_document_text")
-        documents = json.loads(span.get_tag(INPUT_DOCUMENTS))
+        documents = span._get_ctx_item(INPUT_DOCUMENTS)
         assert documents
         assert len(documents) == 1
         assert documents[0]["text"] == "test_document_text"
     with LLMObs.retrieval() as span:
         LLMObs.annotate(span=span, output_data="test_document_text")
-        documents = json.loads(span.get_tag(OUTPUT_DOCUMENTS))
+        documents = span._get_ctx_item(OUTPUT_DOCUMENTS)
         assert documents
         assert len(documents) == 1
         assert documents[0]["text"] == "test_document_text"
@@ -616,13 +579,13 @@ def test_annotate_document_str(LLMObs):
 def test_annotate_document_dict(LLMObs):
     with LLMObs.embedding(model_name="test_model") as span:
         LLMObs.annotate(span=span, input_data={"text": "test_document_text"})
-        documents = json.loads(span.get_tag(INPUT_DOCUMENTS))
+        documents = span._get_ctx_item(INPUT_DOCUMENTS)
         assert documents
         assert len(documents) == 1
         assert documents[0]["text"] == "test_document_text"
     with LLMObs.retrieval() as span:
         LLMObs.annotate(span=span, output_data={"text": "test_document_text"})
-        documents = json.loads(span.get_tag(OUTPUT_DOCUMENTS))
+        documents = span._get_ctx_item(OUTPUT_DOCUMENTS)
         assert documents
         assert len(documents) == 1
         assert documents[0]["text"] == "test_document_text"
@@ -634,7 +597,7 @@ def test_annotate_document_list(LLMObs):
             span=span,
             input_data=[{"text": "test_document_text"}, {"text": "text", "name": "name", "score": 0.9, "id": "id"}],
         )
-        documents = json.loads(span.get_tag(INPUT_DOCUMENTS))
+        documents = span._get_ctx_item(INPUT_DOCUMENTS)
         assert documents
         assert len(documents) == 2
         assert documents[0]["text"] == "test_document_text"
@@ -647,7 +610,7 @@ def test_annotate_document_list(LLMObs):
             span=span,
             output_data=[{"text": "test_document_text"}, {"text": "text", "name": "name", "score": 0.9, "id": "id"}],
         )
-        documents = json.loads(span.get_tag(OUTPUT_DOCUMENTS))
+        documents = span._get_ctx_item(OUTPUT_DOCUMENTS)
         assert documents
         assert len(documents) == 2
         assert documents[0]["text"] == "test_document_text"
@@ -679,30 +642,6 @@ def test_annotate_incorrect_document_type_raises_warning(LLMObs, mock_logs):
         mock_logs.warning.assert_called_once_with("Failed to parse output documents.", exc_info=True)
 
 
-def test_annotate_output_embedding_non_serializable_marks_with_placeholder_value(LLMObs):
-    with LLMObs.embedding(model_name="test_model") as span:
-        with mock.patch("ddtrace.llmobs._utils.log") as mock_logs:
-            LLMObs.annotate(span=span, output_data=object())
-            output_value = json.loads(span.get_tag(OUTPUT_VALUE))
-            assert output_value is not None
-            assert "[Unserializable object:" in output_value
-            mock_logs.warning.assert_called_once_with(
-                "I/O object is not JSON serializable. Defaulting to placeholder value instead."
-            )
-
-
-def test_annotate_input_retrieval_non_serializable_marks_with_placeholder_value(LLMObs):
-    with LLMObs.retrieval() as span:
-        with mock.patch("ddtrace.llmobs._utils.log") as mock_logs:
-            LLMObs.annotate(span=span, input_data=object())
-            input_value = json.loads(span.get_tag(INPUT_VALUE))
-            assert input_value is not None
-            assert "[Unserializable object:" in input_value
-            mock_logs.warning.assert_called_once_with(
-                "I/O object is not JSON serializable. Defaulting to placeholder value instead."
-            )
-
-
 def test_annotate_document_no_text_raises_warning(LLMObs, mock_logs):
     with LLMObs.embedding(model_name="test_model") as span:
         LLMObs.annotate(span=span, input_data=[{"score": 0.9, "id": "id", "name": "name"}])
@@ -738,89 +677,69 @@ def test_annotate_incorrect_document_field_type_raises_warning(LLMObs, mock_logs
 def test_annotate_output_string(LLMObs):
     with LLMObs.llm(model_name="test_model") as llm_span:
         LLMObs.annotate(span=llm_span, output_data="test_output")
-        assert json.loads(llm_span.get_tag(OUTPUT_MESSAGES)) == [{"content": "test_output"}]
+        assert llm_span._get_ctx_item(OUTPUT_MESSAGES) == [{"content": "test_output"}]
     with LLMObs.embedding(model_name="test_model") as embedding_span:
         LLMObs.annotate(span=embedding_span, output_data="test_output")
-        assert embedding_span.get_tag(OUTPUT_VALUE) == "test_output"
+        assert embedding_span._get_ctx_item(OUTPUT_VALUE) == "test_output"
     with LLMObs.task() as task_span:
         LLMObs.annotate(span=task_span, output_data="test_output")
-        assert task_span.get_tag(OUTPUT_VALUE) == "test_output"
+        assert task_span._get_ctx_item(OUTPUT_VALUE) == "test_output"
     with LLMObs.tool() as tool_span:
         LLMObs.annotate(span=tool_span, output_data="test_output")
-        assert tool_span.get_tag(OUTPUT_VALUE) == "test_output"
+        assert tool_span._get_ctx_item(OUTPUT_VALUE) == "test_output"
     with LLMObs.workflow() as workflow_span:
         LLMObs.annotate(span=workflow_span, output_data="test_output")
-        assert workflow_span.get_tag(OUTPUT_VALUE) == "test_output"
+        assert workflow_span._get_ctx_item(OUTPUT_VALUE) == "test_output"
     with LLMObs.agent() as agent_span:
         LLMObs.annotate(span=agent_span, output_data="test_output")
-        assert agent_span.get_tag(OUTPUT_VALUE) == "test_output"
+        assert agent_span._get_ctx_item(OUTPUT_VALUE) == "test_output"
 
 
 def test_annotate_output_serializable_value(LLMObs):
     with LLMObs.embedding(model_name="test_model") as embedding_span:
         LLMObs.annotate(span=embedding_span, output_data=[[0, 1, 2, 3], [4, 5, 6, 7]])
-        assert embedding_span.get_tag(OUTPUT_VALUE) == "[[0, 1, 2, 3], [4, 5, 6, 7]]"
+        assert embedding_span._get_ctx_item(OUTPUT_VALUE) == str([[0, 1, 2, 3], [4, 5, 6, 7]])
     with LLMObs.task() as task_span:
         LLMObs.annotate(span=task_span, output_data=["test_output"])
-        assert task_span.get_tag(OUTPUT_VALUE) == '["test_output"]'
+        assert task_span._get_ctx_item(OUTPUT_VALUE) == str(["test_output"])
     with LLMObs.tool() as tool_span:
         LLMObs.annotate(span=tool_span, output_data={"test_output": "hello world"})
-        assert tool_span.get_tag(OUTPUT_VALUE) == '{"test_output": "hello world"}'
+        assert tool_span._get_ctx_item(OUTPUT_VALUE) == str({"test_output": "hello world"})
     with LLMObs.workflow() as workflow_span:
         LLMObs.annotate(span=workflow_span, output_data=("asd", 123))
-        assert workflow_span.get_tag(OUTPUT_VALUE) == '["asd", 123]'
+        assert workflow_span._get_ctx_item(OUTPUT_VALUE) == str(("asd", 123))
     with LLMObs.agent() as agent_span:
         LLMObs.annotate(span=agent_span, output_data="test_output")
-        assert agent_span.get_tag(OUTPUT_VALUE) == "test_output"
-
-
-def test_annotate_output_value_non_serializable_marks_with_placeholder_value(LLMObs):
-    with LLMObs.workflow() as span:
-        with mock.patch("ddtrace.llmobs._utils.log") as mock_logs:
-            LLMObs.annotate(span=span, output_data=object())
-            output_value = json.loads(span.get_tag(OUTPUT_VALUE))
-            assert output_value is not None
-            assert "[Unserializable object:" in output_value
-            mock_logs.warning.assert_called_once_with(
-                "I/O object is not JSON serializable. Defaulting to placeholder value instead."
-            )
+        assert agent_span._get_ctx_item(OUTPUT_VALUE) == "test_output"
 
 
 def test_annotate_output_llm_message(LLMObs):
     with LLMObs.llm(model_name="test_model") as llm_span:
         LLMObs.annotate(span=llm_span, output_data=[{"content": "test_output", "role": "human"}])
-        assert json.loads(llm_span.get_tag(OUTPUT_MESSAGES)) == [{"content": "test_output", "role": "human"}]
+        assert llm_span._get_ctx_item(OUTPUT_MESSAGES) == [{"content": "test_output", "role": "human"}]
 
 
 def test_annotate_output_llm_message_wrong_type(LLMObs, mock_logs):
     with LLMObs.llm(model_name="test_model") as llm_span:
         LLMObs.annotate(span=llm_span, output_data=[{"content": object()}])
-        assert llm_span.get_tag(OUTPUT_MESSAGES) is None
+        assert llm_span._get_ctx_item(OUTPUT_MESSAGES) is None
         mock_logs.warning.assert_called_once_with("Failed to parse output messages.", exc_info=True)
 
 
 def test_annotate_metrics(LLMObs):
     with LLMObs.llm(model_name="test_model") as span:
         LLMObs.annotate(span=span, metrics={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30})
-        assert json.loads(span.get_tag(METRICS)) == {"input_tokens": 10, "output_tokens": 20, "total_tokens": 30}
+        assert span._get_ctx_item(METRICS) == {"input_tokens": 10, "output_tokens": 20, "total_tokens": 30}
 
 
 def test_annotate_metrics_wrong_type(LLMObs, mock_logs):
     with LLMObs.llm(model_name="test_model") as llm_span:
         LLMObs.annotate(span=llm_span, metrics=12345)
-        assert llm_span.get_tag(METRICS) is None
+        assert llm_span._get_ctx_item(METRICS) is None
         mock_logs.warning.assert_called_once_with("metrics must be a dictionary of string key - numeric value pairs.")
         mock_logs.reset_mock()
 
 
-def test_annotate_metrics_unserializable_uses_placeholder(LLMObs, mock_logs):
-    with LLMObs.llm(model_name="test_model") as llm_span:
-        LLMObs.annotate(span=llm_span, metrics={"content": object()})
-        metrics = json.loads(llm_span.get_tag(METRICS))
-        assert metrics is not None
-        assert "[Unserializable object: <object object at" in metrics["content"]
-
-
 def test_annotate_prompt_dict(LLMObs):
     with LLMObs.llm(model_name="test_model") as span:
         LLMObs.annotate(
@@ -832,7 +751,7 @@ def test_annotate_prompt_dict(LLMObs):
                 "id": "test_prompt",
             },
         )
-        assert json.loads(span.get_tag(INPUT_PROMPT)) == {
+        assert span._get_ctx_item(INPUT_PROMPT) == {
             "template": "{var1} {var3}",
             "variables": {"var1": "var1", "var2": "var3"},
             "version": "1.0.0",
@@ -855,7 +774,7 @@ def test_annotate_prompt_dict_with_context_var_keys(LLMObs):
                 "rag_query_variables": ["user_input"],
             },
         )
-        assert json.loads(span.get_tag(INPUT_PROMPT)) == {
+        assert span._get_ctx_item(INPUT_PROMPT) == {
             "template": "{var1} {var3}",
             "variables": {"var1": "var1", "var2": "var3"},
             "version": "1.0.0",
@@ -878,7 +797,7 @@ def test_annotate_prompt_typed_dict(LLMObs):
                 rag_query_variables=["user_input"],
             ),
         )
-        assert json.loads(span.get_tag(INPUT_PROMPT)) == {
+        assert span._get_ctx_item(INPUT_PROMPT) == {
             "template": "{var1} {var3}",
             "variables": {"var1": "var1", "var2": "var3"},
             "version": "1.0.0",
@@ -891,7 +810,7 @@ def test_annotate_prompt_typed_dict(LLMObs):
 def test_annotate_prompt_wrong_type(LLMObs, mock_logs):
     with LLMObs.llm(model_name="test_model") as span:
         LLMObs.annotate(span=span, prompt="prompt")
-        assert span.get_tag(INPUT_PROMPT) is None
+        assert span._get_ctx_item(INPUT_PROMPT) is None
         mock_logs.warning.assert_called_once_with("Failed to validate prompt with error: ", exc_info=True)
         mock_logs.reset_mock()
 
@@ -1861,13 +1780,13 @@ def test_llmobs_with_evaluation_runner_does_not_enqueue_non_llm_spans(mock_llmob
 def test_annotation_context_modifies_span_tags(LLMObs):
     with LLMObs.annotation_context(tags={"foo": "bar"}):
         with LLMObs.agent(name="test_agent") as span:
-            assert json.loads(span.get_tag(TAGS)) == {"foo": "bar"}
+            assert span._get_ctx_item(TAGS) == {"foo": "bar"}
 
 
 def test_annotation_context_modifies_prompt(LLMObs):
     with LLMObs.annotation_context(prompt={"template": "test_template"}):
         with LLMObs.llm(name="test_agent", model_name="test") as span:
-            assert json.loads(span.get_tag(INPUT_PROMPT)) == {
+            assert span._get_ctx_item(INPUT_PROMPT) == {
                 "template": "test_template",
                 "_dd_context_variable_keys": ["context"],
                 "_dd_query_variable_keys": ["question"],
@@ -1884,14 +1803,14 @@ def test_annotation_context_finished_context_does_not_modify_tags(LLMObs):
     with LLMObs.annotation_context(tags={"foo": "bar"}):
         pass
     with LLMObs.agent(name="test_agent") as span:
-        assert span.get_tag(TAGS) is None
+        assert span._get_ctx_item(TAGS) is None
 
 
 def test_annotation_context_finished_context_does_not_modify_prompt(LLMObs):
     with LLMObs.annotation_context(prompt={"template": "test_template"}):
         pass
     with LLMObs.llm(name="test_agent", model_name="test") as span:
-        assert span.get_tag(INPUT_PROMPT) is None
+        assert span._get_ctx_item(INPUT_PROMPT) is None
 
 
 def test_annotation_context_finished_context_does_not_modify_name(LLMObs):
@@ -1905,7 +1824,7 @@ def test_annotation_context_nested(LLMObs):
     with LLMObs.annotation_context(tags={"foo": "bar", "boo": "bar"}):
         with LLMObs.annotation_context(tags={"foo": "baz"}):
             with LLMObs.agent(name="test_agent") as span:
-                assert json.loads(span.get_tag(TAGS)) == {"foo": "baz", "boo": "bar"}
+                assert span._get_ctx_item(TAGS) == {"foo": "baz", "boo": "bar"}
 
 
 def test_annotation_context_nested_overrides_name(LLMObs):
@@ -1921,8 +1840,8 @@ def test_annotation_context_nested_maintains_trace_structure(LLMObs, mock_llmobs
         with LLMObs.agent(name="parent_span") as parent_span:
             with LLMObs.annotation_context(tags={"foo": "baz"}):
                 with LLMObs.workflow(name="child_span") as child_span:
-                    assert json.loads(child_span.get_tag(TAGS)) == {"foo": "baz", "boo": "bar"}
-                    assert json.loads(parent_span.get_tag(TAGS)) == {"foo": "bar", "boo": "bar"}
+                    assert child_span._get_ctx_item(TAGS) == {"foo": "baz", "boo": "bar"}
+                    assert parent_span._get_ctx_item(TAGS) == {"foo": "bar", "boo": "bar"}
 
     assert len(mock_llmobs_span_writer.enqueue.call_args_list) == 2
     parent_span, child_span = [span[0] for span, _ in mock_llmobs_span_writer.enqueue.call_args_list]
@@ -1966,7 +1885,7 @@ def context_one():
         with LLMObs.annotation_context(name="expected_agent", tags={"foo": "bar"}):
             with LLMObs.agent(name="test_agent") as span:
                 event.wait()
-                agent_has_correct_tags = json.loads(span.get_tag(TAGS)) == {"foo": "bar"}
+                agent_has_correct_tags = span._get_ctx_item(TAGS) == {"foo": "bar"}
                 agent_has_correct_name = span.name == "expected_agent"
 
     # thread which registers an annotation context for 0.5 seconds
@@ -1977,7 +1896,7 @@ def context_two():
             with LLMObs.annotation_context(name="expected_tool"):
                 with LLMObs.tool(name="test_tool") as tool_span:
                     event.wait()
-                    tool_does_not_have_tags = tool_span.get_tag(TAGS) is None
+                    tool_does_not_have_tags = tool_span._get_ctx_item(TAGS) is None
                     tool_has_correct_name = tool_span.name == "expected_tool"
 
     thread_one = threading.Thread(target=context_one)
@@ -1987,7 +1906,7 @@ def context_two():
 
     with LLMObs.agent(name="test_agent") as span:
         assert span.name == "test_agent"
-        assert span.get_tag(TAGS) is None
+        assert span._get_ctx_item(TAGS) is None
 
     event.set()
     thread_one.join()
@@ -2004,13 +1923,13 @@ def context_two():
 async def test_annotation_context_async_modifies_span_tags(LLMObs):
     async with LLMObs.annotation_context(tags={"foo": "bar"}):
         with LLMObs.agent(name="test_agent") as span:
-            assert json.loads(span.get_tag(TAGS)) == {"foo": "bar"}
+            assert span._get_ctx_item(TAGS) == {"foo": "bar"}
 
 
 async def test_annotation_context_async_modifies_prompt(LLMObs):
     async with LLMObs.annotation_context(prompt={"template": "test_template"}):
         with LLMObs.llm(name="test_agent", model_name="test") as span:
-            assert json.loads(span.get_tag(INPUT_PROMPT)) == {
+            assert span._get_ctx_item(INPUT_PROMPT) == {
                 "template": "test_template",
                 "_dd_context_variable_keys": ["context"],
                 "_dd_query_variable_keys": ["question"],
@@ -2027,14 +1946,14 @@ async def test_annotation_context_async_finished_context_does_not_modify_tags(LL
     async with LLMObs.annotation_context(tags={"foo": "bar"}):
         pass
     with LLMObs.agent(name="test_agent") as span:
-        assert span.get_tag(TAGS) is None
+        assert span._get_ctx_item(TAGS) is None
 
 
 async def test_annotation_context_async_finished_context_does_not_modify_prompt(LLMObs):
     async with LLMObs.annotation_context(prompt={"template": "test_template"}):
         pass
     with LLMObs.llm(name="test_agent", model_name="test") as span:
-        assert span.get_tag(INPUT_PROMPT) is None
+        assert span._get_ctx_item(INPUT_PROMPT) is None
 
 
 async def test_annotation_context_finished_context_async_does_not_modify_name(LLMObs):
@@ -2048,7 +1967,7 @@ async def test_annotation_context_async_nested(LLMObs):
     async with LLMObs.annotation_context(tags={"foo": "bar", "boo": "bar"}):
         async with LLMObs.annotation_context(tags={"foo": "baz"}):
             with LLMObs.agent(name="test_agent") as span:
-                assert json.loads(span.get_tag(TAGS)) == {"foo": "baz", "boo": "bar"}
+                assert span._get_ctx_item(TAGS) == {"foo": "baz", "boo": "bar"}
 
 
 def test_service_enable_starts_evaluator_runner_when_evaluators_exist():
diff --git a/tests/llmobs/test_llmobs_span_agentless_writer.py b/tests/llmobs/test_llmobs_span_agentless_writer.py
index 96dc26e5146..4882f3553d8 100644
--- a/tests/llmobs/test_llmobs_span_agentless_writer.py
+++ b/tests/llmobs/test_llmobs_span_agentless_writer.py
@@ -76,12 +76,7 @@ def test_truncating_oversized_events(mock_writer_logs, mock_http_writer_send_pay
 
 
 def test_send_completion_event(mock_writer_logs, mock_http_writer_logs, mock_http_writer_send_payload_response):
-    with override_global_config(
-        dict(
-            _dd_site=DATADOG_SITE,
-            _dd_api_key="foobar.baz",
-        )
-    ):
+    with override_global_config(dict(_dd_site=DATADOG_SITE, _dd_api_key="foobar.baz")):
         llmobs_span_writer = LLMObsSpanWriter(is_agentless=True, interval=1, timeout=1)
         llmobs_span_writer.start()
         llmobs_span_writer.enqueue(_completion_event())
@@ -91,12 +86,7 @@ def test_send_completion_event(mock_writer_logs, mock_http_writer_logs, mock_htt
 
 
 def test_send_chat_completion_event(mock_writer_logs, mock_http_writer_logs, mock_http_writer_send_payload_response):
-    with override_global_config(
-        dict(
-            _dd_site=DATADOG_SITE,
-            _dd_api_key="foobar.baz",
-        )
-    ):
+    with override_global_config(dict(_dd_site=DATADOG_SITE, _dd_api_key="foobar.baz")):
         llmobs_span_writer = LLMObsSpanWriter(is_agentless=True, interval=1, timeout=1)
         llmobs_span_writer.start()
         llmobs_span_writer.enqueue(_chat_completion_event())
@@ -120,12 +110,7 @@ def test_send_completion_bad_api_key(mock_http_writer_logs, mock_http_writer_put
 
 
 def test_send_timed_events(mock_writer_logs, mock_http_writer_logs, mock_http_writer_send_payload_response):
-    with override_global_config(
-        dict(
-            _dd_site=DATADOG_SITE,
-            _dd_api_key="foobar.baz",
-        )
-    ):
+    with override_global_config(dict(_dd_site=DATADOG_SITE, _dd_api_key="foobar.baz")):
         llmobs_span_writer = LLMObsSpanWriter(is_agentless=True, interval=0.01, timeout=1)
         llmobs_span_writer.start()
         mock_writer_logs.reset_mock()
@@ -141,12 +126,7 @@ def test_send_timed_events(mock_writer_logs, mock_http_writer_logs, mock_http_wr
 
 
 def test_send_multiple_events(mock_writer_logs, mock_http_writer_logs, mock_http_writer_send_payload_response):
-    with override_global_config(
-        dict(
-            _dd_site=DATADOG_SITE,
-            _dd_api_key="foobar.baz",
-        )
-    ):
+    with override_global_config(dict(_dd_site=DATADOG_SITE, _dd_api_key="foobar.baz")):
         llmobs_span_writer = LLMObsSpanWriter(is_agentless=True, interval=0.01, timeout=1)
         llmobs_span_writer.start()
         mock_writer_logs.reset_mock()
diff --git a/tests/llmobs/test_llmobs_span_encoder.py b/tests/llmobs/test_llmobs_span_encoder.py
new file mode 100644
index 00000000000..bf7bd5ccc82
--- /dev/null
+++ b/tests/llmobs/test_llmobs_span_encoder.py
@@ -0,0 +1,72 @@
+import json
+
+import mock
+
+import ddtrace
+from ddtrace.llmobs._writer import LLMObsSpanEncoder
+from tests.llmobs._utils import _chat_completion_event
+from tests.llmobs._utils import _chat_completion_event_with_unserializable_field
+from tests.llmobs._utils import _completion_event
+
+
+def test_encode_span(mock_writer_logs):
+    encoder = LLMObsSpanEncoder(0, 0)
+    span = _chat_completion_event()
+    encoder.put([span])
+    encoded_llm_events, n_spans = encoder.encode()
+
+    expected_llm_events = {
+        "_dd.stage": "raw",
+        "_dd.tracer_version": ddtrace.__version__,
+        "event_type": "span",
+        "spans": [span],
+    }
+
+    assert n_spans == 1
+    decoded_llm_events = json.loads(encoded_llm_events)
+    assert decoded_llm_events == expected_llm_events
+    mock_writer_logs.debug.assert_called_once_with("encode %d LLMObs span events to be sent", 1)
+
+
+def test_encode_multiple_spans(mock_writer_logs):
+    encoder = LLMObsSpanEncoder(0, 0)
+    trace = [_chat_completion_event(), _completion_event()]
+    encoder.put(trace)
+    encoded_llm_events, n_spans = encoder.encode()
+
+    expected_llm_events = {
+        "_dd.stage": "raw",
+        "_dd.tracer_version": ddtrace.__version__,
+        "event_type": "span",
+        "spans": trace,
+    }
+
+    assert n_spans == 2
+    decoded_llm_events = json.loads(encoded_llm_events)
+    assert decoded_llm_events == expected_llm_events
+    mock_writer_logs.debug.assert_called_once_with("encode %d LLMObs span events to be sent", 2)
+
+
+@mock.patch("ddtrace.llmobs._utils.log")
+def test_encode_span_with_unserializable_fields(mock_utils_logs):
+    encoder = LLMObsSpanEncoder(0, 0)
+    span = _chat_completion_event_with_unserializable_field()
+    encoder.put([span])
+    encoded_llm_events, n_spans = encoder.encode()
+
+    expected_llm_events = {
+        "_dd.stage": "raw",
+        "_dd.tracer_version": ddtrace.__version__,
+        "event_type": "span",
+        "spans": [mock.ANY],
+    }
+
+    assert n_spans == 1
+    decoded_llm_events = json.loads(encoded_llm_events)
+    assert decoded_llm_events == expected_llm_events
+    decoded_llm_span = decoded_llm_events["spans"][0]
+    assert decoded_llm_span["meta"]["metadata"]["unserializable"] is not None
+    assert "[Unserializable object: <object object at 0x" in decoded_llm_span["meta"]["metadata"]["unserializable"]
+    mock_utils_logs.warning.assert_called_with(
+        "I/O object is not JSON serializable. Defaulting to placeholder value instead."
+    )
diff --git a/tests/llmobs/test_llmobs_trace_processor.py b/tests/llmobs/test_llmobs_trace_processor.py
index e816cd1742a..8eb4c4d6fb3 100644
--- a/tests/llmobs/test_llmobs_trace_processor.py
+++ b/tests/llmobs/test_llmobs_trace_processor.py
@@ -1,5 +1,3 @@
-import json
-
 import mock
 import pytest
 
@@ -37,7 +35,7 @@ def test_processor_returns_all_traces_by_default():
     """Test that the LLMObsTraceProcessor returns all traces by default."""
     trace_filter = LLMObsTraceProcessor(llmobs_span_writer=mock.MagicMock())
     root_llm_span = Span(name="span1", span_type=SpanTypes.LLM)
-    root_llm_span.set_tag_str(SPAN_KIND, "llm")
+    root_llm_span._set_ctx_item(SPAN_KIND, "llm")
     trace1 = [root_llm_span]
     assert trace_filter.process_trace(trace1) == trace1
 
@@ -47,7 +45,7 @@ def test_processor_returns_all_traces_if_not_agentless():
     with override_global_config(dict(_llmobs_agentless_enabled=False)):
         trace_filter = LLMObsTraceProcessor(llmobs_span_writer=mock.MagicMock())
         root_llm_span = Span(name="span1", span_type=SpanTypes.LLM)
-        root_llm_span.set_tag_str(SPAN_KIND, "llm")
+        root_llm_span._set_ctx_item(SPAN_KIND, "llm")
         trace1 = [root_llm_span]
         assert trace_filter.process_trace(trace1) == trace1
 
@@ -57,7 +55,7 @@ def test_processor_returns_none_in_agentless_mode():
     with override_global_config(dict(_llmobs_agentless_enabled=True)):
         trace_filter = LLMObsTraceProcessor(llmobs_span_writer=mock.MagicMock())
         root_llm_span = Span(name="span1", span_type=SpanTypes.LLM)
-        root_llm_span.set_tag_str(SPAN_KIND, "llm")
+        root_llm_span._set_ctx_item(SPAN_KIND, "llm")
         trace1 = [root_llm_span]
         assert trace_filter.process_trace(trace1) is None
 
@@ -67,7 +65,7 @@ def test_processor_creates_llmobs_span_event():
         mock_llmobs_span_writer = mock.MagicMock()
         trace_filter = LLMObsTraceProcessor(llmobs_span_writer=mock_llmobs_span_writer)
         root_llm_span = Span(name="root", span_type=SpanTypes.LLM)
-        root_llm_span.set_tag_str(SPAN_KIND, "llm")
+        root_llm_span._set_ctx_item(SPAN_KIND, "llm")
         trace = [root_llm_span]
         trace_filter.process_trace(trace)
     assert mock_llmobs_span_writer.enqueue.call_count == 1
@@ -83,10 +81,10 @@ def test_processor_only_creates_llmobs_span_event():
     trace_filter = LLMObsTraceProcessor(llmobs_span_writer=mock_llmobs_span_writer)
     with override_global_config(dict(_llmobs_ml_app="unnamed-ml-app")):
         with dummy_tracer.trace("root_llm_span", span_type=SpanTypes.LLM) as root_span:
-            root_span.set_tag_str(SPAN_KIND, "llm")
+            root_span._set_ctx_item(SPAN_KIND, "llm")
             with dummy_tracer.trace("child_span") as child_span:
                 with dummy_tracer.trace("llm_span", span_type=SpanTypes.LLM) as grandchild_span:
-                    grandchild_span.set_tag_str(SPAN_KIND, "llm")
+                    grandchild_span._set_ctx_item(SPAN_KIND, "llm")
         trace = [root_span, child_span, grandchild_span]
         expected_grandchild_llmobs_span = _expected_llmobs_llm_span_event(grandchild_span, "llm")
         expected_grandchild_llmobs_span["parent_id"] = str(root_span.span_id)
@@ -123,7 +121,7 @@ def test_propagate_session_id_from_ancestors():
     """
     dummy_tracer = DummyTracer()
     with dummy_tracer.trace("root_llm_span", span_type=SpanTypes.LLM) as root_span:
-        root_span.set_tag_str(SESSION_ID, "test_session_id")
+        root_span._set_ctx_item(SESSION_ID, "test_session_id")
         with dummy_tracer.trace("child_span"):
             with dummy_tracer.trace("llm_span", span_type=SpanTypes.LLM) as llm_span:
                 pass
@@ -134,10 +132,10 @@ def test_session_id_if_set_manually():
     """Test that session_id is extracted from the span if it is already set manually."""
     dummy_tracer = DummyTracer()
     with dummy_tracer.trace("root_llm_span", span_type=SpanTypes.LLM) as root_span:
-        root_span.set_tag_str(SESSION_ID, "test_session_id")
+        root_span._set_ctx_item(SESSION_ID, "test_session_id")
         with dummy_tracer.trace("child_span"):
             with dummy_tracer.trace("llm_span", span_type=SpanTypes.LLM) as llm_span:
-                llm_span.set_tag_str(SESSION_ID, "test_different_session_id")
+                llm_span._set_ctx_item(SESSION_ID, "test_different_session_id")
     assert _get_session_id(llm_span) == "test_different_session_id"
 
 
@@ -149,13 +147,13 @@ def test_session_id_propagates_ignore_non_llmobs_spans():
     dummy_tracer = DummyTracer()
     with override_global_config(dict(_llmobs_ml_app="<not-a-real-app-name>")):
         with dummy_tracer.trace("root_llm_span", span_type=SpanTypes.LLM) as llm_span:
-            llm_span.set_tag_str(SPAN_KIND, "llm")
-            llm_span.set_tag_str(SESSION_ID, "session-123")
+            llm_span._set_ctx_item(SPAN_KIND, "llm")
+            llm_span._set_ctx_item(SESSION_ID, "session-123")
             with dummy_tracer.trace("child_span"):
                 with dummy_tracer.trace("llm_grandchild_span", span_type=SpanTypes.LLM) as grandchild_span:
-                    grandchild_span.set_tag_str(SPAN_KIND, "llm")
+                    grandchild_span._set_ctx_item(SPAN_KIND, "llm")
                     with dummy_tracer.trace("great_grandchild_span", span_type=SpanTypes.LLM) as great_grandchild_span:
-                        great_grandchild_span.set_tag_str(SPAN_KIND, "llm")
+                        great_grandchild_span._set_ctx_item(SPAN_KIND, "llm")
         tp = LLMObsTraceProcessor(dummy_tracer._writer)
         llm_span_event, _ = tp._llmobs_span_event(llm_span)
         grandchild_span_event, _ = tp._llmobs_span_event(grandchild_span)
@@ -170,7 +168,7 @@ def test_ml_app_tag_defaults_to_env_var():
     dummy_tracer = DummyTracer()
     with override_global_config(dict(_llmobs_ml_app="<not-a-real-app-name>")):
         with dummy_tracer.trace("root_llm_span", span_type=SpanTypes.LLM) as llm_span:
-            llm_span.set_tag_str(SPAN_KIND, "llm")
+            llm_span._set_ctx_item(SPAN_KIND, "llm")
             pass
         tp = LLMObsTraceProcessor(dummy_tracer._writer)
         span_event, _ = tp._llmobs_span_event(llm_span)
@@ -182,8 +180,8 @@ def test_ml_app_tag_overrides_env_var():
     dummy_tracer = DummyTracer()
     with override_global_config(dict(_llmobs_ml_app="<not-a-real-app-name>")):
         with dummy_tracer.trace("root_llm_span", span_type=SpanTypes.LLM) as llm_span:
-            llm_span.set_tag_str(SPAN_KIND, "llm")
-            llm_span.set_tag(ML_APP, "test-ml-app")
+            llm_span._set_ctx_item(SPAN_KIND, "llm")
+            llm_span._set_ctx_item(ML_APP, "test-ml-app")
         tp = LLMObsTraceProcessor(dummy_tracer._writer)
         span_event, _ = tp._llmobs_span_event(llm_span)
         assert "ml_app:test-ml-app" in span_event["tags"]
@@ -197,13 +195,13 @@ def test_ml_app_propagates_ignore_non_llmobs_spans():
     dummy_tracer = DummyTracer()
     with override_global_config(dict(_llmobs_ml_app="<not-a-real-app-name>")):
         with dummy_tracer.trace("root_llm_span", span_type=SpanTypes.LLM) as llm_span:
-            llm_span.set_tag_str(SPAN_KIND, "llm")
-            llm_span.set_tag(ML_APP, "test-ml-app")
+            llm_span._set_ctx_item(SPAN_KIND, "llm")
+            llm_span._set_ctx_item(ML_APP, "test-ml-app")
             with dummy_tracer.trace("child_span"):
                 with dummy_tracer.trace("llm_grandchild_span", span_type=SpanTypes.LLM) as grandchild_span:
-                    grandchild_span.set_tag_str(SPAN_KIND, "llm")
+                    grandchild_span._set_ctx_item(SPAN_KIND, "llm")
                     with dummy_tracer.trace("great_grandchild_span", span_type=SpanTypes.LLM) as great_grandchild_span:
-                        great_grandchild_span.set_tag_str(SPAN_KIND, "llm")
+                        great_grandchild_span._set_ctx_item(SPAN_KIND, "llm")
         tp = LLMObsTraceProcessor(dummy_tracer._writer)
         llm_span_event, _ = tp._llmobs_span_event(llm_span)
         grandchild_span_event, _ = tp._llmobs_span_event(grandchild_span)
@@ -234,9 +232,9 @@ def test_model_and_provider_are_set():
     mock_llmobs_span_writer = mock.MagicMock()
     with override_global_config(dict(_llmobs_ml_app="unnamed-ml-app")):
         with dummy_tracer.trace("root_llm_span", span_type=SpanTypes.LLM) as llm_span:
-            llm_span.set_tag(SPAN_KIND, "llm")
-            llm_span.set_tag(MODEL_NAME, "model_name")
-            llm_span.set_tag(MODEL_PROVIDER, "model_provider")
+            llm_span._set_ctx_item(SPAN_KIND, "llm")
+            llm_span._set_ctx_item(MODEL_NAME, "model_name")
+            llm_span._set_ctx_item(MODEL_PROVIDER, "model_provider")
         tp = LLMObsTraceProcessor(llmobs_span_writer=mock_llmobs_span_writer)
         span_event, _ = tp._llmobs_span_event(llm_span)
     assert span_event["meta"]["model_name"] == "model_name"
@@ -249,8 +247,8 @@ def test_model_provider_defaults_to_custom():
     mock_llmobs_span_writer = mock.MagicMock()
     with override_global_config(dict(_llmobs_ml_app="unnamed-ml-app")):
         with dummy_tracer.trace("root_llm_span", span_type=SpanTypes.LLM) as llm_span:
-            llm_span.set_tag(SPAN_KIND, "llm")
-            llm_span.set_tag(MODEL_NAME, "model_name")
+            llm_span._set_ctx_item(SPAN_KIND, "llm")
+            llm_span._set_ctx_item(MODEL_NAME, "model_name")
         tp = LLMObsTraceProcessor(llmobs_span_writer=mock_llmobs_span_writer)
         span_event, _ = tp._llmobs_span_event(llm_span)
     assert span_event["meta"]["model_name"] == "model_name"
@@ -263,8 +261,8 @@ def test_model_not_set_if_not_llm_kind_span():
     mock_llmobs_span_writer = mock.MagicMock()
     with override_global_config(dict(_llmobs_ml_app="unnamed-ml-app")):
         with dummy_tracer.trace("root_workflow_span", span_type=SpanTypes.LLM) as span:
-            span.set_tag(SPAN_KIND, "workflow")
-            span.set_tag(MODEL_NAME, "model_name")
+            span._set_ctx_item(SPAN_KIND, "workflow")
+            span._set_ctx_item(MODEL_NAME, "model_name")
         tp = LLMObsTraceProcessor(llmobs_span_writer=mock_llmobs_span_writer)
         span_event, _ = tp._llmobs_span_event(span)
     assert "model_name" not in span_event["meta"]
@@ -277,8 +275,8 @@ def test_input_messages_are_set():
     mock_llmobs_span_writer = mock.MagicMock()
     with override_global_config(dict(_llmobs_ml_app="unnamed-ml-app")):
         with dummy_tracer.trace("root_llm_span", span_type=SpanTypes.LLM) as llm_span:
-            llm_span.set_tag(SPAN_KIND, "llm")
-            llm_span.set_tag(INPUT_MESSAGES, '[{"content": "message", "role": "user"}]')
+            llm_span._set_ctx_item(SPAN_KIND, "llm")
+            llm_span._set_ctx_item(INPUT_MESSAGES, [{"content": "message", "role": "user"}])
         tp = LLMObsTraceProcessor(llmobs_span_writer=mock_llmobs_span_writer)
         assert tp._llmobs_span_event(llm_span)[0]["meta"]["input"]["messages"] == [
             {"content": "message", "role": "user"}
@@ -291,8 +289,8 @@ def test_input_value_is_set():
     mock_llmobs_span_writer = mock.MagicMock()
     with override_global_config(dict(_llmobs_ml_app="unnamed-ml-app")):
         with dummy_tracer.trace("root_llm_span", span_type=SpanTypes.LLM) as llm_span:
-            llm_span.set_tag(SPAN_KIND, "llm")
-            llm_span.set_tag(INPUT_VALUE, "value")
+            llm_span._set_ctx_item(SPAN_KIND, "llm")
+            llm_span._set_ctx_item(INPUT_VALUE, "value")
         tp = LLMObsTraceProcessor(llmobs_span_writer=mock_llmobs_span_writer)
         assert tp._llmobs_span_event(llm_span)[0]["meta"]["input"]["value"] == "value"
 
@@ -303,8 +301,8 @@ def test_input_parameters_are_set():
     mock_llmobs_span_writer = mock.MagicMock()
     with override_global_config(dict(_llmobs_ml_app="unnamed-ml-app")):
         with dummy_tracer.trace("root_llm_span", span_type=SpanTypes.LLM) as llm_span:
-            llm_span.set_tag(SPAN_KIND, "llm")
-            llm_span.set_tag(INPUT_PARAMETERS, '{"key": "value"}')
+            llm_span._set_ctx_item(SPAN_KIND, "llm")
+            llm_span._set_ctx_item(INPUT_PARAMETERS, {"key": "value"})
         tp = LLMObsTraceProcessor(llmobs_span_writer=mock_llmobs_span_writer)
         assert tp._llmobs_span_event(llm_span)[0]["meta"]["input"]["parameters"] == {"key": "value"}
 
@@ -315,8 +313,8 @@ def test_output_messages_are_set():
     mock_llmobs_span_writer = mock.MagicMock()
     with override_global_config(dict(_llmobs_ml_app="unnamed-ml-app")):
         with dummy_tracer.trace("root_llm_span", span_type=SpanTypes.LLM) as llm_span:
-            llm_span.set_tag(SPAN_KIND, "llm")
-            llm_span.set_tag(OUTPUT_MESSAGES, '[{"content": "message", "role": "user"}]')
+            llm_span._set_ctx_item(SPAN_KIND, "llm")
+            llm_span._set_ctx_item(OUTPUT_MESSAGES, [{"content": "message", "role": "user"}])
         tp = LLMObsTraceProcessor(llmobs_span_writer=mock_llmobs_span_writer)
         assert tp._llmobs_span_event(llm_span)[0]["meta"]["output"]["messages"] == [
             {"content": "message", "role": "user"}
@@ -329,8 +327,8 @@ def test_output_value_is_set():
     mock_llmobs_span_writer = mock.MagicMock()
     with override_global_config(dict(_llmobs_ml_app="unnamed-ml-app")):
         with dummy_tracer.trace("root_llm_span", span_type=SpanTypes.LLM) as llm_span:
-            llm_span.set_tag(SPAN_KIND, "llm")
-            llm_span.set_tag(OUTPUT_VALUE, "value")
+            llm_span._set_ctx_item(SPAN_KIND, "llm")
+            llm_span._set_ctx_item(OUTPUT_VALUE, "value")
         tp = LLMObsTraceProcessor(llmobs_span_writer=mock_llmobs_span_writer)
         assert tp._llmobs_span_event(llm_span)[0]["meta"]["output"]["value"] == "value"
 
@@ -341,8 +339,8 @@ def test_prompt_is_set():
     mock_llmobs_span_writer = mock.MagicMock()
     with override_global_config(dict(_llmobs_ml_app="unnamed-ml-app")):
         with dummy_tracer.trace("root_llm_span", span_type=SpanTypes.LLM) as llm_span:
-            llm_span.set_tag(SPAN_KIND, "llm")
-            llm_span.set_tag(INPUT_PROMPT, json.dumps({"variables": {"var1": "var2"}}))
+            llm_span._set_ctx_item(SPAN_KIND, "llm")
+            llm_span._set_ctx_item(INPUT_PROMPT, {"variables": {"var1": "var2"}})
         tp = LLMObsTraceProcessor(llmobs_span_writer=mock_llmobs_span_writer)
         assert tp._llmobs_span_event(llm_span)[0]["meta"]["input"]["prompt"] == {"variables": {"var1": "var2"}}
 
@@ -353,9 +351,9 @@ def test_prompt_is_not_set_for_non_llm_spans():
     mock_llmobs_span_writer = mock.MagicMock()
     with override_global_config(dict(_llmobs_ml_app="unnamed-ml-app")):
         with dummy_tracer.trace("task_span", span_type=SpanTypes.LLM) as task_span:
-            task_span.set_tag(SPAN_KIND, "task")
-            task_span.set_tag(INPUT_VALUE, "ival")
-            task_span.set_tag(INPUT_PROMPT, json.dumps({"variables": {"var1": "var2"}}))
+            task_span._set_ctx_item(SPAN_KIND, "task")
+            task_span._set_ctx_item(INPUT_VALUE, "ival")
+            task_span._set_ctx_item(INPUT_PROMPT, {"variables": {"var1": "var2"}})
         tp = LLMObsTraceProcessor(llmobs_span_writer=mock_llmobs_span_writer)
         assert tp._llmobs_span_event(task_span)[0]["meta"]["input"].get("prompt") is None
 
@@ -366,8 +364,8 @@ def test_metadata_is_set():
     mock_llmobs_span_writer = mock.MagicMock()
     with override_global_config(dict(_llmobs_ml_app="unnamed-ml-app")):
         with dummy_tracer.trace("root_llm_span", span_type=SpanTypes.LLM) as llm_span:
-            llm_span.set_tag(SPAN_KIND, "llm")
-            llm_span.set_tag(METADATA, '{"key": "value"}')
+            llm_span._set_ctx_item(SPAN_KIND, "llm")
+            llm_span._set_ctx_item(METADATA, {"key": "value"})
         tp = LLMObsTraceProcessor(llmobs_span_writer=mock_llmobs_span_writer)
         assert tp._llmobs_span_event(llm_span)[0]["meta"]["metadata"] == {"key": "value"}
 
@@ -378,8 +376,8 @@ def test_metrics_are_set():
     mock_llmobs_span_writer = mock.MagicMock()
     with override_global_config(dict(_llmobs_ml_app="unnamed-ml-app")):
         with dummy_tracer.trace("root_llm_span", span_type=SpanTypes.LLM) as llm_span:
-            llm_span.set_tag(SPAN_KIND, "llm")
-            llm_span.set_tag(METRICS, '{"tokens": 100}')
+            llm_span._set_ctx_item(SPAN_KIND, "llm")
+            llm_span._set_ctx_item(METRICS, {"tokens": 100})
         tp = LLMObsTraceProcessor(llmobs_span_writer=mock_llmobs_span_writer)
         assert tp._llmobs_span_event(llm_span)[0]["metrics"] == {"tokens": 100}
 
@@ -390,7 +388,7 @@ def test_langchain_span_name_is_set_to_class_name():
     mock_llmobs_span_writer = mock.MagicMock()
     with override_global_config(dict(_llmobs_ml_app="unnamed-ml-app")):
         with dummy_tracer.trace(LANGCHAIN_APM_SPAN_NAME, resource="expected_name", span_type=SpanTypes.LLM) as llm_span:
-            llm_span.set_tag(SPAN_KIND, "llm")
+            llm_span._set_ctx_item(SPAN_KIND, "llm")
         tp = LLMObsTraceProcessor(llmobs_span_writer=mock_llmobs_span_writer)
         assert tp._llmobs_span_event(llm_span)[0]["name"] == "expected_name"
 
@@ -402,7 +400,7 @@ def test_error_is_set():
     with override_global_config(dict(_llmobs_ml_app="unnamed-ml-app")):
         with pytest.raises(ValueError):
             with dummy_tracer.trace("root_llm_span", span_type=SpanTypes.LLM) as llm_span:
-                llm_span.set_tag(SPAN_KIND, "llm")
+                llm_span._set_ctx_item(SPAN_KIND, "llm")
                 raise ValueError("error")
         tp = LLMObsTraceProcessor(llmobs_span_writer=mock_llmobs_span_writer)
         span_event, _ = tp._llmobs_span_event(llm_span)