From 76340ca7b382cc84ec4a5869f7bc1432735410b5 Mon Sep 17 00:00:00 2001 From: Yun Kim Date: Thu, 9 Jan 2025 13:18:20 -0500 Subject: [PATCH] Clean up fixtures and mocks --- tests/llmobs/conftest.py | 50 +- tests/llmobs/test_llmobs_decorators.py | 492 ++++++++---------- tests/llmobs/test_llmobs_evaluator_runner.py | 12 +- ...est_llmobs_ragas_faithfulness_evaluator.py | 33 +- tests/llmobs/test_llmobs_service.py | 49 +- tests/llmobs/test_propagation.py | 35 +- 6 files changed, 277 insertions(+), 394 deletions(-) diff --git a/tests/llmobs/conftest.py b/tests/llmobs/conftest.py index 15cffe5faa9..5a63b7e2b8f 100644 --- a/tests/llmobs/conftest.py +++ b/tests/llmobs/conftest.py @@ -31,16 +31,6 @@ def pytest_configure(config): config.addinivalue_line("markers", "vcr_logs: mark test to use recorded request/responses") -@pytest.fixture -def mock_llmobs_span_writer(): - patcher = mock.patch("ddtrace.llmobs._llmobs.LLMObsSpanWriter") - LLMObsSpanWriterMock = patcher.start() - m = mock.MagicMock() - LLMObsSpanWriterMock.return_value = m - yield m - patcher.stop() - - @pytest.fixture def mock_llmobs_eval_metric_writer(): patcher = mock.patch("ddtrace.llmobs._llmobs.LLMObsEvalMetricWriter") @@ -127,44 +117,6 @@ def default_global_config(): return {"_dd_api_key": "", "_llmobs_ml_app": "unnamed-ml-app"} -@pytest.fixture -def LLMObs( - mock_llmobs_span_writer, mock_llmobs_eval_metric_writer, mock_llmobs_evaluator_runner, ddtrace_global_config -): - global_config = default_global_config() - global_config.update(ddtrace_global_config) - with override_global_config(global_config): - dummy_tracer = DummyTracer() - llmobs_service.enable(_tracer=dummy_tracer) - yield llmobs_service - llmobs_service.disable() - - -@pytest.fixture -def AgentlessLLMObs( - mock_llmobs_span_writer, - mock_llmobs_eval_metric_writer, - mock_llmobs_evaluator_runner, - ddtrace_global_config, -): - global_config = default_global_config() - global_config.update(ddtrace_global_config) - global_config.update(dict(_llmobs_agentless_enabled=True)) - with override_global_config(global_config): - dummy_tracer = DummyTracer() - llmobs_service.enable(_tracer=dummy_tracer) - yield llmobs_service - llmobs_service.disable() - - -@pytest.fixture -def disabled_llmobs(): - prev = llmobs_service.enabled - llmobs_service.enabled = False - yield - llmobs_service.enabled = prev - - @pytest.fixture def mock_ragas_dependencies_not_present(): import ragas @@ -177,7 +129,7 @@ def mock_ragas_dependencies_not_present(): @pytest.fixture -def ragas(mock_llmobs_span_writer, mock_llmobs_eval_metric_writer): +def ragas(mock_llmobs_eval_metric_writer): with override_global_config(dict(_dd_api_key="")): try: import ragas diff --git a/tests/llmobs/test_llmobs_decorators.py b/tests/llmobs/test_llmobs_decorators.py index e94d72aec64..056de72ee96 100644 --- a/tests/llmobs/test_llmobs_decorators.py +++ b/tests/llmobs/test_llmobs_decorators.py @@ -19,7 +19,7 @@ def mock_logs(): yield mock_logs -def test_llm_decorator_with_llmobs_disabled_logs_warning(LLMObs, mock_logs): +def test_llm_decorator_with_llmobs_disabled_logs_warning(llmobs, mock_logs): for decorator_name, decorator in (("llm", llm), ("embedding", embedding)): @decorator( @@ -28,13 +28,13 @@ def test_llm_decorator_with_llmobs_disabled_logs_warning(LLMObs, mock_logs): def f(): pass - LLMObs.disable() + llmobs.disable() f() mock_logs.warning.assert_called_with(SPAN_START_WHILE_DISABLED_WARNING) mock_logs.reset_mock() -def test_non_llm_decorator_with_llmobs_disabled_logs_warning(LLMObs, mock_logs): +def test_non_llm_decorator_with_llmobs_disabled_logs_warning(llmobs, mock_logs): for decorator_name, decorator in ( ("task", task), ("workflow", workflow), @@ -47,53 +47,49 @@ def test_non_llm_decorator_with_llmobs_disabled_logs_warning(LLMObs, mock_logs): def f(): pass - LLMObs.disable() + llmobs.disable() f() mock_logs.warning.assert_called_with(SPAN_START_WHILE_DISABLED_WARNING) mock_logs.reset_mock() -def test_llm_decorator(LLMObs, mock_llmobs_span_writer): +def test_llm_decorator(llmobs, llmobs_events): @llm(model_name="test_model", model_provider="test_provider", name="test_function", session_id="test_session_id") def f(): pass f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event( - span, "llm", model_name="test_model", model_provider="test_provider", session_id="test_session_id" - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, "llm", model_name="test_model", model_provider="test_provider", session_id="test_session_id" ) -def test_llm_decorator_no_model_name_sets_default(LLMObs, mock_llmobs_span_writer): +def test_llm_decorator_no_model_name_sets_default(llmobs, llmobs_events): @llm(model_provider="test_provider", name="test_function", session_id="test_session_id") def f(): pass f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event( - span, "llm", model_name="custom", model_provider="test_provider", session_id="test_session_id" - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, "llm", model_name="custom", model_provider="test_provider", session_id="test_session_id" ) -def test_llm_decorator_default_kwargs(LLMObs, mock_llmobs_span_writer): +def test_llm_decorator_default_kwargs(llmobs, llmobs_events): @llm def f(): pass f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event(span, "llm", model_name="custom", model_provider="custom") + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, "llm", model_name="custom", model_provider="custom" ) -def test_embedding_decorator(LLMObs, mock_llmobs_span_writer): +def test_embedding_decorator(llmobs, llmobs_events): @embedding( model_name="test_model", model_provider="test_provider", name="test_function", session_id="test_session_id" ) @@ -101,173 +97,157 @@ def f(): pass f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event( - span, "embedding", model_name="test_model", model_provider="test_provider", session_id="test_session_id" - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, "embedding", model_name="test_model", model_provider="test_provider", session_id="test_session_id" ) -def test_embedding_decorator_no_model_name_sets_default(LLMObs, mock_llmobs_span_writer): +def test_embedding_decorator_no_model_name_sets_default(llmobs, llmobs_events): @embedding(model_provider="test_provider", name="test_function", session_id="test_session_id") def f(): pass f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event( - span, "embedding", model_name="custom", model_provider="test_provider", session_id="test_session_id" - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, "embedding", model_name="custom", model_provider="test_provider", session_id="test_session_id" ) -def test_embedding_decorator_default_kwargs(LLMObs, mock_llmobs_span_writer): +def test_embedding_decorator_default_kwargs(llmobs, llmobs_events): @embedding def f(): pass f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event(span, "embedding", model_name="custom", model_provider="custom") + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, "embedding", model_name="custom", model_provider="custom" ) -def test_retrieval_decorator(LLMObs, mock_llmobs_span_writer): +def test_retrieval_decorator(llmobs, llmobs_events): @retrieval(name="test_function", session_id="test_session_id") def f(): pass f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event(span, "retrieval", session_id="test_session_id") - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_non_llm_span_event(span, "retrieval", session_id="test_session_id") -def test_retrieval_decorator_default_kwargs(LLMObs, mock_llmobs_span_writer): +def test_retrieval_decorator_default_kwargs(llmobs, llmobs_events): @retrieval() def f(): pass f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with(_expected_llmobs_non_llm_span_event(span, "retrieval")) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_non_llm_span_event(span, "retrieval") -def test_task_decorator(LLMObs, mock_llmobs_span_writer): +def test_task_decorator(llmobs, llmobs_events): @task(name="test_function", session_id="test_session_id") def f(): pass f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event(span, "task", session_id="test_session_id") - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_non_llm_span_event(span, "task", session_id="test_session_id") -def test_task_decorator_default_kwargs(LLMObs, mock_llmobs_span_writer): +def test_task_decorator_default_kwargs(llmobs, llmobs_events): @task() def f(): pass f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with(_expected_llmobs_non_llm_span_event(span, "task")) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_non_llm_span_event(span, "task") -def test_tool_decorator(LLMObs, mock_llmobs_span_writer): +def test_tool_decorator(llmobs, llmobs_events): @tool(name="test_function", session_id="test_session_id") def f(): pass f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event(span, "tool", session_id="test_session_id") - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_non_llm_span_event(span, "tool", session_id="test_session_id") -def test_tool_decorator_default_kwargs(LLMObs, mock_llmobs_span_writer): +def test_tool_decorator_default_kwargs(llmobs, llmobs_events): @tool() def f(): pass f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with(_expected_llmobs_non_llm_span_event(span, "tool")) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_non_llm_span_event(span, "tool") -def test_workflow_decorator(LLMObs, mock_llmobs_span_writer): +def test_workflow_decorator(llmobs, llmobs_events): @workflow(name="test_function", session_id="test_session_id") def f(): pass f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event(span, "workflow", session_id="test_session_id") - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_non_llm_span_event(span, "workflow", session_id="test_session_id") -def test_workflow_decorator_default_kwargs(LLMObs, mock_llmobs_span_writer): +def test_workflow_decorator_default_kwargs(llmobs, llmobs_events): @workflow() def f(): pass f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with(_expected_llmobs_non_llm_span_event(span, "workflow")) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_non_llm_span_event(span, "workflow") -def test_agent_decorator(LLMObs, mock_llmobs_span_writer): +def test_agent_decorator(llmobs, llmobs_events): @agent(name="test_function", session_id="test_session_id") def f(): pass f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event(span, "agent", session_id="test_session_id") - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_llm_span_event(span, "agent", session_id="test_session_id") -def test_agent_decorator_default_kwargs(LLMObs, mock_llmobs_span_writer): +def test_agent_decorator_default_kwargs(llmobs, llmobs_events): @agent() def f(): pass f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with(_expected_llmobs_llm_span_event(span, "agent")) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_llm_span_event(span, "agent") -def test_llm_decorator_with_error(LLMObs, mock_llmobs_span_writer): +def test_llm_decorator_with_error(llmobs, llmobs_events): @llm(model_name="test_model", model_provider="test_provider", name="test_function", session_id="test_session_id") def f(): raise ValueError("test_error") with pytest.raises(ValueError): f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event( - span, - "llm", - model_name="test_model", - model_provider="test_provider", - session_id="test_session_id", - error=span.get_tag("error.type"), - error_message=span.get_tag("error.message"), - error_stack=span.get_tag("error.stack"), - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, + "llm", + model_name="test_model", + model_provider="test_provider", + session_id="test_session_id", + error=span.get_tag("error.type"), + error_message=span.get_tag("error.message"), + error_stack=span.get_tag("error.stack"), ) -def test_non_llm_decorators_with_error(LLMObs, mock_llmobs_span_writer): +def test_non_llm_decorators_with_error(llmobs, llmobs_events): for decorator_name, decorator in [("task", task), ("workflow", workflow), ("tool", tool), ("agent", agent)]: @decorator(name="test_function", session_id="test_session_id") @@ -276,23 +256,21 @@ def f(): with pytest.raises(ValueError): f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event( - span, - decorator_name, - session_id="test_session_id", - error=span.get_tag("error.type"), - error_message=span.get_tag("error.message"), - error_stack=span.get_tag("error.stack"), - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[-1] == _expected_llmobs_non_llm_span_event( + span, + decorator_name, + session_id="test_session_id", + error=span.get_tag("error.type"), + error_message=span.get_tag("error.message"), + error_stack=span.get_tag("error.stack"), ) -def test_llm_annotate(LLMObs, mock_llmobs_span_writer): +def test_llm_annotate(llmobs, llmobs_events): @llm(model_name="test_model", model_provider="test_provider", name="test_function", session_id="test_session_id") def f(): - LLMObs.annotate( + llmobs.annotate( parameters={"temperature": 0.9, "max_tokens": 50}, input_data=[{"content": "test_prompt"}], output_data=[{"content": "test_response"}], @@ -301,27 +279,25 @@ def f(): ) f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event( - span, - "llm", - model_name="test_model", - model_provider="test_provider", - input_messages=[{"content": "test_prompt"}], - output_messages=[{"content": "test_response"}], - parameters={"temperature": 0.9, "max_tokens": 50}, - token_metrics={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30}, - tags={"custom_tag": "tag_value"}, - session_id="test_session_id", - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, + "llm", + model_name="test_model", + model_provider="test_provider", + input_messages=[{"content": "test_prompt"}], + output_messages=[{"content": "test_response"}], + parameters={"temperature": 0.9, "max_tokens": 50}, + token_metrics={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30}, + tags={"custom_tag": "tag_value"}, + session_id="test_session_id", ) -def test_llm_annotate_raw_string_io(LLMObs, mock_llmobs_span_writer): +def test_llm_annotate_raw_string_io(llmobs, llmobs_events): @llm(model_name="test_model", model_provider="test_provider", name="test_function", session_id="test_session_id") def f(): - LLMObs.annotate( + llmobs.annotate( parameters={"temperature": 0.9, "max_tokens": 50}, input_data="test_prompt", output_data="test_response", @@ -330,24 +306,22 @@ def f(): ) f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event( - span, - "llm", - model_name="test_model", - model_provider="test_provider", - input_messages=[{"content": "test_prompt"}], - output_messages=[{"content": "test_response"}], - parameters={"temperature": 0.9, "max_tokens": 50}, - token_metrics={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30}, - tags={"custom_tag": "tag_value"}, - session_id="test_session_id", - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_llm_span_event( + span, + "llm", + model_name="test_model", + model_provider="test_provider", + input_messages=[{"content": "test_prompt"}], + output_messages=[{"content": "test_response"}], + parameters={"temperature": 0.9, "max_tokens": 50}, + token_metrics={"input_tokens": 10, "output_tokens": 20, "total_tokens": 30}, + tags={"custom_tag": "tag_value"}, + session_id="test_session_id", ) -def test_non_llm_decorators_no_args(LLMObs, mock_llmobs_span_writer): +def test_non_llm_decorators_no_args(llmobs, llmobs_events): """Test that using the decorators without any arguments, i.e. @tool, works the same as @tool(...).""" for decorator_name, decorator in [ ("task", task), @@ -362,11 +336,11 @@ def f(): pass f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with(_expected_llmobs_non_llm_span_event(span, decorator_name)) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[-1] == _expected_llmobs_non_llm_span_event(span, decorator_name) -def test_agent_decorator_no_args(LLMObs, mock_llmobs_span_writer): +def test_agent_decorator_no_args(llmobs, llmobs_events): """Test that using agent decorator without any arguments, i.e. @agent, works the same as @agent(...).""" @agent @@ -374,11 +348,11 @@ def f(): pass f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with(_expected_llmobs_llm_span_event(span, "agent")) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_llm_span_event(span, "agent") -def test_ml_app_override(LLMObs, mock_llmobs_span_writer): +def test_ml_app_override(llmobs, llmobs_events): """Test that setting ml_app kwarg on the LLMObs decorators will override the DD_LLMOBS_ML_APP value.""" for decorator_name, decorator in [("task", task), ("workflow", workflow), ("tool", tool)]: @@ -387,9 +361,9 @@ def f(): pass f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event(span, decorator_name, tags={"ml_app": "test_ml_app"}) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[-1] == _expected_llmobs_non_llm_span_event( + span, decorator_name, tags={"ml_app": "test_ml_app"} ) @llm(model_name="test_model", ml_app="test_ml_app") @@ -397,11 +371,9 @@ def g(): pass g() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event( - span, "llm", model_name="test_model", model_provider="custom", tags={"ml_app": "test_ml_app"} - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[-1] == _expected_llmobs_llm_span_event( + span, "llm", model_name="test_model", model_provider="custom", tags={"ml_app": "test_ml_app"} ) @embedding(model_name="test_model", ml_app="test_ml_app") @@ -409,15 +381,13 @@ def h(): pass h() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event( - span, "embedding", model_name="test_model", model_provider="custom", tags={"ml_app": "test_ml_app"} - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[-1] == _expected_llmobs_llm_span_event( + span, "embedding", model_name="test_model", model_provider="custom", tags={"ml_app": "test_ml_app"} ) -async def test_non_llm_async_decorators(LLMObs, mock_llmobs_span_writer): +async def test_non_llm_async_decorators(llmobs, llmobs_events): """Test that decorators work with async functions.""" for decorator_name, decorator in [ ("task", task), @@ -432,11 +402,11 @@ async def f(): pass await f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with(_expected_llmobs_non_llm_span_event(span, decorator_name)) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[-1] == _expected_llmobs_non_llm_span_event(span, decorator_name) -async def test_llm_async_decorators(LLMObs, mock_llmobs_span_writer): +async def test_llm_async_decorators(llmobs, llmobs_events): """Test that decorators work with async functions.""" for decorator_name, decorator in [("llm", llm), ("embedding", embedding)]: @@ -445,15 +415,13 @@ async def f(): pass await f() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_llm_span_event( - span, decorator_name, model_name="test_model", model_provider="test_provider" - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[-1] == _expected_llmobs_llm_span_event( + span, decorator_name, model_name="test_model", model_provider="test_provider" ) -def test_automatic_annotation_non_llm_decorators(LLMObs, mock_llmobs_span_writer): +def test_automatic_annotation_non_llm_decorators(llmobs, llmobs_events): """Test that automatic input/output annotation works for non-LLM decorators.""" for decorator_name, decorator in (("task", task), ("workflow", workflow), ("tool", tool), ("agent", agent)): @@ -462,19 +430,17 @@ def f(prompt, arg_2, kwarg_1=None, kwarg_2=None): return prompt f("test_prompt", "arg_2", kwarg_2=12345) - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event( - span, - decorator_name, - input_value=str({"prompt": "test_prompt", "arg_2": "arg_2", "kwarg_2": 12345}), - output_value="test_prompt", - session_id="test_session_id", - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[-1] == _expected_llmobs_non_llm_span_event( + span, + decorator_name, + input_value=str({"prompt": "test_prompt", "arg_2": "arg_2", "kwarg_2": 12345}), + output_value="test_prompt", + session_id="test_session_id", ) -def test_automatic_annotation_retrieval_decorator(LLMObs, mock_llmobs_span_writer): +def test_automatic_annotation_retrieval_decorator(llmobs, llmobs_events): """Test that automatic input annotation works for retrieval decorators.""" @retrieval(session_id="test_session_id") @@ -482,18 +448,16 @@ def test_retrieval(query, arg_2, kwarg_1=None, kwarg_2=None): return [{"name": "name", "id": "1234567890", "score": 0.9}] test_retrieval("test_query", "arg_2", kwarg_2=12345) - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event( - span, - "retrieval", - input_value=str({"query": "test_query", "arg_2": "arg_2", "kwarg_2": 12345}), - session_id="test_session_id", - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_non_llm_span_event( + span, + "retrieval", + input_value=str({"query": "test_query", "arg_2": "arg_2", "kwarg_2": 12345}), + session_id="test_session_id", ) -def test_automatic_annotation_off_non_llm_decorators(LLMObs, mock_llmobs_span_writer): +def test_automatic_annotation_off_non_llm_decorators(llmobs, llmobs_events): """Test disabling automatic input/output annotation for non-LLM decorators.""" for decorator_name, decorator in ( ("task", task), @@ -508,35 +472,33 @@ def f(prompt, arg_2, kwarg_1=None, kwarg_2=None): return prompt f("test_prompt", "arg_2", kwarg_2=12345) - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event(span, decorator_name, session_id="test_session_id") + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[-1] == _expected_llmobs_non_llm_span_event( + span, decorator_name, session_id="test_session_id" ) -def test_automatic_annotation_off_if_manually_annotated(LLMObs, mock_llmobs_span_writer): +def test_automatic_annotation_off_if_manually_annotated(llmobs, llmobs_events): """Test disabling automatic input/output annotation for non-LLM decorators.""" for decorator_name, decorator in (("task", task), ("workflow", workflow), ("tool", tool), ("agent", agent)): @decorator(name="test_function", session_id="test_session_id") def f(prompt, arg_2, kwarg_1=None, kwarg_2=None): - LLMObs.annotate(input_data="my custom input", output_data="my custom output") + llmobs.annotate(input_data="my custom input", output_data="my custom output") return prompt f("test_prompt", "arg_2", kwarg_2=12345) - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event( - span, - decorator_name, - session_id="test_session_id", - input_value="my custom input", - output_value="my custom output", - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[-1] == _expected_llmobs_non_llm_span_event( + span, + decorator_name, + session_id="test_session_id", + input_value="my custom input", + output_value="my custom output", ) -def test_generator_sync(LLMObs, mock_llmobs_span_writer): +def test_generator_sync(llmobs, llmobs_events): """ Test that decorators work with generator functions. The span should finish after the generator is exhausted. @@ -556,7 +518,7 @@ def f(): for i in range(3): yield i - LLMObs.annotate( + llmobs.annotate( input_data="hello", output_data="world", ) @@ -566,7 +528,7 @@ def f(): assert e == i i += 1 - span = LLMObs._instance.tracer.pop()[0] + span = llmobs._instance.tracer.pop()[0] if decorator_name == "llm": expected_span_event = _expected_llmobs_llm_span_event( span, @@ -594,10 +556,10 @@ def f(): span, decorator_name, input_value="hello", output_value="world" ) - mock_llmobs_span_writer.enqueue.assert_called_with(expected_span_event) + assert llmobs_events[-1] == expected_span_event -async def test_generator_async(LLMObs, mock_llmobs_span_writer): +async def test_generator_async(llmobs, llmobs_events): """ Test that decorators work with generator functions. The span should finish after the generator is exhausted. @@ -617,7 +579,7 @@ async def f(): for i in range(3): yield i - LLMObs.annotate( + llmobs.annotate( input_data="hello", output_data="world", ) @@ -627,7 +589,7 @@ async def f(): assert e == i i += 1 - span = LLMObs._instance.tracer.pop()[0] + span = llmobs._instance.tracer.pop()[0] if decorator_name == "llm": expected_span_event = _expected_llmobs_llm_span_event( span, @@ -655,11 +617,11 @@ async def f(): span, decorator_name, input_value="hello", output_value="world" ) - mock_llmobs_span_writer.enqueue.assert_called_with(expected_span_event) + assert llmobs_events[-1] == expected_span_event -def test_generator_sync_with_llmobs_disabled(LLMObs, mock_logs): - LLMObs.disable() +def test_generator_sync_with_llmobs_disabled(llmobs, mock_logs): + llmobs.disable() @workflow() def f(): @@ -684,10 +646,11 @@ def g(): i += 1 mock_logs.warning.assert_called_with(SPAN_START_WHILE_DISABLED_WARNING) + llmobs.enable() -async def test_generator_async_with_llmobs_disabled(LLMObs, mock_logs): - LLMObs.disable() +async def test_generator_async_with_llmobs_disabled(llmobs, mock_logs): + llmobs.disable() @workflow() async def f(): @@ -712,9 +675,10 @@ async def g(): i += 1 mock_logs.warning.assert_called_with(SPAN_START_WHILE_DISABLED_WARNING) + llmobs.enable() -def test_generator_sync_finishes_span_on_error(LLMObs, mock_llmobs_span_writer): +def test_generator_sync_finishes_span_on_error(llmobs, llmobs_events): """Tests that""" @workflow() @@ -728,19 +692,17 @@ def f(): for _ in f(): pass - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event( - span, - "workflow", - error=span.get_tag("error.type"), - error_message=span.get_tag("error.message"), - error_stack=span.get_tag("error.stack"), - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_non_llm_span_event( + span, + "workflow", + error=span.get_tag("error.type"), + error_message=span.get_tag("error.message"), + error_stack=span.get_tag("error.stack"), ) -async def test_generator_async_finishes_span_on_error(LLMObs, mock_llmobs_span_writer): +async def test_generator_async_finishes_span_on_error(llmobs, llmobs_events): @workflow() async def f(): for i in range(3): @@ -752,19 +714,17 @@ async def f(): async for _ in f(): pass - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event( - span, - "workflow", - error=span.get_tag("error.type"), - error_message=span.get_tag("error.message"), - error_stack=span.get_tag("error.stack"), - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_non_llm_span_event( + span, + "workflow", + error=span.get_tag("error.type"), + error_message=span.get_tag("error.message"), + error_stack=span.get_tag("error.stack"), ) -def test_generator_sync_send(LLMObs, mock_llmobs_span_writer): +def test_generator_sync_send(llmobs, llmobs_events): @workflow() def f(): while True: @@ -780,16 +740,11 @@ def f(): assert gen.send(4) == 16 gen.close() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event( - span, - "workflow", - ) - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_non_llm_span_event(span, "workflow") -async def test_generator_async_send(LLMObs, mock_llmobs_span_writer): +async def test_generator_async_send(llmobs, llmobs_events): @workflow() async def f(): while True: @@ -805,16 +760,11 @@ async def f(): await gen.aclose() - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event( - span, - "workflow", - ) - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_non_llm_span_event(span, "workflow") -def test_generator_sync_throw(LLMObs, mock_llmobs_span_writer): +def test_generator_sync_throw(llmobs, llmobs_events): @workflow() def f(): for i in range(3): @@ -825,19 +775,17 @@ def f(): next(gen) gen.throw(ValueError("test_error")) - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event( - span, - "workflow", - error=span.get_tag("error.type"), - error_message=span.get_tag("error.message"), - error_stack=span.get_tag("error.stack"), - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_non_llm_span_event( + span, + "workflow", + error=span.get_tag("error.type"), + error_message=span.get_tag("error.message"), + error_stack=span.get_tag("error.stack"), ) -async def test_generator_async_throw(LLMObs, mock_llmobs_span_writer): +async def test_generator_async_throw(llmobs, llmobs_events): @workflow() async def f(): for i in range(3): @@ -848,19 +796,17 @@ async def f(): await gen.asend(None) await gen.athrow(ValueError("test_error")) - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event( - span, - "workflow", - error=span.get_tag("error.type"), - error_message=span.get_tag("error.message"), - error_stack=span.get_tag("error.stack"), - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_non_llm_span_event( + span, + "workflow", + error=span.get_tag("error.type"), + error_message=span.get_tag("error.message"), + error_stack=span.get_tag("error.stack"), ) -def test_generator_exit_exception_sync(LLMObs, mock_llmobs_span_writer): +def test_generator_exit_exception_sync(llmobs, llmobs_events): @workflow() def get_next_element(alist): for element in alist: @@ -873,14 +819,12 @@ def get_next_element(alist): if element == 5: break - span = LLMObs._instance.tracer.pop()[0] - mock_llmobs_span_writer.enqueue.assert_called_with( - _expected_llmobs_non_llm_span_event( - span, - "workflow", - input_value=str({"alist": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]}), - error=span.get_tag("error.type"), - error_message=span.get_tag("error.message"), - error_stack=span.get_tag("error.stack"), - ) + span = llmobs._instance.tracer.pop()[0] + assert llmobs_events[0] == _expected_llmobs_non_llm_span_event( + span, + "workflow", + input_value=str({"alist": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]}), + error=span.get_tag("error.type"), + error_message=span.get_tag("error.message"), + error_stack=span.get_tag("error.stack"), ) diff --git a/tests/llmobs/test_llmobs_evaluator_runner.py b/tests/llmobs/test_llmobs_evaluator_runner.py index 7ee7d510276..128c4639946 100644 --- a/tests/llmobs/test_llmobs_evaluator_runner.py +++ b/tests/llmobs/test_llmobs_evaluator_runner.py @@ -34,9 +34,9 @@ def test_evaluator_runner_buffer_limit(mock_evaluator_logs): ) -def test_evaluator_runner_periodic_enqueues_eval_metric(LLMObs, mock_llmobs_eval_metric_writer): - evaluator_runner = EvaluatorRunner(interval=0.01, llmobs_service=LLMObs) - evaluator_runner.evaluators.append(DummyEvaluator(llmobs_service=LLMObs)) +def test_evaluator_runner_periodic_enqueues_eval_metric(llmobs, mock_llmobs_eval_metric_writer): + evaluator_runner = EvaluatorRunner(interval=0.01, llmobs_service=llmobs) + evaluator_runner.evaluators.append(DummyEvaluator(llmobs_service=llmobs)) evaluator_runner.enqueue({"span_id": "123", "trace_id": "1234"}, DUMMY_SPAN) evaluator_runner.periodic() mock_llmobs_eval_metric_writer.enqueue.assert_called_once_with( @@ -45,9 +45,9 @@ def test_evaluator_runner_periodic_enqueues_eval_metric(LLMObs, mock_llmobs_eval @pytest.mark.vcr_logs -def test_evaluator_runner_timed_enqueues_eval_metric(LLMObs, mock_llmobs_eval_metric_writer): - evaluator_runner = EvaluatorRunner(interval=0.01, llmobs_service=LLMObs) - evaluator_runner.evaluators.append(DummyEvaluator(llmobs_service=LLMObs)) +def test_evaluator_runner_timed_enqueues_eval_metric(llmobs, mock_llmobs_eval_metric_writer): + evaluator_runner = EvaluatorRunner(interval=0.01, llmobs_service=llmobs) + evaluator_runner.evaluators.append(DummyEvaluator(llmobs_service=llmobs)) evaluator_runner.start() evaluator_runner.enqueue({"span_id": "123", "trace_id": "1234"}, DUMMY_SPAN) diff --git a/tests/llmobs/test_llmobs_ragas_faithfulness_evaluator.py b/tests/llmobs/test_llmobs_ragas_faithfulness_evaluator.py index ec8e181e527..39e315b37e4 100644 --- a/tests/llmobs/test_llmobs_ragas_faithfulness_evaluator.py +++ b/tests/llmobs/test_llmobs_ragas_faithfulness_evaluator.py @@ -18,27 +18,27 @@ def _llm_span_without_io(): return _expected_llmobs_llm_span_event(Span("dummy")) -def test_ragas_evaluator_init(ragas, LLMObs): - rf_evaluator = RagasFaithfulnessEvaluator(LLMObs) - assert rf_evaluator.llmobs_service == LLMObs +def test_ragas_evaluator_init(ragas, llmobs): + rf_evaluator = RagasFaithfulnessEvaluator(llmobs) + assert rf_evaluator.llmobs_service == llmobs assert rf_evaluator.ragas_faithfulness_instance == ragas.metrics.faithfulness assert rf_evaluator.ragas_faithfulness_instance.llm == ragas.llms.llm_factory() -def test_ragas_faithfulness_throws_if_dependencies_not_present(LLMObs, mock_ragas_dependencies_not_present, ragas): +def test_ragas_faithfulness_throws_if_dependencies_not_present(llmobs, mock_ragas_dependencies_not_present, ragas): with pytest.raises(NotImplementedError, match="Failed to load dependencies for `ragas_faithfulness` evaluator"): - RagasFaithfulnessEvaluator(LLMObs) + RagasFaithfulnessEvaluator(llmobs) -def test_ragas_faithfulness_returns_none_if_inputs_extraction_fails(ragas, mock_llmobs_submit_evaluation, LLMObs): - rf_evaluator = RagasFaithfulnessEvaluator(LLMObs) +def test_ragas_faithfulness_returns_none_if_inputs_extraction_fails(ragas, mock_llmobs_submit_evaluation, llmobs): + rf_evaluator = RagasFaithfulnessEvaluator(llmobs) failure_msg, _ = rf_evaluator.evaluate(_llm_span_without_io()) assert failure_msg == "fail_extract_faithfulness_inputs" assert rf_evaluator.llmobs_service.submit_evaluation.call_count == 0 def test_ragas_faithfulness_has_modified_faithfulness_instance( - ragas, mock_llmobs_submit_evaluation, reset_ragas_faithfulness_llm, LLMObs + ragas, mock_llmobs_submit_evaluation, reset_ragas_faithfulness_llm, llmobs ): """Faithfulness instance used in ragas evaluator should match the global ragas faithfulness instance""" from ragas.llms import BaseRagasLLM @@ -56,7 +56,7 @@ def agenerate_text(self) -> str: faithfulness.llm = FirstDummyLLM() - rf_evaluator = RagasFaithfulnessEvaluator(LLMObs) + rf_evaluator = RagasFaithfulnessEvaluator(llmobs) assert rf_evaluator.ragas_faithfulness_instance.llm.generate_text() == "dummy llm" @@ -77,9 +77,9 @@ def agenerate_text(self, statements) -> str: @pytest.mark.vcr_logs -def test_ragas_faithfulness_submits_evaluation(ragas, LLMObs, mock_llmobs_submit_evaluation): +def test_ragas_faithfulness_submits_evaluation(ragas, llmobs, mock_llmobs_submit_evaluation): """Test that evaluation is submitted for a valid llm span where question is in the prompt variables""" - rf_evaluator = RagasFaithfulnessEvaluator(LLMObs) + rf_evaluator = RagasFaithfulnessEvaluator(llmobs) llm_span = _llm_span_with_expected_ragas_inputs_in_prompt() rf_evaluator.run_and_submit_evaluation(llm_span) rf_evaluator.llmobs_service.submit_evaluation.assert_has_calls( @@ -104,10 +104,10 @@ def test_ragas_faithfulness_submits_evaluation(ragas, LLMObs, mock_llmobs_submit @pytest.mark.vcr_logs def test_ragas_faithfulness_submits_evaluation_on_span_with_question_in_messages( - ragas, LLMObs, mock_llmobs_submit_evaluation + ragas, llmobs, mock_llmobs_submit_evaluation ): """Test that evaluation is submitted for a valid llm span where the last message content is the question""" - rf_evaluator = RagasFaithfulnessEvaluator(LLMObs) + rf_evaluator = RagasFaithfulnessEvaluator(llmobs) llm_span = _llm_span_with_expected_ragas_inputs_in_messages() rf_evaluator.run_and_submit_evaluation(llm_span) rf_evaluator.llmobs_service.submit_evaluation.assert_has_calls( @@ -131,9 +131,9 @@ def test_ragas_faithfulness_submits_evaluation_on_span_with_question_in_messages @pytest.mark.vcr_logs -def test_ragas_faithfulness_submits_evaluation_on_span_with_custom_keys(ragas, LLMObs, mock_llmobs_submit_evaluation): +def test_ragas_faithfulness_submits_evaluation_on_span_with_custom_keys(ragas, llmobs, mock_llmobs_submit_evaluation): """Test that evaluation is submitted for a valid llm span where the last message content is the question""" - rf_evaluator = RagasFaithfulnessEvaluator(LLMObs) + rf_evaluator = RagasFaithfulnessEvaluator(llmobs) llm_span = _expected_llmobs_llm_span_event( Span("dummy"), prompt={ @@ -178,7 +178,6 @@ def test_ragas_faithfulness_emits_traces(ragas, llmobs, llmobs_events): assert len(ragas_spans) == 7 # check name, io, span kinds match assert ragas_spans == _expected_ragas_spans() - # verify the trace structure root_span = ragas_spans[0] root_span_id = root_span["span_id"] @@ -241,7 +240,7 @@ def test_llmobs_with_faithfulness_emits_traces_and_evals_on_exit(mock_writer_log ): LLMObs.enable() LLMObs._instance._evaluator_runner.enqueue(_llm_span_with_expected_ragas_inputs_in_messages(), None) -""", + """, env=env, ) assert status == 0, err diff --git a/tests/llmobs/test_llmobs_service.py b/tests/llmobs/test_llmobs_service.py index 2ba5754019f..69ebb216d7e 100644 --- a/tests/llmobs/test_llmobs_service.py +++ b/tests/llmobs/test_llmobs_service.py @@ -29,6 +29,8 @@ from ddtrace.llmobs._constants import SPAN_START_WHILE_DISABLED_WARNING from ddtrace.llmobs._constants import TAGS from ddtrace.llmobs._llmobs import SUPPORTED_LLMOBS_INTEGRATIONS +from ddtrace.llmobs._writer import LLMObsAgentlessEventClient +from ddtrace.llmobs._writer import LLMObsProxiedEventClient from ddtrace.llmobs.utils import Prompt from tests.llmobs._utils import _expected_llmobs_eval_metric_event from tests.llmobs._utils import _expected_llmobs_llm_span_event @@ -47,7 +49,7 @@ def run_llmobs_trace_filter(dummy_tracer): return dummy_tracer._writer.pop() -def test_service_enable(): +def test_service_enable_proxy_default(): with override_global_config(dict(_dd_api_key="", _llmobs_ml_app="")): dummy_tracer = DummyTracer() llmobs_service.enable(_tracer=dummy_tracer) @@ -55,6 +57,21 @@ def test_service_enable(): assert llmobs_instance is not None assert llmobs_service.enabled assert llmobs_instance.tracer == dummy_tracer + assert isinstance(llmobs_instance._llmobs_span_writer._clients[0], LLMObsProxiedEventClient) + assert run_llmobs_trace_filter(dummy_tracer) is not None + + llmobs_service.disable() + + +def test_enable_agentless(): + with override_global_config(dict(_dd_api_key="", _llmobs_ml_app="")): + dummy_tracer = DummyTracer() + llmobs_service.enable(_tracer=dummy_tracer, agentless_enabled=True) + llmobs_instance = llmobs_service._instance + assert llmobs_instance is not None + assert llmobs_service.enabled + assert llmobs_instance.tracer == dummy_tracer + assert isinstance(llmobs_instance._llmobs_span_writer._clients[0], LLMObsAgentlessEventClient) assert run_llmobs_trace_filter(dummy_tracer) is not None llmobs_service.disable() @@ -1188,42 +1205,14 @@ def test_submit_evaluation_with_numerical_metric_enqueues_writer_with_score_metr ) -def test_flush_calls_periodic_agentless( - AgentlessLLMObs, mock_llmobs_span_writer, mock_llmobs_eval_metric_writer, mock_llmobs_evaluator_runner -): - AgentlessLLMObs.flush() - mock_llmobs_span_writer.periodic.assert_called_once() - mock_llmobs_eval_metric_writer.periodic.assert_called_once() - mock_llmobs_evaluator_runner.periodic.assert_called_once() - - def test_flush_does_not_call_periodic_when_llmobs_is_disabled( llmobs, - mock_llmobs_span_writer, mock_llmobs_eval_metric_writer, mock_llmobs_evaluator_runner, mock_llmobs_logs, - disabled_llmobs, ): + llmobs.enabled = False llmobs.flush() - mock_llmobs_span_writer.periodic.assert_not_called() - mock_llmobs_eval_metric_writer.periodic.assert_not_called() - mock_llmobs_evaluator_runner.periodic.assert_not_called() - mock_llmobs_logs.warning.assert_has_calls( - [mock.call("flushing when LLMObs is disabled. No spans or evaluation metrics will be sent.")] - ) - - -def test_flush_does_not_call_periodic_when_llmobs_is_disabled_agentless( - AgentlessLLMObs, - mock_llmobs_span_writer, - mock_llmobs_eval_metric_writer, - mock_llmobs_evaluator_runner, - mock_llmobs_logs, - disabled_llmobs, -): - AgentlessLLMObs.flush() - mock_llmobs_span_writer.periodic.assert_not_called() mock_llmobs_eval_metric_writer.periodic.assert_not_called() mock_llmobs_evaluator_runner.periodic.assert_not_called() mock_llmobs_logs.warning.assert_has_calls( diff --git a/tests/llmobs/test_propagation.py b/tests/llmobs/test_propagation.py index d14b22d65d5..e3ab9c80d66 100644 --- a/tests/llmobs/test_propagation.py +++ b/tests/llmobs/test_propagation.py @@ -157,39 +157,39 @@ def test_no_llmobs_parent_id_propagated_if_no_llmobs_spans(run_python_code_in_su assert _get_llmobs_parent_id(span) == "undefined" -def test_inject_distributed_headers_simple(LLMObs): +def test_inject_distributed_headers_simple(llmobs): dummy_tracer = DummyTracer() with dummy_tracer.trace("LLMObs span", span_type=SpanTypes.LLM) as root_span: - request_headers = LLMObs.inject_distributed_headers({}, span=root_span) + request_headers = llmobs.inject_distributed_headers({}, span=root_span) assert PROPAGATED_PARENT_ID_KEY in request_headers["x-datadog-tags"] -def test_inject_distributed_headers_nested_llmobs_non_llmobs(LLMObs): +def test_inject_distributed_headers_nested_llmobs_non_llmobs(llmobs): dummy_tracer = DummyTracer() with dummy_tracer.trace("LLMObs span", span_type=SpanTypes.LLM): with dummy_tracer.trace("Non-LLMObs span") as child_span: - request_headers = LLMObs.inject_distributed_headers({}, span=child_span) + request_headers = llmobs.inject_distributed_headers({}, span=child_span) assert PROPAGATED_PARENT_ID_KEY in request_headers["x-datadog-tags"] -def test_inject_distributed_headers_non_llmobs_root_span(LLMObs): +def test_inject_distributed_headers_non_llmobs_root_span(llmobs): dummy_tracer = DummyTracer() with dummy_tracer.trace("Non-LLMObs span"): with dummy_tracer.trace("LLMObs span", span_type=SpanTypes.LLM) as child_span: - request_headers = LLMObs.inject_distributed_headers({}, span=child_span) + request_headers = llmobs.inject_distributed_headers({}, span=child_span) assert PROPAGATED_PARENT_ID_KEY in request_headers["x-datadog-tags"] -def test_inject_distributed_headers_nested_llmobs_spans(LLMObs): +def test_inject_distributed_headers_nested_llmobs_spans(llmobs): dummy_tracer = DummyTracer() with dummy_tracer.trace("LLMObs span", span_type=SpanTypes.LLM): with dummy_tracer.trace("LLMObs child span", span_type=SpanTypes.LLM): with dummy_tracer.trace("Last LLMObs child span", span_type=SpanTypes.LLM) as last_llmobs_span: - request_headers = LLMObs.inject_distributed_headers({}, span=last_llmobs_span) + request_headers = llmobs.inject_distributed_headers({}, span=last_llmobs_span) assert PROPAGATED_PARENT_ID_KEY in request_headers["x-datadog-tags"] -def test_activate_distributed_headers_propagate_correct_llmobs_parent_id_simple(run_python_code_in_subprocess, LLMObs): +def test_activate_distributed_headers_propagate_correct_llmobs_parent_id_simple(run_python_code_in_subprocess, llmobs): """Test that the correct LLMObs parent ID is propagated in the headers in a simple distributed scenario. Service A (subprocess) has a root LLMObs span and a non-LLMObs child span. Service B (outside subprocess) has a LLMObs span. @@ -218,13 +218,13 @@ def test_activate_distributed_headers_propagate_correct_llmobs_parent_id_simple( assert status == 0, (stdout, stderr) headers = json.loads(stdout.decode()) - LLMObs.activate_distributed_headers(headers) - with LLMObs.workflow("LLMObs span") as span: + llmobs.activate_distributed_headers(headers) + with llmobs.workflow("LLMObs span") as span: assert str(span.parent_id) == headers["x-datadog-parent-id"] assert _get_llmobs_parent_id(span) == headers["_DD_LLMOBS_SPAN_ID"] -def test_activate_distributed_headers_propagate_llmobs_parent_id_complex(run_python_code_in_subprocess, LLMObs): +def test_activate_distributed_headers_propagate_llmobs_parent_id_complex(run_python_code_in_subprocess, llmobs): """Test that the correct LLMObs parent ID is propagated in the headers in a more complex trace. Service A (subprocess) has a root LLMObs span and a non-LLMObs child span. Service B (outside subprocess) has a non-LLMObs local root span and a LLMObs child span. @@ -253,16 +253,16 @@ def test_activate_distributed_headers_propagate_llmobs_parent_id_complex(run_pyt assert status == 0, (stdout, stderr) headers = json.loads(stdout.decode()) - LLMObs.activate_distributed_headers(headers) + llmobs.activate_distributed_headers(headers) dummy_tracer = DummyTracer() with dummy_tracer.trace("Non-LLMObs span") as span: - with LLMObs.llm(model_name="llm_model", name="LLMObs span") as llm_span: + with llmobs.llm(model_name="llm_model", name="LLMObs span") as llm_span: assert str(span.parent_id) == headers["x-datadog-parent-id"] assert _get_llmobs_parent_id(span) == headers["_DD_LLMOBS_SPAN_ID"] assert _get_llmobs_parent_id(llm_span) == headers["_DD_LLMOBS_SPAN_ID"] -def test_activate_distributed_headers_does_not_propagate_if_no_llmobs_spans(run_python_code_in_subprocess, LLMObs): +def test_activate_distributed_headers_does_not_propagate_if_no_llmobs_spans(run_python_code_in_subprocess, llmobs): """Test that the correct LLMObs parent ID (None) is extracted from the headers in a simple distributed scenario. Service A (subprocess) has spans, but none are LLMObs spans. Service B (outside subprocess) has a LLMObs span. @@ -287,10 +287,9 @@ def test_activate_distributed_headers_does_not_propagate_if_no_llmobs_spans(run_ env["DD_TRACE_ENABLED"] = "0" stdout, stderr, status, _ = run_python_code_in_subprocess(code=code, env=env) assert status == 0, (stdout, stderr) - assert stderr == b"", (stdout, stderr) headers = json.loads(stdout.decode()) - LLMObs.activate_distributed_headers(headers) - with LLMObs.task("LLMObs span") as span: + llmobs.activate_distributed_headers(headers) + with llmobs.task("LLMObs span") as span: assert str(span.parent_id) == headers["x-datadog-parent-id"] assert _get_llmobs_parent_id(span) == "undefined"