From e04c8acf72186e63998cb4c97df0d3708aaeb15d Mon Sep 17 00:00:00 2001
From: ncybul <124532568+ncybul@users.noreply.github.com>
Date: Tue, 10 Dec 2024 14:19:31 -0500
Subject: [PATCH] feat(langchain): [MLOB-1972] update langchain to handle
 vertex and gemini llm calls (#11642)

This PR updates the Langchain integration to handle LLM calls
instrumented in the Vertex AI and Gemini integrations by checking for
their respective provider names (`vertexai` and `google_palm`) and
instrumenting the wrapper Langchain calls as workflow spans if these
provider names are detected.

Importantly, because of the way Langchain invokes chat generations for
Vertex AI and Gemini, our integrations will not capture these inner LLM
calls (in both cases, the prediction client is called directly with the
input instead of using the `chat.send_message` method which we have
instrumented in both cases). Therefore, we will only capture the
Langchain LLM call and hence leave it as an LLM span for chat
generations.

## Testing
As we work on a more stable way to test our langchain integration, I
opted to manually verify these changes by submitting traces to staging.
Below I have included the code that I ran and the resulting trace that
appeared in the product. It is expected that LLM calls have a langchain
workflow span with a Gemini/Vertex AI child LLM span whereas Chat calls
have only the Langchain LLM span instrumented (see above for details on
why this is the case).

### Gemini LLM call

```
from langchain_google_genai import GoogleGenerativeAI
llm = GoogleGenerativeAI(model="gemini-pro")
print(
    llm.invoke(
        "What are some tips for improving sleep quality?"
    )
)
```


![image](https://github.com/user-attachments/assets/5d68ad43-36c1-413a-822a-eac37e650c91)


### Gemini Chat call

```
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-pro")
resp = llm.invoke("Tell me a joke.")
print(resp)
```


![image](https://github.com/user-attachments/assets/57b9dadb-2c2b-40d5-905e-3fe261132262)

### Vertex AI LLM call

```
from langchain_google_vertexai import VertexAI
model = VertexAI(model_name="gemini-pro")
message = "What is the optimal temperature for sleeping?"
model.invoke(message)
```


![image](https://github.com/user-attachments/assets/74d84f58-ff71-4f60-948f-4a8494b60a17)


### Vertex AI Chat call

```
from langchain_google_vertexai import ChatVertexAI
llm = ChatVertexAI(
    model="gemini-1.5-flash-001",
    temperature=0,
    max_tokens=None,
    max_retries=6,
    stop=None,
)
messages = [
    (
        "system",
        "You are a helpful assistant that translates English to French. Translate the user sentence.",
    ),
    ("human", "I love programming."),
]
ai_msg = llm.invoke(messages)
print(ai_msg)
```


![image](https://github.com/user-attachments/assets/9bb398a3-4803-4189-af11-8afe7a6add1e)


## Checklist
- [x] PR author has checked that all the criteria below are met
- The PR description includes an overview of the change
- The PR description articulates the motivation for the change
- The change includes tests OR the PR description describes a testing
strategy
- The PR description notes risks associated with the change, if any
- Newly-added code is easy to change
- The change follows the [library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
- The change includes or references documentation updates if necessary
- Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))

## Reviewer Checklist
- [x] Reviewer has checked that all the criteria below are met
- Title is accurate
- All changes are related to the pull request's stated goal
- Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- Testing strategy adequately addresses listed risks
- Newly-added code is easy to change
- Release note makes sense to a user of the library
- If necessary, author has acknowledged and discussed the performance
implications of this PR as reported in the benchmarks PR comment
- Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
---
 ddtrace/llmobs/_integrations/langchain.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/ddtrace/llmobs/_integrations/langchain.py b/ddtrace/llmobs/_integrations/langchain.py
index c2304289c2c..2128458253d 100644
--- a/ddtrace/llmobs/_integrations/langchain.py
+++ b/ddtrace/llmobs/_integrations/langchain.py
@@ -44,6 +44,8 @@
 ANTHROPIC_PROVIDER_NAME = "anthropic"
 BEDROCK_PROVIDER_NAME = "amazon_bedrock"
 OPENAI_PROVIDER_NAME = "openai"
+VERTEXAI_PROVIDER_NAME = "vertexai"
+GEMINI_PROVIDER_NAME = "google_palm"
 
 ROLE_MAPPING = {
     "human": "user",
@@ -81,6 +83,12 @@ def _llmobs_set_tags(
         if model_provider:
             if model_provider.startswith(BEDROCK_PROVIDER_NAME):
                 llmobs_integration = "bedrock"
+            # only the llm interface for Vertex AI will get instrumented
+            elif model_provider.startswith(VERTEXAI_PROVIDER_NAME) and operation == "llm":
+                llmobs_integration = "vertexai"
+            # only the llm interface for Gemini will get instrumented
+            elif model_provider.startswith(GEMINI_PROVIDER_NAME) and operation == "llm":
+                llmobs_integration = "google_generativeai"
             elif model_provider.startswith(OPENAI_PROVIDER_NAME):
                 llmobs_integration = "openai"
             elif operation == "chat" and model_provider.startswith(ANTHROPIC_PROVIDER_NAME):