Skip to content

Commit

Permalink
anthropic: Add support for multi content part and images content in h…
Browse files Browse the repository at this point in the history
…uman messages. (tmc#1141)

* anthropic: Add support for multi content part  in human messages.

* examples: Add Anthropic Vision example

* fix: move base64 encoding in the lib
  • Loading branch information
Neofox authored and hisunwei committed Feb 18, 2025
1 parent 8062596 commit f5632fc
Show file tree
Hide file tree
Showing 7 changed files with 174 additions and 6 deletions.
34 changes: 34 additions & 0 deletions examples/anthropic-vision-example/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Anthropic Vision Example

Hello there! 👋 This example demonstrates how to use the Anthropic Claude 3 Claude 3 Sonnet model for image analysis using Go and the LangChain Go library. Let's break down what this exciting code does!

## What This Example Does

1. **Sets Up Anthropic**: The code initializes an Anthropic client to interact with the Claude 3 Sonnet model.

2. **Loads an Image**: An image file (`image.png`) is embedded into the binary using Go's `embed` package. This image will be analyzed by the AI model.

3. **Sends a Request**: The code constructs a request to the Claude 3 model, including:
- The image data in a base64 encoded string (in PNG format)
- A text prompt asking to identify the string on a box in the image

4. **Processes the Response**: After sending the request, the code handles the response from the AI model, extracting the generated content and some metadata about token usage.

5. **Outputs Results**: Finally, it prints out the AI's interpretation of what string is on the box in the image.

## Key Features

- **Multimodal AI**: This example showcases how to work with both image and text inputs in a single AI request.
- **Error Handling**: Includes basic error checking to ensure the process runs smoothly.
- **Token Usage Tracking**: Logs the number of input and output tokens used, which can be helpful for monitoring usage and costs.

## Running the Example

To run this example, you'll need:

1. An Anthropic API KEY set up in your environment variables
2. The required Go dependencies installed

Once everything is set up, simply run the Go file, and it should output the AI's interpretation of the text on the box in the image!

Happy coding, and enjoy exploring the fascinating world of multimodal AI with Claude 3! 🚀🖼️🤖
59 changes: 59 additions & 0 deletions examples/anthropic-vision-example/anthropic_vision_example.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package main

import (
"context"
_ "embed"
"fmt"
"log"

"github.com/tmc/langchaingo/llms"
"github.com/tmc/langchaingo/llms/anthropic"
)

//go:embed image.png
var image []byte

func main() {
llm, err := anthropic.New(
anthropic.WithModel("claude-3-5-sonnet-20240620"),
)
if err != nil {
log.Fatal(err)
}
ctx := context.Background()
resp, err := llm.GenerateContent(
ctx,
[]llms.MessageContent{
{
Role: llms.ChatMessageTypeHuman,
Parts: []llms.ContentPart{
// For images, you can use image formats such as image/png, image/jpeg, image/gif, image/webp.
// Please change according to the actual byte array to be given.
// for more detailes, see this https://docs.anthropic.com/claude/reference/messages_post
llms.BinaryPart("image/png", image),
llms.TextPart("Please tell me the string on the box."),
},
},
},
llms.WithMaxTokens(1000),
llms.WithTemperature(0.1),
llms.WithTopP(1.0),
llms.WithTopK(100),
)
if err != nil {
log.Fatal(err)
}
choices := resp.Choices
if len(choices) < 1 {
log.Fatal("empty response from model")
}

log.Printf(
"input_tokens: %d, output_tokens: %d",
choices[0].GenerationInfo["InputTokens"],
choices[0].GenerationInfo["OutputTokens"],
)
fmt.Println(choices[0].Content)
// Output:
// The string on the box in the image is "LGTM".
}
13 changes: 13 additions & 0 deletions examples/anthropic-vision-example/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
module github.com/tmc/langchaingo/examples/bedrock-claude3-vision-example

go 1.22.0

toolchain go1.22.1

require github.com/tmc/langchaingo v0.1.13-pre.1

require (
github.com/dlclark/regexp2 v1.10.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/pkoukk/tiktoken-go v0.1.6 // indirect
)
22 changes: 22 additions & 0 deletions examples/anthropic-vision-example/go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0=
github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/pkoukk/tiktoken-go v0.1.6 h1:JF0TlJzhTbrI30wCvFuiw6FzP2+/bR+FIxUdgEAcUsw=
github.com/pkoukk/tiktoken-go v0.1.6/go.mod h1:9NiV+i9mJKGj1rYOT+njbv+ZwA/zJxYdewGl6qVatpg=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/tmc/langchaingo v0.1.13-pre.1 h1:r+ma9kl0NuFJGtIrnMPFjEn4RhXktwSI31fIpgiiMm4=
github.com/tmc/langchaingo v0.1.13-pre.1/go.mod h1:vpQ5NOIhpzxDfTZK9B6tf2GM/MoaHewPWM5KXXGh7hg=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=
sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8=
Binary file added examples/anthropic-vision-example/image.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
37 changes: 31 additions & 6 deletions llms/anthropic/anthropicllm.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package anthropic

import (
"context"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
Expand Down Expand Up @@ -266,13 +267,37 @@ func handleSystemMessage(msg llms.MessageContent) (string, error) {
}

func handleHumanMessage(msg llms.MessageContent) (anthropicclient.ChatMessage, error) {
if textContent, ok := msg.Parts[0].(llms.TextContent); ok {
return anthropicclient.ChatMessage{
Role: RoleUser,
Content: textContent.Text,
}, nil
var contents []anthropicclient.Content

for _, part := range msg.Parts {
switch p := part.(type) {
case llms.TextContent:
contents = append(contents, &anthropicclient.TextContent{
Type: "text",
Text: p.Text,
})
case llms.BinaryContent:
contents = append(contents, &anthropicclient.ImageContent{
Type: "image",
Source: anthropicclient.ImageSource{
Type: "base64",
MediaType: p.MIMEType,
Data: base64.StdEncoding.EncodeToString(p.Data),
},
})
default:
return anthropicclient.ChatMessage{}, fmt.Errorf("anthropic: unsupported human message part type: %T", part)
}
}

if len(contents) == 0 {
return anthropicclient.ChatMessage{}, fmt.Errorf("anthropic: no valid content in human message")
}
return anthropicclient.ChatMessage{}, fmt.Errorf("anthropic: %w for human message", ErrInvalidContentType)

return anthropicclient.ChatMessage{
Role: RoleUser,
Content: contents,
}, nil
}

func handleAIMessage(msg llms.MessageContent) (anthropicclient.ChatMessage, error) {
Expand Down
15 changes: 15 additions & 0 deletions llms/anthropic/internal/anthropicclient/messages.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,21 @@ func (tc TextContent) GetType() string {
return tc.Type
}

type ImageContent struct {
Type string `json:"type"`
Source ImageSource `json:"source"`
}

func (ic ImageContent) GetType() string {
return ic.Type
}

type ImageSource struct {
Type string `json:"type"`
MediaType string `json:"media_type"`
Data string `json:"data"`
}

type ToolUseContent struct {
Type string `json:"type"`
ID string `json:"id"`
Expand Down

0 comments on commit f5632fc

Please sign in to comment.