Skip to content

Commit

Permalink
add crawler tool
Browse files Browse the repository at this point in the history
  • Loading branch information
adrianliechti committed Sep 21, 2024
1 parent 8e41a58 commit d6e39be
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 29 deletions.
38 changes: 27 additions & 11 deletions config/config_tool.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ import (
"errors"
"strings"

"github.com/adrianliechti/llama/pkg/extractor"
"github.com/adrianliechti/llama/pkg/provider"
"github.com/adrianliechti/llama/pkg/tool"
"github.com/adrianliechti/llama/pkg/tool/bing"
"github.com/adrianliechti/llama/pkg/tool/crawler"
"github.com/adrianliechti/llama/pkg/tool/custom"
"github.com/adrianliechti/llama/pkg/tool/draw"
"github.com/adrianliechti/llama/pkg/tool/duckduckgo"
Expand Down Expand Up @@ -44,7 +46,8 @@ type toolConfig struct {
}

type toolContext struct {
Renderer provider.Renderer
Renderer provider.Renderer
Extractor extractor.Provider
}

func (cfg *Config) registerTools(f *configFile) error {
Expand All @@ -59,6 +62,10 @@ func (cfg *Config) registerTools(f *configFile) error {
}
}

if e, err := cfg.Extractor(""); err == nil {
context.Extractor = e
}

tool, err := createTool(t, context)

if err != nil {
Expand All @@ -78,34 +85,43 @@ func (cfg *Config) registerTools(f *configFile) error {
func createTool(cfg toolConfig, context toolContext) (tool.Tool, error) {
switch strings.ToLower(cfg.Type) {
case "bing":
return bingTool(cfg)
return bingTool(cfg, context)

case "crawler":
return crawlerTool(cfg, context)

case "draw":
return drawTool(cfg, context)

case "duckduckgo":
return duckduckgoTool(cfg)
return duckduckgoTool(cfg, context)

case "tavily":
return tavilyTool(cfg)
return tavilyTool(cfg, context)

case "searxng":
return searxngTool(cfg)
return searxngTool(cfg, context)

case "custom":
return customTool(cfg)
return customTool(cfg, context)

default:
return nil, errors.New("invalid tool type: " + cfg.Type)
}
}

func bingTool(cfg toolConfig) (tool.Tool, error) {
func bingTool(cfg toolConfig, context toolContext) (tool.Tool, error) {
var options []bing.Option

return bing.New(cfg.Token, options...)
}

func crawlerTool(cfg toolConfig, context toolContext) (tool.Tool, error) {
var options []crawler.Option

return crawler.New(context.Extractor, options...)
}

func drawTool(cfg toolConfig, context toolContext) (tool.Tool, error) {
var options []draw.Option

Expand All @@ -116,25 +132,25 @@ func drawTool(cfg toolConfig, context toolContext) (tool.Tool, error) {
return draw.New(options...)
}

func duckduckgoTool(cfg toolConfig) (tool.Tool, error) {
func duckduckgoTool(cfg toolConfig, context toolContext) (tool.Tool, error) {
var options []duckduckgo.Option

return duckduckgo.New(options...)
}

func searxngTool(cfg toolConfig) (tool.Tool, error) {
func searxngTool(cfg toolConfig, context toolContext) (tool.Tool, error) {
var options []searxng.Option

return searxng.New(cfg.URL, options...)
}

func tavilyTool(cfg toolConfig) (tool.Tool, error) {
func tavilyTool(cfg toolConfig, context toolContext) (tool.Tool, error) {
var options []tavily.Option

return tavily.New(cfg.Token, options...)
}

func customTool(cfg toolConfig) (tool.Tool, error) {
func customTool(cfg toolConfig, context toolContext) (tool.Tool, error) {
var options []custom.Option

return custom.New(cfg.URL, options...)
Expand Down
24 changes: 6 additions & 18 deletions pkg/extractor/jina/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import (
"errors"
"io"
"net/http"
"net/url"

"github.com/adrianliechti/llama/pkg/extractor"
)
Expand Down Expand Up @@ -44,19 +43,16 @@ func (c *Client) Extract(ctx context.Context, input extractor.File, options *ext
options = new(extractor.ExtractOptions)
}

if !isSupported(input) {
if input.URL == "" {
return nil, extractor.ErrUnsupported
}

// body := map[string]any{
// "url": input.URL,
// }

// req, _ := http.NewRequestWithContext(ctx, "POST", c.url, jsonReader(body))
// req.Header.Set("Content-Type", "application/json")
body := map[string]any{
"url": input.URL,
}

url, _ := url.JoinPath(c.url, "/"+input.URL)
req, _ := http.NewRequestWithContext(ctx, "GET", url, nil)
req, _ := http.NewRequestWithContext(ctx, "POST", c.url, jsonReader(body))
req.Header.Set("Content-Type", "application/json")
req.Header.Set("X-Return-Format", "markdown")

if c.token != "" {
Expand Down Expand Up @@ -86,14 +82,6 @@ func (c *Client) Extract(ctx context.Context, input extractor.File, options *ext
}, nil
}

func isSupported(input extractor.File) bool {
if input.URL == "" {
return false
}

return true
}

func convertError(resp *http.Response) error {
data, _ := io.ReadAll(resp.Body)

Expand Down
81 changes: 81 additions & 0 deletions pkg/tool/crawler/client.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package crawler

import (
"context"
"errors"
"net/http"

"github.com/adrianliechti/llama/pkg/extractor"
"github.com/adrianliechti/llama/pkg/tool"
)

var _ tool.Tool = &Tool{}

type Tool struct {
client *http.Client

extractor extractor.Provider
}

func New(extractor extractor.Provider, options ...Option) (*Tool, error) {
t := &Tool{
client: http.DefaultClient,

extractor: extractor,
}

for _, option := range options {
option(t)
}

if t.extractor == nil {
return nil, errors.New("missing extractor provider")
}

return t, nil
}

func (t *Tool) Name() string {
return "crawler"
}

func (t *Tool) Description() string {
return "return the content of a website as markdown"
}

func (*Tool) Parameters() any {
return map[string]any{
"type": "object",

"properties": map[string]any{
"url": map[string]any{
"type": "string",
"description": "the URL of the website to crawl staring with http:// or https://",
},
},

"required": []string{"url"},
}
}

func (t *Tool) Execute(ctx context.Context, parameters map[string]any) (any, error) {
url, ok := parameters["url"].(string)

if !ok {
return nil, errors.New("missing url parameter")
}

input := extractor.File{
URL: url,
}

options := &extractor.ExtractOptions{}

document, err := t.extractor.Extract(ctx, input, options)

if err != nil {
return nil, err
}

return document.Content, nil
}
13 changes: 13 additions & 0 deletions pkg/tool/crawler/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package crawler

import (
"net/http"
)

type Option func(*Tool)

func WithClient(client *http.Client) Option {
return func(t *Tool) {
t.client = client
}
}

0 comments on commit d6e39be

Please sign in to comment.