diff --git a/ai/cohere/v0/README.mdx b/ai/cohere/v0/README.mdx index dd11b0c2..d09f7a6e 100644 --- a/ai/cohere/v0/README.mdx +++ b/ai/cohere/v0/README.mdx @@ -115,6 +115,7 @@ Sort text inputs by semantic relevance to a specified query. | Output | ID | Type | Description | | :--- | :--- | :--- | :--- | | Reranked documents | `ranking` | array[string] | Reranked documents | +| Reranked documents relevance (optional) | `relevance` | array[number] | The relevance scores of the reranked documents | | Usage (optional) | `usage` | object | Search Usage on the Cohere Platform Rerank Models | diff --git a/ai/huggingface/v0/README.mdx b/ai/huggingface/v0/README.mdx index a08ca404..2f8e7e1c 100644 --- a/ai/huggingface/v0/README.mdx +++ b/ai/huggingface/v0/README.mdx @@ -46,8 +46,8 @@ The component configuration is defined and maintained [here](https://github.com/ | Field | Field ID | Type | Note | | :--- | :--- | :--- | :--- | -| API Key (required) | `api-key` | string | Fill in your Hugging face API token. To find your token, visit https://huggingface.co/settings/tokens. | -| Base URL (required) | `base-url` | string | Hostname for the endpoint. To use Inference API set to https://api-inference.huggingface.co, for Inference Endpoint set to your custom endpoint. | +| API Key (required) | `api-key` | string | Fill in your Hugging face API token. To find your token, visit here | +| Base URL (required) | `base-url` | string | Hostname for the endpoint. To use Inference API set to here, for Inference Endpoint set to your custom endpoint. | | Is Custom Endpoint (required) | `is-custom-endpoint` | boolean | Fill true if you are using a custom Inference Endpoint and not the Inference API. | diff --git a/ai/huggingface/v0/config/setup.json b/ai/huggingface/v0/config/setup.json index a35adcc9..3ba17c38 100644 --- a/ai/huggingface/v0/config/setup.json +++ b/ai/huggingface/v0/config/setup.json @@ -3,7 +3,7 @@ "additionalProperties": true, "properties": { "api-key": { - "description": "Fill in your Hugging face API token. To find your token, visit https://huggingface.co/settings/tokens.", + "description": "Fill in your Hugging face API token. To find your token, visit here", "instillUpstreamTypes": [ "reference" ], @@ -17,7 +17,7 @@ }, "base-url": { "default": "https://api-inference.huggingface.co", - "description": "Hostname for the endpoint. To use Inference API set to https://api-inference.huggingface.co, for Inference Endpoint set to your custom endpoint.", + "description": "Hostname for the endpoint. To use Inference API set to here, for Inference Endpoint set to your custom endpoint.", "instillUpstreamTypes": [ "value" ], diff --git a/ai/mistralai/v0/README.mdx b/ai/mistralai/v0/README.mdx index d653d449..d110685e 100644 --- a/ai/mistralai/v0/README.mdx +++ b/ai/mistralai/v0/README.mdx @@ -1,11 +1,11 @@ --- -title: "Mistral" +title: "Mistral AI" lang: "en-US" draft: false -description: "Learn about how to set up a VDP Mistral component https://github.com/instill-ai/instill-core" +description: "Learn about how to set up a VDP Mistral AI component https://github.com/instill-ai/instill-core" --- -The Mistral component is an AI component that allows users to connect the AI models served on the Mistral Platform. +The Mistral AI component is an AI component that allows users to connect the AI models served on the Mistral AI Platform. It can carry out the following tasks: - [Text Generation Chat](#text-generation-chat) @@ -21,7 +21,7 @@ It can carry out the following tasks: ## Configuration -The component configuration is defined and maintained [here](https://github.com/instill-ai/component/blob/main/ai/mistral/v0/config/definition.json). +The component configuration is defined and maintained [here](https://github.com/instill-ai/component/blob/main/ai/mistralai/v0/config/definition.json). @@ -31,7 +31,7 @@ The component configuration is defined and maintained [here](https://github.com/ | Field | Field ID | Type | Note | | :--- | :--- | :--- | :--- | -| API Key (required) | `api-key` | string | Fill in your Mistral API key. To find your keys, visit the Mistral platform page. | +| API Key (required) | `api-key` | string | Fill in your Mistral API key. To find your keys, visit the Mistral AI platform page. | @@ -78,7 +78,6 @@ Turn text into a vector of numbers that capture its meaning, unlocking use cases | Input | ID | Type | Description | | :--- | :--- | :--- | :--- | | Task ID (required) | `task` | string | `TASK_TEXT_EMBEDDINGS` | -| Embedding Type (required) | `embedding-type` | string | Specifies the return type of embedding. | | Model Name (required) | `model-name` | string | The Mistral embed model to be used | | Text (required) | `text` | string | The text | diff --git a/ai/ollama/v0/README.mdx b/ai/ollama/v0/README.mdx new file mode 100644 index 00000000..0546542a --- /dev/null +++ b/ai/ollama/v0/README.mdx @@ -0,0 +1,93 @@ +--- +title: "Ollama" +lang: "en-US" +draft: false +description: "Learn about how to set up a VDP Ollama component https://github.com/instill-ai/instill-core" +--- + +The Ollama component is an AI component that allows users to connect the AI models served with the Ollama library. +It can carry out the following tasks: + +- [Text Generation Chat](#text-generation-chat) +- [Text Embeddings](#text-embeddings) + + + +## Release Stage + +`Alpha` + + + +## Configuration + +The component configuration is defined and maintained [here](https://github.com/instill-ai/component/blob/main/ai/ollama/v0/config/definition.json). + + + + +## Setup + + +| Field | Field ID | Type | Note | +| :--- | :--- | :--- | :--- | +| Endpoint (required) | `endpoint` | string | Fill in your Ollama hosting endpoint. ### WARNING ###: As of 2024-07-26, the Ollama component does not support authentication methods. To prevent unauthorized access to your Ollama serving resources, please implement additional security measures such as IP whitelisting. | +| Model Auto-Pull (required) | `auto-pull` | boolean | Automatically pull the requested models from the Ollama server if the model is not found in the local cache. | + + + + +## Supported Tasks + +### Text Generation Chat + +Provide text outputs in response to text/image inputs. + + +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_TEXT_GENERATION_CHAT` | +| Model Name (required) | `model` | string | The OSS model to be used, check https://ollama.com/library for list of models available | +| Prompt (required) | `prompt` | string | The prompt text | +| System message | `system-message` | string | The system message helps set the behavior of the assistant. For example, you can modify the personality of the assistant or provide specific instructions about how it should behave throughout the conversation. By default, the model’s behavior is set using a generic message as "You are a helpful assistant." | +| Prompt Images | `prompt-images` | array[string] | The prompt images | +| Chat history | `chat-history` | array[object] | Incorporate external chat history, specifically previous messages within the conversation. Please note that System Message will be ignored and will not have any effect when this field is populated. Each message should adhere to the format: : \{"role": "The message role, i.e. 'system', 'user' or 'assistant'", "content": "message content"\}. | +| Seed | `seed` | integer | The seed | +| Temperature | `temperature` | number | The temperature for sampling | +| Top K | `top-k` | integer | Top k for sampling | +| Max new tokens | `max-new-tokens` | integer | The maximum number of tokens for model to generate | + + + +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Text | `text` | string | Model Output | + + + + + + +### Text Embeddings + +Turn text into a vector of numbers that capture its meaning, unlocking use cases like semantic search. + + +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_TEXT_EMBEDDINGS` | +| Model Name (required) | `model` | string | The OSS model to be used, check https://ollama.com/library for list of models available | +| Text (required) | `text` | string | The text | + + + +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Embedding | `embedding` | array[number] | Embedding of the input text | + + + + + + + diff --git a/ai/ollama/v0/assets/ollama.svg b/ai/ollama/v0/assets/ollama.svg new file mode 100644 index 00000000..d63aeb5e --- /dev/null +++ b/ai/ollama/v0/assets/ollama.svg @@ -0,0 +1,7 @@ + + + + + + + diff --git a/ai/ollama/v0/client.go b/ai/ollama/v0/client.go new file mode 100644 index 00000000..c4f9413e --- /dev/null +++ b/ai/ollama/v0/client.go @@ -0,0 +1,176 @@ +package ollama + +import ( + "fmt" + "slices" + + "github.com/instill-ai/component/internal/util/httpclient" + "go.uber.org/zap" +) + +// reference: https://github.com/ollama/ollama/blob/main/docs/api.md +// Ollama v0.2.5 on 2024-07-17 + +type errBody struct { + Error struct { + Message string `json:"message"` + } `json:"error"` +} + +func (e errBody) Message() string { + return e.Error.Message +} + +type OllamaClient struct { + httpClient *httpclient.Client + autoPull bool +} + +func NewClient(endpoint string, autoPull bool, logger *zap.Logger) *OllamaClient { + c := httpclient.New("Ollama", endpoint, httpclient.WithLogger(logger), + httpclient.WithEndUserError(new(errBody))) + return &OllamaClient{httpClient: c, autoPull: autoPull} +} + +type OllamaModelInfo struct { + Name string `json:"name"` + ModifiedAt string `json:"modified_at"` + Size int `json:"size"` + Dijest string `json:"digest"` + Details struct { + Format string `json:"format"` + Family string `json:"family"` + Families string `json:"families"` + ParameterSize string `json:"parameter_size"` + QuantizationLevel string `json:"quantization_level"` + } `json:"details"` +} + +type ListLocalModelsRequest struct { +} + +type ListLocalModelsResponse struct { + Models []OllamaModelInfo `json:"models"` +} + +func (c *OllamaClient) CheckModelAvailability(modelName string) bool { + request := &ListLocalModelsRequest{} + response := &ListLocalModelsResponse{} + req := c.httpClient.R().SetResult(&response).SetBody(request) + if _, err := req.Get("/api/tags"); err != nil { + return false + } + localModels := []string{} + for _, m := range response.Models { + localModels = append(localModels, m.Name) + } + return slices.Contains(localModels, modelName) +} + +type PullModelRequest struct { + Name string `json:"name"` + Stream bool `json:"stream"` +} + +type PullModelResponse struct { +} + +func (c *OllamaClient) Pull(modelName string) error { + request := &PullModelRequest{ + Name: modelName, + Stream: false, + } + response := &PullModelResponse{} + req := c.httpClient.R().SetResult(&response).SetBody(request) + if _, err := req.Post("/api/pull"); err != nil { + return err + } + return nil + +} + +type OllamaChatMessage struct { + Role string `json:"role"` + Content string `json:"content"` + Images []string `json:"images,omitempty"` +} + +type OllamaOptions struct { + Temperature float32 `json:"temperature,omitempty"` + TopK int `json:"top_k,omitempty"` + Seed int `json:"seed,omitempty"` +} + +type ChatRequest struct { + Model string `json:"model"` + Messages []OllamaChatMessage `json:"messages"` + Stream bool `json:"stream"` + Options OllamaOptions `json:"options"` +} + +type ChatResponse struct { + Model string `json:"model"` + CreatedAt string `json:"created_at"` + Message OllamaChatMessage `json:"message"` + Done bool `json:"done"` + DoneReason string `json:"done_reason"` + TotalDuration int `json:"total_duration"` + LoadDuration int `json:"load_duration"` + PromptEvalCount int `json:"prompt_eval_count"` + PromptEvalDuration int `json:"prompt_eval_duration"` + EvalCount int `json:"eval_count"` + EvalDuration int `json:"eval_duration"` +} + +func (c *OllamaClient) Chat(request ChatRequest) (ChatResponse, error) { + response := ChatResponse{} + isAvailable := c.CheckModelAvailability(request.Model) + + if !isAvailable && !c.autoPull { + return response, fmt.Errorf("model %s is not available", request.Model) + } + if !isAvailable { + err := c.Pull(request.Model) + if err != nil { + return response, fmt.Errorf("error when auto pulling model %v", err) + } + } + req := c.httpClient.R().SetResult(&response).SetBody(request) + if _, err := req.Post("/api/chat"); err != nil { + return response, fmt.Errorf("error when sending chat request %v", err) + } + return response, nil +} + +type EmbedRequest struct { + Model string `json:"model"` + Prompt string `json:"prompt"` +} + +type EmbedResponse struct { + Embedding []float32 `json:"embedding"` +} + +func (c *OllamaClient) Embed(request EmbedRequest) (EmbedResponse, error) { + response := EmbedResponse{} + isAvailable := c.CheckModelAvailability(request.Model) + + if !isAvailable && !c.autoPull { + return response, fmt.Errorf("model %s is not available", request.Model) + } + if !isAvailable { + err := c.Pull(request.Model) + if err != nil { + return response, fmt.Errorf("error when auto pulling model %v", err) + } + } + req := c.httpClient.R().SetResult(&response).SetBody(request) + if _, err := req.Post("/api/embeddings"); err != nil { + return response, fmt.Errorf("error when sending embeddings request %v", err) + } + return response, nil +} + +func (c *OllamaClient) IsAutoPull() bool { + return c.autoPull +} diff --git a/ai/ollama/v0/component_test.go b/ai/ollama/v0/component_test.go new file mode 100644 index 00000000..e8357cd5 --- /dev/null +++ b/ai/ollama/v0/component_test.go @@ -0,0 +1,151 @@ +package ollama + +import ( + "context" + "encoding/json" + "fmt" + "testing" + + qt "github.com/frankban/quicktest" + "github.com/gojuno/minimock/v3" + "github.com/instill-ai/component/base" + "go.uber.org/zap" + "google.golang.org/protobuf/types/known/structpb" +) + +func TestComponent_Tasks(t *testing.T) { + mc := minimock.NewController(t) + c := qt.New(t) + bc := base.Component{Logger: zap.NewNop()} + connector := Init(bc) + ctx := context.Background() + + OllamaClientMock := NewOllamaClientInterfaceMock(mc) + OllamaClientMock.ChatMock. + When(ChatRequest{ + Model: "moondream", + Options: OllamaOptions{Seed: 0, Temperature: 0, TopK: 0}, + Messages: []OllamaChatMessage{{Role: "user", Content: "Tell me a joke", Images: []string{}}}, + }). + Then(ChatResponse{ + Model: "moondream", + CreatedAt: "2024-07-19T10:54:31.448690295Z", + Message: OllamaChatMessage{Role: "assistant", Content: "\nWhy did the tomato turn red?\nAnswer: Because it saw the salad dressing"}, + Done: true, + DoneReason: "stop", + TotalDuration: 3393091575, + LoadDuration: 3125721807, + PromptEvalCount: 10, + PromptEvalDuration: 34202000, + EvalCount: 18, + EvalDuration: 141520000, + }, nil) + OllamaClientMock.ChatMock. + When(ChatRequest{ + Model: "gemini", + Options: OllamaOptions{Seed: 0, Temperature: 0, TopK: 0}, + Messages: []OllamaChatMessage{{Role: "user", Content: "Tell me a joke", Images: []string{}}}, + }). + Then(ChatResponse{}, fmt.Errorf("error when sending chat request %s", `model "gemini" not found, try pulling it first`)) + OllamaClientMock.EmbedMock. + When(EmbedRequest{ + Model: "snowflake-arctic-embed:22m", + Prompt: "The United Kingdom, made up of England, Scotland, Wales and Northern Ireland, is an island nation in northwestern Europe.", + }). + Then(EmbedResponse{Embedding: []float32{0.1, 0.2, 0.3, 0.4, 0.5}}, nil) + OllamaClientMock.EmbedMock. + When(EmbedRequest{ + Model: "snowflake-arctic-embed:23m", + Prompt: "The United Kingdom, made up of England, Scotland, Wales and Northern Ireland, is an island nation in northwestern Europe.", + }). + Then(EmbedResponse{}, fmt.Errorf("error when sending embeddings request %s", `model "snowflake-arctic-embed:23m" not found, try pulling it first`)) + + c.Run("ok - task text generation", func(c *qt.C) { + setup, err := structpb.NewStruct(map[string]any{ + "endpoint": "http://localhost:8080", + "auto-pull": true, + }) + c.Assert(err, qt.IsNil) + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: connector, SystemVariables: nil, Setup: setup, Task: TaskTextGenerationChat}, + client: OllamaClientMock, + } + e.execute = e.TaskTextGenerationChat + exec := &base.ExecutionWrapper{Execution: e} + + pbIn, err := base.ConvertToStructpb(map[string]any{"model": "moondream", "prompt": "Tell me a joke"}) + c.Assert(err, qt.IsNil) + + got, err := exec.Execution.Execute(ctx, []*structpb.Struct{pbIn}) + c.Assert(err, qt.IsNil) + + wantJSON, err := json.Marshal(TaskTextGenerationChatOuput{Text: "\nWhy did the tomato turn red?\nAnswer: Because it saw the salad dressing"}) + c.Assert(err, qt.IsNil) + c.Check(wantJSON, qt.JSONEquals, got[0].AsMap()) + }) + + c.Run("nok - task text generation", func(c *qt.C) { + setup, err := structpb.NewStruct(map[string]any{ + "endpoint": "http://localhost:8080", + "auto-pull": true, + }) + c.Assert(err, qt.IsNil) + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: connector, SystemVariables: nil, Setup: setup, Task: TaskTextGenerationChat}, + client: OllamaClientMock, + } + e.execute = e.TaskTextGenerationChat + exec := &base.ExecutionWrapper{Execution: e} + + pbIn, err := base.ConvertToStructpb(map[string]any{"model": "gemini", "prompt": "Tell me a joke"}) + c.Assert(err, qt.IsNil) + + _, err = exec.Execution.Execute(ctx, []*structpb.Struct{pbIn}) + c.Assert(err, qt.ErrorMatches, `error when sending chat request model "gemini" not found, try pulling it first`) + }) + + c.Run("ok - task embedding", func(c *qt.C) { + setup, err := structpb.NewStruct(map[string]any{ + "endpoint": "http://localhost:8080", + "auto-pull": true, + }) + c.Assert(err, qt.IsNil) + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: connector, SystemVariables: nil, Setup: setup, Task: TaskTextEmbeddings}, + client: OllamaClientMock, + } + e.execute = e.TaskTextEmbeddings + exec := &base.ExecutionWrapper{Execution: e} + + pbIn, err := base.ConvertToStructpb(map[string]any{"model": "snowflake-arctic-embed:22m", "text": "The United Kingdom, made up of England, Scotland, Wales and Northern Ireland, is an island nation in northwestern Europe."}) + c.Assert(err, qt.IsNil) + + got, err := exec.Execution.Execute(ctx, []*structpb.Struct{pbIn}) + c.Assert(err, qt.IsNil) + + wantJSON, err := json.Marshal(TaskTextEmbeddingsOutput{Embedding: []float32{0.1, 0.2, 0.3, 0.4, 0.5}}) + c.Assert(err, qt.IsNil) + c.Check(wantJSON, qt.JSONEquals, got[0].AsMap()) + }) + + c.Run("nok - task embedding", func(c *qt.C) { + setup, err := structpb.NewStruct(map[string]any{ + "endpoint": "http://localhost:8080", + "auto-pull": true, + }) + c.Assert(err, qt.IsNil) + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: connector, SystemVariables: nil, Setup: setup, Task: TaskTextEmbeddings}, + client: OllamaClientMock, + } + e.execute = e.TaskTextEmbeddings + exec := &base.ExecutionWrapper{Execution: e} + + pbIn, err := base.ConvertToStructpb(map[string]any{"model": "snowflake-arctic-embed:23m", "text": "The United Kingdom, made up of England, Scotland, Wales and Northern Ireland, is an island nation in northwestern Europe."}) + c.Assert(err, qt.IsNil) + + _, err = exec.Execution.Execute(ctx, []*structpb.Struct{pbIn}) + c.Assert(err, qt.ErrorMatches, `error when sending embeddings request model "snowflake-arctic-embed:23m" not found, try pulling it first`) + }) + +} diff --git a/ai/ollama/v0/config/definition.json b/ai/ollama/v0/config/definition.json new file mode 100644 index 00000000..0427c032 --- /dev/null +++ b/ai/ollama/v0/config/definition.json @@ -0,0 +1,19 @@ +{ + "availableTasks": [ + "TASK_TEXT_GENERATION_CHAT", + "TASK_TEXT_EMBEDDINGS" + ], + "documentationUrl": "https://www.instill.tech/docs/component/ai/ollama", + "icon": "assets/ollama.svg", + "id": "ollama", + "public": true, + "title": "Ollama", + "description": "Connect the AI models served with the Ollama library", + "type": "COMPONENT_TYPE_AI", + "uid": "5f6dcfc4-efd0-45a1-aae9-c9b4beb68a32", + "vendor": "Ollama", + "vendorAttributes": {}, + "version": "0.1.0", + "sourceUrl": "https://github.com/instill-ai/component/blob/main/ai/ollama/v0", + "releaseStage": "RELEASE_STAGE_ALPHA" +} diff --git a/ai/ollama/v0/config/setup.json b/ai/ollama/v0/config/setup.json new file mode 100644 index 00000000..b0fb042a --- /dev/null +++ b/ai/ollama/v0/config/setup.json @@ -0,0 +1,42 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": true, + "properties": { + "endpoint": { + "description": "Fill in your Ollama hosting endpoint. ### WARNING ###: As of 2024-07-26, the Ollama component does not support authentication methods. To prevent unauthorized access to your Ollama serving resources, please implement additional security measures such as IP whitelisting.", + "instillUpstreamTypes": [ + "reference" + ], + "instillAcceptFormats": [ + "string" + ], + "default": "http://localhost:11434", + "instillUIOrder": 0, + "title": "Endpoint", + "type": "string" + }, + "auto-pull": { + "description": "Automatically pull the requested models from the Ollama server if the model is not found in the local cache.", + "instillUpstreamTypes": [ + "value", + "reference" + ], + "instillAcceptFormats": [ + "boolean" + ], + "instillUIOrder": 1, + "title": "Model Auto-Pull", + "type": "boolean" + } + }, + "required": [ + "endpoint", + "auto-pull" + ], + "instillEditOnNodeFields": [ + "endpoint", + "auto-pull" + ], + "title": "Ollama Connection", + "type": "object" +} diff --git a/ai/ollama/v0/config/tasks.json b/ai/ollama/v0/config/tasks.json new file mode 100644 index 00000000..bafea4ff --- /dev/null +++ b/ai/ollama/v0/config/tasks.json @@ -0,0 +1,307 @@ +{ + "$defs": { + "multi-modal-content": { + "instillFormat": "structured/multi-modal-content", + "items": { + "properties": { + "image-url": { + "properties": { + "url": { + "description": "Either a URL of the image or the base64 encoded image data.", + "type": "string" + } + }, + "required": [ + "url" + ], + "type": "object" + }, + "text": { + "description": "The text content.", + "instillFormat": "string", + "type": "string" + }, + "type": { + "description": "The type of the content part.", + "enum": [ + "text", + "image_url" + ], + "instillFormat": "string", + "type": "string" + } + }, + "required": [ + "type" + ], + "type": "object" + }, + "type": "array" + }, + "chat-message": { + "properties": { + "content": { + "$ref": "#/$defs/multi-modal-content", + "description": "The message content", + "instillUIOrder": 1, + "title": "Content" + }, + "role": { + "description": "The message role, i.e. 'system', 'user' or 'assistant'", + "instillFormat": "string", + "instillUIOrder": 0, + "title": "Role", + "type": "string" + } + }, + "required": [ + "role", + "content" + ], + "title": "Chat Message", + "type": "object" + } + }, + "TASK_TEXT_GENERATION_CHAT": { + "instillShortDescription": "Provide text outputs in response to text/image inputs.", + "description": "Open-source large language models (OSS LLMs) are artificial intelligence models with publicly accessible code and architecture, allowing for free use, modification, and distribution. These models can provide performance comparable to proprietary alternatives. Ollama is a tool that enables you to run and interact with OSS LLMs using limited computational resources. You can install Ollama from: https://ollama.com.", + "input": { + "description": "Input", + "instillEditOnNodeFields": [ + "prompt", + "model" + ], + "instillUIOrder": 0, + "properties": { + "chat-history": { + "description": "Incorporate external chat history, specifically previous messages within the conversation. Please note that System Message will be ignored and will not have any effect when this field is populated. Each message should adhere to the format: : {\"role\": \"The message role, i.e. 'system', 'user' or 'assistant'\", \"content\": \"message content\"}.", + "instillAcceptFormats": [ + "structured/chat-messages" + ], + "instillShortDescription": "Incorporate external chat history, specifically previous messages within the conversation.", + "instillUIOrder": 4, + "instillUpstreamTypes": [ + "reference" + ], + "items": { + "$ref": "#/$defs/chat-message" + }, + "title": "Chat history", + "type": "array" + }, + "max-new-tokens": { + "default": 50, + "description": "The maximum number of tokens for model to generate", + "instillAcceptFormats": [ + "integer" + ], + "instillUIOrder": 6, + "instillUpstreamTypes": [ + "value", + "reference" + ], + "title": "Max new tokens", + "type": "integer" + }, + "model": { + "example": "moondream", + "description": "The OSS model to be used, check https://ollama.com/library for list of models available", + "instillAcceptFormats": [ + "string" + ], + "instillUIOrder": 0, + "instillUpstreamTypes": [ + "value", + "reference", + "template" + ], + "title": "Model Name", + "type": "string" + }, + "prompt": { + "description": "The prompt text", + "instillAcceptFormats": [ + "string" + ], + "instillUIMultiline": true, + "instillUIOrder": 2, + "instillUpstreamTypes": [ + "value", + "reference", + "template" + ], + "title": "Prompt", + "type": "string" + }, + "prompt-images": { + "description": "The prompt images", + "instillAcceptFormats": [ + "array:image/*" + ], + "instillUIOrder": 3, + "instillUpstreamTypes": [ + "reference" + ], + "items": { + "type": "string" + }, + "title": "Prompt Images", + "type": "array" + }, + "seed": { + "description": "The seed", + "instillAcceptFormats": [ + "integer" + ], + "instillUIOrder": 4, + "instillUpstreamTypes": [ + "value", + "reference" + ], + "title": "Seed", + "type": "integer" + }, + "system-message": { + "default": "You are a helpful assistant.", + "description": "The system message helps set the behavior of the assistant. For example, you can modify the personality of the assistant or provide specific instructions about how it should behave throughout the conversation. By default, the model\u2019s behavior is set using a generic message as \"You are a helpful assistant.\"", + "instillAcceptFormats": [ + "string" + ], + "instillShortDescription": "The system message helps set the behavior of the assistant", + "instillUIMultiline": true, + "instillUIOrder": 2, + "instillUpstreamTypes": [ + "value", + "reference", + "template" + ], + "title": "System message", + "type": "string" + }, + "temperature": { + "default": 0.7, + "description": "The temperature for sampling", + "instillAcceptFormats": [ + "number" + ], + "instillUIOrder": 5, + "instillUpstreamTypes": [ + "value", + "reference" + ], + "title": "Temperature", + "type": "number" + }, + "top-k": { + "default": 10, + "description": "Top k for sampling", + "instillAcceptFormats": [ + "integer" + ], + "instillUIOrder": 5, + "instillUpstreamTypes": [ + "value", + "reference" + ], + "title": "Top K", + "type": "integer" + } + }, + "required": [ + "prompt", + "model" + ], + "title": "Input", + "type": "object" + }, + "output": { + "description": "Output", + "instillUIOrder": 0, + "properties": { + "text": { + "description": "Model Output", + "instillUIOrder": 0, + "instillFormat": "string", + "instillUIMultiline": true, + "title": "Text", + "type": "string" + } + }, + "required": [ + "text" + ], + "title": "Output", + "type": "object" + } + }, + "TASK_TEXT_EMBEDDINGS": { + "instillShortDescription": "Turn text into a vector of numbers that capture its meaning, unlocking use cases like semantic search.", + "description": "An embedding is a list of floating point numbers that captures semantic information about the text that it represents.", + "input": { + "instillUIOrder": 0, + "properties": { + "model": { + "example": "snowflake-arctic-embed:22m", + "description": "The OSS model to be used, check https://ollama.com/library for list of models available", + "instillAcceptFormats": [ + "string" + ], + "instillUIOrder": 0, + "instillUpstreamTypes": [ + "value", + "reference", + "template" + ], + "title": "Model Name", + "type": "string" + }, + "text": { + "description": "The text", + "instillAcceptFormats": [ + "string" + ], + "instillUIMultiline": true, + "instillUIOrder": 1, + "instillUpstreamTypes": [ + "value", + "reference", + "template" + ], + "title": "Text", + "type": "string" + } + }, + "required": [ + "text", + "model" + ], + "instillEditOnNodeFields": [ + "text", + "model" + ], + "title": "Input", + "type": "object" + }, + "output": { + "instillUIOrder": 0, + "properties": { + "embedding": { + "instillFormat": "array:number", + "items": { + "instillFormat": "number", + "type": "number" + }, + "type": "array", + "description": "Embedding of the input text", + "instillUIOrder": 0, + "title": "Embedding" + } + }, + "required": [ + "embedding" + ], + "title": "Output", + "type": "object" + } + } +} diff --git a/ai/ollama/v0/main.go b/ai/ollama/v0/main.go new file mode 100644 index 00000000..1a763768 --- /dev/null +++ b/ai/ollama/v0/main.go @@ -0,0 +1,99 @@ +//go:generate compogen readme ./config ./README.mdx +package ollama + +import ( + "context" + _ "embed" + "fmt" + "sync" + + "google.golang.org/protobuf/types/known/structpb" + + "github.com/instill-ai/component/base" +) + +const ( + TaskTextGenerationChat = "TASK_TEXT_GENERATION_CHAT" + TaskTextEmbeddings = "TASK_TEXT_EMBEDDINGS" +) + +var ( + //go:embed config/definition.json + definitionJSON []byte + //go:embed config/setup.json + setupJSON []byte + //go:embed config/tasks.json + tasksJSON []byte + + once sync.Once + comp *component +) + +type component struct { + base.Component +} + +type OllamaSetup struct { + AutoPull bool `json:"auto-pull"` + Endpoint string `json:"endpoint"` +} + +func Init(bc base.Component) *component { + once.Do(func() { + comp = &component{Component: bc} + err := comp.LoadDefinition(definitionJSON, setupJSON, tasksJSON, nil) + if err != nil { + panic(err) + } + }) + return comp +} + +type OllamaClientInterface interface { + Chat(ChatRequest) (ChatResponse, error) + Embed(EmbedRequest) (EmbedResponse, error) + IsAutoPull() bool +} + +type execution struct { + base.ComponentExecution + client OllamaClientInterface + execute func(*structpb.Struct) (*structpb.Struct, error) +} + +func (e *execution) Execute(_ context.Context, inputs []*structpb.Struct) ([]*structpb.Struct, error) { + outputs := make([]*structpb.Struct, len(inputs)) + + // The execution takes a array of inputs and returns an array of outputs. The execution is done sequentially. + for i, input := range inputs { + output, err := e.execute(input) + if err != nil { + return nil, err + } + + outputs[i] = output + } + + return outputs, nil +} + +func (c *component) CreateExecution(sysVars map[string]any, setup *structpb.Struct, task string) (*base.ExecutionWrapper, error) { + setupStruct := &OllamaSetup{} + if err := base.ConvertFromStructpb(setup, setupStruct); err != nil { + return nil, fmt.Errorf("error parsing setup, %v", err) + } + + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: c, SystemVariables: sysVars, Task: task, Setup: setup}, + client: NewClient(setupStruct.Endpoint, setupStruct.AutoPull, c.Logger), + } + switch task { + case TaskTextGenerationChat: + e.execute = e.TaskTextGenerationChat + case TaskTextEmbeddings: + e.execute = e.TaskTextEmbeddings + default: + return nil, fmt.Errorf("unsupported task") + } + return &base.ExecutionWrapper{Execution: e}, nil +} diff --git a/ai/ollama/v0/ollama_client_interface_mock_test.go b/ai/ollama/v0/ollama_client_interface_mock_test.go new file mode 100644 index 00000000..982c73d1 --- /dev/null +++ b/ai/ollama/v0/ollama_client_interface_mock_test.go @@ -0,0 +1,861 @@ +// Code generated by http://github.com/gojuno/minimock (v3.3.13). DO NOT EDIT. + +package ollama + +//go:generate minimock -i github.com/instill-ai/component/ai/ollama/v0.OllamaClientInterface -o ollama_client_interface_mock_test.go -n OllamaClientInterfaceMock -p ollama + +import ( + _ "embed" + "sync" + mm_atomic "sync/atomic" + mm_time "time" + + "github.com/gojuno/minimock/v3" +) + +// OllamaClientInterfaceMock implements OllamaClientInterface +type OllamaClientInterfaceMock struct { + t minimock.Tester + finishOnce sync.Once + + funcChat func(c1 ChatRequest) (c2 ChatResponse, err error) + inspectFuncChat func(c1 ChatRequest) + afterChatCounter uint64 + beforeChatCounter uint64 + ChatMock mOllamaClientInterfaceMockChat + + funcEmbed func(e1 EmbedRequest) (e2 EmbedResponse, err error) + inspectFuncEmbed func(e1 EmbedRequest) + afterEmbedCounter uint64 + beforeEmbedCounter uint64 + EmbedMock mOllamaClientInterfaceMockEmbed + + funcIsAutoPull func() (b1 bool) + inspectFuncIsAutoPull func() + afterIsAutoPullCounter uint64 + beforeIsAutoPullCounter uint64 + IsAutoPullMock mOllamaClientInterfaceMockIsAutoPull +} + +// NewOllamaClientInterfaceMock returns a mock for OllamaClientInterface +func NewOllamaClientInterfaceMock(t minimock.Tester) *OllamaClientInterfaceMock { + m := &OllamaClientInterfaceMock{t: t} + + if controller, ok := t.(minimock.MockController); ok { + controller.RegisterMocker(m) + } + + m.ChatMock = mOllamaClientInterfaceMockChat{mock: m} + m.ChatMock.callArgs = []*OllamaClientInterfaceMockChatParams{} + + m.EmbedMock = mOllamaClientInterfaceMockEmbed{mock: m} + m.EmbedMock.callArgs = []*OllamaClientInterfaceMockEmbedParams{} + + m.IsAutoPullMock = mOllamaClientInterfaceMockIsAutoPull{mock: m} + + t.Cleanup(m.MinimockFinish) + + return m +} + +type mOllamaClientInterfaceMockChat struct { + optional bool + mock *OllamaClientInterfaceMock + defaultExpectation *OllamaClientInterfaceMockChatExpectation + expectations []*OllamaClientInterfaceMockChatExpectation + + callArgs []*OllamaClientInterfaceMockChatParams + mutex sync.RWMutex + + expectedInvocations uint64 +} + +// OllamaClientInterfaceMockChatExpectation specifies expectation struct of the OllamaClientInterface.Chat +type OllamaClientInterfaceMockChatExpectation struct { + mock *OllamaClientInterfaceMock + params *OllamaClientInterfaceMockChatParams + paramPtrs *OllamaClientInterfaceMockChatParamPtrs + results *OllamaClientInterfaceMockChatResults + Counter uint64 +} + +// OllamaClientInterfaceMockChatParams contains parameters of the OllamaClientInterface.Chat +type OllamaClientInterfaceMockChatParams struct { + c1 ChatRequest +} + +// OllamaClientInterfaceMockChatParamPtrs contains pointers to parameters of the OllamaClientInterface.Chat +type OllamaClientInterfaceMockChatParamPtrs struct { + c1 *ChatRequest +} + +// OllamaClientInterfaceMockChatResults contains results of the OllamaClientInterface.Chat +type OllamaClientInterfaceMockChatResults struct { + c2 ChatResponse + err error +} + +// Marks this method to be optional. The default behavior of any method with Return() is '1 or more', meaning +// the test will fail minimock's automatic final call check if the mocked method was not called at least once. +// Optional() makes method check to work in '0 or more' mode. +// It is NOT RECOMMENDED to use this option unless you really need it, as default behaviour helps to +// catch the problems when the expected method call is totally skipped during test run. +func (mmChat *mOllamaClientInterfaceMockChat) Optional() *mOllamaClientInterfaceMockChat { + mmChat.optional = true + return mmChat +} + +// Expect sets up expected params for OllamaClientInterface.Chat +func (mmChat *mOllamaClientInterfaceMockChat) Expect(c1 ChatRequest) *mOllamaClientInterfaceMockChat { + if mmChat.mock.funcChat != nil { + mmChat.mock.t.Fatalf("OllamaClientInterfaceMock.Chat mock is already set by Set") + } + + if mmChat.defaultExpectation == nil { + mmChat.defaultExpectation = &OllamaClientInterfaceMockChatExpectation{} + } + + if mmChat.defaultExpectation.paramPtrs != nil { + mmChat.mock.t.Fatalf("OllamaClientInterfaceMock.Chat mock is already set by ExpectParams functions") + } + + mmChat.defaultExpectation.params = &OllamaClientInterfaceMockChatParams{c1} + for _, e := range mmChat.expectations { + if minimock.Equal(e.params, mmChat.defaultExpectation.params) { + mmChat.mock.t.Fatalf("Expectation set by When has same params: %#v", *mmChat.defaultExpectation.params) + } + } + + return mmChat +} + +// ExpectC1Param1 sets up expected param c1 for OllamaClientInterface.Chat +func (mmChat *mOllamaClientInterfaceMockChat) ExpectC1Param1(c1 ChatRequest) *mOllamaClientInterfaceMockChat { + if mmChat.mock.funcChat != nil { + mmChat.mock.t.Fatalf("OllamaClientInterfaceMock.Chat mock is already set by Set") + } + + if mmChat.defaultExpectation == nil { + mmChat.defaultExpectation = &OllamaClientInterfaceMockChatExpectation{} + } + + if mmChat.defaultExpectation.params != nil { + mmChat.mock.t.Fatalf("OllamaClientInterfaceMock.Chat mock is already set by Expect") + } + + if mmChat.defaultExpectation.paramPtrs == nil { + mmChat.defaultExpectation.paramPtrs = &OllamaClientInterfaceMockChatParamPtrs{} + } + mmChat.defaultExpectation.paramPtrs.c1 = &c1 + + return mmChat +} + +// Inspect accepts an inspector function that has same arguments as the OllamaClientInterface.Chat +func (mmChat *mOllamaClientInterfaceMockChat) Inspect(f func(c1 ChatRequest)) *mOllamaClientInterfaceMockChat { + if mmChat.mock.inspectFuncChat != nil { + mmChat.mock.t.Fatalf("Inspect function is already set for OllamaClientInterfaceMock.Chat") + } + + mmChat.mock.inspectFuncChat = f + + return mmChat +} + +// Return sets up results that will be returned by OllamaClientInterface.Chat +func (mmChat *mOllamaClientInterfaceMockChat) Return(c2 ChatResponse, err error) *OllamaClientInterfaceMock { + if mmChat.mock.funcChat != nil { + mmChat.mock.t.Fatalf("OllamaClientInterfaceMock.Chat mock is already set by Set") + } + + if mmChat.defaultExpectation == nil { + mmChat.defaultExpectation = &OllamaClientInterfaceMockChatExpectation{mock: mmChat.mock} + } + mmChat.defaultExpectation.results = &OllamaClientInterfaceMockChatResults{c2, err} + return mmChat.mock +} + +// Set uses given function f to mock the OllamaClientInterface.Chat method +func (mmChat *mOllamaClientInterfaceMockChat) Set(f func(c1 ChatRequest) (c2 ChatResponse, err error)) *OllamaClientInterfaceMock { + if mmChat.defaultExpectation != nil { + mmChat.mock.t.Fatalf("Default expectation is already set for the OllamaClientInterface.Chat method") + } + + if len(mmChat.expectations) > 0 { + mmChat.mock.t.Fatalf("Some expectations are already set for the OllamaClientInterface.Chat method") + } + + mmChat.mock.funcChat = f + return mmChat.mock +} + +// When sets expectation for the OllamaClientInterface.Chat which will trigger the result defined by the following +// Then helper +func (mmChat *mOllamaClientInterfaceMockChat) When(c1 ChatRequest) *OllamaClientInterfaceMockChatExpectation { + if mmChat.mock.funcChat != nil { + mmChat.mock.t.Fatalf("OllamaClientInterfaceMock.Chat mock is already set by Set") + } + + expectation := &OllamaClientInterfaceMockChatExpectation{ + mock: mmChat.mock, + params: &OllamaClientInterfaceMockChatParams{c1}, + } + mmChat.expectations = append(mmChat.expectations, expectation) + return expectation +} + +// Then sets up OllamaClientInterface.Chat return parameters for the expectation previously defined by the When method +func (e *OllamaClientInterfaceMockChatExpectation) Then(c2 ChatResponse, err error) *OllamaClientInterfaceMock { + e.results = &OllamaClientInterfaceMockChatResults{c2, err} + return e.mock +} + +// Times sets number of times OllamaClientInterface.Chat should be invoked +func (mmChat *mOllamaClientInterfaceMockChat) Times(n uint64) *mOllamaClientInterfaceMockChat { + if n == 0 { + mmChat.mock.t.Fatalf("Times of OllamaClientInterfaceMock.Chat mock can not be zero") + } + mm_atomic.StoreUint64(&mmChat.expectedInvocations, n) + return mmChat +} + +func (mmChat *mOllamaClientInterfaceMockChat) invocationsDone() bool { + if len(mmChat.expectations) == 0 && mmChat.defaultExpectation == nil && mmChat.mock.funcChat == nil { + return true + } + + totalInvocations := mm_atomic.LoadUint64(&mmChat.mock.afterChatCounter) + expectedInvocations := mm_atomic.LoadUint64(&mmChat.expectedInvocations) + + return totalInvocations > 0 && (expectedInvocations == 0 || expectedInvocations == totalInvocations) +} + +// Chat implements OllamaClientInterface +func (mmChat *OllamaClientInterfaceMock) Chat(c1 ChatRequest) (c2 ChatResponse, err error) { + mm_atomic.AddUint64(&mmChat.beforeChatCounter, 1) + defer mm_atomic.AddUint64(&mmChat.afterChatCounter, 1) + + if mmChat.inspectFuncChat != nil { + mmChat.inspectFuncChat(c1) + } + + mm_params := OllamaClientInterfaceMockChatParams{c1} + + // Record call args + mmChat.ChatMock.mutex.Lock() + mmChat.ChatMock.callArgs = append(mmChat.ChatMock.callArgs, &mm_params) + mmChat.ChatMock.mutex.Unlock() + + for _, e := range mmChat.ChatMock.expectations { + if minimock.Equal(*e.params, mm_params) { + mm_atomic.AddUint64(&e.Counter, 1) + return e.results.c2, e.results.err + } + } + + if mmChat.ChatMock.defaultExpectation != nil { + mm_atomic.AddUint64(&mmChat.ChatMock.defaultExpectation.Counter, 1) + mm_want := mmChat.ChatMock.defaultExpectation.params + mm_want_ptrs := mmChat.ChatMock.defaultExpectation.paramPtrs + + mm_got := OllamaClientInterfaceMockChatParams{c1} + + if mm_want_ptrs != nil { + + if mm_want_ptrs.c1 != nil && !minimock.Equal(*mm_want_ptrs.c1, mm_got.c1) { + mmChat.t.Errorf("OllamaClientInterfaceMock.Chat got unexpected parameter c1, want: %#v, got: %#v%s\n", *mm_want_ptrs.c1, mm_got.c1, minimock.Diff(*mm_want_ptrs.c1, mm_got.c1)) + } + + } else if mm_want != nil && !minimock.Equal(*mm_want, mm_got) { + mmChat.t.Errorf("OllamaClientInterfaceMock.Chat got unexpected parameters, want: %#v, got: %#v%s\n", *mm_want, mm_got, minimock.Diff(*mm_want, mm_got)) + } + + mm_results := mmChat.ChatMock.defaultExpectation.results + if mm_results == nil { + mmChat.t.Fatal("No results are set for the OllamaClientInterfaceMock.Chat") + } + return (*mm_results).c2, (*mm_results).err + } + if mmChat.funcChat != nil { + return mmChat.funcChat(c1) + } + mmChat.t.Fatalf("Unexpected call to OllamaClientInterfaceMock.Chat. %v", c1) + return +} + +// ChatAfterCounter returns a count of finished OllamaClientInterfaceMock.Chat invocations +func (mmChat *OllamaClientInterfaceMock) ChatAfterCounter() uint64 { + return mm_atomic.LoadUint64(&mmChat.afterChatCounter) +} + +// ChatBeforeCounter returns a count of OllamaClientInterfaceMock.Chat invocations +func (mmChat *OllamaClientInterfaceMock) ChatBeforeCounter() uint64 { + return mm_atomic.LoadUint64(&mmChat.beforeChatCounter) +} + +// Calls returns a list of arguments used in each call to OllamaClientInterfaceMock.Chat. +// The list is in the same order as the calls were made (i.e. recent calls have a higher index) +func (mmChat *mOllamaClientInterfaceMockChat) Calls() []*OllamaClientInterfaceMockChatParams { + mmChat.mutex.RLock() + + argCopy := make([]*OllamaClientInterfaceMockChatParams, len(mmChat.callArgs)) + copy(argCopy, mmChat.callArgs) + + mmChat.mutex.RUnlock() + + return argCopy +} + +// MinimockChatDone returns true if the count of the Chat invocations corresponds +// the number of defined expectations +func (m *OllamaClientInterfaceMock) MinimockChatDone() bool { + if m.ChatMock.optional { + // Optional methods provide '0 or more' call count restriction. + return true + } + + for _, e := range m.ChatMock.expectations { + if mm_atomic.LoadUint64(&e.Counter) < 1 { + return false + } + } + + return m.ChatMock.invocationsDone() +} + +// MinimockChatInspect logs each unmet expectation +func (m *OllamaClientInterfaceMock) MinimockChatInspect() { + for _, e := range m.ChatMock.expectations { + if mm_atomic.LoadUint64(&e.Counter) < 1 { + m.t.Errorf("Expected call to OllamaClientInterfaceMock.Chat with params: %#v", *e.params) + } + } + + afterChatCounter := mm_atomic.LoadUint64(&m.afterChatCounter) + // if default expectation was set then invocations count should be greater than zero + if m.ChatMock.defaultExpectation != nil && afterChatCounter < 1 { + if m.ChatMock.defaultExpectation.params == nil { + m.t.Error("Expected call to OllamaClientInterfaceMock.Chat") + } else { + m.t.Errorf("Expected call to OllamaClientInterfaceMock.Chat with params: %#v", *m.ChatMock.defaultExpectation.params) + } + } + // if func was set then invocations count should be greater than zero + if m.funcChat != nil && afterChatCounter < 1 { + m.t.Error("Expected call to OllamaClientInterfaceMock.Chat") + } + + if !m.ChatMock.invocationsDone() && afterChatCounter > 0 { + m.t.Errorf("Expected %d calls to OllamaClientInterfaceMock.Chat but found %d calls", + mm_atomic.LoadUint64(&m.ChatMock.expectedInvocations), afterChatCounter) + } +} + +type mOllamaClientInterfaceMockEmbed struct { + optional bool + mock *OllamaClientInterfaceMock + defaultExpectation *OllamaClientInterfaceMockEmbedExpectation + expectations []*OllamaClientInterfaceMockEmbedExpectation + + callArgs []*OllamaClientInterfaceMockEmbedParams + mutex sync.RWMutex + + expectedInvocations uint64 +} + +// OllamaClientInterfaceMockEmbedExpectation specifies expectation struct of the OllamaClientInterface.Embed +type OllamaClientInterfaceMockEmbedExpectation struct { + mock *OllamaClientInterfaceMock + params *OllamaClientInterfaceMockEmbedParams + paramPtrs *OllamaClientInterfaceMockEmbedParamPtrs + results *OllamaClientInterfaceMockEmbedResults + Counter uint64 +} + +// OllamaClientInterfaceMockEmbedParams contains parameters of the OllamaClientInterface.Embed +type OllamaClientInterfaceMockEmbedParams struct { + e1 EmbedRequest +} + +// OllamaClientInterfaceMockEmbedParamPtrs contains pointers to parameters of the OllamaClientInterface.Embed +type OllamaClientInterfaceMockEmbedParamPtrs struct { + e1 *EmbedRequest +} + +// OllamaClientInterfaceMockEmbedResults contains results of the OllamaClientInterface.Embed +type OllamaClientInterfaceMockEmbedResults struct { + e2 EmbedResponse + err error +} + +// Marks this method to be optional. The default behavior of any method with Return() is '1 or more', meaning +// the test will fail minimock's automatic final call check if the mocked method was not called at least once. +// Optional() makes method check to work in '0 or more' mode. +// It is NOT RECOMMENDED to use this option unless you really need it, as default behaviour helps to +// catch the problems when the expected method call is totally skipped during test run. +func (mmEmbed *mOllamaClientInterfaceMockEmbed) Optional() *mOllamaClientInterfaceMockEmbed { + mmEmbed.optional = true + return mmEmbed +} + +// Expect sets up expected params for OllamaClientInterface.Embed +func (mmEmbed *mOllamaClientInterfaceMockEmbed) Expect(e1 EmbedRequest) *mOllamaClientInterfaceMockEmbed { + if mmEmbed.mock.funcEmbed != nil { + mmEmbed.mock.t.Fatalf("OllamaClientInterfaceMock.Embed mock is already set by Set") + } + + if mmEmbed.defaultExpectation == nil { + mmEmbed.defaultExpectation = &OllamaClientInterfaceMockEmbedExpectation{} + } + + if mmEmbed.defaultExpectation.paramPtrs != nil { + mmEmbed.mock.t.Fatalf("OllamaClientInterfaceMock.Embed mock is already set by ExpectParams functions") + } + + mmEmbed.defaultExpectation.params = &OllamaClientInterfaceMockEmbedParams{e1} + for _, e := range mmEmbed.expectations { + if minimock.Equal(e.params, mmEmbed.defaultExpectation.params) { + mmEmbed.mock.t.Fatalf("Expectation set by When has same params: %#v", *mmEmbed.defaultExpectation.params) + } + } + + return mmEmbed +} + +// ExpectE1Param1 sets up expected param e1 for OllamaClientInterface.Embed +func (mmEmbed *mOllamaClientInterfaceMockEmbed) ExpectE1Param1(e1 EmbedRequest) *mOllamaClientInterfaceMockEmbed { + if mmEmbed.mock.funcEmbed != nil { + mmEmbed.mock.t.Fatalf("OllamaClientInterfaceMock.Embed mock is already set by Set") + } + + if mmEmbed.defaultExpectation == nil { + mmEmbed.defaultExpectation = &OllamaClientInterfaceMockEmbedExpectation{} + } + + if mmEmbed.defaultExpectation.params != nil { + mmEmbed.mock.t.Fatalf("OllamaClientInterfaceMock.Embed mock is already set by Expect") + } + + if mmEmbed.defaultExpectation.paramPtrs == nil { + mmEmbed.defaultExpectation.paramPtrs = &OllamaClientInterfaceMockEmbedParamPtrs{} + } + mmEmbed.defaultExpectation.paramPtrs.e1 = &e1 + + return mmEmbed +} + +// Inspect accepts an inspector function that has same arguments as the OllamaClientInterface.Embed +func (mmEmbed *mOllamaClientInterfaceMockEmbed) Inspect(f func(e1 EmbedRequest)) *mOllamaClientInterfaceMockEmbed { + if mmEmbed.mock.inspectFuncEmbed != nil { + mmEmbed.mock.t.Fatalf("Inspect function is already set for OllamaClientInterfaceMock.Embed") + } + + mmEmbed.mock.inspectFuncEmbed = f + + return mmEmbed +} + +// Return sets up results that will be returned by OllamaClientInterface.Embed +func (mmEmbed *mOllamaClientInterfaceMockEmbed) Return(e2 EmbedResponse, err error) *OllamaClientInterfaceMock { + if mmEmbed.mock.funcEmbed != nil { + mmEmbed.mock.t.Fatalf("OllamaClientInterfaceMock.Embed mock is already set by Set") + } + + if mmEmbed.defaultExpectation == nil { + mmEmbed.defaultExpectation = &OllamaClientInterfaceMockEmbedExpectation{mock: mmEmbed.mock} + } + mmEmbed.defaultExpectation.results = &OllamaClientInterfaceMockEmbedResults{e2, err} + return mmEmbed.mock +} + +// Set uses given function f to mock the OllamaClientInterface.Embed method +func (mmEmbed *mOllamaClientInterfaceMockEmbed) Set(f func(e1 EmbedRequest) (e2 EmbedResponse, err error)) *OllamaClientInterfaceMock { + if mmEmbed.defaultExpectation != nil { + mmEmbed.mock.t.Fatalf("Default expectation is already set for the OllamaClientInterface.Embed method") + } + + if len(mmEmbed.expectations) > 0 { + mmEmbed.mock.t.Fatalf("Some expectations are already set for the OllamaClientInterface.Embed method") + } + + mmEmbed.mock.funcEmbed = f + return mmEmbed.mock +} + +// When sets expectation for the OllamaClientInterface.Embed which will trigger the result defined by the following +// Then helper +func (mmEmbed *mOllamaClientInterfaceMockEmbed) When(e1 EmbedRequest) *OllamaClientInterfaceMockEmbedExpectation { + if mmEmbed.mock.funcEmbed != nil { + mmEmbed.mock.t.Fatalf("OllamaClientInterfaceMock.Embed mock is already set by Set") + } + + expectation := &OllamaClientInterfaceMockEmbedExpectation{ + mock: mmEmbed.mock, + params: &OllamaClientInterfaceMockEmbedParams{e1}, + } + mmEmbed.expectations = append(mmEmbed.expectations, expectation) + return expectation +} + +// Then sets up OllamaClientInterface.Embed return parameters for the expectation previously defined by the When method +func (e *OllamaClientInterfaceMockEmbedExpectation) Then(e2 EmbedResponse, err error) *OllamaClientInterfaceMock { + e.results = &OllamaClientInterfaceMockEmbedResults{e2, err} + return e.mock +} + +// Times sets number of times OllamaClientInterface.Embed should be invoked +func (mmEmbed *mOllamaClientInterfaceMockEmbed) Times(n uint64) *mOllamaClientInterfaceMockEmbed { + if n == 0 { + mmEmbed.mock.t.Fatalf("Times of OllamaClientInterfaceMock.Embed mock can not be zero") + } + mm_atomic.StoreUint64(&mmEmbed.expectedInvocations, n) + return mmEmbed +} + +func (mmEmbed *mOllamaClientInterfaceMockEmbed) invocationsDone() bool { + if len(mmEmbed.expectations) == 0 && mmEmbed.defaultExpectation == nil && mmEmbed.mock.funcEmbed == nil { + return true + } + + totalInvocations := mm_atomic.LoadUint64(&mmEmbed.mock.afterEmbedCounter) + expectedInvocations := mm_atomic.LoadUint64(&mmEmbed.expectedInvocations) + + return totalInvocations > 0 && (expectedInvocations == 0 || expectedInvocations == totalInvocations) +} + +// Embed implements OllamaClientInterface +func (mmEmbed *OllamaClientInterfaceMock) Embed(e1 EmbedRequest) (e2 EmbedResponse, err error) { + mm_atomic.AddUint64(&mmEmbed.beforeEmbedCounter, 1) + defer mm_atomic.AddUint64(&mmEmbed.afterEmbedCounter, 1) + + if mmEmbed.inspectFuncEmbed != nil { + mmEmbed.inspectFuncEmbed(e1) + } + + mm_params := OllamaClientInterfaceMockEmbedParams{e1} + + // Record call args + mmEmbed.EmbedMock.mutex.Lock() + mmEmbed.EmbedMock.callArgs = append(mmEmbed.EmbedMock.callArgs, &mm_params) + mmEmbed.EmbedMock.mutex.Unlock() + + for _, e := range mmEmbed.EmbedMock.expectations { + if minimock.Equal(*e.params, mm_params) { + mm_atomic.AddUint64(&e.Counter, 1) + return e.results.e2, e.results.err + } + } + + if mmEmbed.EmbedMock.defaultExpectation != nil { + mm_atomic.AddUint64(&mmEmbed.EmbedMock.defaultExpectation.Counter, 1) + mm_want := mmEmbed.EmbedMock.defaultExpectation.params + mm_want_ptrs := mmEmbed.EmbedMock.defaultExpectation.paramPtrs + + mm_got := OllamaClientInterfaceMockEmbedParams{e1} + + if mm_want_ptrs != nil { + + if mm_want_ptrs.e1 != nil && !minimock.Equal(*mm_want_ptrs.e1, mm_got.e1) { + mmEmbed.t.Errorf("OllamaClientInterfaceMock.Embed got unexpected parameter e1, want: %#v, got: %#v%s\n", *mm_want_ptrs.e1, mm_got.e1, minimock.Diff(*mm_want_ptrs.e1, mm_got.e1)) + } + + } else if mm_want != nil && !minimock.Equal(*mm_want, mm_got) { + mmEmbed.t.Errorf("OllamaClientInterfaceMock.Embed got unexpected parameters, want: %#v, got: %#v%s\n", *mm_want, mm_got, minimock.Diff(*mm_want, mm_got)) + } + + mm_results := mmEmbed.EmbedMock.defaultExpectation.results + if mm_results == nil { + mmEmbed.t.Fatal("No results are set for the OllamaClientInterfaceMock.Embed") + } + return (*mm_results).e2, (*mm_results).err + } + if mmEmbed.funcEmbed != nil { + return mmEmbed.funcEmbed(e1) + } + mmEmbed.t.Fatalf("Unexpected call to OllamaClientInterfaceMock.Embed. %v", e1) + return +} + +// EmbedAfterCounter returns a count of finished OllamaClientInterfaceMock.Embed invocations +func (mmEmbed *OllamaClientInterfaceMock) EmbedAfterCounter() uint64 { + return mm_atomic.LoadUint64(&mmEmbed.afterEmbedCounter) +} + +// EmbedBeforeCounter returns a count of OllamaClientInterfaceMock.Embed invocations +func (mmEmbed *OllamaClientInterfaceMock) EmbedBeforeCounter() uint64 { + return mm_atomic.LoadUint64(&mmEmbed.beforeEmbedCounter) +} + +// Calls returns a list of arguments used in each call to OllamaClientInterfaceMock.Embed. +// The list is in the same order as the calls were made (i.e. recent calls have a higher index) +func (mmEmbed *mOllamaClientInterfaceMockEmbed) Calls() []*OllamaClientInterfaceMockEmbedParams { + mmEmbed.mutex.RLock() + + argCopy := make([]*OllamaClientInterfaceMockEmbedParams, len(mmEmbed.callArgs)) + copy(argCopy, mmEmbed.callArgs) + + mmEmbed.mutex.RUnlock() + + return argCopy +} + +// MinimockEmbedDone returns true if the count of the Embed invocations corresponds +// the number of defined expectations +func (m *OllamaClientInterfaceMock) MinimockEmbedDone() bool { + if m.EmbedMock.optional { + // Optional methods provide '0 or more' call count restriction. + return true + } + + for _, e := range m.EmbedMock.expectations { + if mm_atomic.LoadUint64(&e.Counter) < 1 { + return false + } + } + + return m.EmbedMock.invocationsDone() +} + +// MinimockEmbedInspect logs each unmet expectation +func (m *OllamaClientInterfaceMock) MinimockEmbedInspect() { + for _, e := range m.EmbedMock.expectations { + if mm_atomic.LoadUint64(&e.Counter) < 1 { + m.t.Errorf("Expected call to OllamaClientInterfaceMock.Embed with params: %#v", *e.params) + } + } + + afterEmbedCounter := mm_atomic.LoadUint64(&m.afterEmbedCounter) + // if default expectation was set then invocations count should be greater than zero + if m.EmbedMock.defaultExpectation != nil && afterEmbedCounter < 1 { + if m.EmbedMock.defaultExpectation.params == nil { + m.t.Error("Expected call to OllamaClientInterfaceMock.Embed") + } else { + m.t.Errorf("Expected call to OllamaClientInterfaceMock.Embed with params: %#v", *m.EmbedMock.defaultExpectation.params) + } + } + // if func was set then invocations count should be greater than zero + if m.funcEmbed != nil && afterEmbedCounter < 1 { + m.t.Error("Expected call to OllamaClientInterfaceMock.Embed") + } + + if !m.EmbedMock.invocationsDone() && afterEmbedCounter > 0 { + m.t.Errorf("Expected %d calls to OllamaClientInterfaceMock.Embed but found %d calls", + mm_atomic.LoadUint64(&m.EmbedMock.expectedInvocations), afterEmbedCounter) + } +} + +type mOllamaClientInterfaceMockIsAutoPull struct { + optional bool + mock *OllamaClientInterfaceMock + defaultExpectation *OllamaClientInterfaceMockIsAutoPullExpectation + expectations []*OllamaClientInterfaceMockIsAutoPullExpectation + + expectedInvocations uint64 +} + +// OllamaClientInterfaceMockIsAutoPullExpectation specifies expectation struct of the OllamaClientInterface.IsAutoPull +type OllamaClientInterfaceMockIsAutoPullExpectation struct { + mock *OllamaClientInterfaceMock + + results *OllamaClientInterfaceMockIsAutoPullResults + Counter uint64 +} + +// OllamaClientInterfaceMockIsAutoPullResults contains results of the OllamaClientInterface.IsAutoPull +type OllamaClientInterfaceMockIsAutoPullResults struct { + b1 bool +} + +// Marks this method to be optional. The default behavior of any method with Return() is '1 or more', meaning +// the test will fail minimock's automatic final call check if the mocked method was not called at least once. +// Optional() makes method check to work in '0 or more' mode. +// It is NOT RECOMMENDED to use this option unless you really need it, as default behaviour helps to +// catch the problems when the expected method call is totally skipped during test run. +func (mmIsAutoPull *mOllamaClientInterfaceMockIsAutoPull) Optional() *mOllamaClientInterfaceMockIsAutoPull { + mmIsAutoPull.optional = true + return mmIsAutoPull +} + +// Expect sets up expected params for OllamaClientInterface.IsAutoPull +func (mmIsAutoPull *mOllamaClientInterfaceMockIsAutoPull) Expect() *mOllamaClientInterfaceMockIsAutoPull { + if mmIsAutoPull.mock.funcIsAutoPull != nil { + mmIsAutoPull.mock.t.Fatalf("OllamaClientInterfaceMock.IsAutoPull mock is already set by Set") + } + + if mmIsAutoPull.defaultExpectation == nil { + mmIsAutoPull.defaultExpectation = &OllamaClientInterfaceMockIsAutoPullExpectation{} + } + + return mmIsAutoPull +} + +// Inspect accepts an inspector function that has same arguments as the OllamaClientInterface.IsAutoPull +func (mmIsAutoPull *mOllamaClientInterfaceMockIsAutoPull) Inspect(f func()) *mOllamaClientInterfaceMockIsAutoPull { + if mmIsAutoPull.mock.inspectFuncIsAutoPull != nil { + mmIsAutoPull.mock.t.Fatalf("Inspect function is already set for OllamaClientInterfaceMock.IsAutoPull") + } + + mmIsAutoPull.mock.inspectFuncIsAutoPull = f + + return mmIsAutoPull +} + +// Return sets up results that will be returned by OllamaClientInterface.IsAutoPull +func (mmIsAutoPull *mOllamaClientInterfaceMockIsAutoPull) Return(b1 bool) *OllamaClientInterfaceMock { + if mmIsAutoPull.mock.funcIsAutoPull != nil { + mmIsAutoPull.mock.t.Fatalf("OllamaClientInterfaceMock.IsAutoPull mock is already set by Set") + } + + if mmIsAutoPull.defaultExpectation == nil { + mmIsAutoPull.defaultExpectation = &OllamaClientInterfaceMockIsAutoPullExpectation{mock: mmIsAutoPull.mock} + } + mmIsAutoPull.defaultExpectation.results = &OllamaClientInterfaceMockIsAutoPullResults{b1} + return mmIsAutoPull.mock +} + +// Set uses given function f to mock the OllamaClientInterface.IsAutoPull method +func (mmIsAutoPull *mOllamaClientInterfaceMockIsAutoPull) Set(f func() (b1 bool)) *OllamaClientInterfaceMock { + if mmIsAutoPull.defaultExpectation != nil { + mmIsAutoPull.mock.t.Fatalf("Default expectation is already set for the OllamaClientInterface.IsAutoPull method") + } + + if len(mmIsAutoPull.expectations) > 0 { + mmIsAutoPull.mock.t.Fatalf("Some expectations are already set for the OllamaClientInterface.IsAutoPull method") + } + + mmIsAutoPull.mock.funcIsAutoPull = f + return mmIsAutoPull.mock +} + +// Times sets number of times OllamaClientInterface.IsAutoPull should be invoked +func (mmIsAutoPull *mOllamaClientInterfaceMockIsAutoPull) Times(n uint64) *mOllamaClientInterfaceMockIsAutoPull { + if n == 0 { + mmIsAutoPull.mock.t.Fatalf("Times of OllamaClientInterfaceMock.IsAutoPull mock can not be zero") + } + mm_atomic.StoreUint64(&mmIsAutoPull.expectedInvocations, n) + return mmIsAutoPull +} + +func (mmIsAutoPull *mOllamaClientInterfaceMockIsAutoPull) invocationsDone() bool { + if len(mmIsAutoPull.expectations) == 0 && mmIsAutoPull.defaultExpectation == nil && mmIsAutoPull.mock.funcIsAutoPull == nil { + return true + } + + totalInvocations := mm_atomic.LoadUint64(&mmIsAutoPull.mock.afterIsAutoPullCounter) + expectedInvocations := mm_atomic.LoadUint64(&mmIsAutoPull.expectedInvocations) + + return totalInvocations > 0 && (expectedInvocations == 0 || expectedInvocations == totalInvocations) +} + +// IsAutoPull implements OllamaClientInterface +func (mmIsAutoPull *OllamaClientInterfaceMock) IsAutoPull() (b1 bool) { + mm_atomic.AddUint64(&mmIsAutoPull.beforeIsAutoPullCounter, 1) + defer mm_atomic.AddUint64(&mmIsAutoPull.afterIsAutoPullCounter, 1) + + if mmIsAutoPull.inspectFuncIsAutoPull != nil { + mmIsAutoPull.inspectFuncIsAutoPull() + } + + if mmIsAutoPull.IsAutoPullMock.defaultExpectation != nil { + mm_atomic.AddUint64(&mmIsAutoPull.IsAutoPullMock.defaultExpectation.Counter, 1) + + mm_results := mmIsAutoPull.IsAutoPullMock.defaultExpectation.results + if mm_results == nil { + mmIsAutoPull.t.Fatal("No results are set for the OllamaClientInterfaceMock.IsAutoPull") + } + return (*mm_results).b1 + } + if mmIsAutoPull.funcIsAutoPull != nil { + return mmIsAutoPull.funcIsAutoPull() + } + mmIsAutoPull.t.Fatalf("Unexpected call to OllamaClientInterfaceMock.IsAutoPull.") + return +} + +// IsAutoPullAfterCounter returns a count of finished OllamaClientInterfaceMock.IsAutoPull invocations +func (mmIsAutoPull *OllamaClientInterfaceMock) IsAutoPullAfterCounter() uint64 { + return mm_atomic.LoadUint64(&mmIsAutoPull.afterIsAutoPullCounter) +} + +// IsAutoPullBeforeCounter returns a count of OllamaClientInterfaceMock.IsAutoPull invocations +func (mmIsAutoPull *OllamaClientInterfaceMock) IsAutoPullBeforeCounter() uint64 { + return mm_atomic.LoadUint64(&mmIsAutoPull.beforeIsAutoPullCounter) +} + +// MinimockIsAutoPullDone returns true if the count of the IsAutoPull invocations corresponds +// the number of defined expectations +func (m *OllamaClientInterfaceMock) MinimockIsAutoPullDone() bool { + if m.IsAutoPullMock.optional { + // Optional methods provide '0 or more' call count restriction. + return true + } + + for _, e := range m.IsAutoPullMock.expectations { + if mm_atomic.LoadUint64(&e.Counter) < 1 { + return false + } + } + + return m.IsAutoPullMock.invocationsDone() +} + +// MinimockIsAutoPullInspect logs each unmet expectation +func (m *OllamaClientInterfaceMock) MinimockIsAutoPullInspect() { + for _, e := range m.IsAutoPullMock.expectations { + if mm_atomic.LoadUint64(&e.Counter) < 1 { + m.t.Error("Expected call to OllamaClientInterfaceMock.IsAutoPull") + } + } + + afterIsAutoPullCounter := mm_atomic.LoadUint64(&m.afterIsAutoPullCounter) + // if default expectation was set then invocations count should be greater than zero + if m.IsAutoPullMock.defaultExpectation != nil && afterIsAutoPullCounter < 1 { + m.t.Error("Expected call to OllamaClientInterfaceMock.IsAutoPull") + } + // if func was set then invocations count should be greater than zero + if m.funcIsAutoPull != nil && afterIsAutoPullCounter < 1 { + m.t.Error("Expected call to OllamaClientInterfaceMock.IsAutoPull") + } + + if !m.IsAutoPullMock.invocationsDone() && afterIsAutoPullCounter > 0 { + m.t.Errorf("Expected %d calls to OllamaClientInterfaceMock.IsAutoPull but found %d calls", + mm_atomic.LoadUint64(&m.IsAutoPullMock.expectedInvocations), afterIsAutoPullCounter) + } +} + +// MinimockFinish checks that all mocked methods have been called the expected number of times +func (m *OllamaClientInterfaceMock) MinimockFinish() { + m.finishOnce.Do(func() { + if !m.minimockDone() { + m.MinimockChatInspect() + + m.MinimockEmbedInspect() + + m.MinimockIsAutoPullInspect() + } + }) +} + +// MinimockWait waits for all mocked methods to be called the expected number of times +func (m *OllamaClientInterfaceMock) MinimockWait(timeout mm_time.Duration) { + timeoutCh := mm_time.After(timeout) + for { + if m.minimockDone() { + return + } + select { + case <-timeoutCh: + m.MinimockFinish() + return + case <-mm_time.After(10 * mm_time.Millisecond): + } + } +} + +func (m *OllamaClientInterfaceMock) minimockDone() bool { + done := true + return done && + m.MinimockChatDone() && + m.MinimockEmbedDone() && + m.MinimockIsAutoPullDone() +} diff --git a/ai/ollama/v0/tasks.go b/ai/ollama/v0/tasks.go new file mode 100644 index 00000000..79dc5b84 --- /dev/null +++ b/ai/ollama/v0/tasks.go @@ -0,0 +1,131 @@ +package ollama + +import ( + "github.com/instill-ai/component/base" + "google.golang.org/protobuf/types/known/structpb" +) + +type TaskTextGenerationChatInput struct { + ChatHistory []ChatMessage `json:"chat-history"` + MaxNewTokens int `json:"max-new-tokens"` + Model string `json:"model"` + Prompt string `json:"prompt"` + PromptImages []string `json:"prompt-images"` + Seed int `json:"seed"` + SystemMsg string `json:"system-message"` + Temperature float32 `json:"temperature"` + TopK int `json:"top-k"` +} + +type ChatMessage struct { + Role string `json:"role"` + Content []MultiModalContent `json:"content"` +} + +type MultiModalContent struct { + ImageURL URL `json:"image-url"` + Text string `json:"text"` + Type string `json:"type"` +} + +type URL struct { + URL string `json:"url"` +} + +type TaskTextGenerationChatOuput struct { + Text string `json:"text"` +} + +func (e *execution) TaskTextGenerationChat(in *structpb.Struct) (*structpb.Struct, error) { + input := TaskTextGenerationChatInput{} + if err := base.ConvertFromStructpb(in, &input); err != nil { + return nil, err + } + + messages := []OllamaChatMessage{} + + if input.SystemMsg != "" { + messages = append(messages, OllamaChatMessage{ + Role: "system", + Content: input.SystemMsg, + }) + } + for _, msg := range input.ChatHistory { + textContent := "" + imageContent := []string{} + for _, content := range msg.Content { + if content.Type == "text" { + textContent = textContent + content.Text + } else { + imageContent = append(imageContent, base.TrimBase64Mime(content.ImageURL.URL)) + } + } + messages = append(messages, OllamaChatMessage{ + Role: msg.Role, + Content: textContent, + Images: imageContent, + }) + } + + images := []string{} + + for _, image := range input.PromptImages { + input.PromptImages = append(images, base.TrimBase64Mime(image)) + } + + messages = append(messages, OllamaChatMessage{ + Role: "user", + Content: input.Prompt, + Images: images, + }) + + request := ChatRequest{ + Model: input.Model, + Messages: messages, + Stream: false, + Options: OllamaOptions{ + Temperature: input.Temperature, + TopK: input.TopK, + Seed: input.Seed, + }, + } + + response, err := e.client.Chat(request) + if err != nil { + return nil, err + } + + output := TaskTextGenerationChatOuput{ + Text: response.Message.Content, + } + return base.ConvertToStructpb(output) +} + +type TaskTextEmbeddingsInput struct { + Text string `json:"text"` + Model string `json:"model"` +} + +type TaskTextEmbeddingsOutput struct { + Embedding []float32 `json:"embedding"` +} + +func (e *execution) TaskTextEmbeddings(in *structpb.Struct) (*structpb.Struct, error) { + input := TaskTextEmbeddingsInput{} + if err := base.ConvertFromStructpb(in, &input); err != nil { + return nil, err + } + + request := EmbedRequest{ + Model: input.Model, + Prompt: input.Text, + } + + response, err := e.client.Embed(request) + if err != nil { + return nil, err + } + + output := TaskTextEmbeddingsOutput(response) + return base.ConvertToStructpb(output) +} diff --git a/ai/openai/v0/README.mdx b/ai/openai/v0/README.mdx index ea13136c..1095cc9a 100644 --- a/ai/openai/v0/README.mdx +++ b/ai/openai/v0/README.mdx @@ -50,18 +50,18 @@ Provide text outputs in response to their inputs. | Input | ID | Type | Description | | :--- | :--- | :--- | :--- | | Task ID (required) | `task` | string | `TASK_TEXT_GENERATION` | -| Model (required) | `model` | string | ID of the model to use. See the [model endpoint compatibility](/docs/models/model-endpoint-compatibility) table for details on which models work with the Chat API. | +| Model (required) | `model` | string | ID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API. | | Prompt (required) | `prompt` | string | The prompt text | | System message | `system-message` | string | The system message helps set the behavior of the assistant. For example, you can modify the personality of the assistant or provide specific instructions about how it should behave throughout the conversation. By default, the model’s behavior is using a generic message as "You are a helpful assistant." | | Image | `images` | array[string] | The images | | Chat history | `chat-history` | array[object] | Incorporate external chat history, specifically previous messages within the conversation. Please note that System Message will be ignored and will not have any effect when this field is populated. Each message should adhere to the format \{"role": "The message role, i.e. 'system', 'user' or 'assistant'", "content": "message content"\}. | | Temperature | `temperature` | number | What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both. | | N | `n` | integer | How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep `n` as `1` to minimize costs. | -| Max Tokens | `max-tokens` | integer | The maximum number of [tokens](/tokenizer) that can be generated in the chat completion. The total length of input tokens and generated tokens is limited by the model's context length. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens. | +| Max Tokens | `max-tokens` | integer | The maximum number of tokens that can be generated in the chat completion. The total length of input tokens and generated tokens is limited by the model's context length. Example Python code for counting tokens. | | Response Format | `response-format` | object | An object specifying the format that the model must output. Used to enable JSON mode. | | Top P | `top-p` | number | An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both. | -| Presence Penalty | `presence-penalty` | number | Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. [See more information about frequency and presence penalties.](/docs/guides/text-generation/parameter-details) | -| Frequency Penalty | `frequency-penalty` | number | Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. [See more information about frequency and presence penalties.](/docs/guides/text-generation/parameter-details) | +| Presence Penalty | `presence-penalty` | number | Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. See more information about frequency and presence penalties. | +| Frequency Penalty | `frequency-penalty` | number | Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. See more information about frequency and presence penalties. | @@ -83,7 +83,7 @@ Turn text into numbers, unlocking use cases like search. | Input | ID | Type | Description | | :--- | :--- | :--- | :--- | | Task ID (required) | `task` | string | `TASK_TEXT_EMBEDDINGS` | -| Model (required) | `model` | string | ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them. | +| Model (required) | `model` | string | ID of the model to use. You can use the List models API to see all of your available models, or see our Model overview for descriptions of them. | | Text (required) | `text` | string | The text | | Dimensions | `dimensions` | integer | The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models. | @@ -108,9 +108,9 @@ Turn audio into text. | Task ID (required) | `task` | string | `TASK_SPEECH_RECOGNITION` | | Model (required) | `model` | string | ID of the model to use. Only `whisper-1` is currently available. | | Audio (required) | `audio` | string | The audio file object (not file name) to transcribe, in one of these formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm. | -| Prompt | `prompt` | string | An optional text to guide the model's style or continue a previous audio segment. The [prompt](/docs/guides/speech-to-text/prompting) should match the audio language. | -| Language | `language` | string | The language of the input audio. Supplying the input language in [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will improve accuracy and latency. | -| Temperature | `temperature` | number | The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to automatically increase the temperature until certain thresholds are hit. | +| Prompt | `prompt` | string | An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language. | +| Language | `language` | string | The language of the input audio. Supplying the input language in ISO-639-1 format will improve accuracy and latency. | +| Temperature | `temperature` | number | The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit. | @@ -131,9 +131,9 @@ Turn text into lifelike spoken audio | Input | ID | Type | Description | | :--- | :--- | :--- | :--- | | Task ID (required) | `task` | string | `TASK_TEXT_TO_SPEECH` | -| Model (required) | `model` | string | One of the available [TTS models](/docs/models/tts): `tts-1` or `tts-1-hd` | +| Model (required) | `model` | string | One of the available TTS models: `tts-1` or `tts-1-hd` | | Text (required) | `text` | string | The text to generate audio for. The maximum length is 4096 characters. | -| Voice (required) | `voice` | string | The voice to use when generating the audio. Supported voices are `alloy`, `echo`, `fable`, `onyx`, `nova`, and `shimmer`. Previews of the voices are available in the [Text to speech guide](/docs/guides/text-to-speech/voice-options). | +| Voice (required) | `voice` | string | The voice to use when generating the audio. Supported voices are `alloy`, `echo`, `fable`, `onyx`, `nova`, and `shimmer`. Previews of the voices are available in the Text to speech guide. | | Response Format | `response-format` | string | The format to audio in. Supported formats are `mp3`, `opus`, `aac`, and `flac`. | | Speed | `speed` | number | The speed of the generated audio. Select a value from `0.25` to `4.0`. `1.0` is the default. | diff --git a/ai/openai/v0/config/openai.json b/ai/openai/v0/config/openai.json index 1ce35613..89715b7c 100644 --- a/ai/openai/v0/config/openai.json +++ b/ai/openai/v0/config/openai.json @@ -2,7 +2,7 @@ "components": { "schemas": { "AssistantFileObject": { - "description": "A list of [Files](/docs/api-reference/files) attached to an `assistant`.", + "description": "A list of Files attached to an `assistant`.", "properties": { "assistant_id": { "description": "The assistant ID that the file is attached to.", @@ -53,7 +53,7 @@ }, "file_ids": { "default": [], - "description": "A list of [file](/docs/api-reference/files) IDs attached to this assistant. There can be a maximum of 20 files attached to the assistant. Files are ordered by their creation date in ascending order.\n", + "description": "A list of file IDs attached to this assistant. There can be a maximum of 20 files attached to the assistant. Files are ordered by their creation date in ascending order.\n", "items": { "type": "string" }, @@ -77,7 +77,7 @@ "x-oaiTypeLabel": "map" }, "model": { - "description": "ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them.\n", + "description": "ID of the model to use. You can use the List models API to see all of your available models, or see our Model overview for descriptions of them.\n", "type": "string" }, "name": { @@ -441,7 +441,7 @@ "properties": { "detail": { "default": "auto", - "description": "Specifies the detail level of the image. Learn more in the [Vision guide](/docs/guides/vision/low-or-high-fidelity-image-understanding).", + "description": "Specifies the detail level of the image. Learn more in the Vision guide.", "enum": [ "auto", "low", @@ -805,7 +805,7 @@ "additionalProperties": false, "properties": { "file_id": { - "description": "A [File](/docs/api-reference/files) ID (with `purpose=\"assistants\"`) that the assistant should use. Useful for tools like `retrieval` and `code_interpreter` that can access files.", + "description": "A File ID (with `purpose=\"assistants\"`) that the assistant should use. Useful for tools like `retrieval` and `code_interpreter` that can access files.", "type": "string" } }, @@ -825,7 +825,7 @@ }, "file_ids": { "default": [], - "description": "A list of [file](/docs/api-reference/files) IDs attached to this assistant. There can be a maximum of 20 files attached to the assistant. Files are ordered by their creation date in ascending order.\n", + "description": "A list of file IDs attached to this assistant. There can be a maximum of 20 files attached to the assistant. Files are ordered by their creation date in ascending order.\n", "items": { "type": "string" }, @@ -850,7 +850,7 @@ "type": "string" } ], - "description": "ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them.\n" + "description": "ID of the model to use. You can use the List models API to see all of your available models, or see our Model overview for descriptions of them.\n" }, "name": { "description": "The name of the assistant. The maximum length is 256 characters.\n", @@ -973,7 +973,7 @@ "properties": { "frequency_penalty": { "default": 0, - "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.\n\n[See more information about frequency and presence penalties.](/docs/guides/text-generation/parameter-details)\n", + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.\n\nSee more information about frequency and presence penalties.\n", "maximum": 2, "minimum": -2, "nullable": true, @@ -1024,12 +1024,12 @@ "type": "boolean" }, "max_tokens": { - "description": "The maximum number of [tokens](/tokenizer) that can be generated in the chat completion.\n\nThe total length of input tokens and generated tokens is limited by the model's context length. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens.\n", + "description": "The maximum number of tokens that can be generated in the chat completion.\n\nThe total length of input tokens and generated tokens is limited by the model's context length. Example Python code for counting tokens.\n", "nullable": true, "type": "integer" }, "messages": { - "description": "A list of messages comprising the conversation so far. [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).", + "description": "A list of messages comprising the conversation so far. Example Python code.", "items": { "$ref": "#/components/schemas/ChatCompletionRequestMessage" }, @@ -1037,7 +1037,7 @@ "type": "array" }, "model": { - "description": "ID of the model to use. See the [model endpoint compatibility](/docs/models/model-endpoint-compatibility) table for details on which models work with the Chat API.", + "description": "ID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.", "enum": [ "gpt-4o-mini", "gpt-4o", @@ -1077,14 +1077,14 @@ }, "presence_penalty": { "default": 0, - "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.\n\n[See more information about frequency and presence penalties.](/docs/guides/text-generation/parameter-details)\n", + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.\n\nSee more information about frequency and presence penalties.\n", "maximum": 2, "minimum": -2, "nullable": true, "type": "number" }, "response_format": { - "description": "An object specifying the format that the model must output. Compatible with [GPT-4 Turbo](/docs/models/gpt-4-and-gpt-4-turbo) and `gpt-3.5-turbo-1106`.\n\nSetting to `{ \"type\": \"json_object\" }` enables JSON mode, which guarantees the message the model generates is valid JSON.\n\n**Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly \"stuck\" request. Also note that the message content may be partially cut off if `finish_reason=\"length\"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length.\n", + "description": "An object specifying the format that the model must output. Compatible with GPT-4 Turbo and `gpt-3.5-turbo-1106`.\n\nSetting to `{ \"type\": \"json_object\" }` enables JSON mode, which guarantees the message the model generates is valid JSON.\n\n**Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly \"stuck\" request. Also note that the message content may be partially cut off if `finish_reason=\"length\"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length.\n", "properties": { "type": { "default": "text", @@ -1129,7 +1129,7 @@ }, "stream": { "default": false, - "description": "If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) as they become available, with the stream terminated by a `data: [DONE]` message. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).\n", + "description": "If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) as they become available, with the stream terminated by a `data: [DONE]` message. Example Python code.\n", "nullable": true, "type": "boolean" }, @@ -1169,7 +1169,7 @@ "type": "number" }, "user": { - "description": "A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).\n", + "description": "A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.\n", "example": "user-1234", "type": "string" } @@ -1385,7 +1385,7 @@ }, "frequency_penalty": { "default": 0, - "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.\n\n[See more information about frequency and presence penalties.](/docs/guides/text-generation/parameter-details)\n", + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.\n\nSee more information about frequency and presence penalties.\n", "maximum": 2, "minimum": -2, "nullable": true, @@ -1411,7 +1411,7 @@ }, "max_tokens": { "default": 16, - "description": "The maximum number of [tokens](/tokenizer) that can be generated in the completion.\n\nThe token count of your prompt plus `max_tokens` cannot exceed the model's context length. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens.\n", + "description": "The maximum number of tokens that can be generated in the completion.\n\nThe token count of your prompt plus `max_tokens` cannot exceed the model's context length. Example Python code for counting tokens.\n", "example": 16, "minimum": 0, "nullable": true, @@ -1431,7 +1431,7 @@ "type": "string" } ], - "description": "ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them.\n", + "description": "ID of the model to use. You can use the List models API to see all of your available models, or see our Model overview for descriptions of them.\n", "x-oaiTypeLabel": "string" }, "n": { @@ -1445,7 +1445,7 @@ }, "presence_penalty": { "default": 0, - "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.\n\n[See more information about frequency and presence penalties.](/docs/guides/text-generation/parameter-details)\n", + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.\n\nSee more information about frequency and presence penalties.\n", "maximum": 2, "minimum": -2, "nullable": true, @@ -1522,7 +1522,7 @@ }, "stream": { "default": false, - "description": "Whether to stream back partial progress. If set, tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) as they become available, with the stream terminated by a `data: [DONE]` message. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).\n", + "description": "Whether to stream back partial progress. If set, tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) as they become available, with the stream terminated by a `data: [DONE]` message. Example Python code.\n", "nullable": true, "type": "boolean" }, @@ -1552,7 +1552,7 @@ "type": "number" }, "user": { - "description": "A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).\n", + "description": "A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.\n", "example": "user-1234", "type": "string" } @@ -1689,7 +1689,7 @@ "type": "string" }, "input": { - "description": "Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model (8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048 dimensions or less. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens.\n", + "description": "Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model (8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048 dimensions or less. Example Python code for counting tokens.\n", "example": "The quick brown fox jumped over the lazy dog", "oneOf": [ { @@ -1741,7 +1741,7 @@ "x-oaiExpandable": true }, "model": { - "description": "ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them.\n", + "description": "ID of the model to use. You can use the List models API to see all of your available models, or see our Model overview for descriptions of them.\n", "enum": [ "text-embedding-ada-002", "text-embedding-3-small", @@ -1752,7 +1752,7 @@ "x-oaiTypeLabel": "string" }, "user": { - "description": "A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).\n", + "description": "A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.\n", "example": "user-1234", "type": "string" } @@ -1819,7 +1819,7 @@ "type": "string" }, "purpose": { - "description": "The intended purpose of the uploaded file.\n\nUse \"fine-tune\" for [Fine-tuning](/docs/api-reference/fine-tuning) and \"assistants\" for [Assistants](/docs/api-reference/assistants) and [Messages](/docs/api-reference/messages). This allows us to validate the format of the uploaded file is correct for fine-tuning.\n", + "description": "The intended purpose of the uploaded file.\n\nUse \"fine-tune\" for Fine-tuning and \"assistants\" for Assistants and Messages. This allows us to validate the format of the uploaded file is correct for fine-tuning.\n", "enum": [ "fine-tune", "assistants" @@ -1906,7 +1906,7 @@ "type": "string" } ], - "description": "The name of the model to fine-tune. You can select one of the\n[supported models](/docs/guides/fine-tuning/what-models-can-be-fine-tuned).\n", + "description": "The name of the model to fine-tune. You can select one of the\nsupported models.\n", "example": "gpt-3.5-turbo", "x-oaiTypeLabel": "string" }, @@ -1919,12 +1919,12 @@ "type": "string" }, "training_file": { - "description": "The ID of an uploaded file that contains training data.\n\nSee [upload file](/docs/api-reference/files/upload) for how to upload a file.\n\nYour dataset must be formatted as a JSONL file. Additionally, you must upload your file with the purpose `fine-tune`.\n\nSee the [fine-tuning guide](/docs/guides/fine-tuning) for more details.\n", + "description": "The ID of an uploaded file that contains training data.\n\nSee upload file for how to upload a file.\n\nYour dataset must be formatted as a JSONL file. Additionally, you must upload your file with the purpose `fine-tune`.\n\nSee the fine-tuning guide for more details.\n", "example": "file-abc123", "type": "string" }, "validation_file": { - "description": "The ID of an uploaded file that contains validation data.\n\nIf you provide this file, the data is used to generate validation\nmetrics periodically during fine-tuning. These metrics can be viewed in\nthe fine-tuning results file.\nThe same data should not be present in both train and validation files.\n\nYour dataset must be formatted as a JSONL file. You must upload your file with the purpose `fine-tune`.\n\nSee the [fine-tuning guide](/docs/guides/fine-tuning) for more details.\n", + "description": "The ID of an uploaded file that contains validation data.\n\nIf you provide this file, the data is used to generate validation\nmetrics periodically during fine-tuning. These metrics can be viewed in\nthe fine-tuning results file.\nThe same data should not be present in both train and validation files.\n\nYour dataset must be formatted as a JSONL file. You must upload your file with the purpose `fine-tune`.\n\nSee the fine-tuning guide for more details.\n", "example": "file-abc123", "nullable": true, "type": "string" @@ -2004,7 +2004,7 @@ "type": "string" }, "user": { - "description": "A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).\n", + "description": "A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.\n", "example": "user-1234", "type": "string" } @@ -2090,7 +2090,7 @@ "type": "string" }, "user": { - "description": "A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).\n", + "description": "A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.\n", "example": "user-1234", "type": "string" } @@ -2158,7 +2158,7 @@ "type": "string" }, "user": { - "description": "A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).\n", + "description": "A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Learn more.\n", "example": "user-1234", "type": "string" } @@ -2179,7 +2179,7 @@ }, "file_ids": { "default": [], - "description": "A list of [File](/docs/api-reference/files) IDs that the message should use. There can be a maximum of 10 files attached to a message. Useful for tools like `retrieval` and `code_interpreter` that can access and use files.", + "description": "A list of File IDs that the message should use. There can be a maximum of 10 files attached to a message. Useful for tools like `retrieval` and `code_interpreter` that can access and use files.", "items": { "type": "string" }, @@ -2428,11 +2428,11 @@ "type": "string" }, "assistant_id": { - "description": "The ID of the [assistant](/docs/api-reference/assistants) to use to execute this run.", + "description": "The ID of the assistant to use to execute this run.", "type": "string" }, "instructions": { - "description": "Overrides the [instructions](/docs/api-reference/assistants/createAssistant) of the assistant. This is useful for modifying the behavior on a per-run basis.", + "description": "Overrides the instructions of the assistant. This is useful for modifying the behavior on a per-run basis.", "nullable": true, "type": "string" }, @@ -2443,7 +2443,7 @@ "x-oaiTypeLabel": "map" }, "model": { - "description": "The ID of the [Model](/docs/api-reference/models) to be used to execute this run. If a value is provided here, it will override the model associated with the assistant. If not, the model associated with the assistant will be used.", + "description": "The ID of the Model to be used to execute this run. If a value is provided here, it will override the model associated with the assistant. If not, the model associated with the assistant will be used.", "nullable": true, "type": "string" }, @@ -2495,7 +2495,7 @@ "type": "string" } ], - "description": "One of the available [TTS models](/docs/models/tts): `tts-1` or `tts-1-hd`\n", + "description": "One of the available TTS models: `tts-1` or `tts-1-hd`\n", "x-oaiTypeLabel": "string" }, "response_format": { @@ -2517,7 +2517,7 @@ "type": "number" }, "voice": { - "description": "The voice to use when generating the audio. Supported voices are `alloy`, `echo`, `fable`, `onyx`, `nova`, and `shimmer`. Previews of the voices are available in the [Text to speech guide](/docs/guides/text-to-speech/voice-options).", + "description": "The voice to use when generating the audio. Supported voices are `alloy`, `echo`, `fable`, `onyx`, `nova`, and `shimmer`. Previews of the voices are available in the Text to speech guide.", "enum": [ "alloy", "echo", @@ -2540,7 +2540,7 @@ "additionalProperties": false, "properties": { "assistant_id": { - "description": "The ID of the [assistant](/docs/api-reference/assistants) to use to execute this run.", + "description": "The ID of the assistant to use to execute this run.", "type": "string" }, "instructions": { @@ -2555,7 +2555,7 @@ "x-oaiTypeLabel": "map" }, "model": { - "description": "The ID of the [Model](/docs/api-reference/models) to be used to execute this run. If a value is provided here, it will override the model associated with the assistant. If not, the model associated with the assistant will be used.", + "description": "The ID of the Model to be used to execute this run. If a value is provided here, it will override the model associated with the assistant. If not, the model associated with the assistant will be used.", "nullable": true, "type": "string" }, @@ -2593,7 +2593,7 @@ "additionalProperties": false, "properties": { "messages": { - "description": "A list of [messages](/docs/api-reference/messages) to start the thread with.", + "description": "A list of messages to start the thread with.", "items": { "$ref": "#/components/schemas/CreateMessageRequest" }, @@ -2618,7 +2618,7 @@ "x-oaiTypeLabel": "file" }, "language": { - "description": "The language of the input audio. Supplying the input language in [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will improve accuracy and latency.\n", + "description": "The language of the input audio. Supplying the input language in ISO-639-1 format will improve accuracy and latency.\n", "type": "string" }, "model": { @@ -2631,7 +2631,7 @@ "x-oaiTypeLabel": "string" }, "prompt": { - "description": "An optional text to guide the model's style or continue a previous audio segment. The [prompt](/docs/guides/speech-to-text/prompting) should match the audio language.\n", + "description": "An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language.\n", "type": "string" }, "response_format": { @@ -2648,7 +2648,7 @@ }, "temperature": { "default": 0, - "description": "The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to automatically increase the temperature until certain thresholds are hit.\n", + "description": "The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n", "type": "number" } }, @@ -2695,7 +2695,7 @@ "x-oaiTypeLabel": "string" }, "prompt": { - "description": "An optional text to guide the model's style or continue a previous audio segment. The [prompt](/docs/guides/speech-to-text/prompting) should be in English.\n", + "description": "An optional text to guide the model's style or continue a previous audio segment. The prompt should be in English.\n", "type": "string" }, "response_format": { @@ -2705,7 +2705,7 @@ }, "temperature": { "default": 0, - "description": "The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to automatically increase the temperature until certain thresholds are hit.\n", + "description": "The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.\n", "type": "number" } }, @@ -2727,7 +2727,7 @@ "type": "object" }, "DeleteAssistantFileResponse": { - "description": "Deletes the association between the assistant and the file, but does not delete the [File](/docs/api-reference/files) object itself.", + "description": "Deletes the association between the assistant and the file, but does not delete the File object itself.", "properties": { "deleted": { "type": "boolean" @@ -2860,7 +2860,7 @@ "description": "Represents an embedding vector returned by embedding endpoint.\n", "properties": { "embedding": { - "description": "The embedding vector, which is a list of floats. The length of vector depends on the model as listed in the [embedding guide](/docs/guides/embeddings).\n", + "description": "The embedding vector, which is a list of floats. The length of vector depends on the model as listed in the embedding guide.\n", "items": { "type": "number" }, @@ -2970,7 +2970,7 @@ "type": "integer" }, "hyperparameters": { - "description": "The hyperparameters used for the fine-tuning job. See the [fine-tuning guide](/docs/guides/fine-tuning) for more details.", + "description": "The hyperparameters used for the fine-tuning job. See the fine-tuning guide for more details.", "properties": { "n_epochs": { "default": "auto", @@ -3015,7 +3015,7 @@ "type": "string" }, "result_files": { - "description": "The compiled results file ID(s) for the fine-tuning job. You can retrieve the results with the [Files API](/docs/api-reference/files/retrieve-contents).", + "description": "The compiled results file ID(s) for the fine-tuning job. You can retrieve the results with the Files API.", "items": { "example": "file-abc123", "type": "string" @@ -3040,11 +3040,11 @@ "type": "integer" }, "training_file": { - "description": "The file ID used for training. You can retrieve the training data with the [Files API](/docs/api-reference/files/retrieve-contents).", + "description": "The file ID used for training. You can retrieve the training data with the Files API.", "type": "string" }, "validation_file": { - "description": "The file ID used for validation. You can retrieve the validation results with the [Files API](/docs/api-reference/files/retrieve-contents).", + "description": "The file ID used for validation. You can retrieve the validation results with the Files API.", "nullable": true, "type": "string" } @@ -3133,7 +3133,7 @@ }, "FunctionParameters": { "additionalProperties": true, - "description": "The parameters the functions accepts, described as a JSON Schema object. See the [guide](/docs/guides/text-generation/function-calling) for examples, and the [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for documentation about the format. \n\nOmitting `parameters` defines a function with an empty parameter list.", + "description": "The parameters the functions accepts, described as a JSON Schema object. See the guide for examples, and the JSON Schema reference for documentation about the format. \n\nOmitting `parameters` defines a function with an empty parameter list.", "type": "object" }, "Image": { @@ -3504,12 +3504,12 @@ ] }, "MessageContentImageFileObject": { - "description": "References an image [File](/docs/api-reference/files) in the content of a message.", + "description": "References an image File in the content of a message.", "properties": { "image_file": { "properties": { "file_id": { - "description": "The [File](/docs/api-reference/files) ID of the image in the message content.", + "description": "The File ID of the image in the message content.", "type": "string" } }, @@ -3685,7 +3685,7 @@ "type": "string" }, "message_id": { - "description": "The ID of the [message](/docs/api-reference/messages) that the [File](/docs/api-reference/files) is attached to.", + "description": "The ID of the message that the File is attached to.", "type": "string" }, "object": { @@ -3711,10 +3711,10 @@ } }, "MessageObject": { - "description": "Represents a message within a [thread](/docs/api-reference/threads).", + "description": "Represents a message within a thread.", "properties": { "assistant_id": { - "description": "If applicable, the ID of the [assistant](/docs/api-reference/assistants) that authored this message.", + "description": "If applicable, the ID of the assistant that authored this message.", "nullable": true, "type": "string" }, @@ -3739,7 +3739,7 @@ }, "file_ids": { "default": [], - "description": "A list of [file](/docs/api-reference/files) IDs that the assistant should use. Useful for tools like retrieval and code_interpreter that can access files. A maximum of 10 files can be attached to a message.", + "description": "A list of file IDs that the assistant should use. Useful for tools like retrieval and code_interpreter that can access files. A maximum of 10 files can be attached to a message.", "items": { "type": "string" }, @@ -3772,12 +3772,12 @@ "type": "string" }, "run_id": { - "description": "If applicable, the ID of the [run](/docs/api-reference/runs) associated with the authoring of this message.", + "description": "If applicable, the ID of the run associated with the authoring of this message.", "nullable": true, "type": "string" }, "thread_id": { - "description": "The [thread](/docs/api-reference/threads) ID that this message belongs to.", + "description": "The thread ID that this message belongs to.", "type": "string" } }, @@ -3847,7 +3847,7 @@ }, "file_ids": { "default": [], - "description": "A list of [File](/docs/api-reference/files) IDs attached to this assistant. There can be a maximum of 20 files attached to the assistant. Files are ordered by their creation date in ascending order. If a file was previously attached to the list but does not show up in the list, it will be deleted from the assistant.\n", + "description": "A list of File IDs attached to this assistant. There can be a maximum of 20 files attached to the assistant. Files are ordered by their creation date in ascending order. If a file was previously attached to the list but does not show up in the list, it will be deleted from the assistant.\n", "items": { "type": "string" }, @@ -3872,7 +3872,7 @@ "type": "string" } ], - "description": "ID of the model to use. You can use the [List models](/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](/docs/models/overview) for descriptions of them.\n" + "description": "ID of the model to use. You can use the List models API to see all of your available models, or see our Model overview for descriptions of them.\n" }, "name": { "description": "The name of the assistant. The maximum length is 256 characters.\n", @@ -4031,10 +4031,10 @@ "type": "object" }, "RunObject": { - "description": "Represents an execution run on a [thread](/docs/api-reference/threads).", + "description": "Represents an execution run on a thread.", "properties": { "assistant_id": { - "description": "The ID of the [assistant](/docs/api-reference/assistants) used for execution of this run.", + "description": "The ID of the assistant used for execution of this run.", "type": "string" }, "cancelled_at": { @@ -4062,7 +4062,7 @@ }, "file_ids": { "default": [], - "description": "The list of [File](/docs/api-reference/files) IDs the [assistant](/docs/api-reference/assistants) used for this run.", + "description": "The list of File IDs the assistant used for this run.", "items": { "type": "string" }, @@ -4073,7 +4073,7 @@ "type": "string" }, "instructions": { - "description": "The instructions that the [assistant](/docs/api-reference/assistants) used for this run.", + "description": "The instructions that the assistant used for this run.", "type": "string" }, "last_error": { @@ -4106,7 +4106,7 @@ "x-oaiTypeLabel": "map" }, "model": { - "description": "The model that the [assistant](/docs/api-reference/assistants) used for this run.", + "description": "The model that the assistant used for this run.", "type": "string" }, "object": { @@ -4170,12 +4170,12 @@ "type": "string" }, "thread_id": { - "description": "The ID of the [thread](/docs/api-reference/threads) that was executed on as a part of this run.", + "description": "The ID of the thread that was executed on as a part of this run.", "type": "string" }, "tools": { "default": [], - "description": "The list of tools that the [assistant](/docs/api-reference/assistants) used for this run.", + "description": "The list of tools that the assistant used for this run.", "items": { "oneOf": [ { @@ -4338,7 +4338,7 @@ "image": { "properties": { "file_id": { - "description": "The [file](/docs/api-reference/files) ID of the image.", + "description": "The file ID of the image.", "type": "string" } }, @@ -4398,7 +4398,7 @@ "type": "string" }, "output": { - "description": "The output of the function. This will be `null` if the outputs have not been [submitted](/docs/api-reference/runs/submitToolOutputs) yet.", + "description": "The output of the function. This will be `null` if the outputs have not been submitted yet.", "nullable": true, "type": "string" } @@ -4498,7 +4498,7 @@ "description": "Represents a step in execution of a run.\n", "properties": { "assistant_id": { - "description": "The ID of the [assistant](/docs/api-reference/assistants) associated with the run step.", + "description": "The ID of the assistant associated with the run step.", "type": "string" }, "cancelled_at": { @@ -4566,7 +4566,7 @@ "type": "string" }, "run_id": { - "description": "The ID of the [run](/docs/api-reference/runs) that this run step is a part of.", + "description": "The ID of the run that this run step is a part of.", "type": "string" }, "status": { @@ -4594,7 +4594,7 @@ "x-oaiExpandable": true }, "thread_id": { - "description": "The ID of the [thread](/docs/api-reference/threads) that was run.", + "description": "The ID of the thread that was run.", "type": "string" }, "type": { @@ -4657,7 +4657,7 @@ "type": "object" }, "id": { - "description": "The ID of the tool call. This ID must be referenced when you submit the tool outputs in using the [Submit tool outputs to run](/docs/api-reference/runs/submitToolOutputs) endpoint.", + "description": "The ID of the tool call. This ID must be referenced when you submit the tool outputs in using the Submit tool outputs to run endpoint.", "type": "string" }, "type": { @@ -4702,7 +4702,7 @@ "type": "object" }, "ThreadObject": { - "description": "Represents a thread that contains [messages](/docs/api-reference/messages).", + "description": "Represents a thread that contains messages.", "properties": { "created_at": { "description": "The Unix timestamp (in seconds) for when the thread was created.", @@ -4753,7 +4753,7 @@ "name": "OpenAI Support", "url": "https://help.openai.com/" }, - "description": "The OpenAI REST API. Please see https://platform.openai.com/docs/api-reference for more details.", + "description": "The OpenAI REST API. Please see here for more details.", "license": { "name": "MIT", "url": "https://github.com/openai/openai-openapi/blob/master/LICENSE" @@ -4836,7 +4836,7 @@ }, "group": "assistants", "name": "List assistants", - "returns": "A list of [assistant](/docs/api-reference/assistants/object) objects." + "returns": "A list of assistant objects." } }, "post": { @@ -4891,7 +4891,7 @@ ], "group": "assistants", "name": "Create assistant", - "returns": "An [assistant](/docs/api-reference/assistants/object) object." + "returns": "An assistant object." } } }, @@ -4981,7 +4981,7 @@ }, "group": "assistants", "name": "Retrieve assistant", - "returns": "The [assistant](/docs/api-reference/assistants/object) object matching the specified ID." + "returns": "The assistant object matching the specified ID." } }, "post": { @@ -5035,7 +5035,7 @@ }, "group": "assistants", "name": "Modify assistant", - "returns": "The modified [assistant](/docs/api-reference/assistants/object) object." + "returns": "The modified assistant object." } } }, @@ -5120,7 +5120,7 @@ }, "group": "assistants", "name": "List assistant files", - "returns": "A list of [assistant file](/docs/api-reference/assistants/file-object) objects." + "returns": "A list of assistant file objects." } }, "post": { @@ -5159,7 +5159,7 @@ "description": "OK" } }, - "summary": "Create an assistant file by attaching a [File](/docs/api-reference/files) to an [assistant](/docs/api-reference/assistants).", + "summary": "Create an assistant file by attaching a File to an assistant.", "tags": [ "Assistants" ], @@ -5175,7 +5175,7 @@ }, "group": "assistants", "name": "Create assistant file", - "returns": "An [assistant file](/docs/api-reference/assistants/file-object) object." + "returns": "An assistant file object." } } }, @@ -5283,7 +5283,7 @@ }, "group": "assistants", "name": "Retrieve assistant file", - "returns": "The [assistant file](/docs/api-reference/assistants/file-object) object matching the specified ID." + "returns": "The assistant file object matching the specified ID." } } }, @@ -5507,7 +5507,7 @@ "group": "chat", "name": "Create chat completion", "path": "create", - "returns": "Returns a [chat completion](/docs/api-reference/chat/object) object, or a streamed sequence of [chat completion chunk](/docs/api-reference/chat/streaming) objects if the request is streamed.\n" + "returns": "Returns a chat completion object, or a streamed sequence of chat completion chunk objects if the request is streamed.\n" } } }, @@ -5564,7 +5564,7 @@ "group": "completions", "legacy": true, "name": "Create completion", - "returns": "Returns a [completion](/docs/api-reference/completions/object) object, or a sequence of completion objects if the request is streamed.\n" + "returns": "Returns a completion object, or a sequence of completion objects if the request is streamed.\n" } } }, @@ -5608,7 +5608,7 @@ }, "group": "embeddings", "name": "Create embeddings", - "returns": "A list of [embedding](/docs/api-reference/embeddings/object) objects." + "returns": "A list of embedding objects." } } }, @@ -5653,7 +5653,7 @@ }, "group": "files", "name": "List files", - "returns": "A list of [File](/docs/api-reference/files/object) objects." + "returns": "A list of File objects." } }, "post": { @@ -5680,7 +5680,7 @@ "description": "OK" } }, - "summary": "Upload a file that can be used across various endpoints. The size of all the files uploaded by one organization can be up to 100 GB.\n\nThe size of individual files can be a maximum of 512 MB or 2 million tokens for Assistants. See the [Assistants Tools guide](/docs/assistants/tools) to learn more about the types of files supported. The Fine-tuning API only supports `.jsonl` files.\n\nPlease [contact us](https://help.openai.com/) if you need to increase these storage limits.\n", + "summary": "Upload a file that can be used across various endpoints. The size of all the files uploaded by one organization can be up to 100 GB.\n\nThe size of individual files can be a maximum of 512 MB or 2 million tokens for Assistants. See the Assistants Tools guide to learn more about the types of files supported. The Fine-tuning API only supports `.jsonl` files.\n\nPlease contact us if you need to increase these storage limits.\n", "tags": [ "Files" ], @@ -5695,7 +5695,7 @@ }, "group": "files", "name": "Upload file", - "returns": "The uploaded [File](/docs/api-reference/files/object) object." + "returns": "The uploaded File object." } } }, @@ -5783,7 +5783,7 @@ }, "group": "files", "name": "Retrieve file", - "returns": "The [File](/docs/api-reference/files/object) object matching the specified ID." + "returns": "The File object matching the specified ID." } } }, @@ -5882,7 +5882,7 @@ }, "group": "fine-tuning", "name": "List fine-tuning jobs", - "returns": "A list of paginated [fine-tuning job](/docs/api-reference/fine-tuning/object) objects." + "returns": "A list of paginated fine-tuning job objects." } }, "post": { @@ -5909,7 +5909,7 @@ "description": "OK" } }, - "summary": "Creates a fine-tuning job which begins the process of creating a new model from a given dataset.\n\nResponse includes details of the enqueued job including job status and the name of the fine-tuned models once complete.\n\n[Learn more about fine-tuning](/docs/guides/fine-tuning)\n", + "summary": "Creates a fine-tuning job which begins the process of creating a new model from a given dataset.\n\nResponse includes details of the enqueued job including job status and the name of the fine-tuned models once complete.\n\nLearn more about fine-tuning\n", "tags": [ "Fine-tuning" ], @@ -5945,7 +5945,7 @@ ], "group": "fine-tuning", "name": "Create fine-tuning job", - "returns": "A [fine-tuning.job](/docs/api-reference/fine-tuning/object) object." + "returns": "A fine-tuning.job object." } } }, @@ -5976,7 +5976,7 @@ "description": "OK" } }, - "summary": "Get info about a fine-tuning job.\n\n[Learn more about fine-tuning](/docs/guides/fine-tuning)\n", + "summary": "Get info about a fine-tuning job.\n\nLearn more about fine-tuning\n", "tags": [ "Fine-tuning" ], @@ -5991,7 +5991,7 @@ }, "group": "fine-tuning", "name": "Retrieve fine-tuning job", - "returns": "The [fine-tuning](/docs/api-reference/fine-tuning/object) object with the given ID." + "returns": "The fine-tuning object with the given ID." } } }, @@ -6037,7 +6037,7 @@ }, "group": "fine-tuning", "name": "Cancel fine-tuning", - "returns": "The cancelled [fine-tuning](/docs/api-reference/fine-tuning/object) object." + "returns": "The cancelled fine-tuning object." } } }, @@ -6146,7 +6146,7 @@ }, "group": "images", "name": "Create image edit", - "returns": "Returns a list of [image](/docs/api-reference/images/object) objects." + "returns": "Returns a list of image objects." } } }, @@ -6190,7 +6190,7 @@ }, "group": "images", "name": "Create image", - "returns": "Returns a list of [image](/docs/api-reference/images/object) objects." + "returns": "Returns a list of image objects." } } }, @@ -6234,7 +6234,7 @@ }, "group": "images", "name": "Create image variation", - "returns": "Returns a list of [image](/docs/api-reference/images/object) objects." + "returns": "Returns a list of image objects." } } }, @@ -6268,7 +6268,7 @@ }, "group": "models", "name": "List models", - "returns": "A list of [model](/docs/api-reference/models/object) objects." + "returns": "A list of model objects." } } }, @@ -6358,7 +6358,7 @@ }, "group": "models", "name": "Retrieve model", - "returns": "The [model](/docs/api-reference/models/object) object matching the specified ID." + "returns": "The model object matching the specified ID." } } }, @@ -6402,7 +6402,7 @@ }, "group": "moderations", "name": "Create moderation", - "returns": "A [moderation](/docs/api-reference/moderations/object) object." + "returns": "A moderation object." } } }, @@ -6458,7 +6458,7 @@ ], "group": "threads", "name": "Create thread", - "returns": "A [thread](/docs/api-reference/threads) object." + "returns": "A thread object." } } }, @@ -6503,7 +6503,7 @@ }, "group": "threads", "name": "Create thread and run", - "returns": "A [run](/docs/api-reference/runs/object) object." + "returns": "A run object." } } }, @@ -6593,7 +6593,7 @@ }, "group": "threads", "name": "Retrieve thread", - "returns": "The [thread](/docs/api-reference/threads/object) object matching the specified ID." + "returns": "The thread object matching the specified ID." } }, "post": { @@ -6647,7 +6647,7 @@ }, "group": "threads", "name": "Modify thread", - "returns": "The modified [thread](/docs/api-reference/threads/object) object matching the specified ID." + "returns": "The modified thread object matching the specified ID." } } }, @@ -6656,7 +6656,7 @@ "operationId": "listMessages", "parameters": [ { - "description": "The ID of the [thread](/docs/api-reference/threads) the messages belong to.", + "description": "The ID of the thread the messages belong to.", "in": "path", "name": "thread_id", "required": true, @@ -6732,14 +6732,14 @@ }, "group": "threads", "name": "List messages", - "returns": "A list of [message](/docs/api-reference/messages) objects." + "returns": "A list of message objects." } }, "post": { "operationId": "createMessage", "parameters": [ { - "description": "The ID of the [thread](/docs/api-reference/threads) to create a message for.", + "description": "The ID of the thread to create a message for.", "in": "path", "name": "thread_id", "required": true, @@ -6786,7 +6786,7 @@ }, "group": "threads", "name": "Create message", - "returns": "A [message](/docs/api-reference/messages/object) object." + "returns": "A message object." } } }, @@ -6795,7 +6795,7 @@ "operationId": "getMessage", "parameters": [ { - "description": "The ID of the [thread](/docs/api-reference/threads) to which this message belongs.", + "description": "The ID of the thread to which this message belongs.", "in": "path", "name": "thread_id", "required": true, @@ -6841,7 +6841,7 @@ }, "group": "threads", "name": "Retrieve message", - "returns": "The [message](/docs/api-reference/threads/messages/object) object matching the specified ID." + "returns": "The message object matching the specified ID." } }, "post": { @@ -6904,7 +6904,7 @@ }, "group": "threads", "name": "Modify message", - "returns": "The modified [message](/docs/api-reference/threads/messages/object) object." + "returns": "The modified message object." } } }, @@ -6998,7 +6998,7 @@ }, "group": "threads", "name": "List message files", - "returns": "A list of [message file](/docs/api-reference/messages/file-object) objects." + "returns": "A list of message file objects." } } }, @@ -7065,7 +7065,7 @@ }, "group": "threads", "name": "Retrieve message file", - "returns": "The [message file](/docs/api-reference/messages/file-object) object." + "returns": "The message file object." } } }, @@ -7150,7 +7150,7 @@ }, "group": "threads", "name": "List runs", - "returns": "A list of [run](/docs/api-reference/runs/object) objects." + "returns": "A list of run objects." } }, "post": { @@ -7204,7 +7204,7 @@ }, "group": "threads", "name": "Create run", - "returns": "A [run](/docs/api-reference/runs/object) object." + "returns": "A run object." } } }, @@ -7213,7 +7213,7 @@ "operationId": "getRun", "parameters": [ { - "description": "The ID of the [thread](/docs/api-reference/threads) that was run.", + "description": "The ID of the thread that was run.", "in": "path", "name": "thread_id", "required": true, @@ -7259,14 +7259,14 @@ }, "group": "threads", "name": "Retrieve run", - "returns": "The [run](/docs/api-reference/runs/object) object matching the specified ID." + "returns": "The run object matching the specified ID." } }, "post": { "operationId": "modifyRun", "parameters": [ { - "description": "The ID of the [thread](/docs/api-reference/threads) that was run.", + "description": "The ID of the thread that was run.", "in": "path", "name": "thread_id", "required": true, @@ -7322,7 +7322,7 @@ }, "group": "threads", "name": "Modify run", - "returns": "The modified [run](/docs/api-reference/runs/object) object matching the specified ID." + "returns": "The modified run object matching the specified ID." } } }, @@ -7377,7 +7377,7 @@ }, "group": "threads", "name": "Cancel a run", - "returns": "The modified [run](/docs/api-reference/runs/object) object matching the specified ID." + "returns": "The modified run object matching the specified ID." } } }, @@ -7471,7 +7471,7 @@ }, "group": "threads", "name": "List run steps", - "returns": "A list of [run step](/docs/api-reference/runs/step-object) objects." + "returns": "A list of run step objects." } } }, @@ -7535,7 +7535,7 @@ }, "group": "threads", "name": "Retrieve run step", - "returns": "The [run step](/docs/api-reference/runs/step-object) object matching the specified ID." + "returns": "The run step object matching the specified ID." } } }, @@ -7544,7 +7544,7 @@ "operationId": "submitToolOuputsToRun", "parameters": [ { - "description": "The ID of the [thread](/docs/api-reference/threads) to which this run belongs.", + "description": "The ID of the thread to which this run belongs.", "in": "path", "name": "thread_id", "required": true, @@ -7600,7 +7600,7 @@ }, "group": "threads", "name": "Submit tool outputs to run", - "returns": "The modified [run](/docs/api-reference/runs/object) object matching the specified ID." + "returns": "The modified run object matching the specified ID." } } } @@ -7660,7 +7660,7 @@ "x-oaiMeta": { "groups": [ { - "description": "Learn how to turn audio into text or text into audio.\n\nRelated guide: [Speech to text](/docs/guides/speech-to-text)\n", + "description": "Learn how to turn audio into text or text into audio.\n\nRelated guide: Speech to text\n", "id": "audio", "sections": [ { @@ -7682,7 +7682,7 @@ "title": "Audio" }, { - "description": "Given a list of messages comprising a conversation, the model will return a response.\n\nRelated guide: [Chat Completions](/docs/guides/text-generation)\n", + "description": "Given a list of messages comprising a conversation, the model will return a response.\n\nRelated guide: Chat Completions\n", "id": "chat", "sections": [ { @@ -7704,7 +7704,7 @@ "title": "Chat" }, { - "description": "Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.\n\nRelated guide: [Embeddings](/docs/guides/embeddings)\n", + "description": "Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.\n\nRelated guide: Embeddings\n", "id": "embeddings", "sections": [ { @@ -7721,7 +7721,7 @@ "title": "Embeddings" }, { - "description": "Manage fine-tuning jobs to tailor a model to your specific training data.\n\nRelated guide: [Fine-tune models](/docs/guides/fine-tuning)\n", + "description": "Manage fine-tuning jobs to tailor a model to your specific training data.\n\nRelated guide: Fine-tune models\n", "id": "fine-tuning", "sections": [ { @@ -7763,7 +7763,7 @@ "title": "Fine-tuning" }, { - "description": "Files are used to upload documents that can be used with features like [Assistants](/docs/api-reference/assistants) and [Fine-tuning](/docs/api-reference/fine-tuning).\n", + "description": "Files are used to upload documents that can be used with features like Assistants and Fine-tuning.\n", "id": "files", "sections": [ { @@ -7800,7 +7800,7 @@ "title": "Files" }, { - "description": "Given a prompt and/or an input image, the model will generate a new image.\n\nRelated guide: [Image generation](/docs/guides/images)\n", + "description": "Given a prompt and/or an input image, the model will generate a new image.\n\nRelated guide: Image generation\n", "id": "images", "sections": [ { @@ -7827,7 +7827,7 @@ "title": "Images" }, { - "description": "List and describe the various models available in the API. You can refer to the [Models](/docs/models) documentation to understand what models are available and the differences between them.\n", + "description": "List and describe the various models available in the API. You can refer to the Models documentation to understand what models are available and the differences between them.\n", "id": "models", "sections": [ { @@ -7854,7 +7854,7 @@ "title": "Models" }, { - "description": "Given a input text, outputs if the model classifies it as violating OpenAI's content policy.\n\nRelated guide: [Moderations](/docs/guides/moderation)\n", + "description": "Given a input text, outputs if the model classifies it as violating OpenAI's content policy.\n\nRelated guide: Moderations\n", "id": "moderations", "sections": [ { @@ -7872,7 +7872,7 @@ }, { "beta": true, - "description": "Build assistants that can call models and use tools to perform tasks.\n\n[Get started with the Assistants API](/docs/assistants)\n", + "description": "Build assistants that can call models and use tools to perform tasks.\n\nGet started with the Assistants API\n", "id": "assistants", "sections": [ { @@ -7935,7 +7935,7 @@ }, { "beta": true, - "description": "Create threads that assistants can interact with.\n\nRelated guide: [Assistants](/docs/assistants/overview)\n", + "description": "Create threads that assistants can interact with.\n\nRelated guide: Assistants\n", "id": "threads", "sections": [ { @@ -7968,7 +7968,7 @@ }, { "beta": true, - "description": "Create messages within threads\n\nRelated guide: [Assistants](/docs/assistants/overview)\n", + "description": "Create messages within threads\n\nRelated guide: Assistants\n", "id": "messages", "sections": [ { @@ -8016,7 +8016,7 @@ }, { "beta": true, - "description": "Represents an execution run on a thread.\n\nRelated guide: [Assistants](/docs/assistants/overview)\n", + "description": "Represents an execution run on a thread.\n\nRelated guide: Assistants\n", "id": "runs", "sections": [ { @@ -8078,7 +8078,7 @@ "title": "Runs" }, { - "description": "Given a prompt, the model will return one or more predicted completions along with the probabilities of alternative tokens at each position. Most developer should use our [Chat Completions API](/docs/guides/text-generation/text-generation-models) to leverage our best and newest models. Most models that support the legacy Completions endpoint [will be shut off on January 4th, 2024](/docs/deprecations/2023-07-06-gpt-and-embeddings).\n", + "description": "Given a prompt, the model will return one or more predicted completions along with the probabilities of alternative tokens at each position. Most developer should use our Chat Completions API to leverage our best and newest models. Most models that support the legacy Completions endpoint [will be shut off on January 4th, 2024](/docs/deprecations/2023-07-06-gpt-and-embeddings).\n", "id": "completions", "legacy": true, "sections": [ diff --git a/ai/stabilityai/v0/README.mdx b/ai/stabilityai/v0/README.mdx index a4b07c8a..9fc5940a 100644 --- a/ai/stabilityai/v0/README.mdx +++ b/ai/stabilityai/v0/README.mdx @@ -31,7 +31,7 @@ The component configuration is defined and maintained [here](https://github.com/ | Field | Field ID | Type | Note | | :--- | :--- | :--- | :--- | -| API Key (required) | `api-key` | string | Fill in your Stability AI API key. To find your keys, visit - https://platform.stability.ai/account/keys | +| API Key (required) | `api-key` | string | Fill in your Stability AI API key. To find your keys, visit here | diff --git a/ai/stabilityai/v0/config/setup.json b/ai/stabilityai/v0/config/setup.json index cdb94f4b..7796c500 100644 --- a/ai/stabilityai/v0/config/setup.json +++ b/ai/stabilityai/v0/config/setup.json @@ -3,7 +3,7 @@ "additionalProperties": false, "properties": { "api-key": { - "description": "Fill in your Stability AI API key. To find your keys, visit - https://platform.stability.ai/account/keys", + "description": "Fill in your Stability AI API key. To find your keys, visit here", "instillUpstreamTypes": [ "reference" ], diff --git a/ai/stabilityai/v0/config/stabilityai.json b/ai/stabilityai/v0/config/stabilityai.json index 7eabc5a2..b4540805 100644 --- a/ai/stabilityai/v0/config/stabilityai.json +++ b/ai/stabilityai/v0/config/stabilityai.json @@ -1044,7 +1044,7 @@ } }, "info": { - "description": "Welcome to the official Stability AI REST API!\n\n#### Authentication\n\nYou will need your [Stability API key](https://platform.stability.ai/account/keys) in order to make requests to this API.\nMake sure you never share your API key with anyone, and you never commit it to a public repository. Include this key in \nthe `Authorization` header of your requests.\n\n#### Rate limiting\n\nThis API is rate-limited to 150 requests every 10 seconds. If you exceed this limit, you will receive a `429` response.\nIf you find this limit too restrictive, please reach out to us via email at [platform@stability.ai](mailto:platform@stability.ai).\n\n#### Support\n\nCheck our [Status Page](https://stabilityai.instatus.com/) to view the current health of our REST/gRPC APIs.\n\nIf you run into issues, please reach out to us:\n - [Support Form](https://platform.stability.ai/support)\n - [platform@stability.ai](mailto:platform@stability.ai) \n - [Discord](https://discord.com/channels/1002292111942635562/1042896447311454361)\n", + "description": "Welcome to the official Stability AI REST API!\n\n#### Authentication\n\nYou will need your Stability API key in order to make requests to this API.\nMake sure you never share your API key with anyone, and you never commit it to a public repository. Include this key in \nthe `Authorization` header of your requests.\n\n#### Rate limiting\n\nThis API is rate-limited to 150 requests every 10 seconds. If you exceed this limit, you will receive a `429` response.\nIf you find this limit too restrictive, please reach out to us via email at [platform@stability.ai](mailto:platform@stability.ai).\n\n#### Support\n\nCheck our Status Page to view the current health of our REST/gRPC APIs.\n\nIf you run into issues, please reach out to us:\n - Support Form\n - [platform@stability.ai](mailto:platform@stability.ai) \n - Discord\n", "termsOfService": "https://platform.stability.ai/docs/terms-of-service", "title": "Stability.ai REST API", "version": "v1", diff --git a/application/github/v0/README.mdx b/application/github/v0/README.mdx index 9416c6b0..a07f9ae8 100644 --- a/application/github/v0/README.mdx +++ b/application/github/v0/README.mdx @@ -38,7 +38,7 @@ The component configuration is defined and maintained [here](https://github.com/ | Field | Field ID | Type | Note | | :--- | :--- | :--- | :--- | -| Token | `token` | string | Fill in your GitHub access token for advanced usages. For more information about how to create tokens, please refer to the https://github.com/settings/tokens. | +| Token | `token` | string | Fill in your GitHub access token for advanced usages. For more information about how to create tokens, please refer to the github settings. | @@ -293,10 +293,10 @@ Get an issue. | Owner (required) | `owner` | string | Owner of the repository | | Repository (required) | `repository` | string | Repository name | | Webhook URL (required) | `hook-url` | string | URL to send the payload to | -| Events (required) | `events` | array[string] | Events to trigger the webhook. Please see https://docs.github.com/en/webhooks/webhook-events-and-payloads for more information | +| Events (required) | `events` | array[string] | Events to trigger the webhook. Please see the github document for more information | | Active | `active` | boolean | Whether the webhook is active. Default is false | | Content Type | `content-type` | string | Content type of the webhook, can be one of: json, form. Default is json | -| Hook Secret | `hook-secret` | string | If provided, the secret will be used as the key to generate the HMAC hex digest value for delivery signature headers. (see https://docs.github.com/en/webhooks/webhook-events-and-payloads#delivery-headers) | +| Hook Secret | `hook-secret` | string | If provided, the secret will be used as the key to generate the HMAC hex digest value for delivery signature headers. (see the document) | diff --git a/application/github/v0/config/setup.json b/application/github/v0/config/setup.json index 12be9fd1..e4ec94d8 100644 --- a/application/github/v0/config/setup.json +++ b/application/github/v0/config/setup.json @@ -3,7 +3,7 @@ "additionalProperties": false, "properties": { "token": { - "description": "Fill in your GitHub access token for advanced usages. For more information about how to create tokens, please refer to the https://github.com/settings/tokens.", + "description": "Fill in your GitHub access token for advanced usages. For more information about how to create tokens, please refer to the github settings.", "instillUpstreamTypes": [ "reference" ], diff --git a/application/github/v0/config/tasks.json b/application/github/v0/config/tasks.json index 6efb9894..4ab9d191 100644 --- a/application/github/v0/config/tasks.json +++ b/application/github/v0/config/tasks.json @@ -1295,7 +1295,7 @@ }, "events": { "title": "Events", - "description": "Events to trigger the webhook. Please see https://docs.github.com/en/webhooks/webhook-events-and-payloads for more information", + "description": "Events to trigger the webhook. Please see the github document for more information", "instillFormat": "array:string", "instillAcceptFormats": [ "array" @@ -1352,7 +1352,7 @@ }, "hook-secret": { "title": "Hook Secret", - "description": "If provided, the secret will be used as the key to generate the HMAC hex digest value for delivery signature headers. (see https://docs.github.com/en/webhooks/webhook-events-and-payloads#delivery-headers)", + "description": "If provided, the secret will be used as the key to generate the HMAC hex digest value for delivery signature headers. (see the document)", "instillSecret": true, "instillFormat": "string", "instillAcceptFormats": [ diff --git a/application/github/v0/main.go b/application/github/v0/main.go index ab8aebfd..95e0fc40 100644 --- a/application/github/v0/main.go +++ b/application/github/v0/main.go @@ -4,7 +4,6 @@ package github import ( "context" _ "embed" - "encoding/json" "fmt" "sync" @@ -97,67 +96,11 @@ func (c *component) CreateExecution(sysVars map[string]any, setup *structpb.Stru return &base.ExecutionWrapper{Execution: e}, nil } -func (e *execution) fillInDefaultValues(input *structpb.Struct) (*structpb.Struct, error) { - task := e.Task - taskSpec, ok := e.Component.GetTaskInputSchemas()[task] - if !ok { - return nil, errmsg.AddMessage( - fmt.Errorf("task %s not found", task), - fmt.Sprintf("Task %s not found", task), - ) - } - var taskSpecMap map[string]interface{} - err := json.Unmarshal([]byte(taskSpec), &taskSpecMap) - if err != nil { - return nil, errmsg.AddMessage( - err, - "Failed to unmarshal input", - ) - } - inputMap := taskSpecMap["properties"].(map[string]interface{}) - for key, value := range inputMap { - valueMap, ok := value.(map[string]interface{}) - if !ok { - continue - } - if _, ok := valueMap["default"]; !ok { - continue - } - if _, ok := input.GetFields()[key]; ok { - continue - } - defaultValue := valueMap["default"] - typeValue := valueMap["type"] - switch typeValue { - case "string": - input.GetFields()[key] = &structpb.Value{ - Kind: &structpb.Value_StringValue{ - StringValue: fmt.Sprintf("%v", defaultValue), - }, - } - case "integer", "number": - input.GetFields()[key] = &structpb.Value{ - Kind: &structpb.Value_NumberValue{ - NumberValue: defaultValue.(float64), - }, - } - case "boolean": - input.GetFields()[key] = &structpb.Value{ - Kind: &structpb.Value_BoolValue{ - BoolValue: defaultValue.(bool), - }, - } - } - } - return input, nil -} - func (e *execution) Execute(ctx context.Context, inputs []*structpb.Struct) ([]*structpb.Struct, error) { outputs := make([]*structpb.Struct, len(inputs)) for i, input := range inputs { - input, err := e.fillInDefaultValues(input) - if err != nil { + if _, err := e.FillInDefaultValues(input); err != nil { return nil, err } output, err := e.execute(ctx, input) diff --git a/application/googlesearch/v0/README.mdx b/application/googlesearch/v0/README.mdx index 83b4785f..64713c4a 100644 --- a/application/googlesearch/v0/README.mdx +++ b/application/googlesearch/v0/README.mdx @@ -30,8 +30,8 @@ The component configuration is defined and maintained [here](https://github.com/ | Field | Field ID | Type | Note | | :--- | :--- | :--- | :--- | -| API Key (required) | `api-key` | string | API Key for the Google Custom Search API. You can create one here: https://developers.google.com/custom-search/v1/overview#api-key | -| Search Engine ID (required) | `cse-id` | string | ID of the Search Engine to use. Before using the Custom Search JSON API you will first need to create and configure your Programmable Search Engine. If you have not already created a Programmable Search Engine, you can start by visiting the Programmable Search Engine control panel https://programmablesearchengine.google.com/controlpanel/all. You can find this in the URL of your Search Engine. For example, if the URL of your search engine is https://cse.google.com/cse.js?cx=012345678910, the ID value is: 012345678910 | +| API Key (required) | `api-key` | string | API Key for the Google Custom Search API. You can create one here | +| Search Engine ID (required) | `cse-id` | string | ID of the Search Engine to use. Before using the Custom Search JSON API you will first need to create and configure your Programmable Search Engine. If you have not already created a Programmable Search Engine, you can start by visiting the Programmable Search Engine control panel.
You can find this in the URL of your Search Engine. For example, if the URL of your search engine is https://cse.google.com/cse.js?cx=012345678910, the ID value is: 012345678910 | diff --git a/application/googlesearch/v0/config/setup.json b/application/googlesearch/v0/config/setup.json index 7cc0b0df..8c9d5072 100644 --- a/application/googlesearch/v0/config/setup.json +++ b/application/googlesearch/v0/config/setup.json @@ -3,7 +3,7 @@ "additionalProperties": false, "properties": { "api-key": { - "description": "API Key for the Google Custom Search API. You can create one here: https://developers.google.com/custom-search/v1/overview#api-key", + "description": "API Key for the Google Custom Search API. You can create one here", "instillUpstreamTypes": [ "reference" ], @@ -13,7 +13,7 @@ "type": "string" }, "cse-id": { - "description": "ID of the Search Engine to use. Before using the Custom Search JSON API you will first need to create and configure your Programmable Search Engine. If you have not already created a Programmable Search Engine, you can start by visiting the Programmable Search Engine control panel https://programmablesearchengine.google.com/controlpanel/all. You can find this in the URL of your Search Engine. For example, if the URL of your search engine is https://cse.google.com/cse.js?cx=012345678910, the ID value is: 012345678910", + "description": "ID of the Search Engine to use. Before using the Custom Search JSON API you will first need to create and configure your Programmable Search Engine. If you have not already created a Programmable Search Engine, you can start by visiting the Programmable Search Engine control panel.
You can find this in the URL of your Search Engine. For example, if the URL of your search engine is https://cse.google.com/cse.js?cx=012345678910, the ID value is: 012345678910", "instillUpstreamTypes": [ "value" ], diff --git a/application/hubspot/v0/README.mdx b/application/hubspot/v0/README.mdx new file mode 100644 index 00000000..31a6f197 --- /dev/null +++ b/application/hubspot/v0/README.mdx @@ -0,0 +1,368 @@ +--- +title: "HubSpot" +lang: "en-US" +draft: false +description: "Learn about how to set up a VDP HubSpot component https://github.com/instill-ai/instill-core" +--- + +The HubSpot component is an application component that allows users to use HubSpot application to do various tasks. +It can carry out the following tasks: + +- [Get Contact](#get-contact) +- [Create Contact](#create-contact) +- [Get Deal](#get-deal) +- [Create Deal](#create-deal) +- [Get Company](#get-company) +- [Create Company](#create-company) +- [Get Ticket](#get-ticket) +- [Create Ticket](#create-ticket) +- [Get Thread](#get-thread) +- [Insert Message](#insert-message) +- [Retrieve Association](#retrieve-association) + + + +## Release Stage + +`Alpha` + + + +## Configuration + +The component configuration is defined and maintained [here](https://github.com/instill-ai/component/blob/main/application/hubspot/v0/config/definition.json). + + + + +## Setup + + +| Field | Field ID | Type | Note | +| :--- | :--- | :--- | :--- | +| Token (required) | `token` | string | Fill in your HubSpot private app access token. Go here for more information: https://developers.hubspot.com/docs/api/private-apps | + + + + +## Supported Tasks + +### Get Contact + +Get contact information using contact ID or Email + + +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_GET_CONTACT` | +| Contact ID or Email (required) | `contact-id-or-email` | string | Input contact ID or email. If the input has @, it will search the contact using email | + + + +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Owner ID (optional) | `owner-id` | string | The user who is assigned to the object | +| Email Address (optional) | `email` | string | Email address | +| First Name (optional) | `first-name` | string | First name | +| Last Name (optional) | `last-name` | string | Last name | +| Phone Number (optional) | `phone-number` | string | Phone number. If you plan to use the number formatting feature in HubSpot, use country code + phone number. Example: +886xxxxxxxxx | +| Company (optional) | `company` | string | Company | +| Job Title (optional) | `job-title` | string | Job title | +| Lifecycle Stage (optional) | `lifecycle-stage` | string | Lifecycle stages are used to track how contacts or companies move forward in your process. Default format is in small letters, all words are combined. Example: salesqualifiedlead. However, remember to check internal value for custom fields. | +| Lead Status (optional) | `lead-status` | string | The contact's sales, prospecting or outreach status. Default format is in capital letters, with _ as space. Example: IN_PROGRESS. However, remember to check internal value for custom fields. | +| Contact ID | `contact-id` | string | Contact ID | + + + + + + +### Create Contact + +Create new contact + + +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_CREATE_CONTACT` | +| Owner ID | `owner-id` | string | The user who is assigned to the object | +| Email Address (required) | `email` | string | Email address | +| First Name | `first-name` | string | First name | +| Last Name | `last-name` | string | Last name | +| Phone Number | `phone-number` | string | Phone number. If you plan to use the number formatting feature in HubSpot, use country code + phone number. Example: +886xxxxxxxxx | +| Company | `company` | string | Company | +| Job Title | `job-title` | string | Job title | +| Lifecycle Stage | `lifecycle-stage` | string | Lifecycle stages are used to track how contacts or companies move forward in your process. Default format is in small letters, all words are combined. Example: salesqualifiedlead. However, remember to check internal value for custom fields. | +| Lead Status | `lead-status` | string | The contact's sales, prospecting or outreach status. Default format is in capital letters, with _ as space. Example: IN_PROGRESS. However, remember to check internal value for custom fields. | +| Create Object -> Deal Association using deal IDs | `create-deals-association` | array[string] | Existing deal IDs to be associated with the object | +| Create Object -> Company Association using company IDs | `create-companies-association` | array[string] | Existing company IDs to be associated with the object | +| Create Object -> Ticket Association using ticket IDs | `create-tickets-association` | array[string] | Existing ticket IDs to be associated with the object | + + + +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Contact ID | `contact-id` | string | Contact ID | + + + + + + +### Get Deal + +Get deal information using deal ID + + +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_GET_DEAL` | +| Deal ID (required) | `deal-id` | string | Input deal ID | + + + +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Owner ID (optional) | `owner-id` | string | The user who is assigned to the object | +| Deal Name | `deal-name` | string | Deal name | +| Pipeline | `pipeline` | string | A pipeline is the place where you document and manage how your prospects move through the steps of your sales process. HubSpot uses interval value rather than the name displayed in the view | +| Deal Stage | `deal-stage` | string | Deal stages allow you to categorize and track the progress of the deals that you are working on. Default format is in small letters, all words are combined. Example: qualifiedtobuy. However, remember to check internal value for custom fields. | +| Amount (optional) | `amount` | number | The total amount of the deal | +| Deal Type (optional) | `deal-type` | string | The type of deal. Default format is in small letters, all words are combined. Example: newbusiness. However, remember to check internal value for custom fields. | +| Close Date (optional) | `close-date` | string | Date the deal was closed. Set automatically by HubSpot. Format is in ISO 8601. Example: 2024-07-01T11:47:40.388Z | +| Create Date (optional) | `create-date` | string | Create date. Format is in ISO 8601. Example: 2024-07-01T11:47:40.388Z | +| Associated Contact IDs (optional) | `associated-contact-ids` | array[string] | Contact IDs associated with the object | + + + + + + +### Create Deal + +Create new deal + + +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_CREATE_DEAL` | +| Owner ID | `owner-id` | string | The user who is assigned to the object | +| Deal Name (required) | `deal-name` | string | Deal name | +| Pipeline (required) | `pipeline` | string | A pipeline is the place where you document and manage how your prospects move through the steps of your sales process. HubSpot uses interval value rather than the name displayed in the view | +| Deal Stage (required) | `deal-stage` | string | Deal stages allow you to categorize and track the progress of the deals that you are working on. Default format is in small letters, all words are combined. Example: qualifiedtobuy. However, remember to check internal value for custom fields. | +| Amount | `amount` | number | The total amount of the deal | +| Deal Type | `deal-type` | string | The type of deal. Default format is in small letters, all words are combined. Example: newbusiness. However, remember to check internal value for custom fields. | +| Close Date | `close-date` | string | Date the deal was closed. Set automatically by HubSpot. Format is in ISO 8601. Example: 2024-07-01T11:47:40.388Z | +| Create Object -> Contact Association using contact IDs | `create-contacts-association` | array[string] | Existing contact IDs to be associated with the object | + + + +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Deal ID | `deal-id` | string | Deal ID | + + + + + + +### Get Company + +Get company information using company ID + + +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_GET_COMPANY` | +| Company ID (required) | `company-id` | string | Input company ID | + + + +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Owner ID (optional) | `owner-id` | string | The user who is assigned to the object | +| Company Name (optional) | `company-name` | string | Company name | +| Company Domain (optional) | `company-domain` | string | The domain name of the company | +| Description (optional) | `description` | string | Description of the company | +| Phone Number (optional) | `phone-number` | string | Phone number of the company. If you plan to use the number formatting feature in HubSpot, use country code + phone number. Example: +886xxxxxxxxx | +| Industry (optional) | `industry` | string | The industry the company belongs to. Default format is in capital letters, with _ as space. Example: BROADCAST_MEDIA | +| Company Type (optional) | `company-type` | string | Type of company. Default format is capital letter. Example: RESELLER | +| City (optional) | `city` | string | City | +| State (optional) | `state` | string | State | +| Country (optional) | `country` | string | Country | +| Postal Code (optional) | `postal-code,` | string | Postal code | +| Time Zone (optional) | `time-zone` | string | Time zone | +| Annual Revenue (optional) | `annual-revenue` | number | Annual revenue | +| Total Revenue (optional) | `total-revenue` | number | Total revenue. Calculated automatically by HubSpot | +| Linkedin Page (optional) | `linkedin-page` | string | Linkedin page of the company | +| Associated Contact IDs (optional) | `associated-contact-ids` | array[string] | Contact IDs associated with the object | + + + + + + +### Create Company + +Create new company + + +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_CREATE_COMPANY` | +| Owner ID | `owner-id` | string | The user who is assigned to the object | +| Company Name | `company-name` | string | Company name | +| Company Domain (required) | `company-domain` | string | The domain name of the company | +| Description | `description` | string | Description of the company | +| Phone Number | `phone-number` | string | Phone number of the company. If you plan to use the number formatting feature in HubSpot, use country code + phone number. Example: +886xxxxxxxxx | +| Industry | `industry` | string | The industry the company belongs to. Default format is in capital letters, with _ as space. Example: BROADCAST_MEDIA | +| Company Type | `company-type` | string | Type of company. Default format is capital letter. Example: RESELLER | +| City | `city` | string | City | +| State | `state` | string | State | +| Country | `country` | string | Country | +| Postal Code | `postal-code,` | string | Postal code | +| Time Zone | `time-zone` | string | Time zone | +| Annual Revenue | `annual-revenue` | number | Annual revenue | +| Linkedin Page | `linkedin-page` | string | Linkedin page of the company | +| Create Object -> Contact Association using contact IDs | `create-contacts-association` | array[string] | Existing contact IDs to be associated with the object | + + + +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Company ID | `company-id` | string | Company ID | + + + + + + +### Get Ticket + +Get ticket information using ticket ID + + +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_GET_TICKET` | +| Ticket ID (required) | `ticket-id` | string | Input ticket ID | + + + +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Owner ID (optional) | `owner-id` | string | The user who is assigned to the object | +| Ticket Name | `ticket-name` | string | Ticket name | +| Ticket Status | `ticket-status` | string | The pipeline stage that contains this ticket. Default format is number. Example: 1. However, remember to check internal value for custom fields. Note: In Instill, ticket-status is displayed as string because of the possible custom internal value. | +| Pipeline | `pipeline` | string | A pipeline organizes and tracks the progression of tickets through various stages of resolution within your support process. HubSpot uses interval value rather than the name displayed in the view | +| Category (optional) | `category` | array[string] | The main reason customer reached out for help. Default format is in capital letters. Example: BILLING_ISSUE. However, remember to check internal value for custom fields. | +| Priority (optional) | `priority` | string | The level of attention needed on the ticket. Default format is in capital letters. Example: MEDIUM. However, remember to check internal value for custom fields. | +| Source (optional) | `source` | string | Channel where ticket was originally submitted. Default format is in capital letters. Example: EMAIL | +| Record Source (optional) | `record-source` | string | How this record was created. | +| Create Date (optional) | `create-date` | string | Create date. Format is in ISO 8601. Example: 2024-07-01T11:47:40.388Z | +| Last Modified Date (optional) | `last-modified-date` | string | Last modified date. Format is in ISO 8601. Example: 2024-07-01T11:47:40.388Z | +| Associated Contact IDs (optional) | `associated-contact-ids` | array[string] | Contact IDs associated with the object | + + + + + + +### Create Ticket + +Create new ticket + + +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_CREATE_TICKET` | +| Owner ID | `owner-id` | string | The user who is assigned to the object | +| Ticket Name (required) | `ticket-name` | string | Ticket name | +| Ticket Status (required) | `ticket-status` | string | The pipeline stage that contains this ticket. Default format is number. Example: 1. However, remember to check internal value for custom fields. Note: In Instill, ticket-status is displayed as string because of the possible custom internal value. | +| Pipeline (required) | `pipeline` | string | A pipeline organizes and tracks the progression of tickets through various stages of resolution within your support process. HubSpot uses interval value rather than the name displayed in the view | +| Category | `category` | array[string] | The main reason customer reached out for help. Default format is in capital letters. Example: BILLING_ISSUE. However, remember to check internal value for custom fields. | +| Priority | `priority` | string | The level of attention needed on the ticket. Default format is in capital letters. Example: MEDIUM. However, remember to check internal value for custom fields. | +| Source | `source` | string | Channel where ticket was originally submitted. Default format is in capital letters. Example: EMAIL | +| Create Object -> Contact Association using contact IDs | `create-contacts-association` | array[string] | Existing contact IDs to be associated with the object | + + + +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Ticket ID | `ticket-id` | string | Ticket ID | + + + + + + +### Get Thread + +Retrieve all the messages inside a thread (conversation inbox). The messages will be sorted from most recent to least recent. Note: This task uses Conversation API from HubSpot, which is still in BETA. + + +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_GET_THREAD` | +| Thread ID (required) | `thread-id` | string | Input thread ID | + + + +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Messages | `results` | array[object] | An array of messages | + + + + + + +### Insert Message + +Insert message into a thread (only support email thread) + + +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_INSERT_MESSAGE` | +| Thread ID (required) | `thread-id` | string | Input thread ID | +| Sender Actor ID (required) | `sender-actor-id` | string | Input sender actor id. Example: A-12345678. To obtain this, it is recommended to use and copy the 'Get Thread task' sender output. For more information about actor id: https://developers.hubspot.com/beta-docs/guides/api/conversations/inbox-and-messages#get-actors | +| Recipients (required) | `recipients` | array[string] | Recipients of the message | +| Channel Account ID (required) | `channel-account-id` | string | The ID of an account that is part of the channel-id channel. On an existing thread, it is recommended to copy channel-account-id of the most recent message on the thread. | +| Subject (required) | `subject` | string | The subject of the message | +| Text (required) | `text` | string | The body of the message | + + + +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Status | `status` | string | The message status | + + + + + + +### Retrieve Association + +Get the object IDs associated with contact ID (contact->objects). If you are trying to do the opposite (object->contacts), it is possible using the other tasks. Example: Go to get deal task to obtain deal->contacts + + +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_RETRIEVE_ASSOCIATION` | +| Contact ID (required) | `contact-id` | string | Input contact ID | +| Object Type (required) | `object-type` | string | Input object type (CRM objects or 'Threads'). Note: CRM objects include 'Deals', 'Companies', 'Tickets', etc | + + + +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Object ID Array | `object-ids` | array[string] | An array of object ID associated with the contact | + + + + + + + diff --git a/application/hubspot/v0/assets/HubSpot.svg b/application/hubspot/v0/assets/HubSpot.svg new file mode 100644 index 00000000..8ebbf298 --- /dev/null +++ b/application/hubspot/v0/assets/HubSpot.svg @@ -0,0 +1,3 @@ + + + diff --git a/application/hubspot/v0/association.go b/application/hubspot/v0/association.go new file mode 100644 index 00000000..b77d0ed7 --- /dev/null +++ b/application/hubspot/v0/association.go @@ -0,0 +1,241 @@ +package hubspot + +import ( + "fmt" + + hubspot "github.com/belong-inc/go-hubspot" + "github.com/instill-ai/component/base" + "google.golang.org/protobuf/types/known/structpb" +) + +// Retrieve Association is a custom feature +// Will implement it following go-hubspot sdk format + +// API functions for Retrieve Association + +type RetrieveAssociationService interface { + GetThreadID(contactID string) (*TaskRetrieveAssociationThreadResp, error) + GetCrmID(contactID string, objectType string) (*TaskRetrieveAssociationCrmResp, error) +} + +type RetrieveAssociationServiceOp struct { + retrieveCrmIDPath string + retrieveThreadIDPath string + client *hubspot.Client +} + +func (s *RetrieveAssociationServiceOp) GetThreadID(contactID string) (*TaskRetrieveAssociationThreadResp, error) { + resource := &TaskRetrieveAssociationThreadResp{} + if err := s.client.Get(s.retrieveThreadIDPath+contactID, resource, nil); err != nil { + return nil, err + } + return resource, nil +} + +func (s *RetrieveAssociationServiceOp) GetCrmID(contactID string, objectType string) (*TaskRetrieveAssociationCrmResp, error) { + resource := &TaskRetrieveAssociationCrmResp{} + + contactIDInput := TaskRetrieveAssociationCrmReqID{ContactID: contactID} + + req := &TaskRetrieveAssociationCrmReq{} + req.Input = append(req.Input, contactIDInput) + + path := s.retrieveCrmIDPath + "/" + objectType + "/batch/read" + + if err := s.client.Post(path, req, resource); err != nil { + return nil, err + } + return resource, nil +} + +// Retrieve Association: use contact id to get the object ID associated with it + +type TaskRetrieveAssociationInput struct { + ContactID string `json:"contact-id"` + ObjectType string `json:"object-type"` +} + +// Retrieve Association Task is mainly divided into two: +// 1. GetThreadID +// 2. GetCrmID +// Basically, these two will have seperate structs for handling request/response + +// For GetThreadID + +type TaskRetrieveAssociationThreadResp struct { + Results []struct { + ID string `json:"id"` + } `json:"results"` +} + +// For GetCrmID + +type TaskRetrieveAssociationCrmReq struct { + Input []TaskRetrieveAssociationCrmReqID `json:"inputs"` +} + +type TaskRetrieveAssociationCrmReqID struct { + ContactID string `json:"id"` +} + +type TaskRetrieveAssociationCrmResp struct { + Results []taskRetrieveAssociationCrmRespResult `json:"results"` +} + +type taskRetrieveAssociationCrmRespResult struct { + IDArray []struct { + ID string `json:"id"` + } `json:"to"` +} + +// Retrieve Association Output + +type TaskRetrieveAssociationOutput struct { + ObjectIDs []string `json:"object-ids"` +} + +func (e *execution) RetrieveAssociation(input *structpb.Struct) (*structpb.Struct, error) { + inputStruct := TaskRetrieveAssociationInput{} + err := base.ConvertFromStructpb(input, &inputStruct) + + if err != nil { + return nil, err + } + + // API calls to retrieve association for Threads and CRM objects are different + + var objectIDs []string + if inputStruct.ObjectType == "Threads" { + + // To handle Threads + res, err := e.client.RetrieveAssociation.GetThreadID(inputStruct.ContactID) + + if err != nil { + return nil, err + } + + if len(res.Results) == 0 { + return nil, fmt.Errorf("no object ID found") + } + + objectIDs = make([]string, len(res.Results)) + for index, value := range res.Results { + objectIDs[index] = value.ID + } + + } else { + + // To handle CRM objects + res, err := e.client.RetrieveAssociation.GetCrmID(inputStruct.ContactID, inputStruct.ObjectType) + + if err != nil { + return nil, err + } + + if len(res.Results) == 0 { + return nil, fmt.Errorf("no object ID found") + } + + // only take the first Result, because the input is only one contact id + objectIDs = make([]string, len(res.Results[0].IDArray)) + for index, value := range res.Results[0].IDArray { + objectIDs[index] = value.ID + } + + } + + outputStruct := TaskRetrieveAssociationOutput{ + ObjectIDs: objectIDs, + } + + output, err := base.ConvertToStructpb(outputStruct) + + if err != nil { + return nil, err + } + + return output, nil +} + +// Create Association (not a task) +// This section (create association) is used in: +// create contact task to create contact -> objects (company, ticket, deal) association +// create company task to create company -> contact association +// create deal task to create deal -> contact association +// create ticket task to create ticket -> contact association + +type CreateAssociationReq struct { + Associations []association `json:"inputs"` +} + +type association struct { + From struct { + ID string `json:"id"` + } `json:"from"` + To struct { + ID string `json:"id"` + } `json:"to"` + Type string `json:"type"` +} + +type CreateAssociationResponse struct { + Status string `json:"status"` +} + +// CreateAssociation is used to create batch associations between objects + +func CreateAssociation(fromID *string, toIDs *[]string, fromObjectType string, toObjectType string, e *execution) error { + req := &CreateAssociationReq{ + Associations: make([]association, len(*toIDs)), + } + + //for any association created related to company, it will use non-primary label. + //for more info: https://developers.hubspot.com/beta-docs/guides/api/crm/associations#association-type-id-values + + var associationType string + if toObjectType == "company" { + switch fromObjectType { //use switch here in case other association of object -> company want to be created in the future + case "contact": + associationType = "279" + } + + } else if fromObjectType == "company" { + switch toObjectType { + case "contact": + associationType = "280" + } + } else { + associationType = fmt.Sprintf("%s_to_%s", fromObjectType, toObjectType) + } + + for index, toID := range *toIDs { + + req.Associations[index] = association{ + From: struct { + ID string `json:"id"` + }{ + ID: *fromID, + }, + To: struct { + ID string `json:"id"` + }{ + ID: toID, + }, + Type: associationType, + } + } + + createAssociationPath := fmt.Sprintf("crm/v3/associations/%s/%s/batch/create", fromObjectType, toObjectType) + + resp := &CreateAssociationResponse{} + + if err := e.client.Post(createAssociationPath, req, resp); err != nil { + return err + } + + if resp.Status != "COMPLETE" { + return fmt.Errorf("failed to create association") + } + + return nil +} diff --git a/application/hubspot/v0/association_test.go b/application/hubspot/v0/association_test.go new file mode 100644 index 00000000..d23c6e79 --- /dev/null +++ b/application/hubspot/v0/association_test.go @@ -0,0 +1,121 @@ +package hubspot + +import ( + "context" + "testing" + + qt "github.com/frankban/quicktest" + "github.com/instill-ai/component/base" + "go.uber.org/zap" + "google.golang.org/protobuf/encoding/protojson" + "google.golang.org/protobuf/types/known/structpb" +) + +// mockClient is in contact_test.go + +// Mock Retrieve Association struct and its functions + +type MockRetrieveAssociation struct{} + +func (s *MockRetrieveAssociation) GetThreadID(contactID string) (*TaskRetrieveAssociationThreadResp, error) { + + var fakeThreadID TaskRetrieveAssociationThreadResp + if contactID == "32027696539" { + fakeThreadID = TaskRetrieveAssociationThreadResp{ + Results: []struct { + ID string `json:"id"` + }{ + {ID: "7509711154"}, + }, + } + } + return &fakeThreadID, nil +} + +func (s *MockRetrieveAssociation) GetCrmID(contactID string, objectType string) (*TaskRetrieveAssociationCrmResp, error) { + + var fakeCrmID TaskRetrieveAssociationCrmResp + if contactID == "32027696539" { + fakeCrmID = TaskRetrieveAssociationCrmResp{ + Results: []taskRetrieveAssociationCrmRespResult{ + { + IDArray: []struct { + ID string `json:"id"` + }{ + {ID: "12345678900"}, + }, + }, + }, + } + } + return &fakeCrmID, nil + +} + +func TestComponent_ExecuteRetrieveAssociationTask(t *testing.T) { + c := qt.New(t) + ctx := context.Background() + bc := base.Component{Logger: zap.NewNop()} + connector := Init(bc) + + testcases := []struct { + name string + input TaskRetrieveAssociationInput + wantResp interface{} + }{ + { + name: "ok - retrieve association: thread ID", + input: TaskRetrieveAssociationInput{ + ContactID: "32027696539", + ObjectType: "Threads", + }, + wantResp: TaskRetrieveAssociationOutput{ + ObjectIDs: []string{ + "7509711154", + }, + }, + }, + { + name: "ok - retrieve association: deal ID", + input: TaskRetrieveAssociationInput{ + ContactID: "32027696539", + ObjectType: "Deals", + }, + wantResp: TaskRetrieveAssociationOutput{ + ObjectIDs: []string{ + "12345678900", + }, + }, + }, + } + + for _, tc := range testcases { + c.Run(tc.name, func(c *qt.C) { + setup, err := structpb.NewStruct(map[string]any{ + "token": bearerToken, + }) + c.Assert(err, qt.IsNil) + + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: connector, SystemVariables: nil, Setup: setup, Task: taskRetrieveAssociation}, + client: createMockClient(), + } + e.execute = e.RetrieveAssociation + exec := &base.ExecutionWrapper{Execution: e} + + pbInput, err := base.ConvertToStructpb(tc.input) + + c.Assert(err, qt.IsNil) + + res, err := exec.Execution.Execute(ctx, []*structpb.Struct{pbInput}) + c.Assert(err, qt.IsNil) + + resJSON, err := protojson.Marshal(res[0]) + c.Assert(err, qt.IsNil) + + c.Check(resJSON, qt.JSONEquals, tc.wantResp) + + }) + } + +} diff --git a/application/hubspot/v0/company.go b/application/hubspot/v0/company.go new file mode 100644 index 00000000..f90d3174 --- /dev/null +++ b/application/hubspot/v0/company.go @@ -0,0 +1,231 @@ +package hubspot + +import ( + "strconv" + + hubspot "github.com/belong-inc/go-hubspot" + "github.com/instill-ai/component/base" + "google.golang.org/protobuf/types/known/structpb" +) + +// Get Company +type TaskGetCompanyInput struct { + CompanyID string `json:"company-id"` +} + +type TaskGetCompanyResp struct { + OwnerID string `json:"hubspot_owner_id,omitempty"` + CompanyName string `json:"name,omitempty"` + CompanyDomain string `json:"domain,omitempty"` + Description string `json:"description,omitempty"` + PhoneNumber string `json:"phone,omitempty"` + Industry string `json:"industry,omitempty"` + CompanyType string `json:"type,omitempty"` + City string `json:"city,omitempty"` + State string `json:"state,omitempty"` + Country string `json:"country,omitempty"` + PostalCode string `json:"zip,omitempty"` + TimeZone string `json:"timezone,omitempty"` + AnnualRevenue string `json:"annualrevenue,omitempty"` + TotalRevenue string `json:"totalrevenue,omitempty"` + LinkedinPage string `json:"linkedin_company_page,omitempty"` +} + +type TaskGetCompanyOutput struct { + OwnerID string `json:"owner-id,omitempty"` + CompanyName string `json:"company-name,omitempty"` + CompanyDomain string `json:"company-domain,omitempty"` + Description string `json:"description,omitempty"` + PhoneNumber string `json:"phone-number,omitempty"` + Industry string `json:"industry,omitempty"` + CompanyType string `json:"company-type,omitempty"` + City string `json:"city,omitempty"` + State string `json:"state,omitempty"` + Country string `json:"country,omitempty"` + PostalCode string `json:"postal-code,omitempty"` + TimeZone string `json:"time-zone,omitempty"` + AnnualRevenue float64 `json:"annual-revenue,omitempty"` + TotalRevenue float64 `json:"total-revenue,omitempty"` + LinkedinPage string `json:"linkedin-page,omitempty"` + AssociatedContactIDs []string `json:"associated-contact-ids,omitempty"` +} + +func (e *execution) GetCompany(input *structpb.Struct) (*structpb.Struct, error) { + inputStruct := TaskGetCompanyInput{} + + err := base.ConvertFromStructpb(input, &inputStruct) + + if err != nil { + return nil, err + } + + res, err := e.client.CRM.Company.Get(inputStruct.CompanyID, &TaskGetCompanyResp{}, &hubspot.RequestQueryOption{Associations: []string{"contacts"}}) + + if err != nil { + return nil, err + } + + companyInfo := res.Properties.(*TaskGetCompanyResp) + + // get contacts associated with company + + var companyContactList []string + if res.Associations != nil { + companyContactAssociation := res.Associations.Contacts.Results + companyContactList = make([]string, len(companyContactAssociation)) + + for index, value := range companyContactAssociation { + companyContactList[index] = value.ID + } + } + + // convert to outputStruct + + var annualRevenue, totalRevenue float64 + + if companyInfo.AnnualRevenue != "" { + var err error + annualRevenue, err = strconv.ParseFloat(companyInfo.AnnualRevenue, 64) + + if err != nil { + return nil, err + } + } + + if companyInfo.TotalRevenue != "" { + var err error + totalRevenue, err = strconv.ParseFloat(companyInfo.TotalRevenue, 64) + + if err != nil { + return nil, err + } + } + + outputStruct := TaskGetCompanyOutput{ + OwnerID: companyInfo.OwnerID, + CompanyName: companyInfo.CompanyName, + CompanyDomain: companyInfo.CompanyDomain, + Description: companyInfo.Description, + PhoneNumber: companyInfo.PhoneNumber, + Industry: companyInfo.Industry, + CompanyType: companyInfo.CompanyType, + City: companyInfo.City, + State: companyInfo.State, + Country: companyInfo.Country, + PostalCode: companyInfo.PostalCode, + TimeZone: companyInfo.TimeZone, + AnnualRevenue: annualRevenue, + TotalRevenue: totalRevenue, + LinkedinPage: companyInfo.LinkedinPage, + AssociatedContactIDs: companyContactList, + } + + output, err := base.ConvertToStructpb(outputStruct) + + if err != nil { + return nil, err + } + + return output, nil +} + +// Create Company +type TaskCreateCompanyInput struct { + OwnerID string `json:"owner-id"` + CompanyName string `json:"company-name"` + CompanyDomain string `json:"company-domain"` + Description string `json:"description"` + PhoneNumber string `json:"phone-number"` + Industry string `json:"industry"` + CompanyType string `json:"company-type"` + City string `json:"city"` + State string `json:"state"` + Country string `json:"country"` + PostalCode string `json:"postal-code"` + TimeZone string `json:"time-zone"` + AnnualRevenue float64 `json:"annual-revenue"` + LinkedinPage string `json:"linkedin-page"` + CreateContactsAssociation []string `json:"create-contacts-association"` +} + +type TaskCreateCompanyReq struct { + OwnerID string `json:"hubspot_owner_id,omitempty"` + CompanyName string `json:"name,omitempty"` + CompanyDomain string `json:"domain,omitempty"` + Description string `json:"description,omitempty"` + PhoneNumber string `json:"phone,omitempty"` + Industry string `json:"industry,omitempty"` + CompanyType string `json:"type,omitempty"` + City string `json:"city,omitempty"` + State string `json:"state,omitempty"` + Country string `json:"country,omitempty"` + PostalCode string `json:"zip,omitempty"` + TimeZone string `json:"timezone,omitempty"` + AnnualRevenue string `json:"annualrevenue,omitempty"` + LinkedinPage string `json:"linkedin_company_page,omitempty"` + CompanyID string `json:"hs_object_id"` +} + +type TaskCreateCompanyOutput struct { + CompanyID string `json:"company-id"` +} + +func (e *execution) CreateCompany(input *structpb.Struct) (*structpb.Struct, error) { + + inputStruct := TaskCreateCompanyInput{} + err := base.ConvertFromStructpb(input, &inputStruct) + + if err != nil { + return nil, err + } + + var annualRevenue string + if inputStruct.AnnualRevenue != 0 { + annualRevenue = strconv.FormatFloat(inputStruct.AnnualRevenue, 'f', -1, 64) + } + + req := TaskCreateCompanyReq{ + OwnerID: inputStruct.OwnerID, + CompanyName: inputStruct.CompanyName, + CompanyDomain: inputStruct.CompanyDomain, + Description: inputStruct.Description, + PhoneNumber: inputStruct.PhoneNumber, + Industry: inputStruct.Industry, + CompanyType: inputStruct.CompanyType, + City: inputStruct.City, + State: inputStruct.State, + Country: inputStruct.Country, + PostalCode: inputStruct.PostalCode, + TimeZone: inputStruct.TimeZone, + AnnualRevenue: annualRevenue, + LinkedinPage: inputStruct.LinkedinPage, + } + + res, err := e.client.CRM.Company.Create(&req) + + if err != nil { + return nil, err + } + + // get company ID + companyID := res.Properties.(*TaskCreateCompanyReq).CompanyID + + outputStruct := TaskCreateCompanyOutput{CompanyID: companyID} + + output, err := base.ConvertToStructpb(outputStruct) + + if err != nil { + return nil, err + } + + // This section is for creating associations (company -> object) + if len(inputStruct.CreateContactsAssociation) != 0 { + err := CreateAssociation(&outputStruct.CompanyID, &inputStruct.CreateContactsAssociation, "company", "contact", e) + + if err != nil { + return nil, err + } + } + + return output, nil +} diff --git a/application/hubspot/v0/company_test.go b/application/hubspot/v0/company_test.go new file mode 100644 index 00000000..d8a04eec --- /dev/null +++ b/application/hubspot/v0/company_test.go @@ -0,0 +1,163 @@ +package hubspot + +import ( + "context" + "testing" + + hubspot "github.com/belong-inc/go-hubspot" + qt "github.com/frankban/quicktest" + "github.com/instill-ai/component/base" + "go.uber.org/zap" + "google.golang.org/protobuf/encoding/protojson" + "google.golang.org/protobuf/types/known/structpb" +) + +// mockClient is in contact_test.go + +// Mock Company struct and its functions +type MockCompany struct{} + +func (s *MockCompany) Get(companyID string, company interface{}, option *hubspot.RequestQueryOption) (*hubspot.ResponseResource, error) { + + var fakeCompany TaskGetCompanyResp + if companyID == "20620806729" { + fakeCompany = TaskGetCompanyResp{ + CompanyName: "HubSpot", + CompanyDomain: "hubspot.com", + Description: "HubSpot offers a comprehensive cloud-based marketing and sales platform with integrated applications for attracting, converting, and delighting customers through inbound marketing strategies.", + PhoneNumber: "+1 888-482-7768", + Industry: "COMPUTER_SOFTWARE", + AnnualRevenue: "10000000000", + } + } + + ret := &hubspot.ResponseResource{ + Properties: &fakeCompany, + } + + return ret, nil +} + +func (s *MockCompany) Create(company interface{}) (*hubspot.ResponseResource, error) { + arbitraryCompanyID := "99999999999" + + fakeCompanyInfo := company.(*TaskCreateCompanyReq) + + fakeCompanyInfo.CompanyID = arbitraryCompanyID + + ret := &hubspot.ResponseResource{ + Properties: fakeCompanyInfo, + } + + return ret, nil +} +func (s *MockCompany) Update(companyID string, company interface{}) (*hubspot.ResponseResource, error) { + return nil, nil +} +func (s *MockCompany) Delete(companyID string) error { + return nil +} +func (s *MockCompany) AssociateAnotherObj(companyID string, conf *hubspot.AssociationConfig) (*hubspot.ResponseResource, error) { + return nil, nil +} + +func TestComponent_ExecuteGetCompanyTask(t *testing.T) { + c := qt.New(t) + ctx := context.Background() + bc := base.Component{Logger: zap.NewNop()} + connector := Init(bc) + + tc := struct { + name string + input string + wantResp TaskGetCompanyOutput + }{ + name: "ok - get company", + input: "20620806729", + wantResp: TaskGetCompanyOutput{ + CompanyName: "HubSpot", + CompanyDomain: "hubspot.com", + Description: "HubSpot offers a comprehensive cloud-based marketing and sales platform with integrated applications for attracting, converting, and delighting customers through inbound marketing strategies.", + PhoneNumber: "+1 888-482-7768", + Industry: "COMPUTER_SOFTWARE", + AnnualRevenue: 10000000000, + }, + } + + c.Run(tc.name, func(c *qt.C) { + setup, err := structpb.NewStruct(map[string]any{ + "token": bearerToken, + }) + c.Assert(err, qt.IsNil) + + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: connector, SystemVariables: nil, Setup: setup, Task: taskGetCompany}, + client: createMockClient(), + } + e.execute = e.GetCompany + exec := &base.ExecutionWrapper{Execution: e} + + pbInput, err := structpb.NewStruct(map[string]any{ + "company-id": tc.input, + }) + + c.Assert(err, qt.IsNil) + + res, err := exec.Execution.Execute(ctx, []*structpb.Struct{pbInput}) + + c.Assert(err, qt.IsNil) + + resJSON, err := protojson.Marshal(res[0]) + c.Assert(err, qt.IsNil) + + c.Check(resJSON, qt.JSONEquals, tc.wantResp) + + }) +} + +func TestComponent_ExecuteCreateCompanyTask(t *testing.T) { + c := qt.New(t) + ctx := context.Background() + bc := base.Component{Logger: zap.NewNop()} + connector := Init(bc) + + tc := struct { + name string + inputCompany TaskCreateCompanyInput + wantResp string + }{ + name: "ok - create company", + inputCompany: TaskCreateCompanyInput{ + CompanyName: "Fake Company", + CompanyDomain: "fakecompany.com", + AnnualRevenue: 5000000, + }, + wantResp: "99999999999", + } + + c.Run(tc.name, func(c *qt.C) { + setup, err := structpb.NewStruct(map[string]any{ + "token": bearerToken, + }) + c.Assert(err, qt.IsNil) + + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: connector, SystemVariables: nil, Setup: setup, Task: taskCreateCompany}, + client: createMockClient(), + } + e.execute = e.CreateCompany + exec := &base.ExecutionWrapper{Execution: e} + + pbInput, err := base.ConvertToStructpb(tc.inputCompany) + + c.Assert(err, qt.IsNil) + + res, err := exec.Execution.Execute(ctx, []*structpb.Struct{pbInput}) + c.Assert(err, qt.IsNil) + + resString := res[0].Fields["company-id"].GetStringValue() + + c.Check(resString, qt.Equals, tc.wantResp) + + }) +} diff --git a/application/hubspot/v0/config/definition.json b/application/hubspot/v0/config/definition.json new file mode 100644 index 00000000..0c8a561e --- /dev/null +++ b/application/hubspot/v0/config/definition.json @@ -0,0 +1,29 @@ +{ + "availableTasks": [ + "TASK_GET_CONTACT", + "TASK_CREATE_CONTACT", + "TASK_GET_DEAL", + "TASK_CREATE_DEAL", + "TASK_GET_COMPANY", + "TASK_CREATE_COMPANY", + "TASK_GET_TICKET", + "TASK_CREATE_TICKET", + "TASK_GET_THREAD", + "TASK_INSERT_MESSAGE", + "TASK_RETRIEVE_ASSOCIATION" + ], + "documentationUrl": "https://www.instill.tech/docs/component/application/hubspot", + "icon": "assets/HubSpot.svg", + "id": "hubspot", + "public": true, + "title": "HubSpot", + "description": "Use HubSpot application to do various tasks", + "tombstone": false, + "type": "COMPONENT_TYPE_APPLICATION", + "uid": "0cd80b30-29bc-4f19-91ca-5911de3a3aae", + "vendor": "HubSpot", + "vendorAttributes": {}, + "version": "0.1.0", + "sourceUrl": "https://github.com/instill-ai/component/blob/main/application/hubspot/v0", + "releaseStage": "RELEASE_STAGE_ALPHA" +} diff --git a/application/hubspot/v0/config/setup.json b/application/hubspot/v0/config/setup.json new file mode 100644 index 00000000..880f6580 --- /dev/null +++ b/application/hubspot/v0/config/setup.json @@ -0,0 +1,27 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": false, + "properties": { + "token": { + "description": "Fill in your HubSpot private app access token. Go here for more information: https://developers.hubspot.com/docs/api/private-apps", + "instillUpstreamTypes": [ + "reference" + ], + "instillAcceptFormats": [ + "string" + ], + "instillSecret": true, + "instillUIOrder": 0, + "title": "Token", + "type": "string" + } + }, + "required": [ + "token" + ], + "instillEditOnNodeFields": [ + "token" + ], + "title": "HubSpot Connection", + "type": "object" +} diff --git a/application/hubspot/v0/config/tasks.json b/application/hubspot/v0/config/tasks.json new file mode 100644 index 00000000..5a003874 --- /dev/null +++ b/application/hubspot/v0/config/tasks.json @@ -0,0 +1,1350 @@ +{ + "$defs": { + "common": { + "owner-id": { + "description": "The user who is assigned to the object", + "title": "Owner ID", + "instillUIOrder": 0, + "type": "string" + }, + "associated-contact-ids": { + "description": "Contact IDs associated with the object", + "title": "Associated Contact IDs", + "type": "array", + "instillFormat": "array:string", + "items": { + "title": "Associated Contact ID", + "type": "string", + "description": "Contact ID associated with the object", + "instillFormat": "string" + } + }, + "create-contacts-association": { + "description": "Existing contact IDs to be associated with the object", + "title": "Create Object -> Contact Association using contact IDs", + "type": "array", + "instillAcceptFormats": ["array:string"], + "items": { + "type": "string" + }, + "instillUpstreamTypes": ["value", "reference"] + }, + "create-deals-association": { + "description": "Existing deal IDs to be associated with the object", + "title": "Create Object -> Deal Association using deal IDs", + "type": "array", + "instillAcceptFormats": ["array:string"], + "items": { + "type": "string" + }, + "instillUpstreamTypes": ["value", "reference"] + }, + "create-companies-association": { + "description": "Existing company IDs to be associated with the object", + "title": "Create Object -> Company Association using company IDs", + "type": "array", + "instillAcceptFormats": ["array:string"], + "items": { + "type": "string" + }, + "instillUpstreamTypes": ["value", "reference"] + }, + "create-tickets-association": { + "description": "Existing ticket IDs to be associated with the object", + "title": "Create Object -> Ticket Association using ticket IDs", + "type": "array", + "instillAcceptFormats": ["array:string"], + "items": { + "type": "string" + }, + "instillUpstreamTypes": ["value", "reference"] + } + }, + "contact": { + "email": { + "description": "Email address", + "instillUIOrder": 1, + "title": "Email Address", + "type": "string" + }, + "first-name": { + "description": "First name", + "instillUIOrder": 2, + "title": "First Name", + "type": "string" + }, + "last-name": { + "description": "Last name", + "instillUIOrder": 3, + "title": "Last Name", + "type": "string" + }, + "phone-number": { + "description": "Phone number. If you plan to use the number formatting feature in HubSpot, use country code + phone number. Example: +886xxxxxxxxx", + "instillUIOrder": 4, + "title": "Phone Number", + "type": "string" + }, + "company": { + "description": "Company", + "instillUIOrder": 5, + "title": "Company", + "type": "string" + }, + "job-title": { + "description": "Job title", + "instillUIOrder": 6, + "title": "Job Title", + "type": "string" + }, + "lifecycle-stage": { + "description": "Lifecycle stages are used to track how contacts or companies move forward in your process. Default format is in small letters, all words are combined. Example: salesqualifiedlead. However, remember to check internal value for custom fields.", + "instillUIOrder": 7, + "title": "Lifecycle Stage", + "type": "string" + }, + "lead-status": { + "description": "The contact's sales, prospecting or outreach status. Default format is in capital letters, with _ as space. Example: IN_PROGRESS. However, remember to check internal value for custom fields.", + "instillUIOrder": 8, + "title": "Lead Status", + "type": "string" + } + }, + "deal": { + "deal-name": { + "description": "Deal name", + "instillUIOrder": 1, + "title": "Deal Name", + "type": "string" + }, + "pipeline": { + "description": "A pipeline is the place where you document and manage how your prospects move through the steps of your sales process. HubSpot uses interval value rather than the name displayed in the view", + "instillUIOrder": 2, + "title": "Pipeline", + "type": "string" + }, + "deal-stage": { + "description": "Deal stages allow you to categorize and track the progress of the deals that you are working on. Default format is in small letters, all words are combined. Example: qualifiedtobuy. However, remember to check internal value for custom fields.", + "instillUIOrder": 3, + "title": "Deal Stage", + "type": "string" + }, + "amount": { + "description": "The total amount of the deal", + "instillUIOrder": 4, + "title": "Amount", + "type": "number" + }, + "deal-type": { + "description": "The type of deal. Default format is in small letters, all words are combined. Example: newbusiness. However, remember to check internal value for custom fields.", + "instillUIOrder": 5, + "title": "Deal Type", + "type": "string" + }, + "close-date": { + "description": "Date the deal was closed. Set automatically by HubSpot. Format is in ISO 8601. Example: 2024-07-01T11:47:40.388Z", + "instillUIOrder": 6, + "title": "Close Date", + "type": "string" + }, + "create-date": { + "description": "Create date. Format is in ISO 8601. Example: 2024-07-01T11:47:40.388Z", + "instillUIOrder": 7, + "title": "Create Date", + "type": "string" + } + }, + "company": { + "company-name": { + "description": "Company name", + "instillUIOrder": 1, + "title": "Company Name", + "type": "string" + }, + "company-domain": { + "description": "The domain name of the company", + "instillUIOrder": 2, + "title": "Company Domain", + "type": "string" + }, + "description": { + "description": "Description of the company", + "instillUIOrder": 3, + "title": "Description", + "type": "string" + }, + "phone-number": { + "description": "Phone number of the company. If you plan to use the number formatting feature in HubSpot, use country code + phone number. Example: +886xxxxxxxxx", + "instillUIOrder": 4, + "title": "Phone Number", + "type": "string" + }, + "industry": { + "description": "The industry the company belongs to. Default format is in capital letters, with _ as space. Example: BROADCAST_MEDIA", + "instillUIOrder": 5, + "title": "Industry", + "type": "string" + }, + "company-type": { + "description": "Type of company. Default format is capital letter. Example: RESELLER", + "instillUIOrder": 6, + "title": "Company Type", + "type": "string" + }, + "city": { + "description": "City", + "instillUIOrder": 7, + "title": "City", + "type": "string" + }, + "state": { + "description": "State", + "instillUIOrder": 8, + "title": "State", + "type": "string" + }, + "country": { + "description": "Country", + "instillUIOrder": 9, + "title": "Country", + "type": "string" + }, + "postal-code": { + "description": "Postal code", + "instillUIOrder": 10, + "title": "Postal Code", + "type": "string" + }, + "time-zone": { + "description": "Time zone", + "instillUIOrder": 11, + "title": "Time Zone", + "type": "string" + }, + "annual-revenue": { + "description": "Annual revenue", + "instillUIOrder": 12, + "title": "Annual Revenue", + "type": "number" + }, + "total-revenue": { + "description": "Total revenue. Calculated automatically by HubSpot", + "instillUIOrder": 13, + "title": "Total Revenue", + "type": "number" + }, + "linkedin-page": { + "description": "Linkedin page of the company", + "instillUIOrder": 14, + "title": "Linkedin Page", + "type": "string" + } + }, + "ticket": { + "ticket-name": { + "description": "Ticket name", + "instillUIOrder": 1, + "title": "Ticket Name", + "type": "string" + }, + "ticket-status": { + "description": "The pipeline stage that contains this ticket. Default format is number. Example: 1. However, remember to check internal value for custom fields. Note: In Instill AI, ticket-status is displayed as string because of the possible custom internal value.", + "instillUIOrder": 2, + "title": "Ticket Status", + "type": "string" + }, + "pipeline": { + "description": "A pipeline organizes and tracks the progression of tickets through various stages of resolution within your support process. HubSpot uses interval value rather than the name displayed in the view", + "instillUIOrder": 3, + "title": "Pipeline", + "type": "string" + }, + "categories": { + "description": "The main reason customer reached out for help. Default format is in capital letters. Example: BILLING_ISSUE. However, remember to check internal value for custom fields.", + "instillUIOrder": 4, + "title": "Categories", + "type": "array" + }, + "priority": { + "description": "The level of attention needed on the ticket. Default format is in capital letters. Example: MEDIUM. However, remember to check internal value for custom fields.", + "instillUIOrder": 5, + "title": "Priority", + "type": "string" + }, + "source": { + "description": "Channel where ticket was originally submitted. Default format is in capital letters. Example: EMAIL", + "instillUIOrder": 6, + "title": "Source", + "type": "string" + }, + "record-source": { + "description": "How this record was created.", + "instillUIOrder": 7, + "title": "Record Source", + "type": "string" + }, + "create-date": { + "description": "Create date. Format is in ISO 8601. Example: 2024-07-01T11:47:40.388Z", + "instillUIOrder": 8, + "title": "Create Date", + "type": "string" + }, + "last-modified-date": { + "description": "Last modified date. Format is in ISO 8601. Example: 2024-07-01T11:47:40.388Z", + "instillUIOrder": 9, + "title": "Last Modified Date", + "type": "string" + } + } + }, + "TASK_GET_CONTACT": { + "instillShortDescription": "Get contact information using contact ID or Email", + "input": { + "description": "Input contact ID or email", + "instillUIOrder": 0, + "instillEditOnNodeFields": ["contact-id-or-email"], + "properties": { + "contact-id-or-email": { + "description": "Input contact ID or email. If the input has @, it will search the contact using email", + "instillAcceptFormats": ["string"], + "instillUIMultiline": false, + "instillUIOrder": 0, + "instillUpstreamTypes": ["value", "reference", "template"], + "title": "Contact ID or Email", + "type": "string" + } + }, + "required": ["contact-id-or-email"], + "title": "Input", + "type": "object" + }, + "output": { + "description": "Contact information", + "instillUIOrder": 0, + "properties": { + "owner-id": { + "$ref": "#/$defs/common/owner-id", + "instillFormat": "string", + "required": [] + }, + "email": { + "$ref": "#/$defs/contact/email", + "instillFormat": "string", + "required": [] + }, + "first-name": { + "$ref": "#/$defs/contact/first-name", + "instillFormat": "string", + "required": [] + }, + "last-name": { + "$ref": "#/$defs/contact/last-name", + "instillFormat": "string", + "required": [] + }, + "phone-number": { + "$ref": "#/$defs/contact/phone-number", + "instillFormat": "string", + "required": [] + }, + "company": { + "$ref": "#/$defs/contact/company", + "instillFormat": "string", + "required": [] + }, + "job-title": { + "$ref": "#/$defs/contact/job-title", + "instillFormat": "string", + "required": [] + }, + "lifecycle-stage": { + "$ref": "#/$defs/contact/lifecycle-stage", + "instillFormat": "string", + "required": [] + }, + "lead-status": { + "$ref": "#/$defs/contact/lead-status", + "instillFormat": "string", + "required": [] + }, + "contact-id": { + "description": "Contact ID", + "instillUIOrder": 9, + "required": [], + "title": "Contact ID", + "type": "string", + "instillFormat": "string" + } + }, + "required": ["contact-id"], + "title": "Output", + "type": "object" + } + }, + "TASK_CREATE_CONTACT": { + "instillShortDescription": "Create new contact", + "input": { + "description": "Contact information", + "instillUIOrder": 0, + "instillEditOnNodeFields": [ + "email", + "first-name", + "last-name", + "phone-number" + ], + "properties": { + "owner-id": { + "$ref": "#/$defs/common/owner-id", + "instillAcceptFormats": ["string"], + "instillUIMultiline": false, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "email": { + "$ref": "#/$defs/contact/email", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "first-name": { + "$ref": "#/$defs/contact/first-name", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "last-name": { + "$ref": "#/$defs/contact/last-name", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "phone-number": { + "$ref": "#/$defs/contact/phone-number", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "company": { + "$ref": "#/$defs/contact/company", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "job-title": { + "$ref": "#/$defs/contact/job-title", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "lifecycle-stage": { + "$ref": "#/$defs/contact/lifecycle-stage", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "lead-status": { + "$ref": "#/$defs/contact/lead-status", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "create-deals-association": { + "$ref": "#/$defs/common/create-deals-association", + "instillUIOrder": 9 + }, + "create-companies-association": { + "$ref": "#/$defs/common/create-companies-association", + "instillUIOrder": 10 + }, + "create-tickets-association": { + "$ref": "#/$defs/common/create-tickets-association", + "instillUIOrder": 11 + } + }, + "required": ["email"], + "title": "Input", + "type": "object" + }, + "output": { + "description": "Obtain contact ID", + "instillUIOrder": 0, + "properties": { + "contact-id": { + "description": "Contact ID", + "instillUIOrder": 0, + "required": [], + "title": "Contact ID", + "type": "string", + "instillFormat": "string" + } + }, + "required": ["contact-id"], + "title": "Output", + "type": "object" + } + }, + "TASK_GET_DEAL": { + "instillShortDescription": "Get deal information using deal ID", + "input": { + "description": "Input deal ID", + "instillUIOrder": 0, + "instillEditOnNodeFields": ["deal-id"], + "properties": { + "deal-id": { + "description": "Input deal ID", + "instillAcceptFormats": ["string"], + "instillUIMultiline": false, + "instillUIOrder": 0, + "instillUpstreamTypes": ["value", "reference", "template"], + "title": "Deal ID", + "type": "string" + } + }, + "required": ["deal-id"], + "title": "Input", + "type": "object" + }, + "output": { + "description": "Deal information", + "instillUIOrder": 0, + "properties": { + "owner-id": { + "$ref": "#/$defs/common/owner-id", + "instillFormat": "string", + "required": [] + }, + "deal-name": { + "$ref": "#/$defs/deal/deal-name", + "instillFormat": "string", + "required": [] + }, + "pipeline": { + "$ref": "#/$defs/deal/pipeline", + "instillFormat": "string", + "required": [] + }, + "deal-stage": { + "$ref": "#/$defs/deal/deal-stage", + "instillFormat": "string", + "required": [] + }, + "amount": { + "$ref": "#/$defs/deal/amount", + "instillFormat": "number", + "required": [] + }, + "deal-type": { + "$ref": "#/$defs/deal/deal-type", + "instillFormat": "string", + "required": [] + }, + "create-date": { + "$ref": "#/$defs/deal/create-date", + "instillFormat": "string", + "required": [] + }, + "close-date": { + "$ref": "#/$defs/deal/close-date", + "instillFormat": "string", + "required": [] + }, + "associated-contact-ids": { + "$ref": "#/$defs/common/associated-contact-ids", + "instillUIorder": 8 + } + }, + "required": ["deal-name", "pipeline", "deal-stage"], + "title": "Output", + "type": "object" + } + }, + "TASK_CREATE_DEAL": { + "instillShortDescription": "Create new deal", + "input": { + "description": "Deal information", + "instillUIOrder": 0, + "instillEditOnNodeFields": ["deal-name", "pipeline", "deal-stage"], + "properties": { + "owner-id": { + "$ref": "#/$defs/common/owner-id", + "instillAcceptFormats": ["string"], + "instillUIMultiline": false, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "deal-name": { + "$ref": "#/$defs/deal/deal-name", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "pipeline": { + "$ref": "#/$defs/deal/pipeline", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "deal-stage": { + "$ref": "#/$defs/deal/deal-stage", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "amount": { + "$ref": "#/$defs/deal/amount", + "instillAcceptFormats": ["number"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "deal-type": { + "$ref": "#/$defs/deal/deal-type", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "close-date": { + "$ref": "#/$defs/deal/close-date", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "create-contacts-association": { + "$ref": "#/$defs/common/create-contacts-association", + "instillUIOrder": 7 + } + }, + "required": ["deal-name", "pipeline", "deal-stage"], + "title": "Input", + "type": "object" + }, + "output": { + "description": "Obtain deal ID", + "instillUIOrder": 0, + "properties": { + "deal-id": { + "description": "Deal ID", + "instillUIOrder": 0, + "required": [], + "title": "Deal ID", + "type": "string", + "instillFormat": "string" + } + }, + "required": ["deal-id"], + "title": "Output", + "type": "object" + } + }, + "TASK_GET_COMPANY": { + "instillShortDescription": "Get company information using company ID", + "input": { + "description": "Input company ID", + "instillUIOrder": 0, + "instillEditOnNodeFields": ["company-id"], + "properties": { + "company-id": { + "description": "Input company ID", + "instillAcceptFormats": ["string"], + "instillUIMultiline": false, + "instillUIOrder": 0, + "instillUpstreamTypes": ["value", "reference", "template"], + "title": "Company ID", + "type": "string" + } + }, + "required": ["company-id"], + "title": "Input", + "type": "object" + }, + "output": { + "description": "Company information", + "instillUIOrder": 0, + "properties": { + "owner-id": { + "$ref": "#/$defs/common/owner-id", + "instillFormat": "string", + "required": [] + }, + "company-name": { + "$ref": "#/$defs/company/company-name", + "instillFormat": "string", + "required": [] + }, + "company-domain": { + "$ref": "#/$defs/company/company-domain", + "instillFormat": "string", + "required": [] + }, + "description": { + "$ref": "#/$defs/company/description", + "instillFormat": "string", + "required": [] + }, + "phone-number": { + "$ref": "#/$defs/company/phone-number", + "instillFormat": "string", + "required": [] + }, + "industry": { + "$ref": "#/$defs/company/industry", + "instillFormat": "string", + "required": [] + }, + "company-type": { + "$ref": "#/$defs/company/company-type", + "instillFormat": "string", + "required": [] + }, + "city": { + "$ref": "#/$defs/company/city", + "instillFormat": "string", + "required": [] + }, + "state": { + "$ref": "#/$defs/company/state", + "instillFormat": "string", + "required": [] + }, + "country": { + "$ref": "#/$defs/company/country", + "instillFormat": "string", + "required": [] + }, + "postal-code,": { + "$ref": "#/$defs/company/postal-code", + "instillFormat": "string", + "required": [] + }, + "time-zone": { + "$ref": "#/$defs/company/time-zone", + "instillFormat": "string", + "required": [] + }, + "annual-revenue": { + "$ref": "#/$defs/company/annual-revenue", + "instillFormat": "number", + "required": [] + }, + "total-revenue": { + "$ref": "#/$defs/company/total-revenue", + "instillFormat": "number", + "required": [] + }, + "linkedin-page": { + "$ref": "#/$defs/company/linkedin-page", + "instillFormat": "string", + "required": [] + }, + "associated-contact-ids": { + "$ref": "#/$defs/common/associated-contact-ids", + "instillUIorder": 15 + } + }, + "required": [], + "title": "Output", + "type": "object" + } + }, + "TASK_CREATE_COMPANY": { + "instillShortDescription": "Create new company", + "input": { + "description": "Company information", + "instillUIOrder": 0, + "instillEditOnNodeFields": [ + "company-name", + "company-domain", + "description", + "phone-number" + ], + "properties": { + "owner-id": { + "$ref": "#/$defs/common/owner-id", + "instillAcceptFormats": ["string"], + "instillUIMultiline": false, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "company-name": { + "$ref": "#/$defs/company/company-name", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "company-domain": { + "$ref": "#/$defs/company/company-domain", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "description": { + "$ref": "#/$defs/company/description", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "phone-number": { + "$ref": "#/$defs/company/phone-number", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "industry": { + "$ref": "#/$defs/company/industry", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "company-type": { + "$ref": "#/$defs/company/company-type", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "city": { + "$ref": "#/$defs/company/city", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "state": { + "$ref": "#/$defs/company/state", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "country": { + "$ref": "#/$defs/company/country", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "postal-code,": { + "$ref": "#/$defs/company/postal-code", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "time-zone": { + "$ref": "#/$defs/company/time-zone", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "annual-revenue": { + "$ref": "#/$defs/company/annual-revenue", + "instillAcceptFormats": ["number"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "linkedin-page": { + "$ref": "#/$defs/company/linkedin-page", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "create-contacts-association": { + "$ref": "#/$defs/common/create-contacts-association", + "instillUIOrder": 15 + } + }, + "required": ["company-domain"], + "title": "Input", + "type": "object" + }, + "output": { + "description": "Obtain company ID", + "instillUIOrder": 0, + "properties": { + "company-id": { + "description": "Company ID", + "instillUIOrder": 0, + "required": [], + "title": "Company ID", + "type": "string", + "instillFormat": "string" + } + }, + "required": ["company-id"], + "title": "Output", + "type": "object" + } + }, + "TASK_GET_TICKET": { + "instillShortDescription": "Get ticket information using ticket ID", + "input": { + "description": "Input ticket ID", + "instillUIOrder": 0, + "instillEditOnNodeFields": ["ticket-id"], + "properties": { + "ticket-id": { + "description": "Input ticket ID", + "instillAcceptFormats": ["string"], + "instillUIMultiline": false, + "instillUIOrder": 0, + "instillUpstreamTypes": ["value", "reference", "template"], + "title": "Ticket ID", + "type": "string" + } + }, + "required": ["ticket-id"], + "title": "Input", + "type": "object" + }, + "output": { + "description": "Ticket information", + "instillUIOrder": 0, + "properties": { + "owner-id": { + "$ref": "#/$defs/common/owner-id", + "instillFormat": "string", + "required": [] + }, + "ticket-name": { + "$ref": "#/$defs/ticket/ticket-name", + "instillFormat": "string", + "required": [] + }, + "ticket-status": { + "$ref": "#/$defs/ticket/ticket-status", + "instillFormat": "string", + "required": [] + }, + "pipeline": { + "$ref": "#/$defs/ticket/pipeline", + "instillFormat": "string", + "required": [] + }, + "categories": { + "$ref": "#/$defs/ticket/categories", + "instillFormat": "array:string", + "items": { + "title": "Category Value", + "type": "string", + "description": "Category value", + "instillFormat": "string" + } + }, + "priority": { + "$ref": "#/$defs/ticket/priority", + "instillFormat": "string", + "required": [] + }, + "source": { + "$ref": "#/$defs/ticket/source", + "instillFormat": "string", + "required": [] + }, + "record-source": { + "$ref": "#/$defs/ticket/record-source", + "instillFormat": "string", + "required": [] + }, + "create-date": { + "$ref": "#/$defs/ticket/create-date", + "instillFormat": "string", + "required": [] + }, + "last-modified-date": { + "$ref": "#/$defs/ticket/last-modified-date", + "instillFormat": "string", + "required": [] + }, + "associated-contact-ids": { + "$ref": "#/$defs/common/associated-contact-ids", + "instillUIorder": 10 + } + }, + "required": ["ticket-name", "ticket-status", "pipeline"], + "title": "Output", + "type": "object" + } + }, + "TASK_CREATE_TICKET": { + "instillShortDescription": "Create new ticket", + "input": { + "description": "Ticket information", + "instillUIOrder": 0, + "instillEditOnNodeFields": ["ticket-name", "ticket-status", "pipeline"], + "properties": { + "owner-id": { + "$ref": "#/$defs/common/owner-id", + "instillAcceptFormats": ["string"], + "instillUIMultiline": false, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "ticket-name": { + "$ref": "#/$defs/ticket/ticket-name", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "ticket-status": { + "$ref": "#/$defs/ticket/ticket-status", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "pipeline": { + "$ref": "#/$defs/ticket/pipeline", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "categories": { + "$ref": "#/$defs/ticket/categories", + "instillAcceptFormats": ["array:string"], + "items": { + "type": "string" + }, + "instillUpstreamTypes": ["value", "reference"] + }, + "priority": { + "$ref": "#/$defs/ticket/priority", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "source": { + "$ref": "#/$defs/ticket/source", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUpstreamTypes": ["value", "reference", "template"] + }, + "create-contacts-association": { + "$ref": "#/$defs/common/create-contacts-association", + "instillUIOrder": 7 + } + }, + "required": ["ticket-name", "ticket-status", "pipeline"], + "title": "Input", + "type": "object" + }, + "output": { + "description": "Obtain ticket ID", + "instillUIOrder": 0, + "properties": { + "ticket-id": { + "description": "Ticket ID", + "instillUIOrder": 0, + "required": [], + "title": "Ticket ID", + "type": "string", + "instillFormat": "string" + } + }, + "required": ["ticket-id"], + "title": "Output", + "type": "object" + } + }, + "TASK_GET_THREAD": { + "instillShortDescription": "Retrieve all the messages inside a thread (conversation inbox). The messages will be sorted from most recent to least recent. Note: This task uses Conversation API from HubSpot, which is still in BETA.", + "input": { + "description": "Input thread ID", + "instillUIOrder": 0, + "instillEditOnNodeFields": ["thread-id"], + "properties": { + "thread-id": { + "description": "Input thread ID", + "instillAcceptFormats": ["string"], + "instillUIMultiline": false, + "instillUIOrder": 0, + "instillUpstreamTypes": ["value", "reference", "template"], + "title": "Thread ID", + "type": "string" + } + }, + "required": ["thread-id"], + "title": "Input", + "type": "object" + }, + "output": { + "description": "All messages in the thread", + "instillUIOrder": 0, + "properties": { + "results": { + "description": "An array of messages", + "instillUIOrder": 0, + "title": "Messages", + "type": "array", + "items": { + "title": "Messages Details", + "type": "object", + "properties": { + "created-at": { + "description": "message to start a conversation", + "instillFormat": "string", + "instillUIOrder": 0, + "title": "Start Conversation Message", + "type": "string" + }, + "sender": { + "description": "Sender's information", + "instillUIOrder": 1, + "title": "Sender", + "type": "object", + "instillFormat": "object", + "properties": { + "sender-name": { + "description": "The name of the sender", + "instillFormat": "string", + "instillUIOrder": 0, + "title": "Name", + "type": "string" + }, + "sender-type": { + "description": "Specify the category of sender information", + "instillFormat": "string", + "instillUIOrder": 1, + "title": "Type", + "type": "string" + }, + "sender-value": { + "description": "Contains the actual sender information (e.g.: email address)", + "instillFormat": "string", + "instillUIOrder": 2, + "title": "Value", + "type": "string" + }, + "sender-actor-id": { + "description": "The actor ID of the sender", + "instillFormat": "string", + "instillUIOrder": 3, + "title": "Actor ID", + "type": "string" + } + }, + "required": ["sender-actor-id"] + }, + "recipients": { + "description": "Recipients' information", + "instillFormat": "array", + "instillUIOrder": 2, + "title": "Recipients", + "type": "array", + "items": { + "title": "Recipient's information", + "type": "object", + "properties": { + "name": { + "description": "The name of the recipient", + "instillFormat": "string", + "instillUIOrder": 0, + "title": "Name", + "type": "string" + }, + "type": { + "description": "Specify the category of recipient information", + "instillFormat": "string", + "instillUIOrder": 1, + "title": "Type", + "type": "string" + }, + "value": { + "description": "Contains the actual recipient information (e.g.: email address)", + "instillFormat": "string", + "instillUIOrder": 2, + "title": "Value", + "type": "string" + } + }, + "required": [] + } + }, + "text": { + "description": "The content of the message", + "instillFormat": "string", + "instillUIOrder": 3, + "required": [], + "title": "Text", + "type": "string" + }, + "subject": { + "description": "The subject of the message", + "instillFormat": "string", + "instillUIOrder": 4, + "required": [], + "title": "Subject", + "type": "string" + }, + "channel-id": { + "description": "The ID of a generic channel returned from the channels endpoint, like 1000 for live chat, 1001 for Facebook Messenger, 1002 for email, etc.", + "instillFormat": "string", + "instillUIOrder": 5, + "required": [], + "title": "Channel ID", + "type": "string" + }, + "channel-account-id": { + "description": "The ID of an account that is part of the channel-id channel.", + "instillFormat": "string", + "instillUIOrder": 6, + "required": [], + "title": "Channel Account ID", + "type": "string" + } + }, + "required": [ + "created-at", + "text", + "channel-id", + "channel-account-id" + ] + } + } + }, + "required": ["results"], + "title": "Output", + "type": "object" + } + }, + "TASK_INSERT_MESSAGE": { + "instillShortDescription": "Insert message into a thread (only support email thread)", + "input": { + "description": "Input", + "instillUIOrder": 0, + "instillEditOnNodeFields": [ + "thread-id", + "sender-actor-id", + "recipients", + "channel-account-id", + "subject", + "text" + ], + "properties": { + "thread-id": { + "description": "Input thread ID", + "instillAcceptFormats": ["string"], + "instillUIMultiline": false, + "instillUIOrder": 0, + "instillUpstreamTypes": ["value", "reference", "template"], + "title": "Thread ID", + "type": "string" + }, + "sender-actor-id": { + "description": "Input sender actor id. Example: A-12345678. To obtain this, it is recommended to use and copy the 'Get Thread task' sender output. For more information about actor id: https://developers.hubspot.com/beta-docs/guides/api/conversations/inbox-and-messages#get-actors", + "instillAcceptFormats": ["string"], + "instillUIMultiline": false, + "instillUIOrder": 1, + "instillUpstreamTypes": ["value", "reference", "template"], + "title": "Sender Actor ID", + "type": "string" + }, + "recipients": { + "description": "Recipients of the message", + "title": "Recipients", + "type": "array", + "instillAcceptFormats": ["array:string"], + "instillUIOrder": 2, + "instillUpstreamTypes": ["value", "reference"], + "items": { + "type": "string" + } + }, + "channel-account-id": { + "description": "The ID of an account that is part of the channel-id channel. On an existing thread, it is recommended to copy channel-account-id of the most recent message on the thread.", + "instillAcceptFormats": ["string"], + "instillUIMultiline": false, + "instillUIOrder": 3, + "instillUpstreamTypes": ["value", "reference", "template"], + "title": "Channel Account ID", + "type": "string" + }, + "subject": { + "description": "The subject of the message", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUIOrder": 4, + "instillUpstreamTypes": ["value", "reference", "template"], + "title": "Subject", + "type": "string" + }, + "text": { + "description": "The body of the message", + "instillAcceptFormats": ["string"], + "instillUIMultiline": true, + "instillUIOrder": 5, + "instillUpstreamTypes": ["value", "reference", "template"], + "title": "Text", + "type": "string" + } + }, + "required": [ + "thread-id", + "sender-actor-id", + "recipients", + "channel-account-id", + "subject", + "text" + ], + "title": "Input", + "type": "object" + }, + "output": { + "description": "Status of the message", + "instillUIOrder": 0, + "properties": { + "status": { + "description": "The message status", + "instillUIOrder": 0, + "required": [], + "title": "Status", + "type": "string", + "instillFormat": "string" + } + }, + "required": ["status"], + "title": "Output", + "type": "object" + } + }, + "TASK_RETRIEVE_ASSOCIATION": { + "instillShortDescription": "Get the object IDs associated with contact ID (contact->objects). If you are trying to do the opposite (object->contacts), it is possible using the other tasks. Example: Go to get deal task to obtain deal->contacts", + "input": { + "description": "Contact ID and object type (CRM objects or Thread)", + "instillUIOrder": 0, + "instillEditOnNodeFields": ["contact-id", "object-type"], + "properties": { + "contact-id": { + "description": "Input contact ID", + "instillAcceptFormats": ["string"], + "instillUIMultiline": false, + "instillUIOrder": 0, + "instillUpstreamTypes": ["value", "reference", "template"], + "title": "Contact ID", + "type": "string" + }, + "object-type": { + "description": "Input object type (CRM objects or 'Threads'). Note: CRM objects include 'Deals', 'Companies', 'Tickets', etc", + "instillAcceptFormats": ["string"], + "instillUIMultiline": false, + "instillUIOrder": 1, + "instillUpstreamTypes": ["value", "reference", "template"], + "title": "Object Type", + "type": "string" + } + }, + "required": ["contact-id", "object-type"], + "title": "Input", + "type": "object" + }, + "output": { + "description": "All object IDs", + "instillUIOrder": 0, + "properties": { + "object-ids": { + "description": "An array of object ID associated with the contact", + "instillUIOrder": 0, + "instillFormat": "array:string", + "title": "Object ID Array ", + "type": "array", + "items": { + "title": "Object ID", + "type": "string", + "description": "The object ID associated with the contact" + } + } + }, + "required": ["object-ids"], + "title": "Output", + "type": "object" + } + } +} diff --git a/application/hubspot/v0/contact.go b/application/hubspot/v0/contact.go new file mode 100644 index 00000000..e05f90e5 --- /dev/null +++ b/application/hubspot/v0/contact.go @@ -0,0 +1,173 @@ +package hubspot + +import ( + "strings" + + hubspot "github.com/belong-inc/go-hubspot" + "github.com/instill-ai/component/base" + "google.golang.org/protobuf/types/known/structpb" +) + +// Get Contact + +type TaskGetContactInput struct { + ContactIDOrEmail string `json:"contact-id-or-email"` +} + +type TaskGetContactResp struct { + OwnerID string `json:"hubspot_owner_id,omitempty"` + Email string `json:"email,omitempty"` + FirstName string `json:"firstname,omitempty"` + LastName string `json:"lastname,omitempty"` + PhoneNumber string `json:"phone,omitempty"` + Company string `json:"company,omitempty"` + JobTitle string `json:"jobtitle,omitempty"` + LifecycleStage string `json:"lifecyclestage,omitempty"` + LeadStatus string `json:"hs_lead_status,omitempty"` + ContactID string `json:"hs_object_id"` +} + +type TaskGetContactOutput struct { + OwnerID string `json:"owner-id,omitempty"` + Email string `json:"email,omitempty"` + FirstName string `json:"first-name,omitempty"` + LastName string `json:"last-name,omitempty"` + PhoneNumber string `json:"phone-number,omitempty"` + Company string `json:"company,omitempty"` + JobTitle string `json:"job-title,omitempty"` + LifecycleStage string `json:"lifecycle-stage,omitempty"` + LeadStatus string `json:"lead-status,omitempty"` + ContactID string `json:"contact-id"` +} + +func (e *execution) GetContact(input *structpb.Struct) (*structpb.Struct, error) { + inputStruct := TaskGetContactInput{} + + err := base.ConvertFromStructpb(input, &inputStruct) + + if err != nil { + return nil, err + } + + uniqueKey := inputStruct.ContactIDOrEmail + + // If user enter email instead of contact ID + if strings.Contains(uniqueKey, "@") { + uniqueKey += "?idProperty=email" + } + + res, err := e.client.CRM.Contact.Get(uniqueKey, &TaskGetContactResp{}, &hubspot.RequestQueryOption{CustomProperties: []string{"phone"}}) + + if err != nil { + return nil, err + } + + contactInfo := res.Properties.(*TaskGetContactResp) + + outputStruct := TaskGetContactOutput(*contactInfo) + + output, err := base.ConvertToStructpb(outputStruct) + + if err != nil { + return nil, err + } + + return output, nil +} + +// Create Contact + +type TaskCreateContactInput struct { + OwnerID string `json:"owner-id"` + Email string `json:"email"` + FirstName string `json:"first-name"` + LastName string `json:"last-name"` + PhoneNumber string `json:"phone-number"` + Company string `json:"company"` + JobTitle string `json:"job-title"` + LifecycleStage string `json:"lifecycle-stage"` + LeadStatus string `json:"lead-status"` + CreateDealsAssociation []string `json:"create-deals-association"` + CreateCompaniesAssociation []string `json:"create-companies-association"` + CreateTicketsAssociation []string `json:"create-tickets-association"` +} + +type TaskCreateContactReq struct { + OwnerID string `json:"hubspot_owner_id,omitempty"` + Email string `json:"email,omitempty"` + FirstName string `json:"firstname,omitempty"` + LastName string `json:"lastname,omitempty"` + PhoneNumber string `json:"phone,omitempty"` + Company string `json:"company,omitempty"` + JobTitle string `json:"jobtitle,omitempty"` + LifecycleStage string `json:"lifecyclestage,omitempty"` + LeadStatus string `json:"hs_lead_status,omitempty"` + ContactID string `json:"hs_object_id"` +} + +type TaskCreateContactOutput struct { + ContactID string `json:"contact-id"` +} + +func (e *execution) CreateContact(input *structpb.Struct) (*structpb.Struct, error) { + inputStruct := TaskCreateContactInput{} + + err := base.ConvertFromStructpb(input, &inputStruct) + + if err != nil { + return nil, err + } + + req := TaskCreateContactReq{ + OwnerID: inputStruct.OwnerID, + Email: inputStruct.Email, + FirstName: inputStruct.FirstName, + LastName: inputStruct.LastName, + PhoneNumber: inputStruct.PhoneNumber, + Company: inputStruct.Company, + JobTitle: inputStruct.JobTitle, + LifecycleStage: inputStruct.LifecycleStage, + LeadStatus: inputStruct.LeadStatus, + } + + res, err := e.client.CRM.Contact.Create(&req) + + if err != nil { + return nil, err + } + + contactID := res.Properties.(*TaskCreateContactReq).ContactID + + outputStruct := TaskCreateContactOutput{ContactID: contactID} + + output, err := base.ConvertToStructpb(outputStruct) + + if err != nil { + return nil, err + } + + // This section is for creating associations (contact -> object) + + if len(inputStruct.CreateDealsAssociation) != 0 { + err := CreateAssociation(&outputStruct.ContactID, &inputStruct.CreateDealsAssociation, "contact", "deal", e) + + if err != nil { + return nil, err + } + } + if len(inputStruct.CreateCompaniesAssociation) != 0 { + err := CreateAssociation(&outputStruct.ContactID, &inputStruct.CreateCompaniesAssociation, "contact", "company", e) + + if err != nil { + return nil, err + } + } + if len(inputStruct.CreateTicketsAssociation) != 0 { + err := CreateAssociation(&outputStruct.ContactID, &inputStruct.CreateTicketsAssociation, "contact", "ticket", e) + + if err != nil { + return nil, err + } + } + return output, nil +} diff --git a/application/hubspot/v0/contact_test.go b/application/hubspot/v0/contact_test.go new file mode 100644 index 00000000..ba30ea72 --- /dev/null +++ b/application/hubspot/v0/contact_test.go @@ -0,0 +1,194 @@ +package hubspot + +import ( + "context" + "testing" + + hubspot "github.com/belong-inc/go-hubspot" + qt "github.com/frankban/quicktest" + "github.com/instill-ai/component/base" + "go.uber.org/zap" + "google.golang.org/protobuf/encoding/protojson" + "google.golang.org/protobuf/types/known/structpb" +) + +const ( + bearerToken = "123" +) + +// mockClient is a custom client that will be used for testing + +func createMockClient() *CustomClient { + + mockCRM := &hubspot.CRM{ + Contact: &MockContact{}, + Deal: &MockDeal{}, + Company: &MockCompany{}, + } + + mockClient := &CustomClient{ + Client: &hubspot.Client{ + CRM: mockCRM, + }, + Thread: &MockThread{}, + RetrieveAssociation: &MockRetrieveAssociation{}, + Ticket: &MockTicket{}, + } + + return mockClient +} + +// Mock Contact struct and its functions +type MockContact struct{} + +func (s *MockContact) Get(contactID string, contact interface{}, option *hubspot.RequestQueryOption) (*hubspot.ResponseResource, error) { + + var fakeContact TaskGetContactResp + if contactID == "32027696539" || contactID == "bh@hubspot.com" { + + fakeContact = TaskGetContactResp{ + FirstName: "Brian", + LastName: "Halligan (Sample Contact)", + Email: "bh@hubspot.com", + Company: "HubSpot", + JobTitle: "CEO", + LifecycleStage: "lead", + ContactID: "32027696539", + } + } + ret := &hubspot.ResponseResource{ + Properties: &fakeContact, + } + + return ret, nil +} + +func (s *MockContact) Create(contact interface{}) (*hubspot.ResponseResource, error) { + + // in the actual create function, if the user created a contact, it will return all the information back to the user, so I will be mimicking that + + arbitraryContactID := "12345678" + + fakeContactInfo := contact.(*TaskCreateContactReq) + + fakeContactInfo.ContactID = arbitraryContactID + + ret := &hubspot.ResponseResource{ + Properties: fakeContactInfo, + } + + return ret, nil +} + +func (s *MockContact) Update(contactID string, contact interface{}) (*hubspot.ResponseResource, error) { + return nil, nil +} +func (s *MockContact) Delete(contactID string) error { + return nil +} +func (s *MockContact) AssociateAnotherObj(contactID string, conf *hubspot.AssociationConfig) (*hubspot.ResponseResource, error) { + return nil, nil +} + +func TestComponent_ExecuteGetContactTask(t *testing.T) { + c := qt.New(t) + ctx := context.Background() + bc := base.Component{Logger: zap.NewNop()} + connector := Init(bc) + + tc := struct { + name string + input string + wantResp TaskGetContactOutput + }{ + name: "ok - get contact", + input: "32027696539", + wantResp: TaskGetContactOutput{ + FirstName: "Brian", + LastName: "Halligan (Sample Contact)", + Email: "bh@hubspot.com", + Company: "HubSpot", + JobTitle: "CEO", + LifecycleStage: "lead", + ContactID: "32027696539", + }, + } + + c.Run(tc.name, func(c *qt.C) { + setup, err := structpb.NewStruct(map[string]any{ + "token": bearerToken, + }) + c.Assert(err, qt.IsNil) + + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: connector, SystemVariables: nil, Setup: setup, Task: taskGetContact}, + client: createMockClient(), + } + + e.execute = e.GetContact + exec := &base.ExecutionWrapper{Execution: e} + + pbInput, err := structpb.NewStruct(map[string]any{ + "contact-id-or-email": tc.input, + }) + + c.Assert(err, qt.IsNil) + + res, err := exec.Execution.Execute(ctx, []*structpb.Struct{pbInput}) + + c.Assert(err, qt.IsNil) + + resJSON, err := protojson.Marshal(res[0]) + c.Assert(err, qt.IsNil) + + c.Check(resJSON, qt.JSONEquals, tc.wantResp) + + }) +} + +func TestComponent_ExecuteCreateContactTask(t *testing.T) { + c := qt.New(t) + ctx := context.Background() + bc := base.Component{Logger: zap.NewNop()} + connector := Init(bc) + + tc := struct { + name string + input TaskCreateContactInput + wantResp string + }{ + name: "ok - create contact", + input: TaskCreateContactInput{ + FirstName: "Test", + LastName: "Name", + Email: "test_name@gmail.com", + }, + wantResp: "12345678", + } + + c.Run(tc.name, func(c *qt.C) { + setup, err := structpb.NewStruct(map[string]any{ + "token": bearerToken, + }) + c.Assert(err, qt.IsNil) + + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: connector, SystemVariables: nil, Setup: setup, Task: taskCreateContact}, + client: createMockClient(), + } + e.execute = e.CreateContact + exec := &base.ExecutionWrapper{Execution: e} + + pbInput, err := base.ConvertToStructpb(tc.input) + + c.Assert(err, qt.IsNil) + + res, err := exec.Execution.Execute(ctx, []*structpb.Struct{pbInput}) + c.Assert(err, qt.IsNil) + + resString := res[0].Fields["contact-id"].GetStringValue() + + c.Check(resString, qt.Equals, tc.wantResp) + + }) +} diff --git a/application/hubspot/v0/custom_client.go b/application/hubspot/v0/custom_client.go new file mode 100644 index 00000000..432360f3 --- /dev/null +++ b/application/hubspot/v0/custom_client.go @@ -0,0 +1,43 @@ +package hubspot + +import ( + hubspot "github.com/belong-inc/go-hubspot" +) + +// need to create CustomClient because the go-hubspot sdk we are using does not support threads (conversation inbox) +// future functionalities that go-huspot sdk doesn't support will go here or need to be modified will go here. +type CustomClient struct { + *hubspot.Client + Thread ThreadService + RetrieveAssociation RetrieveAssociationService + Ticket TicketService +} + +func NewCustomClient(setAuthMethod hubspot.AuthMethod, opts ...hubspot.Option) (*CustomClient, error) { + + // call default NewClient + c, err := hubspot.NewClient(setAuthMethod, opts...) + + if err != nil { + return nil, err + } + + customC := &CustomClient{ + Client: c, + Thread: &ThreadServiceOp{ + threadPath: "conversations/v3/conversations/threads", + client: c, + }, + RetrieveAssociation: &RetrieveAssociationServiceOp{ + retrieveCrmIDPath: "crm/v3/associations/Contacts", + retrieveThreadIDPath: "conversations/v3/conversations/threads?associatedContactId=", + client: c, + }, + Ticket: &TicketServiceOp{ + ticketPath: "crm/v3/objects/tickets", + client: c, + }, + } + + return customC, nil +} diff --git a/application/hubspot/v0/deal.go b/application/hubspot/v0/deal.go new file mode 100644 index 00000000..4e23d012 --- /dev/null +++ b/application/hubspot/v0/deal.go @@ -0,0 +1,183 @@ +package hubspot + +import ( + "strconv" + + hubspot "github.com/belong-inc/go-hubspot" + "github.com/instill-ai/component/base" + "google.golang.org/protobuf/types/known/structpb" +) + +// Get Deal + +type TaskGetDealInput struct { + DealID string `json:"deal-id"` +} + +type TaskGetDealResp struct { + OwnerID string `json:"hubspot_owner_id,omitempty"` + DealName string `json:"dealname"` + Pipeline string `json:"pipeline"` + DealStage string `json:"dealstage"` + Amount string `json:"amount,omitempty"` + DealType string `json:"dealtype,omitempty"` + CloseDate string `json:"closedate,omitempty"` + CreateDate string `json:"createdate"` +} + +type TaskGetDealOutput struct { + OwnerID string `json:"owner-id,omitempty"` + DealName string `json:"deal-name"` + Pipeline string `json:"pipeline"` + DealStage string `json:"deal-stage"` + Amount float64 `json:"amount,omitempty"` + DealType string `json:"deal-type,omitempty"` + CreateDate string `json:"create-date"` + CloseDate string `json:"close-date,omitempty"` + AssociatedContactIDs []string `json:"associated-contact-ids,omitempty"` +} + +func (e *execution) GetDeal(input *structpb.Struct) (*structpb.Struct, error) { + inputStruct := TaskGetDealInput{} + + err := base.ConvertFromStructpb(input, &inputStruct) + + if err != nil { + return nil, err + } + + // get deal information + + res, err := e.client.CRM.Deal.Get(inputStruct.DealID, &TaskGetDealResp{}, &hubspot.RequestQueryOption{Associations: []string{"contacts"}}) + + if err != nil { + return nil, err + } + + dealInfo := res.Properties.(*TaskGetDealResp) + + // get contacts associated with deal + + var dealContactList []string + if res.Associations != nil { + dealContactAssociation := res.Associations.Contacts.Results + dealContactList = make([]string, len(dealContactAssociation)) + for index, value := range dealContactAssociation { + dealContactList[index] = value.ID + } + } + + // convert to outputStruct + + var amount float64 + + if dealInfo.Amount != "" { + var err error + amount, err = strconv.ParseFloat(dealInfo.Amount, 64) + + if err != nil { + return nil, err + } + } + + outputStruct := TaskGetDealOutput{ + OwnerID: dealInfo.OwnerID, + DealName: dealInfo.DealName, + Pipeline: dealInfo.Pipeline, + DealStage: dealInfo.DealStage, + Amount: amount, + DealType: dealInfo.DealType, + CreateDate: dealInfo.CreateDate, + CloseDate: dealInfo.CloseDate, + AssociatedContactIDs: dealContactList, + } + + output, err := base.ConvertToStructpb(outputStruct) + + if err != nil { + return nil, err + } + + return output, nil +} + +// Create Deal + +type TaskCreateDealInput struct { + OwnerID string `json:"owner-id"` + DealName string `json:"deal-name"` + Pipeline string `json:"pipeline"` + DealStage string `json:"deal-stage"` + Amount float64 `json:"amount"` + DealType string `json:"deal-type"` + CloseDate string `json:"close-date"` + CreateContactsAssociation []string `json:"create-contacts-association"` +} + +type TaskCreateDealReq struct { + OwnerID string `json:"hubspot_owner_id,omitempty"` + DealName string `json:"dealname"` + Pipeline string `json:"pipeline"` + DealStage string `json:"dealstage"` + Amount string `json:"amount,omitempty"` + DealType string `json:"dealtype,omitempty"` + CloseDate string `json:"closedate,omitempty"` + DealID string `json:"hs_object_id"` +} + +type TaskCreateDealOutput struct { + DealID string `json:"deal-id"` +} + +func (e *execution) CreateDeal(input *structpb.Struct) (*structpb.Struct, error) { + + inputStruct := TaskCreateDealInput{} + err := base.ConvertFromStructpb(input, &inputStruct) + + if err != nil { + return nil, err + } + + var amount string + if inputStruct.Amount != 0 { + amount = strconv.FormatFloat(inputStruct.Amount, 'f', -1, 64) + } + + req := TaskCreateDealReq{ + OwnerID: inputStruct.OwnerID, + DealName: inputStruct.DealName, + Pipeline: inputStruct.Pipeline, + DealStage: inputStruct.DealStage, + Amount: amount, + DealType: inputStruct.DealType, + CloseDate: inputStruct.CloseDate, + } + + res, err := e.client.CRM.Deal.Create(&req) + + if err != nil { + return nil, err + } + + // get deal ID + dealID := res.Properties.(*TaskCreateDealReq).DealID + + outputStruct := TaskCreateDealOutput{DealID: dealID} + + output, err := base.ConvertToStructpb(outputStruct) + + if err != nil { + return nil, err + } + + // This section is for creating associations (deal -> object) + if len(inputStruct.CreateContactsAssociation) != 0 { + err := CreateAssociation(&outputStruct.DealID, &inputStruct.CreateContactsAssociation, "deal", "contact", e) + + if err != nil { + return nil, err + } + } + + return output, nil +} diff --git a/application/hubspot/v0/deal_test.go b/application/hubspot/v0/deal_test.go new file mode 100644 index 00000000..1925f895 --- /dev/null +++ b/application/hubspot/v0/deal_test.go @@ -0,0 +1,156 @@ +package hubspot + +import ( + "context" + "testing" + + hubspot "github.com/belong-inc/go-hubspot" + qt "github.com/frankban/quicktest" + "github.com/instill-ai/component/base" + "go.uber.org/zap" + "google.golang.org/protobuf/encoding/protojson" + "google.golang.org/protobuf/types/known/structpb" +) + +// mockClient is in contact_test.go + +// Mock Deal struct and its functions +type MockDeal struct{} + +func (s *MockDeal) Get(dealID string, deal interface{}, option *hubspot.RequestQueryOption) (*hubspot.ResponseResource, error) { + + var fakeDeal TaskGetDealResp + if dealID == "20620806729" { + fakeDeal = TaskGetDealResp{ + DealName: "Fake deal", + Pipeline: "default", + DealStage: "qualifiedtobuy", + CreateDate: "2024-07-09T02:22:06.140Z", + } + } + + ret := &hubspot.ResponseResource{ + Properties: &fakeDeal, + } + + return ret, nil +} + +func (s *MockDeal) Create(deal interface{}) (*hubspot.ResponseResource, error) { + arbitraryDealID := "12345678900" + + fakeDealInfo := deal.(*TaskCreateDealReq) + + fakeDealInfo.DealID = arbitraryDealID + + ret := &hubspot.ResponseResource{ + Properties: fakeDealInfo, + } + + return ret, nil +} + +func (s *MockDeal) Update(dealID string, deal interface{}) (*hubspot.ResponseResource, error) { + return nil, nil +} +func (s *MockDeal) AssociateAnotherObj(dealID string, conf *hubspot.AssociationConfig) (*hubspot.ResponseResource, error) { + return nil, nil +} + +func TestComponent_ExecuteGetDealTask(t *testing.T) { + c := qt.New(t) + ctx := context.Background() + bc := base.Component{Logger: zap.NewNop()} + connector := Init(bc) + + tc := struct { + name string + input string + wantResp TaskGetDealOutput + }{ + name: "ok - get deal", + input: "20620806729", + wantResp: TaskGetDealOutput{ + DealName: "Fake deal", + Pipeline: "default", + DealStage: "qualifiedtobuy", + CreateDate: "2024-07-09T02:22:06.140Z", + }, + } + + c.Run(tc.name, func(c *qt.C) { + setup, err := structpb.NewStruct(map[string]any{ + "token": bearerToken, + }) + c.Assert(err, qt.IsNil) + + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: connector, SystemVariables: nil, Setup: setup, Task: taskGetDeal}, + client: createMockClient(), + } + e.execute = e.GetDeal + exec := &base.ExecutionWrapper{Execution: e} + + pbInput, err := structpb.NewStruct(map[string]any{ + "deal-id": tc.input, + }) + + c.Assert(err, qt.IsNil) + + res, err := exec.Execution.Execute(ctx, []*structpb.Struct{pbInput}) + c.Assert(err, qt.IsNil) + + resJSON, err := protojson.Marshal(res[0]) + c.Assert(err, qt.IsNil) + + c.Check(resJSON, qt.JSONEquals, tc.wantResp) + + }) +} + +func TestComponent_ExecuteCreateDealTask(t *testing.T) { + c := qt.New(t) + ctx := context.Background() + bc := base.Component{Logger: zap.NewNop()} + connector := Init(bc) + + tc := struct { + name string + inputDeal TaskCreateDealInput + wantResp string + }{ + name: "ok - create deal", + inputDeal: TaskCreateDealInput{ + DealName: "Test Creating Deal", + Pipeline: "default", + DealStage: "contractsent", + }, + wantResp: "12345678900", + } + + c.Run(tc.name, func(c *qt.C) { + setup, err := structpb.NewStruct(map[string]any{ + "token": bearerToken, + }) + c.Assert(err, qt.IsNil) + + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: connector, SystemVariables: nil, Setup: setup, Task: taskCreateDeal}, + client: createMockClient(), + } + e.execute = e.CreateDeal + exec := &base.ExecutionWrapper{Execution: e} + + pbInput, err := base.ConvertToStructpb(tc.inputDeal) + + c.Assert(err, qt.IsNil) + + res, err := exec.Execution.Execute(ctx, []*structpb.Struct{pbInput}) + c.Assert(err, qt.IsNil) + + resString := res[0].Fields["deal-id"].GetStringValue() + + c.Check(resString, qt.Equals, tc.wantResp) + + }) +} diff --git a/application/hubspot/v0/main.go b/application/hubspot/v0/main.go new file mode 100644 index 00000000..57b46667 --- /dev/null +++ b/application/hubspot/v0/main.go @@ -0,0 +1,129 @@ +//go:generate compogen readme ./config ./README.mdx + +package hubspot + +import ( + "context" + _ "embed" + "fmt" + "sync" + + hubspot "github.com/belong-inc/go-hubspot" + "github.com/instill-ai/component/base" + "google.golang.org/protobuf/types/known/structpb" +) + +const ( + taskGetContact = "TASK_GET_CONTACT" + taskCreateContact = "TASK_CREATE_CONTACT" + taskGetDeal = "TASK_GET_DEAL" + taskCreateDeal = "TASK_CREATE_DEAL" + taskGetCompany = "TASK_GET_COMPANY" + taskCreateCompany = "TASK_CREATE_COMPANY" + taskGetTicket = "TASK_GET_TICKET" + taskCreateTicket = "TASK_CREATE_TICKET" + taskGetThread = "TASK_GET_THREAD" + taskInsertMessage = "TASK_INSERT_MESSAGE" + taskRetrieveAssociation = "TASK_RETRIEVE_ASSOCIATION" +) + +var ( + //go:embed config/definition.json + definitionJSON []byte + //go:embed config/tasks.json + tasksJSON []byte + //go:embed config/setup.json + setupJSON []byte + + once sync.Once + comp *component +) + +type component struct { + base.Component +} + +type execution struct { + base.ComponentExecution + client *CustomClient + execute func(*structpb.Struct) (*structpb.Struct, error) +} + +func Init(bc base.Component) *component { + once.Do(func() { + comp = &component{Component: bc} + err := comp.LoadDefinition(definitionJSON, setupJSON, tasksJSON, nil) + if err != nil { + panic(err) + } + }) + return comp +} + +func getToken(setup *structpb.Struct) string { + return setup.GetFields()["token"].GetStringValue() +} + +// custom client to support thread task +func hubspotNewCustomClient(setup *structpb.Struct) *CustomClient { + client, err := NewCustomClient(hubspot.SetPrivateAppToken(getToken(setup))) + + if err != nil { + panic(err) + } + + return client +} + +func (c *component) CreateExecution(sysVars map[string]any, setup *structpb.Struct, task string) (*base.ExecutionWrapper, error) { + + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: c, SystemVariables: sysVars, Task: task}, + client: hubspotNewCustomClient(setup), + } + + switch task { + case taskGetContact: + e.execute = e.GetContact + case taskCreateContact: + e.execute = e.CreateContact + case taskGetDeal: + e.execute = e.GetDeal + case taskCreateDeal: + e.execute = e.CreateDeal + case taskGetCompany: + e.execute = e.GetCompany + case taskCreateCompany: + e.execute = e.CreateCompany + case taskGetTicket: + e.execute = e.GetTicket + case taskCreateTicket: + e.execute = e.CreateTicket + case taskGetThread: + e.execute = e.GetThread + case taskInsertMessage: + e.execute = e.InsertMessage + case taskRetrieveAssociation: + e.execute = e.RetrieveAssociation + default: + return nil, fmt.Errorf("unsupported task") + } + + return &base.ExecutionWrapper{Execution: e}, nil +} + +func (e *execution) Execute(_ context.Context, inputs []*structpb.Struct) ([]*structpb.Struct, error) { + + outputs := make([]*structpb.Struct, len(inputs)) + + for i, input := range inputs { + output, err := e.execute(input) + if err != nil { + return nil, err + } + + outputs[i] = output + } + + return outputs, nil +} diff --git a/application/hubspot/v0/thread.go b/application/hubspot/v0/thread.go new file mode 100644 index 00000000..e9cc9511 --- /dev/null +++ b/application/hubspot/v0/thread.go @@ -0,0 +1,282 @@ +package hubspot + +import ( + "fmt" + + hubspot "github.com/belong-inc/go-hubspot" + "github.com/instill-ai/component/base" + "google.golang.org/protobuf/types/known/structpb" +) + +// following go-hubspot sdk format +// Note: The conversation API is still in BETA, and hence, any of these struct can change in the future. + +// API functions for Thread + +type ThreadService interface { + Get(threadID string) (*TaskGetThreadResp, error) + Insert(threadID string, message *TaskInsertMessageReq) (*TaskInsertMessageResp, error) +} + +type ThreadServiceOp struct { + threadPath string + client *hubspot.Client +} + +func (s *ThreadServiceOp) Get(threadID string) (*TaskGetThreadResp, error) { + resource := &TaskGetThreadResp{} + if err := s.client.Get(s.threadPath+"/"+threadID+"/messages", resource, nil); err != nil { + return nil, err + } + return resource, nil +} + +func (s *ThreadServiceOp) Insert(threadID string, message *TaskInsertMessageReq) (*TaskInsertMessageResp, error) { + resource := &TaskInsertMessageResp{} + if err := s.client.Post(s.threadPath+"/"+threadID+"/messages", message, resource); err != nil { + return nil, err + } + return resource, nil +} + +// Get Thread + +// Get Thread Input + +type TaskGetThreadInput struct { + ThreadID string `json:"thread-id"` +} + +// Get Thread Reponse structs + +type TaskGetThreadResp struct { + Results []taskGetThreadRespResult `json:"results"` +} + +type taskGetThreadRespResult struct { + CreatedAt string `json:"createdAt"` + Senders []taskGetThreadRespUser `json:"senders,omitempty"` + Recipients []taskGetThreadRespUser `json:"recipients,omitempty"` + Text string `json:"text,omitempty"` + Subject string `json:"subject,omitempty"` + ChannelID string `json:"channelId,omitempty"` + ChannelAccountID string `json:"channelAccountId,omitempty"` + Type string `json:"type,omitempty"` +} + +type taskGetThreadRespUser struct { + Name string `json:"name,omitempty"` + DeliveryIdentifier taskGetThreadRespIdentifier `json:"deliveryIdentifier,omitempty"` + ActorID string `json:"actorId,omitempty"` //only applicable to sender +} + +type taskGetThreadRespIdentifier struct { + Type string `json:"type,omitempty"` + Value string `json:"value,omitempty"` +} + +// Get Thread Output structs + +type TaskGetThreadOutput struct { + Results []taskGetThreadOutputResult `json:"results"` +} + +type taskGetThreadOutputResult struct { + CreatedAt string `json:"created-at"` + Sender taskGetThreadOutputSender `json:"sender,omitempty"` + Recipients []taskGetThreadOutputRecipient `json:"recipients,omitempty"` + Text string `json:"text"` + Subject string `json:"subject,omitempty"` + ChannelID string `json:"channel-id"` + ChannelAccountID string `json:"channel-account-id"` +} + +// It is named as sender-x so that it is clearer for the user that it is referring to the sender's information. + +type taskGetThreadOutputSender struct { + Name string `json:"sender-name,omitempty"` + Type string `json:"sender-type,omitempty"` + Value string `json:"sender-value,omitempty"` + ActorID string `json:"sender-actor-id"` +} + +type taskGetThreadOutputRecipient struct { + Name string `json:"name"` + Type string `json:"type"` + Value string `json:"value"` + ActorID string `json:"actor-id"` +} + +func (e *execution) GetThread(input *structpb.Struct) (*structpb.Struct, error) { + + inputStruct := TaskGetThreadInput{} + err := base.ConvertFromStructpb(input, &inputStruct) + + if err != nil { + return nil, err + } + + res, err := e.client.Thread.Get(inputStruct.ThreadID) + + if err != nil { + return nil, err + } + + // convert to output struct + + outputStruct := TaskGetThreadOutput{} + + for _, value1 := range res.Results { + // this way, the output will only contain the actual messages in the thread (ignore system message from hubspot) + if value1.Type != "MESSAGE" { + continue + } + + resultOutput := taskGetThreadOutputResult{ + CreatedAt: value1.CreatedAt, + Text: value1.Text, + Subject: value1.Subject, + ChannelID: value1.ChannelID, + ChannelAccountID: value1.ChannelAccountID, + } + + // there should only be one sender + // sender + if len(value1.Senders) > 0 { + value2 := value1.Senders[0] + userSenderOutput := taskGetThreadOutputSender{ + Name: value2.Name, + Type: value2.DeliveryIdentifier.Type, + Value: value2.DeliveryIdentifier.Value, + ActorID: value2.ActorID, + } + resultOutput.Sender = userSenderOutput + } + + // recipient + for _, value3 := range value1.Recipients { + userRecipientOutput := taskGetThreadOutputRecipient{ + Name: value3.Name, + Type: value3.DeliveryIdentifier.Type, + Value: value3.DeliveryIdentifier.Value, + } + + resultOutput.Recipients = append(resultOutput.Recipients, userRecipientOutput) + + } + + outputStruct.Results = append(outputStruct.Results, resultOutput) + + } + + output, err := base.ConvertToStructpb(outputStruct) + if err != nil { + return nil, err + } + + return output, nil +} + +// Insert Message + +// Input + +type TaskInsertMessageInput struct { + ThreadID string `json:"thread-id"` + SenderActorID string `json:"sender-actor-id"` + Recipients []string `json:"recipients"` + ChannelAccountID string `json:"channel-account-id"` + Subject string `json:"subject"` + Text string `json:"text"` +} + +// Request + +type TaskInsertMessageReq struct { + Type string `json:"type"` + Text string `json:"text"` //content of the message + Recipients []taskInsertMessageReqRecipient `json:"recipients"` + SenderActorID string `json:"senderActorId"` + ChannelID string `json:"channelId"` + ChannelAccountID string `json:"channelAccountId"` + Subject string `json:"subject"` +} + +type taskInsertMessageReqRecipient struct { + RecipientField string `json:"recipientField"` + DeliveryIdentifier taskInsertMessageReqIdentifier `json:"deliveryIdentifier"` +} + +type taskInsertMessageReqIdentifier struct { + Type string `json:"type"` + Value string `json:"value"` +} + +// Response + +type TaskInsertMessageResp struct { + Status taskInsertMessageRespStatusType `json:"status"` + Message string `json:"message,omitempty"` +} + +type taskInsertMessageRespStatusType struct { + StatusType string `json:"statusType"` +} + +// Output + +type TaskInsertMessageOutput struct { + Status string `json:"status"` +} + +func (e *execution) InsertMessage(input *structpb.Struct) (*structpb.Struct, error) { + inputStruct := TaskInsertMessageInput{} + err := base.ConvertFromStructpb(input, &inputStruct) + + if err != nil { + return nil, err + } + + recipients := make([]taskInsertMessageReqRecipient, len(inputStruct.Recipients)) + for index, value := range inputStruct.Recipients { + recipients[index] = taskInsertMessageReqRecipient{ + RecipientField: "TO", + DeliveryIdentifier: taskInsertMessageReqIdentifier{ + Type: "HS_EMAIL_ADDRESS", + Value: value, + }, + } + } + + req := TaskInsertMessageReq{ + Type: "MESSAGE", + Text: inputStruct.Text, + Recipients: recipients, + SenderActorID: inputStruct.SenderActorID, + ChannelID: "1002", //1002 is for email + ChannelAccountID: inputStruct.ChannelAccountID, + Subject: inputStruct.Subject, + } + + res, err := e.client.Thread.Insert(inputStruct.ThreadID, &req) + + if err != nil { + return nil, err + } + + outputStruct := TaskInsertMessageOutput{ + Status: res.Status.StatusType, + } + + if outputStruct.Status != "SENT" { + return nil, fmt.Errorf("error sending message") + } + + output, err := base.ConvertToStructpb(outputStruct) + + if err != nil { + return nil, err + } + + return output, nil +} diff --git a/application/hubspot/v0/thread_test.go b/application/hubspot/v0/thread_test.go new file mode 100644 index 00000000..bc17c7a6 --- /dev/null +++ b/application/hubspot/v0/thread_test.go @@ -0,0 +1,182 @@ +package hubspot + +import ( + "context" + "testing" + + qt "github.com/frankban/quicktest" + "github.com/instill-ai/component/base" + "go.uber.org/zap" + "google.golang.org/protobuf/encoding/protojson" + "google.golang.org/protobuf/types/known/structpb" +) + +// mockClient is in contact_test.go + +// Mock Thread struct and its functions +type MockThread struct{} + +func (s *MockThread) Get(threadID string) (*TaskGetThreadResp, error) { + + var fakeThread TaskGetThreadResp + if threadID == "7509711154" { + fakeThread = TaskGetThreadResp{ + Results: []taskGetThreadRespResult{ + { + CreatedAt: "2024-07-02T10:42:15Z", + Senders: []taskGetThreadRespUser{ + { + Name: "Brian Halligan (Sample Contact)", + DeliveryIdentifier: taskGetThreadRespIdentifier{ + Type: "HS_EMAIL_ADDRESS", + Value: "bh@hubspot.com", + }, + }, + }, + Recipients: []taskGetThreadRespUser{ + { + DeliveryIdentifier: taskGetThreadRespIdentifier{ + Type: "HS_EMAIL_ADDRESS", + Value: "fake_email@gmail.com", + }, + }, + }, + Text: "Just random content inside", + Subject: "A fake message", + ChannelID: "1002", + ChannelAccountID: "638727358", + Type: "MESSAGE", + }, + }, + } + } + + return &fakeThread, nil +} + +func (s *MockThread) Insert(threadID string, message *TaskInsertMessageReq) (*TaskInsertMessageResp, error) { + + res := &TaskInsertMessageResp{} + if threadID == "7509711154" { + res.Status = taskInsertMessageRespStatusType{ + StatusType: "SENT", + } + } + return res, nil +} + +func TestComponent_ExecuteGetThreadTask(t *testing.T) { + c := qt.New(t) + ctx := context.Background() + bc := base.Component{Logger: zap.NewNop()} + connector := Init(bc) + tc := struct { + name string + input string + wantResp TaskGetThreadOutput + }{ + name: "ok - get thread", + input: "7509711154", + wantResp: TaskGetThreadOutput{ + Results: []taskGetThreadOutputResult{ + { + CreatedAt: "2024-07-02T10:42:15Z", + Sender: taskGetThreadOutputSender{ + Name: "Brian Halligan (Sample Contact)", + Type: "HS_EMAIL_ADDRESS", + Value: "bh@hubspot.com", + }, + Recipients: []taskGetThreadOutputRecipient{ + { + Type: "HS_EMAIL_ADDRESS", + Value: "fake_email@gmail.com", + }, + }, + Text: "Just random content inside", + Subject: "A fake message", + ChannelID: "1002", + ChannelAccountID: "638727358", + }, + }, + }, + } + + c.Run(tc.name, func(c *qt.C) { + setup, err := structpb.NewStruct(map[string]any{ + "token": bearerToken, + }) + c.Assert(err, qt.IsNil) + + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: connector, SystemVariables: nil, Setup: setup, Task: taskGetThread}, + client: createMockClient(), + } + e.execute = e.GetThread + exec := &base.ExecutionWrapper{Execution: e} + pbInput, err := structpb.NewStruct(map[string]any{ + "thread-id": tc.input, + }) + + c.Assert(err, qt.IsNil) + + res, err := exec.Execution.Execute(ctx, []*structpb.Struct{pbInput}) + c.Assert(err, qt.IsNil) + + resJSON, err := protojson.Marshal(res[0]) + c.Assert(err, qt.IsNil) + + c.Check(resJSON, qt.JSONEquals, tc.wantResp) + + }) +} + +func TestComponent_ExecuteInsertMessageTask(t *testing.T) { + c := qt.New(t) + ctx := context.Background() + bc := base.Component{Logger: zap.NewNop()} + connector := Init(bc) + + tc := struct { + name string + input TaskInsertMessageInput + wantResp string + }{ + + name: "ok - insert message", + input: TaskInsertMessageInput{ + ThreadID: "7509711154", + SenderActorID: "A-12345678", + Recipients: []string{"randomemail@gmail.com"}, + ChannelAccountID: "123456789", + Subject: "A fake message", + Text: "A message with random content inside", + }, + wantResp: "SENT", + } + + c.Run(tc.name, func(c *qt.C) { + setup, err := structpb.NewStruct(map[string]any{ + "token": bearerToken, + }) + c.Assert(err, qt.IsNil) + + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: connector, SystemVariables: nil, Setup: setup, Task: taskInsertMessage}, + client: createMockClient(), + } + e.execute = e.InsertMessage + exec := &base.ExecutionWrapper{Execution: e} + + pbInput, err := base.ConvertToStructpb(tc.input) + + c.Assert(err, qt.IsNil) + + res, err := exec.Execution.Execute(ctx, []*structpb.Struct{pbInput}) + + c.Assert(err, qt.IsNil) + resString := res[0].Fields["status"].GetStringValue() + c.Check(resString, qt.Equals, tc.wantResp) + + }) + +} diff --git a/application/hubspot/v0/ticket.go b/application/hubspot/v0/ticket.go new file mode 100644 index 00000000..38007ab0 --- /dev/null +++ b/application/hubspot/v0/ticket.go @@ -0,0 +1,219 @@ +package hubspot + +import ( + "strings" + + hubspot "github.com/belong-inc/go-hubspot" + "github.com/instill-ai/component/base" + "google.golang.org/protobuf/types/known/structpb" +) + +// following go-hubspot sdk format + +// API functions for Ticket + +type TicketService interface { + Get(ticketID string) (*hubspot.ResponseResource, error) + Create(ticket *TaskCreateTicketReq) (*hubspot.ResponseResource, error) +} + +type TicketServiceOp struct { + client *hubspot.Client + ticketPath string +} + +var ticketProperties = []string{ + "hubspot_owner_id", + "subject", + "hs_pipeline_stage", + "hs_pipeline", + "hs_ticket_category", + "hs_ticket_priority", + "source_type", + "hs_object_source_label", + "createdate", + "hs_lastmodifieddate", +} + +func (s *TicketServiceOp) Get(ticketID string) (*hubspot.ResponseResource, error) { + resource := &hubspot.ResponseResource{Properties: &TaskGetTicketResp{}} + option := &hubspot.RequestQueryOption{Properties: ticketProperties, Associations: []string{"contacts"}} + if err := s.client.Get(s.ticketPath+"/"+ticketID, resource, option); err != nil { + return nil, err + } + + return resource, nil +} + +func (s *TicketServiceOp) Create(ticket *TaskCreateTicketReq) (*hubspot.ResponseResource, error) { + req := &hubspot.RequestPayload{Properties: ticket} + resource := &hubspot.ResponseResource{Properties: ticket} + if err := s.client.Post(s.ticketPath, req, resource); err != nil { + return nil, err + } + return resource, nil +} + +// Get Ticket + +type TaskGetTicketInput struct { + TicketID string `json:"ticket-id"` +} + +type TaskGetTicketResp struct { + OwnerID string `json:"hubspot_owner_id,omitempty"` + TicketName string `json:"subject"` + TicketStatus string `json:"hs_pipeline_stage"` + Pipeline string `json:"hs_pipeline"` + Category string `json:"hs_ticket_category,omitempty"` + Priority string `json:"hs_ticket_priority,omitempty"` + Source string `json:"source_type,omitempty"` + RecordSource string `json:"hs_object_source_label,omitempty"` + CreateDate string `json:"createdate"` + LastModifiedDate string `json:"hs_lastmodifieddate"` + TicketID string `json:"hs_object_id"` +} + +type TaskGetTicketOutput struct { + OwnerID string `json:"owner-id,omitempty"` + TicketName string `json:"ticket-name"` + TicketStatus string `json:"ticket-status"` + Pipeline string `json:"pipeline"` + Category []string `json:"categories,omitempty"` + Priority string `json:"priority,omitempty"` + Source string `json:"source,omitempty"` + RecordSource string `json:"record-source,omitempty"` + CreateDate string `json:"create-date"` + LastModifiedDate string `json:"last-modified-date"` + AssociatedContactIDs []string `json:"associated-contact-ids,omitempty"` +} + +func (e *execution) GetTicket(input *structpb.Struct) (*structpb.Struct, error) { + + inputStruct := TaskGetTicketInput{} + + err := base.ConvertFromStructpb(input, &inputStruct) + + if err != nil { + return nil, err + } + + res, err := e.client.Ticket.Get(inputStruct.TicketID) + if err != nil { + return nil, err + } + + ticketInfo := res.Properties.(*TaskGetTicketResp) + + // get contacts associated with ticket + + var ticketContactList []string + if res.Associations != nil { + ticketContactAssociation := res.Associations.Contacts.Results + ticketContactList = make([]string, len(ticketContactAssociation)) + for index, value := range ticketContactAssociation { + ticketContactList[index] = value.ID + } + } + + var categoryValues []string + if ticketInfo.Category != "" { + categoryValues = strings.Split(ticketInfo.Category, ";") + } + + outputStruct := TaskGetTicketOutput{ + OwnerID: ticketInfo.OwnerID, + TicketName: ticketInfo.TicketName, + TicketStatus: ticketInfo.TicketStatus, + Pipeline: ticketInfo.Pipeline, + Category: categoryValues, + Priority: ticketInfo.Priority, + Source: ticketInfo.Source, + RecordSource: ticketInfo.RecordSource, + CreateDate: ticketInfo.CreateDate, + LastModifiedDate: ticketInfo.LastModifiedDate, + AssociatedContactIDs: ticketContactList, + } + + output, err := base.ConvertToStructpb(outputStruct) + if err != nil { + return nil, err + } + + return output, nil +} + +// Create Ticket +type TaskCreateTicketInput struct { + OwnerID string `json:"owner-id"` + TicketName string `json:"ticket-name"` + TicketStatus string `json:"ticket-status"` + Pipeline string `json:"pipeline"` + Category []string `json:"categories"` + Priority string `json:"priority"` + Source string `json:"source"` + CreateContactsAssociation []string `json:"create-contacts-association"` +} + +type TaskCreateTicketReq struct { + OwnerID string `json:"hubspot_owner_id,omitempty"` + TicketName string `json:"subject"` + TicketStatus string `json:"hs_pipeline_stage"` + Pipeline string `json:"hs_pipeline"` + Category string `json:"hs_ticket_category,omitempty"` + Priority string `json:"hs_ticket_priority,omitempty"` + Source string `json:"source_type,omitempty"` + TicketID string `json:"hs_object_id"` +} + +type TaskCreateTicketOutput struct { + TicketID string `json:"ticket-id"` +} + +func (e *execution) CreateTicket(input *structpb.Struct) (*structpb.Struct, error) { + + inputStruct := TaskCreateTicketInput{} + err := base.ConvertFromStructpb(input, &inputStruct) + + if err != nil { + return nil, err + } + + req := TaskCreateTicketReq{ + OwnerID: inputStruct.OwnerID, + TicketName: inputStruct.TicketName, + TicketStatus: inputStruct.TicketStatus, + Pipeline: inputStruct.Pipeline, + Category: strings.Join(inputStruct.Category, ";"), + Priority: inputStruct.Priority, + Source: inputStruct.Source, + } + + res, err := e.client.Ticket.Create(&req) + + if err != nil { + return nil, err + } + + // get ticket ID + ticketID := res.Properties.(*TaskCreateTicketReq).TicketID + + outputStruct := TaskCreateTicketOutput{TicketID: ticketID} + + output, err := base.ConvertToStructpb(outputStruct) + + if err != nil { + return nil, err + } + + // This section is for creating associations (ticket -> object) + if len(inputStruct.CreateContactsAssociation) != 0 { + err := CreateAssociation(&outputStruct.TicketID, &inputStruct.CreateContactsAssociation, "ticket", "contact", e) + + if err != nil { + return nil, err + } + } + + return output, nil +} diff --git a/application/hubspot/v0/ticket_test.go b/application/hubspot/v0/ticket_test.go new file mode 100644 index 00000000..e45aa7b7 --- /dev/null +++ b/application/hubspot/v0/ticket_test.go @@ -0,0 +1,149 @@ +package hubspot + +import ( + "context" + "testing" + + hubspot "github.com/belong-inc/go-hubspot" + qt "github.com/frankban/quicktest" + "github.com/instill-ai/component/base" + "go.uber.org/zap" + "google.golang.org/protobuf/encoding/protojson" + "google.golang.org/protobuf/types/known/structpb" +) + +// mockClient is in contact_test.go + +// Mock Ticket struct and its functions +type MockTicket struct{} + +func (s *MockTicket) Get(ticketID string) (*hubspot.ResponseResource, error) { + var fakeTicket TaskGetTicketResp + if ticketID == "2865646368" { + fakeTicket = TaskGetTicketResp{ + TicketName: "HubSpot - New Query (Sample Query)", + TicketStatus: "1", + Pipeline: "0", + Category: "PRODUCT_ISSUE;BILLING_ISSUE", + } + } + + ret := &hubspot.ResponseResource{ + Properties: &fakeTicket, + } + + return ret, nil +} +func (s *MockTicket) Create(ticket *TaskCreateTicketReq) (*hubspot.ResponseResource, error) { + arbitraryTicketID := "99987654321" + + fakeTicketInfo := ticket + + fakeTicketInfo.TicketID = arbitraryTicketID + + ret := &hubspot.ResponseResource{ + Properties: fakeTicketInfo, + } + + return ret, nil +} + +func TestComponent_ExecuteGetTicketTask(t *testing.T) { + c := qt.New(t) + ctx := context.Background() + bc := base.Component{Logger: zap.NewNop()} + connector := Init(bc) + + tc := struct { + name string + input string + wantResp TaskGetTicketOutput + }{ + name: "ok - get ticket", + input: "2865646368", + wantResp: TaskGetTicketOutput{ + TicketName: "HubSpot - New Query (Sample Query)", + TicketStatus: "1", + Pipeline: "0", + Category: []string{"PRODUCT_ISSUE", "BILLING_ISSUE"}, + }, + } + + c.Run(tc.name, func(c *qt.C) { + setup, err := structpb.NewStruct(map[string]any{ + "token": bearerToken, + }) + c.Assert(err, qt.IsNil) + + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: connector, SystemVariables: nil, Setup: setup, Task: taskGetTicket}, + client: createMockClient(), + } + e.execute = e.GetTicket + exec := &base.ExecutionWrapper{Execution: e} + + pbInput, err := structpb.NewStruct(map[string]any{ + "ticket-id": tc.input, + }) + + c.Assert(err, qt.IsNil) + + res, err := exec.Execution.Execute(ctx, []*structpb.Struct{pbInput}) + + c.Assert(err, qt.IsNil) + + resJSON, err := protojson.Marshal(res[0]) + c.Assert(err, qt.IsNil) + + c.Check(resJSON, qt.JSONEquals, tc.wantResp) + + }) +} + +func TestComponent_ExecuteCreateTicketTask(t *testing.T) { + c := qt.New(t) + ctx := context.Background() + bc := base.Component{Logger: zap.NewNop()} + connector := Init(bc) + + tc := struct { + name string + inputTicket TaskCreateTicketInput + wantResp string + }{ + name: "ok - create ticket", + inputTicket: TaskCreateTicketInput{ + TicketName: "Fake Ticket", + TicketStatus: "2", + Pipeline: "0", + Category: []string{"FEATURE_REQUEST", "GENERAL_INQUIRY"}, + }, + wantResp: "99987654321", + } + + c.Run(tc.name, func(c *qt.C) { + setup, err := structpb.NewStruct(map[string]any{ + "token": bearerToken, + }) + c.Assert(err, qt.IsNil) + + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: connector, SystemVariables: nil, Setup: setup, Task: taskCreateTicket}, + client: createMockClient(), + } + e.execute = e.CreateTicket + exec := &base.ExecutionWrapper{Execution: e} + + pbInput, err := base.ConvertToStructpb(tc.inputTicket) + + c.Assert(err, qt.IsNil) + + res, err := exec.Execution.Execute(ctx, []*structpb.Struct{pbInput}) + c.Assert(err, qt.IsNil) + + resString := res[0].Fields["ticket-id"].GetStringValue() + + c.Check(resString, qt.Equals, tc.wantResp) + + }) +} diff --git a/application/website/v0/README.mdx b/application/website/v0/README.mdx index 3882c5c7..f42ff714 100644 --- a/application/website/v0/README.mdx +++ b/application/website/v0/README.mdx @@ -9,6 +9,7 @@ The Website component is an application component that allows users to scrape we It can carry out the following tasks: - [Scrape Website](#scrape-website) +- [Scrape Sitemap](#scrape-sitemap) @@ -53,4 +54,25 @@ Scrape the website contents. +### Scrape Sitemap + +Scrape the sitemap information + + +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_SCRAPE_SITEMAP` | +| Sitemap URL (required) | `url` | string | The URL of the sitemap to scrape | + + + +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| List | `list` | array | The list of information in a sitemap | + + + + + + diff --git a/application/website/v0/config/definition.json b/application/website/v0/config/definition.json index 59b9026e..86ed057d 100644 --- a/application/website/v0/config/definition.json +++ b/application/website/v0/config/definition.json @@ -1,6 +1,7 @@ { "availableTasks": [ - "TASK_SCRAPE_WEBSITE" + "TASK_SCRAPE_WEBSITE", + "TASK_SCRAPE_SITEMAP" ], "custom": false, "documentationUrl": "https://www.instill.tech/docs/component/application/website", diff --git a/application/website/v0/config/tasks.json b/application/website/v0/config/tasks.json index efcaac5e..3f872a4f 100644 --- a/application/website/v0/config/tasks.json +++ b/application/website/v0/config/tasks.json @@ -149,5 +149,84 @@ "title": "Output", "type": "object" } + }, + "TASK_SCRAPE_SITEMAP": { + "instillShortDescription": "Scrape the sitemap information", + "input": { + "description": "The URL contains sitemap", + "instillUIOrder": 0, + "properties": { + "url": { + "description": "The URL of the sitemap to scrape", + "instillAcceptFormats": [ + "string" + ], + "instillUIOrder": 0, + "instillUpstreamTypes": [ + "value", + "reference", + "template" + ], + "title": "Sitemap URL", + "type": "string" + } + }, + "required": [ + "url" + ], + "title": "Input", + "type": "object" + }, + "output": { + "instillUIOrder": 0, + "properties": { + "list": { + "description": "The list of information in a sitemap", + "instillFormat": "array:semi-structured/json", + "instillUIOrder": 0, + "items": { + "properties": { + "loc": { + "description": "The URL of the webpage", + "instillFormat": "string", + "title": "URL", + "type": "string" + }, + "lastmod": { + "description": "The last modified time of the webpage with ISO 8601 format", + "instillFormat": "string", + "title": "Last Modified", + "type": "string" + }, + "changefreq": { + "description": "The change frequency of the webpage", + "instillFormat": "string", + "title": "Change Frequency", + "type": "string" + }, + "priority": { + "description": "The priority of the webpage", + "instillFormat": "number", + "title": "Priority", + "type": "number" + } + }, + "required": [ + "loc", + "lastmod" + ], + "title": "List", + "instillFormat": "semi-structured/json" + }, + "title": "List", + "type": "array" + } + }, + "required": [ + "list" + ], + "title": "Output", + "type": "object" + } } } diff --git a/application/website/v0/main.go b/application/website/v0/main.go index 9ff179f4..58fb6b83 100644 --- a/application/website/v0/main.go +++ b/application/website/v0/main.go @@ -5,6 +5,7 @@ import ( "context" _ "embed" "fmt" + "io" "sync" "google.golang.org/protobuf/types/known/structpb" @@ -14,6 +15,7 @@ import ( const ( taskScrapeWebsite = "TASK_SCRAPE_WEBSITE" + taskScrapeSitemap = "TASK_SCRAPE_SITEMAP" ) var ( @@ -32,6 +34,8 @@ type component struct { type execution struct { base.ComponentExecution + execute func(*structpb.Struct) (*structpb.Struct, error) + externalCaller func(url string) (ioCloser io.ReadCloser, err error) } func Init(bc base.Component) *component { @@ -46,40 +50,35 @@ func Init(bc base.Component) *component { } func (c *component) CreateExecution(sysVars map[string]any, setup *structpb.Struct, task string) (*base.ExecutionWrapper, error) { - return &base.ExecutionWrapper{Execution: &execution{ + e := &execution{ ComponentExecution: base.ComponentExecution{Component: c, SystemVariables: sysVars, Setup: setup, Task: task}, - }}, nil + } + + switch task { + case taskScrapeWebsite: + e.execute = e.Scrape + case taskScrapeSitemap: + // To make mocking easier + e.externalCaller = scrapSitemapCaller + e.execute = e.ScrapeSitemap + default: + return nil, fmt.Errorf(task + " task is not supported.") + } + + return &base.ExecutionWrapper{Execution: e}, nil } func (e *execution) Execute(_ context.Context, inputs []*structpb.Struct) ([]*structpb.Struct, error) { - outputs := []*structpb.Struct{} - - for _, input := range inputs { - switch e.Task { - case taskScrapeWebsite: - inputStruct := ScrapeWebsiteInput{} - err := base.ConvertFromStructpb(input, &inputStruct) - if err != nil { - return nil, err - } - - outputStruct, err := Scrape(inputStruct) - if err != nil { - return nil, err - } - output, err := base.ConvertToStructpb(outputStruct) - if err != nil { - return nil, err - } - outputs = append(outputs, output) - default: - return nil, fmt.Errorf("not supported task: %s", e.Task) + outputs := make([]*structpb.Struct, len(inputs)) + + for i, input := range inputs { + output, err := e.execute(input) + if err != nil { + return nil, err } + + outputs[i] = output } return outputs, nil } - -func (c *component) Test(sysVars map[string]any, setup *structpb.Struct) error { - return nil -} diff --git a/application/website/v0/main_test.go b/application/website/v0/main_test.go new file mode 100644 index 00000000..8752f8db --- /dev/null +++ b/application/website/v0/main_test.go @@ -0,0 +1,62 @@ +package website + +import ( + "io" + "strings" + "testing" + + "github.com/frankban/quicktest" + "github.com/instill-ai/component/base" +) + +func TestScrapSiteMap(t *testing.T) { + c := quicktest.New(t) + + c.Run("ScrapeSitemap", func(c *quicktest.C) { + component := Init(base.Component{}) + + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: component, SystemVariables: nil, Setup: nil, Task: taskScrapeSitemap}, + externalCaller: fakeScrapSitemapCaller, + } + + e.execute = e.ScrapeSitemap + + input := &ScrapeSitemapInput{ + URL: "https://www.example.com/sitemap.xml", + } + + inputStruct, err := base.ConvertToStructpb(input) + c.Assert(err, quicktest.IsNil) + + output, err := e.execute(inputStruct) + + c.Assert(err, quicktest.IsNil) + + var outputStruct ScrapeSitemapOutput + err = base.ConvertFromStructpb(output, &outputStruct) + c.Assert(err, quicktest.IsNil) + + c.Assert(len(outputStruct.List), quicktest.Equals, 1) + + siteInfo := outputStruct.List[0] + c.Assert(siteInfo.Loc, quicktest.Equals, "https://www.example.com") + c.Assert(siteInfo.LastModifiedTime, quicktest.Equals, "2021-01-01T00:00:00Z") + c.Assert(siteInfo.ChangeFrequency, quicktest.Equals, "daily") + c.Assert(siteInfo.Priority, quicktest.Equals, 0.8) + }) +} + +func fakeScrapSitemapCaller(url string) (ioCloser io.ReadCloser, err error) { + + xml := `` + xml += `` + xml += `` + xml += `https://www.example.com` + xml += `2021-01-01T00:00:00Z` + xml += `daily` + xml += `0.8` + xml += `` + xml += `` + return io.NopCloser(strings.NewReader(xml)), nil +} diff --git a/application/website/v0/scrape_sitemap.go b/application/website/v0/scrape_sitemap.go new file mode 100644 index 00000000..6ed6c3f5 --- /dev/null +++ b/application/website/v0/scrape_sitemap.go @@ -0,0 +1,109 @@ +package website + +import ( + "encoding/xml" + "fmt" + "io" + "net/http" + "strconv" + + "github.com/instill-ai/component/base" + "google.golang.org/protobuf/types/known/structpb" +) + +type ScrapeSitemapInput struct { + URL string `json:"url"` +} + +type ScrapeSitemapOutput struct { + List []SiteInformation `json:"list"` +} + +type SiteInformation struct { + Loc string `json:"loc"` + // Follow ISO 8601 format + LastModifiedTime string `json:"lastmod"` + ChangeFrequency string `json:"changefreq,omitempty"` + Priority float64 `json:"priority,omitempty"` +} + +type URLSet struct { + XMLName xml.Name `xml:"urlset"` + Urls []URL `xml:"url"` +} + +type URL struct { + Loc string `xml:"loc"` + LastMod string `xml:"lastmod"` + ChangeFreq string `xml:"changefreq"` + Priority string `xml:"priority"` +} + +func (e *execution) ScrapeSitemap(input *structpb.Struct) (*structpb.Struct, error) { + + inputStruct := ScrapeSitemapInput{} + err := base.ConvertFromStructpb(input, &inputStruct) + + if err != nil { + return nil, fmt.Errorf("failed to convert input to struct: %v", err) + } + + ioCloser, err := e.externalCaller(inputStruct.URL) + + if err != nil { + return nil, fmt.Errorf("failed to scrap the URL: %v", err) + } + + defer ioCloser.Close() + + body, err := io.ReadAll(ioCloser) + + if err != nil { + return nil, fmt.Errorf("failed to read the response body: %v", err) + } + + var urlSet URLSet + err = xml.Unmarshal(body, &urlSet) + if err != nil { + return nil, fmt.Errorf("failed to parse XML: %v", err) + } + + list := []SiteInformation{} + for _, url := range urlSet.Urls { + priority, err := strconv.ParseFloat(url.Priority, 64) + if err != nil { + return nil, fmt.Errorf("failed to parse priority: %v", err) + } + + list = append(list, SiteInformation{ + Loc: url.Loc, + LastModifiedTime: url.LastMod, + ChangeFrequency: url.ChangeFreq, + Priority: priority, + }) + } + + output := ScrapeSitemapOutput{ + List: list, + } + + outputStruct, err := base.ConvertToStructpb(output) + + if err != nil { + return nil, fmt.Errorf("failed to convert output to struct: %v", err) + } + return outputStruct, nil +} + +func scrapSitemapCaller(url string) (io.ReadCloser, error) { + resp, err := http.Get(url) + + if err != nil { + return nil, fmt.Errorf("failed to fetch the URL: %v", err) + } + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("error: status code %d", resp.StatusCode) + } + return resp.Body, nil +} diff --git a/application/website/v0/scrape_website.go b/application/website/v0/scrape_website.go index a5fde187..660abd65 100644 --- a/application/website/v0/scrape_website.go +++ b/application/website/v0/scrape_website.go @@ -8,8 +8,10 @@ import ( "strings" "github.com/PuerkitoBio/goquery" - "github.com/gocolly/colly/v2" + colly "github.com/gocolly/colly/v2" + "github.com/instill-ai/component/base" "github.com/instill-ai/component/internal/util" + "google.golang.org/protobuf/types/known/structpb" ) type PageInfo struct { @@ -93,26 +95,33 @@ func getHTMLPageDoc(url string) (*goquery.Document, error) { } // Scrape crawls a webpage and returns a slice of PageInfo -func Scrape(input ScrapeWebsiteInput) (ScrapeWebsiteOutput, error) { +func (e *execution) Scrape(input *structpb.Struct) (*structpb.Struct, error) { + inputStruct := ScrapeWebsiteInput{} + err := base.ConvertFromStructpb(input, &inputStruct) + + if err != nil { + return nil, fmt.Errorf("error converting input to struct: %v", err) + } + output := ScrapeWebsiteOutput{} - if input.IncludeLinkHTML == nil { + if inputStruct.IncludeLinkHTML == nil { b := false - input.IncludeLinkHTML = &b + inputStruct.IncludeLinkHTML = &b } - if input.IncludeLinkText == nil { + if inputStruct.IncludeLinkText == nil { b := false - input.IncludeLinkText = &b + inputStruct.IncludeLinkText = &b } - if input.MaxK < 0 { - input.MaxK = 0 + if inputStruct.MaxK < 0 { + inputStruct.MaxK = 0 } pageLinks := []string{} c := colly.NewCollector() - if len(input.AllowedDomains) > 0 { - c.AllowedDomains = input.AllowedDomains + if len(inputStruct.AllowedDomains) > 0 { + c.AllowedDomains = inputStruct.AllowedDomains } c.AllowURLRevisit = false @@ -130,7 +139,7 @@ func Scrape(input ScrapeWebsiteInput) (ScrapeWebsiteOutput, error) { c.OnRequest(func(r *colly.Request) { - if input.MaxK > 0 && len(output.Pages) >= input.MaxK { + if inputStruct.MaxK > 0 && len(output.Pages) >= inputStruct.MaxK { r.Abort() return } @@ -154,18 +163,18 @@ func Scrape(input ScrapeWebsiteInput) (ScrapeWebsiteOutput, error) { page.Title = title page.Link = strippedURL.String() - if *input.IncludeLinkHTML || *input.IncludeLinkText { + if *inputStruct.IncludeLinkHTML || *inputStruct.IncludeLinkText { html, err := util.ScrapeWebpageHTML(doc) if err != nil { fmt.Printf("Error scraping HTML from %s: %v", strippedURL.String(), err) return } - if *input.IncludeLinkHTML { + if *inputStruct.IncludeLinkHTML { page.LinkHTML = html } - if *input.IncludeLinkText { + if *inputStruct.IncludeLinkText { markdown, err := util.ScrapeWebpageHTMLToMarkdown(html) if err != nil { fmt.Printf("Error scraping text from %s: %v", strippedURL.String(), err) @@ -179,10 +188,16 @@ func Scrape(input ScrapeWebsiteInput) (ScrapeWebsiteOutput, error) { }) // Start scraping - if !strings.HasPrefix(input.TargetURL, "http://") && !strings.HasPrefix(input.TargetURL, "https://") { - input.TargetURL = "https://" + input.TargetURL + if !strings.HasPrefix(inputStruct.TargetURL, "http://") && !strings.HasPrefix(inputStruct.TargetURL, "https://") { + inputStruct.TargetURL = "https://" + inputStruct.TargetURL } - _ = c.Visit(input.TargetURL) + _ = c.Visit(inputStruct.TargetURL) + + outputStruct, err := base.ConvertToStructpb(output) + if err != nil { + return nil, fmt.Errorf("error converting output to struct: %v", err) + } + + return outputStruct, nil - return output, nil } diff --git a/base/component.go b/base/component.go index 6ab82e53..5e6f53df 100644 --- a/base/component.go +++ b/base/component.go @@ -18,6 +18,7 @@ import ( jsoniter "github.com/json-iterator/go" "github.com/instill-ai/component/internal/jsonref" + "github.com/instill-ai/x/errmsg" pb "github.com/instill-ai/protogen-go/vdp/pipeline/v1beta" ) @@ -270,10 +271,6 @@ func generateComponentSpec(title string, tasks []*pb.ComponentTask, taskStructs componentSpec.Fields["title"] = structpb.NewStringValue(fmt.Sprintf("%s Component", title)) componentSpec.Fields["type"] = structpb.NewStringValue("object") - if err != nil { - return nil, err - } - oneOfList := &structpb.ListValue{ Values: []*structpb.Value{}, } @@ -309,9 +306,7 @@ func generateComponentSpec(title string, tasks []*pb.ComponentTask, taskStructs condition := &structpb.Struct{} err = protojson.Unmarshal([]byte(conditionJSON), condition) if err != nil { - if err != nil { - panic(err) - } + panic(err) } oneOf.Fields["properties"].GetStructValue().Fields["condition"] = structpb.NewStructValue(condition) oneOf.Fields["properties"].GetStructValue().Fields["input"] = structpb.NewStructValue(compInputStruct) @@ -853,6 +848,154 @@ func (e *ComponentExecution) GetTaskOutputSchema() string { // explicit credentials. func (e *ComponentExecution) UsesInstillCredentials() bool { return false } +func (e *ComponentExecution) getInputSchemaJSON(task string) (map[string]interface{}, error) { + taskSpec, ok := e.Component.GetTaskInputSchemas()[task] + if !ok { + return nil, errmsg.AddMessage( + fmt.Errorf("task %s not found", task), + fmt.Sprintf("Task %s not found", task), + ) + } + var taskSpecMap map[string]interface{} + err := json.Unmarshal([]byte(taskSpec), &taskSpecMap) + if err != nil { + return nil, errmsg.AddMessage( + err, + "Failed to unmarshal input", + ) + } + inputMap := taskSpecMap["properties"].(map[string]interface{}) + return inputMap, nil +} +func (e *ComponentExecution) FillInDefaultValues(input *structpb.Struct) (*structpb.Struct, error) { + inputMap, err := e.getInputSchemaJSON(e.Task) + if err != nil { + return nil, err + } + return e.fillInDefaultValuesWithReference(input, inputMap) +} +func hasNextLevel(valueMap map[string]interface{}) bool { + if valType, ok := valueMap["type"]; ok { + if valType != "object" { + return false + } + } + if _, ok := valueMap["properties"]; ok { + return true + } + for _, target := range []string{"allOf", "anyOf", "oneOf"} { + if _, ok := valueMap[target]; ok { + items := valueMap[target].([]interface{}) + for _, v := range items { + if _, ok := v.(map[string]interface{})["properties"].(map[string]interface{}); ok { + return true + } + } + } + } + return false +} +func optionMatch(valueMap *structpb.Struct, reference map[string]interface{}, checkFields []string) bool { + for _, checkField := range checkFields { + if _, ok := valueMap.GetFields()[checkField]; !ok { + return false + } + if val, ok := reference[checkField].(map[string]interface{})["const"]; ok { + if valueMap.GetFields()[checkField].GetStringValue() != val { + return false + } + } + } + return true +} +func (e *ComponentExecution) fillInDefaultValuesWithReference(input *structpb.Struct, reference map[string]interface{}) (*structpb.Struct, error) { + for key, value := range reference { + valueMap, ok := value.(map[string]interface{}) + if !ok { + continue + } + if _, ok := valueMap["default"]; !ok { + if !hasNextLevel(valueMap) { + continue + } + if _, ok := input.GetFields()[key]; !ok { + input.GetFields()[key] = structpb.NewStructValue(&structpb.Struct{Fields: make(map[string]*structpb.Value)}) + } + var properties map[string]interface{} + if _, ok := valueMap["properties"]; !ok { + var requiredFieldsRaw []interface{} + if requiredFieldsRaw, ok = valueMap["required"].([]interface{}); !ok { + continue + } + requiredFields := make([]string, len(requiredFieldsRaw)) + for idx, v := range requiredFieldsRaw { + requiredFields[idx] = fmt.Sprintf("%v", v) + } + for _, target := range []string{"allOf", "anyOf", "oneOf"} { + var items []interface{} + if items, ok = valueMap[target].([]interface{}); !ok { + continue + } + for _, v := range items { + if properties, ok = v.(map[string]interface{})["properties"].(map[string]interface{}); !ok { + continue + } + inputSubField := input.GetFields()[key].GetStructValue() + if target == "oneOf" && !optionMatch(inputSubField, properties, requiredFields) { + continue + } + subField, err := e.fillInDefaultValuesWithReference(inputSubField, properties) + if err != nil { + return nil, err + } + input.GetFields()[key] = structpb.NewStructValue(subField) + } + } + } else { + if properties, ok = valueMap["properties"].(map[string]interface{}); !ok { + continue + } + subField, err := e.fillInDefaultValuesWithReference(input.GetFields()[key].GetStructValue(), properties) + if err != nil { + return nil, err + } + input.GetFields()[key] = structpb.NewStructValue(subField) + } + continue + } + if _, ok := input.GetFields()[key]; ok { + continue + } + defaultValue := valueMap["default"] + typeValue := valueMap["type"] + switch typeValue { + case "string", "integer", "number", "boolean": + val, err := structpb.NewValue(defaultValue) + if err != nil { + continue + } + input.GetFields()[key] = val + case "array": + tempArray := &structpb.ListValue{Values: []*structpb.Value{}} + itemType := valueMap["items"].(map[string]interface{})["type"] + switch itemType { + case "string", "integer", "number", "boolean": + for _, v := range defaultValue.([]interface{}) { + val, err := structpb.NewValue(v) + if err != nil { + continue + } + tempArray.Values = append(tempArray.Values, val) + } + default: + continue + } + input.GetFields()[key] = structpb.NewListValue(tempArray) + } + } + return input, nil +} + // ReadFromGlobalConfig looks up a component credential field from a secret map // that comes from the environment variable configuration. // diff --git a/data/pinecone/v0/README.mdx b/data/pinecone/v0/README.mdx index 359d204c..daaff40f 100644 --- a/data/pinecone/v0/README.mdx +++ b/data/pinecone/v0/README.mdx @@ -51,7 +51,7 @@ Retrieve the ids of the most similar items in a namespace, along with their simi | Vector (required) | `vector` | array[number] | An array of dimensions for the query vector. | | Top K (required) | `top-k` | integer | The number of results to return for each query | | Namespace | `namespace` | string | The namespace to query | -| Filter | `filter` | object | The filter to apply. You can use vector metadata to limit your search. See https://www.pinecone.io/docs/metadata-filtering/. | +| Filter | `filter` | object | The filter to apply. You can use vector metadata to limit your search. See more details here. | | Minimum Score | `min-score` | number | Exclude results whose score is below this value | | Include Metadata | `include-metadata` | boolean | Indicates whether metadata is included in the response as well as the IDs | | Include Values | `include-values` | boolean | Indicates whether vector values are included in the response | diff --git a/data/pinecone/v0/config/tasks.json b/data/pinecone/v0/config/tasks.json index 9b4aacc7..5496505f 100644 --- a/data/pinecone/v0/config/tasks.json +++ b/data/pinecone/v0/config/tasks.json @@ -65,7 +65,7 @@ "type": "string" }, "filter": { - "description": "The filter to apply. You can use vector metadata to limit your search. See https://www.pinecone.io/docs/metadata-filtering/.", + "description": "The filter to apply. You can use vector metadata to limit your search. See more details here.", "instillAcceptFormats": [ "semi-structured/object" ], diff --git a/data/sql/v0/README.mdx b/data/sql/v0/README.mdx new file mode 100644 index 00000000..c2ad86b0 --- /dev/null +++ b/data/sql/v0/README.mdx @@ -0,0 +1,190 @@ +--- +title: "SQL" +lang: "en-US" +draft: false +description: "Learn about how to set up a VDP SQL component https://github.com/instill-ai/instill-core" +--- + +The SQL component is a data component that allows users to access the SQL database of your choice. +It can carry out the following tasks: + +- [Insert](#insert) +- [Update](#update) +- [Select](#select) +- [Delete](#delete) +- [Create Table](#create-table) +- [Drop Table](#drop-table) + + + +## Release Stage + +`Alpha` + + + +## Configuration + +The component configuration is defined and maintained [here](https://github.com/instill-ai/component/blob/main/data/sql/v0/config/definition.json). + + + + +## Setup + + +| Field | Field ID | Type | Note | +| :--- | :--- | :--- | :--- | +| Username (required) | `user` | string | Fill in your account username | +| Password (required) | `password` | string | Fill in your account password | +| Database Name (required) | `database-name` | string | Fill in the name of your database | +| Host (required) | `host` | string | Fill in the host of your database | +| Port (required) | `port` | number | Fill in the port of your database | + + + + +## Supported Tasks + +### Insert + +Perform an insert operation based on specified filter + + +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_INSERT` | +| Engine (required) | `engine` | string | Choose the engine of your database | +| Table Name (required) | `table-name` | string | The table name in the database to insert data into | +| Data (required) | `data` | any | The data to be inserted | + + + +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Status | `status` | string | Insert status | + + + + + + +### Update + +Perform an update operation based on specified filter + + +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_UPDATE` | +| Engine (required) | `engine` | string | Choose the engine of your database | +| Table Name (required) | `table-name` | string | The table name in the database to update data into | +| Filter (required) | `filter` | string | The filter to be applied to the data with SQL syntax, which starts with WHERE clause | +| Update (required) | `update-data` | any | The new data to be updated to | + + + +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Status | `status` | string | Update status | + + + + + + +### Select + +Perform a select operation based on specified filter + + +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_SELECT` | +| Engine (required) | `engine` | string | Choose the engine of your database | +| Table Name (required) | `table-name` | string | The table name in the database to be selected | +| Filter | `filter` | string | The filter to be applied to the data with SQL syntax, which starts with WHERE clause, empty for all rows | +| Limit | `limit` | integer | The limit of rows to be selected, empty for all rows | +| Columns | `columns` | array[string] | The columns to return in the rows. If empty then all columns will be returned | + + + +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Rows | `rows` | array | The rows returned from the select operation | +| Status | `status` | string | Select status | + + + + + + +### Delete + +Perform a delete operation based on specified filter + + +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_DELETE` | +| Engine (required) | `engine` | string | Choose the engine of your database | +| Table Name (required) | `table-name` | string | The table name in the database to be deleted | +| Filter (required) | `filter` | string | The filter to be applied to the data with SQL syntax, which starts with WHERE clause | + + + +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Status | `status` | string | Delete status | + + + + + + +### Create Table + +Create a table in the database + + +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_CREATE_TABLE` | +| Engine (required) | `engine` | string | Choose the engine of your database | +| Table Name (required) | `table-name` | string | The table name in the database to be created | +| Columns (required) | `columns-structure` | any | The columns structure to be created in the table, json with value string, e.g \{"name": "VARCHAR(255)", "age": "INT not null"\} | + + + +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Status | `status` | string | Create table status | + + + + + + +### Drop Table + +Drop a table in the database + + +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_DROP_TABLE` | +| Engine (required) | `engine` | string | Choose the engine of your database | +| Table Name (required) | `table-name` | string | The table name in the database to be dropped | + + + +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Status | `status` | string | Drop table status | + + + + + + + diff --git a/data/sql/v0/assets/sql.svg b/data/sql/v0/assets/sql.svg new file mode 100644 index 00000000..1ba1685d --- /dev/null +++ b/data/sql/v0/assets/sql.svg @@ -0,0 +1,6 @@ + + + + + + diff --git a/data/sql/v0/client.go b/data/sql/v0/client.go new file mode 100644 index 00000000..27192073 --- /dev/null +++ b/data/sql/v0/client.go @@ -0,0 +1,91 @@ +package sql + +import ( + "fmt" + "strconv" + + "github.com/jmoiron/sqlx" + "google.golang.org/protobuf/types/known/structpb" + + // Import all the SQL drivers + _ "github.com/denisenkom/go-mssqldb" // SQL Server + _ "github.com/go-sql-driver/mysql" // MySQL and MariaDB + _ "github.com/lib/pq" // PostgreSQL + _ "github.com/nakagami/firebirdsql" // Firebird + _ "github.com/sijms/go-ora/v2" // Oracle +) + +var engines = map[string]string{ + "PostgreSQL": "postgresql://%s:%s@%s/%s", // PostgreSQL + "SQL Server": "sqlserver://%s:%s@%s?database=%s", // SQL Server + "Oracle": "oracle://%s:%s@%s/%s", // Oracle + "MySQL": "%s:%s@tcp(%s)/%s", // MySQL and MariaDB + "Firebird": "firebirdsql://%s:%s@%s/%s", // Firebird +} + +var enginesType = map[string]string{ + "PostgreSQL": "postgres", // PostgreSQL + "SQL Server": "sqlserver", // SQL Server + "Oracle": "oracle", // Oracle + "MySQL": "mysql", // MySQL and MariaDB + "Firebird": "firebirdsql", // Firebird +} + +type Config struct { + DBUser string + DBPassword string + DBName string + DBHost string + DBPort string +} + +func LoadConfig(setup *structpb.Struct) *Config { + return &Config{ + DBUser: getUser(setup), + DBPassword: getPassword(setup), + DBName: getDatabaseName(setup), + DBHost: getHost(setup), + DBPort: getPort(setup), + } +} + +func newClient(setup *structpb.Struct, inputSetup *Engine) SQLClient { + cfg := LoadConfig(setup) + + DBEndpoint := fmt.Sprintf("%v:%v", cfg.DBHost, cfg.DBPort) + + // Test every engines to find the correct one + var db *sqlx.DB + var err error + + // Get the correct engine + engine := engines[inputSetup.DBEngine] + engineType := enginesType[inputSetup.DBEngine] + + dsn := fmt.Sprintf(engine, + cfg.DBUser, cfg.DBPassword, DBEndpoint, cfg.DBName, + ) + + db, err = sqlx.Open(engineType, dsn) + if err != nil { + return nil + } + + return db +} + +func getUser(setup *structpb.Struct) string { + return setup.GetFields()["user"].GetStringValue() +} +func getPassword(setup *structpb.Struct) string { + return setup.GetFields()["password"].GetStringValue() +} +func getDatabaseName(setup *structpb.Struct) string { + return setup.GetFields()["database-name"].GetStringValue() +} +func getHost(setup *structpb.Struct) string { + return setup.GetFields()["host"].GetStringValue() +} +func getPort(setup *structpb.Struct) string { + return strconv.Itoa(int(setup.GetFields()["port"].GetNumberValue())) +} diff --git a/data/sql/v0/component_test.go b/data/sql/v0/component_test.go new file mode 100644 index 00000000..b4ed9f28 --- /dev/null +++ b/data/sql/v0/component_test.go @@ -0,0 +1,488 @@ +package sql + +import ( + "context" + "database/sql" + "encoding/json" + "fmt" + "strings" + "testing" + + "github.com/DATA-DOG/go-sqlmock" + qt "github.com/frankban/quicktest" + "github.com/instill-ai/component/base" + "github.com/jmoiron/sqlx" + + "go.uber.org/zap" + "google.golang.org/protobuf/types/known/structpb" +) + +type MockSQLClient struct{} + +func (m *MockSQLClient) Queryx(query string, args ...interface{}) (*sqlx.Rows, error) { + mockDB, mock, _ := sqlmock.New() + defer mockDB.Close() + + sqlxDB := sqlx.NewDb(mockDB, "sqlmock") + mock.ExpectQuery("SELECT (.+) FROM users WHERE id = (.+) AND name = (.+) AND email = (.+) LIMIT (.+) OFFSET (.+)"). + WithArgs("1", "john", "john@example.com", 1, 0). + WillReturnRows(sqlmock.NewRows([]string{"id", "name", "email"}).AddRow("1", "john", "john@example.com")) + + return sqlxDB.Queryx("SELECT id, name, email FROM users WHERE id = ? AND name = ? AND email = ? LIMIT ? OFFSET ?", "1", "john", "john@example.com", 1, 0) +} + +func (m *MockSQLClient) NamedExec(query string, arg interface{}) (sql.Result, error) { + if strings.Contains(query, "INSERT") { + mockDB, mock, _ := sqlmock.New() + defer mockDB.Close() + + sqlxDB := sqlx.NewDb(mockDB, "sqlmock") + fmt.Print(arg) + arg = map[string]interface{}{ + "id": "1", + "name": "John Doe", + } + + mock.ExpectExec("INSERT INTO users \\(id, name\\) VALUES \\(\\?, \\?\\)"). + WithArgs("1", "John Doe").WillReturnResult(sqlmock.NewResult(1, 1)) + + return sqlxDB.NamedExec("INSERT INTO users (id, name) VALUES (:id, :name)", arg) + } else if strings.Contains(query, "DELETE") { + mockDB, mock, _ := sqlmock.New() + defer mockDB.Close() + + sqlxDB := sqlx.NewDb(mockDB, "sqlmock") + arg = map[string]interface{}{ + "id": "1", + "name": "john", + } + + mock.ExpectExec("DELETE FROM users WHERE id = \\? AND name = \\?"). + WithArgs("1", "john").WillReturnResult(sqlmock.NewResult(1, 1)) + + return sqlxDB.NamedExec("DELETE FROM users WHERE id = :id AND name = :name", arg) + } else if strings.Contains(query, "UPDATE") { + mockDB, mock, _ := sqlmock.New() + defer mockDB.Close() + + sqlxDB := sqlx.NewDb(mockDB, "sqlmock") + arg = map[string]interface{}{ + "id": "1", + "name": "John Doe Updated", + } + + mock.ExpectExec("UPDATE users SET id = \\?, name = \\? WHERE id = \\? AND name = \\?"). + WithArgs("1", "John Doe Updated", "1", "John Doe Updated").WillReturnResult(sqlmock.NewResult(1, 1)) + + return sqlxDB.NamedExec("UPDATE users SET id = :id, name = :name WHERE id = :id AND name = :name", arg) + } else if strings.Contains(query, "CREATE") { + mockDB, mock, _ := sqlmock.New() + defer mockDB.Close() + + sqlxDB := sqlx.NewDb(mockDB, "sqlmock") + arg = map[string]interface{}{ + "id": "INT", + "name": "VARCHAR(255)", + } + + mock.ExpectExec("CREATE TABLE users \\(id INT, name VARCHAR\\(255\\)\\)"). + WillReturnResult(sqlmock.NewResult(1, 1)) + + return sqlxDB.NamedExec("CREATE TABLE users (id INT, name VARCHAR(255))", arg) + } else if strings.Contains(query, "DROP") { + mockDB, mock, _ := sqlmock.New() + defer mockDB.Close() + + sqlxDB := sqlx.NewDb(mockDB, "sqlmock") + arg = map[string]interface{}{} + + mock.ExpectExec("DROP TABLE users"). + WillReturnResult(sqlmock.NewResult(1, 1)) + + return sqlxDB.NamedExec("DROP TABLE users", arg) + } + + return nil, nil +} + +func TestInsertUser(t *testing.T) { + c := qt.New(t) + ctx := context.Background() + bc := base.Component{Logger: zap.NewNop()} + connector := Init(bc) + + testcases := []struct { + name string + tableName string + input InsertInput + wantResp InsertOutput + wantErr string + }{ + { + name: "insert user", + tableName: "users", + input: InsertInput{ + Data: map[string]any{ + "id": "1", + "name": "John Doe", + }, + TableName: "users", + }, + wantResp: InsertOutput{ + Status: "Successfully inserted rows", + }, + }, + } + + for _, tc := range testcases { + c.Run(tc.name, func(c *qt.C) { + setup, err := structpb.NewStruct(map[string]any{ + "user": "test_user", + "password": "test_pass", + "name": "test_db", + "host": "localhost", + "port": "3306", + "region": "us-west-2", + }) + c.Assert(err, qt.IsNil) + + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: connector, SystemVariables: nil, Setup: setup, Task: TaskInsert}, + client: &MockSQLClient{}, + } + e.execute = e.insert + exec := &base.ExecutionWrapper{Execution: e} + + pbIn, err := base.ConvertToStructpb(tc.input) + c.Assert(err, qt.IsNil) + + got, err := exec.Execution.Execute(ctx, []*structpb.Struct{pbIn}) + + if tc.wantErr != "" { + c.Assert(err, qt.ErrorMatches, tc.wantErr) + return + } + + wantJSON, err := json.Marshal(tc.wantResp) + c.Assert(err, qt.IsNil) + c.Check(wantJSON, qt.JSONEquals, got[0].AsMap()) + }) + } +} + +func TestUpdateUser(t *testing.T) { + c := qt.New(t) + ctx := context.Background() + bc := base.Component{Logger: zap.NewNop()} + connector := Init(bc) + + testcases := []struct { + name string + tableName string + input UpdateInput + wantResp UpdateOutput + wantErr string + }{ + { + name: "update user", + tableName: "users", + input: UpdateInput{ + Filter: "id = 1 AND name = 'John Doe'", + UpdateData: map[string]any{ + "id": "1", + "name": "John Doe Updated", + }, + TableName: "users", + }, + wantResp: UpdateOutput{ + Status: "Successfully updated rows", + }, + }, + } + + for _, tc := range testcases { + c.Run(tc.name, func(c *qt.C) { + setup, err := structpb.NewStruct(map[string]any{ + "user": "test_user", + "password": "test_pass", + "name": "test_db", + "host": "localhost", + "port": "3306", + "region": "us-west-2", + }) + c.Assert(err, qt.IsNil) + + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: connector, SystemVariables: nil, Setup: setup, Task: TaskInsert}, + client: &MockSQLClient{}, + } + e.execute = e.update + exec := &base.ExecutionWrapper{Execution: e} + + pbIn, err := base.ConvertToStructpb(tc.input) + c.Assert(err, qt.IsNil) + + got, err := exec.Execution.Execute(ctx, []*structpb.Struct{pbIn}) + + if tc.wantErr != "" { + c.Assert(err, qt.ErrorMatches, tc.wantErr) + return + } + + wantJSON, err := json.Marshal(tc.wantResp) + c.Assert(err, qt.IsNil) + c.Check(wantJSON, qt.JSONEquals, got[0].AsMap()) + }) + } +} + +func TestSelectUser(t *testing.T) { + c := qt.New(t) + ctx := context.Background() + bc := base.Component{Logger: zap.NewNop()} + connector := Init(bc) + + testcases := []struct { + name string + tableName string + input SelectInput + wantResp SelectOutput + wantErr string + }{ + { + name: "select users", + tableName: "users", + input: SelectInput{ + Filter: "id = 1 AND name = 'john' AND email = 'john@example.com'", + TableName: "users", + Limit: 0, + }, + wantResp: SelectOutput{ + Status: "Successfully selected rows", + Rows: []map[string]any{ + {"id": "1", "name": "john", "email": "john@example.com"}, + }, + }, + }, + } + + for _, tc := range testcases { + c.Run(tc.name, func(c *qt.C) { + setup, err := structpb.NewStruct(map[string]any{ + "user": "test_user", + "password": "test_pass", + "name": "test_db", + "host": "localhost", + "port": "3306", + "region": "us-west-2", + }) + c.Assert(err, qt.IsNil) + + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: connector, SystemVariables: nil, Setup: setup, Task: TaskSelect}, + client: &MockSQLClient{}, + } + e.execute = e.selects + exec := &base.ExecutionWrapper{Execution: e} + + pbIn, err := base.ConvertToStructpb(tc.input) + c.Assert(err, qt.IsNil) + + got, err := exec.Execution.Execute(ctx, []*structpb.Struct{pbIn}) + + if tc.wantErr != "" { + c.Assert(err, qt.ErrorMatches, tc.wantErr) + return + } + + wantJSON, err := json.Marshal(tc.wantResp) + c.Assert(err, qt.IsNil) + c.Check(wantJSON, qt.JSONEquals, got[0].AsMap()) + }) + } +} + +func TestDeleteUser(t *testing.T) { + c := qt.New(t) + ctx := context.Background() + bc := base.Component{Logger: zap.NewNop()} + connector := Init(bc) + + testcases := []struct { + name string + tableName string + input DeleteInput + wantResp DeleteOutput + wantErr string + }{ + { + name: "delete user", + tableName: "users", + input: DeleteInput{ + Filter: "id = 1 AND name = 'john'", + TableName: "users", + }, + wantResp: DeleteOutput{ + Status: "Successfully deleted rows", + }, + }, + } + + for _, tc := range testcases { + c.Run(tc.name, func(c *qt.C) { + setup, err := structpb.NewStruct(map[string]any{ + "user": "test_user", + "password": "test_pass", + "name": "test_db", + "host": "localhost", + "port": "3306", + "region": "us-west-2", + }) + c.Assert(err, qt.IsNil) + + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: connector, SystemVariables: nil, Setup: setup, Task: TaskDelete}, + client: &MockSQLClient{}, + } + e.execute = e.delete + exec := &base.ExecutionWrapper{Execution: e} + + pbIn, err := base.ConvertToStructpb(tc.input) + c.Assert(err, qt.IsNil) + + got, err := exec.Execution.Execute(ctx, []*structpb.Struct{pbIn}) + + if tc.wantErr != "" { + c.Assert(err, qt.ErrorMatches, tc.wantErr) + return + } + + wantJSON, err := json.Marshal(tc.wantResp) + c.Assert(err, qt.IsNil) + c.Check(wantJSON, qt.JSONEquals, got[0].AsMap()) + }) + } +} + +func TestCreateTable(t *testing.T) { + c := qt.New(t) + ctx := context.Background() + bc := base.Component{Logger: zap.NewNop()} + connector := Init(bc) + + testcases := []struct { + name string + tableName string + input CreateTableInput + wantResp CreateTableOutput + wantErr string + }{ + { + name: "create table", + input: CreateTableInput{ + ColumnsStructure: map[string]string{ + "id": "INT", + "name": "VARCHAR(255)", + }, + TableName: "users", + }, + wantResp: CreateTableOutput{ + Status: "Successfully created table", + }, + }, + } + + for _, tc := range testcases { + c.Run(tc.name, func(c *qt.C) { + setup, err := structpb.NewStruct(map[string]any{ + "user": "test_user", + "password": "test_pass", + "name": "test_db", + "host": "localhost", + "port": "3306", + "region": "us-west-2", + }) + c.Assert(err, qt.IsNil) + + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: connector, SystemVariables: nil, Setup: setup, Task: TaskCreateTable}, + client: &MockSQLClient{}, + } + e.execute = e.createTable + exec := &base.ExecutionWrapper{Execution: e} + + pbIn, err := base.ConvertToStructpb(tc.input) + c.Assert(err, qt.IsNil) + + got, err := exec.Execution.Execute(ctx, []*structpb.Struct{pbIn}) + + if tc.wantErr != "" { + c.Assert(err, qt.ErrorMatches, tc.wantErr) + return + } + + wantJSON, err := json.Marshal(tc.wantResp) + c.Assert(err, qt.IsNil) + c.Check(wantJSON, qt.JSONEquals, got[0].AsMap()) + }) + } +} + +func TestDropTable(t *testing.T) { + c := qt.New(t) + ctx := context.Background() + bc := base.Component{Logger: zap.NewNop()} + connector := Init(bc) + + testcases := []struct { + name string + input DropTableInput + wantResp DropTableOutput + wantErr string + }{ + { + name: "drop table", + input: DropTableInput{ + TableName: "users", + }, + wantResp: DropTableOutput{ + Status: "Successfully dropped table", + }, + }, + } + + for _, tc := range testcases { + c.Run(tc.name, func(c *qt.C) { + setup, err := structpb.NewStruct(map[string]any{ + "user": "test_user", + "password": "test_pass", + "name": "test_db", + "host": "localhost", + "port": "3306", + "region": "us-west-2", + }) + c.Assert(err, qt.IsNil) + + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: connector, SystemVariables: nil, Setup: setup, Task: TaskDropTable}, + client: &MockSQLClient{}, + } + e.execute = e.dropTable + exec := &base.ExecutionWrapper{Execution: e} + + pbIn, err := base.ConvertToStructpb(tc.input) + c.Assert(err, qt.IsNil) + + got, err := exec.Execution.Execute(ctx, []*structpb.Struct{pbIn}) + + if tc.wantErr != "" { + c.Assert(err, qt.ErrorMatches, tc.wantErr) + return + } + + wantJSON, err := json.Marshal(tc.wantResp) + c.Assert(err, qt.IsNil) + c.Check(wantJSON, qt.JSONEquals, got[0].AsMap()) + }) + } +} diff --git a/data/sql/v0/config/definition.json b/data/sql/v0/config/definition.json new file mode 100644 index 00000000..b48f95d6 --- /dev/null +++ b/data/sql/v0/config/definition.json @@ -0,0 +1,22 @@ +{ + "availableTasks": [ + "TASK_INSERT", + "TASK_UPDATE", + "TASK_SELECT", + "TASK_DELETE", + "TASK_CREATE_TABLE", + "TASK_DROP_TABLE" + ], + "documentationUrl": "https://www.instill.tech/docs/component/data/sql", + "icon": "assets/sql.svg", + "id": "sql", + "public": true, + "title": "SQL", + "description": "Access the SQL database of your choice", + "tombstone": false, + "type": "COMPONENT_TYPE_DATA", + "uid": "5861fc8f-1a07-42f6-a6b8-0e5a2664de00", + "version": "0.1.0", + "sourceUrl": "https://github.com/instill-ai/component/blob/main/data/sql/v0", + "releaseStage": "RELEASE_STAGE_ALPHA" +} \ No newline at end of file diff --git a/data/sql/v0/config/setup.json b/data/sql/v0/config/setup.json new file mode 100644 index 00000000..1c4d1014 --- /dev/null +++ b/data/sql/v0/config/setup.json @@ -0,0 +1,84 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": false, + "properties": { + "user": { + "description": "Fill in your account username", + "instillUpstreamTypes": [ + "value","reference" + ], + "instillAcceptFormats": [ + "string" + ], + "instillUIOrder": 0, + "title": "Username", + "type": "string" + }, + "password": { + "description": "Fill in your account password", + "instillUpstreamTypes": [ + "reference" + ], + "instillAcceptFormats": [ + "string" + ], + "instillSecret": true, + "instillUIOrder": 1, + "title": "Password", + "type": "string" + }, + "database-name": { + "description": "Fill in the name of your database", + "instillUpstreamTypes": [ + "value","reference" + ], + "instillAcceptFormats": [ + "string" + ], + "instillUIOrder": 2, + "title": "Database Name", + "type": "string" + }, + "host": { + "description": "Fill in the host of your database", + "instillUpstreamTypes": [ + "value","reference" + ], + "instillAcceptFormats": [ + "string" + ], + "instillUIOrder": 3, + "title": "Host", + "type": "string" + }, + "port": { + "description": "Fill in the port of your database", + "instillUpstreamTypes": [ + "value","reference" + ], + "default": 3306, + "instillAcceptFormats": [ + "number" + ], + "instillUIOrder": 4, + "title": "Port", + "type": "number" + } + }, + "required": [ + "user", + "password", + "database-name", + "host", + "port" + ], + "instillEditOnNodeFields": [ + "user", + "password", + "database-name", + "host", + "port" + ], + "title": "SQL Connection", + "type": "object" +} \ No newline at end of file diff --git a/data/sql/v0/config/tasks.json b/data/sql/v0/config/tasks.json new file mode 100644 index 00000000..9f5afb35 --- /dev/null +++ b/data/sql/v0/config/tasks.json @@ -0,0 +1,571 @@ +{ + "TASK_INSERT": { + "instillShortDescription": "Perform an insert operation based on specified filter", + "input": { + "instillUIOrder": 0, + "properties": { + "engine": { + "description": "Choose the engine of your database", + "instillUpstreamTypes": [ + "value", + "reference", + "template" + ], + "instillAcceptFormats": [ + "string" + ], + "instillUIOrder": 0, + "title": "Engine", + "enum": [ + "MySQL", + "PostgreSQL", + "SQL Server", + "Oracle", + "MariaDB", + "Firebird" + ], + "type": "string" + }, + "table-name": { + "description": "The table name in the database to insert data into", + "instillAcceptFormats": [ + "string" + ], + "instillShortDescription": "Database Table Name", + "instillUIOrder": 1, + "instillUpstreamTypes": [ + "reference", + "template", + "value" + ], + "title": "Table Name", + "type":"string" + }, + "data": { + "description": "The data to be inserted", + "instillAcceptFormats": [ + "semi-structured/*","structured/*","object","array" + ], + "instillUIOrder": 2, + "instillUpstreamTypes": [ + "reference", + "template", + "value" + ], + "items": { + "title": "Object", + "instillFormat": "semi-structured/json" + }, + "title": "Data" + } + }, + "required": [ + "engine", + "data", + "table-name" + ], + "title": "Input", + "type": "object" + }, + "output": { + "instillUIOrder": 0, + "properties": { + "status": { + "description": "Insert status", + "instillFormat": "string", + "required": [], + "instillUIOrder": 0, + "title": "Status", + "type": "string" + } + }, + "required": [ + "status" + ], + "title": "Output", + "type": "object" + } + }, + "TASK_UPDATE": { + "instillShortDescription": "Perform an update operation based on specified filter", + "input": { + "instillUIOrder": 0, + "properties": { + "engine": { + "description": "Choose the engine of your database", + "instillUpstreamTypes": [ + "value", + "reference", + "template" + ], + "instillAcceptFormats": [ + "string" + ], + "instillUIOrder": 0, + "title": "Engine", + "enum": [ + "MySQL", + "PostgreSQL", + "SQL Server", + "Oracle", + "MariaDB", + "Firebird" + ], + "type": "string" + }, + "table-name": { + "description": "The table name in the database to update data into", + "instillAcceptFormats": [ + "string" + ], + "instillShortDescription": "Database Table Name", + "instillUIOrder": 1, + "instillUpstreamTypes": [ + "reference", + "template", + "value" + ], + "title": "Table Name", + "type":"string" + }, + "filter": { + "instillShortDescription": "The filter to be applied to the data", + "description": "The filter to be applied to the data with SQL syntax, which starts with WHERE clause", + "instillUIOrder": 2, + "instillUpstreamTypes": [ + "reference", + "template", + "value" + ], + "instillAcceptFormats":[ + "string" + ], + "title": "Filter", + "type": "string" + }, + "update-data": { + "description": "The new data to be updated to", + "instillAcceptFormats": [ + "semi-structured/*","structured/*","object","array" + ], + "instillUIOrder": 3, + "instillUpstreamTypes": [ + "reference", + "template", + "value" + ], + "items": { + "title": "Object", + "instillFormat": "semi-structured/json" + }, + "title": "Update" + } + }, + "required": [ + "engine", + "filter", + "update-data", + "table-name" + ], + "title": "Input", + "type": "object" + }, + "output": { + "instillUIOrder": 0, + "properties": { + "status": { + "description": "Update status", + "instillFormat": "string", + "required": [], + "instillUIOrder": 0, + "title": "Status", + "type": "string" + } + }, + "required": [ + "status" + ], + "title": "Output", + "type": "object" + } + }, + "TASK_SELECT":{ + "instillShortDescription": "Perform a select operation based on specified filter", + "input": { + "instillUIOrder": 0, + "properties": { + "engine": { + "description": "Choose the engine of your database", + "instillUpstreamTypes": [ + "value", + "reference", + "template" + ], + "instillAcceptFormats": [ + "string" + ], + "instillUIOrder": 0, + "title": "Engine", + "enum": [ + "MySQL", + "PostgreSQL", + "SQL Server", + "Oracle", + "MariaDB", + "Firebird" + ], + "type": "string" + }, + "table-name": { + "description": "The table name in the database to be selected", + "instillAcceptFormats": [ + "string" + ], + "instillShortDescription": "Database Table Name", + "instillUIOrder": 1, + "instillUpstreamTypes": [ + "reference", + "template", + "value" + ], + "title": "Table Name", + "type":"string" + }, + "filter": { + "instillShortDescription": "The filter to be applied to the data. If empty, then all rows will be updated", + "description": "The filter to be applied to the data with SQL syntax, which starts with WHERE clause, empty for all rows", + "instillUIOrder": 2, + "instillUpstreamTypes": [ + "reference", + "template", + "value" + ], + "instillAcceptFormats":[ + "string" + ], + "title": "Filter", + "type": "string" + }, + "limit": { + "description": "The limit of rows to be selected, empty for all rows", + "instillAcceptFormats": [ + "integer" + ], + "instillShortDescription": "Limit Rows", + "instillUIOrder": 3, + "instillUpstreamTypes": [ + "reference", + "template", + "value" + ], + "title": "Limit", + "type":"integer" + }, + "columns":{ + "description": "The columns to return in the rows. If empty then all columns will be returned", + "instillAcceptFormats": [ + "array:string" + ], + "instillShortDescription": "Columns to be returned, empty for all columns", + "instillUIOrder": 4, + "instillUpstreamTypes": [ + "reference", + "template", + "value" + ], + "title": "Columns", + "type":"array", + "items": { + "title":"Column", + "type": "string" + } + } + }, + "required": [ + "engine", + "table-name" + ], + "instillEditOnNodeFields": [ + "engine", + "table-name", + "filter" + ], + "title": "Input", + "type": "object" + }, + "output": { + "description": "Output", + "instillEditOnNodeFields": [ + "json" + ], + "instillUIOrder": 0, + "properties": { + "rows": { + "description": "The rows returned from the select operation", + "instillEditOnNodeFields": [], + "instillUIOrder": 0, + "required": [], + "title": "Rows", + "type": "array", + "instillFormat": "array:semi-structured/json", + "items": { + "title": "Result", + "instillFormat": "semi-structured/json" + } + }, + "status": { + "description": "Select status", + "instillFormat": "string", + "required": [], + "instillUIOrder": 0, + "title": "Status", + "type": "string" + } + }, + "required": [ + "status", + "rows" + ], + "title": "Output", + "type": "object" + } + }, + "TASK_DELETE": { + "instillShortDescription": "Perform a delete operation based on specified filter", + "input": { + "instillUIOrder": 0, + "properties": { + "engine": { + "description": "Choose the engine of your database", + "instillUpstreamTypes": [ + "value", + "reference", + "template" + ], + "instillAcceptFormats": [ + "string" + ], + "instillUIOrder": 0, + "title": "Engine", + "enum": [ + "MySQL", + "PostgreSQL", + "SQL Server", + "Oracle", + "MariaDB", + "Firebird" + ], + "type": "string" + }, + "table-name": { + "description": "The table name in the database to be deleted", + "instillAcceptFormats": [ + "string" + ], + "instillShortDescription": "Database Table Name", + "instillUIOrder": 1, + "instillUpstreamTypes": [ + "reference", + "template", + "value" + ], + "title": "Table Name", + "type":"string" + }, + "filter": { + "instillShortDescription": "The filter to be applied to the data", + "description": "The filter to be applied to the data with SQL syntax, which starts with WHERE clause", + "instillUIOrder": 2, + "instillUpstreamTypes": [ + "reference", + "template", + "value" + ], + "instillAcceptFormats":[ + "string" + ], + "title": "Filter", + "type": "string" + } + }, + "required": [ + "engine", + "filter", + "table-name" + ], + "title": "Input", + "type": "object" + }, + "output": { + "instillUIOrder": 0, + "properties": { + "status": { + "description": "Delete status", + "instillFormat": "string", + "required": [], + "instillUIOrder": 0, + "title": "Status", + "type": "string" + } + }, + "required": [ + "status" + ], + "title": "Output", + "type": "object" + } + }, + "TASK_CREATE_TABLE":{ + "instillShortDescription": "Create a table in the database", + "input": { + "instillUIOrder": 0, + "properties": { + "engine": { + "description": "Choose the engine of your database", + "instillUpstreamTypes": [ + "value", + "reference", + "template" + ], + "instillAcceptFormats": [ + "string" + ], + "instillUIOrder": 0, + "title": "Engine", + "enum": [ + "MySQL", + "PostgreSQL", + "SQL Server", + "Oracle", + "MariaDB", + "Firebird" + ], + "type": "string" + }, + "table-name": { + "description": "The table name in the database to be created", + "instillAcceptFormats": [ + "string" + ], + "instillShortDescription": "Database Table Name", + "instillUIOrder": 1, + "instillUpstreamTypes": [ + "reference", + "template", + "value" + ], + "title": "Table Name", + "type":"string" + }, + "columns-structure": { + "description": "The columns structure to be created in the table, json with value string, e.g {\"name\": \"VARCHAR(255)\", \"age\": \"INT not null\"}", + "instillAcceptFormats": [ + "semi-structured/*","structured/*","object" + ], + "instillShortDescription": "Columns Structure, e.g {\"name\": \"VARCHAR(255)\", \"age\": \"INT not null\"}", + "instillUIOrder": 2, + "instillUpstreamTypes": [ + "reference", + "template", + "value" + ], + "title": "Columns" + } + }, + "required": [ + "engine", + "table-name", + "columns-structure" + ], + "title": "Input", + "type": "object" + }, + "output": { + "instillUIOrder": 0, + "properties": { + "status": { + "description": "Create table status", + "instillFormat": "string", + "required": [], + "instillUIOrder": 0, + "title": "Status", + "type": "string" + } + }, + "required": [ + "status" + ], + "title": "Output", + "type": "object" + } + }, + "TASK_DROP_TABLE":{ + "instillShortDescription": "Drop a table in the database", + "input": { + "instillUIOrder": 0, + "properties": { + "engine": { + "description": "Choose the engine of your database", + "instillUpstreamTypes": [ + "value", + "reference", + "template" + ], + "instillAcceptFormats": [ + "string" + ], + "instillUIOrder": 0, + "title": "Engine", + "enum": [ + "MySQL", + "PostgreSQL", + "SQL Server", + "Oracle", + "MariaDB", + "Firebird" + ], + "type": "string" + }, + "table-name": { + "description": "The table name in the database to be dropped", + "instillAcceptFormats": [ + "string" + ], + "instillShortDescription": "Database Table Name", + "instillUIOrder": 1, + "instillUpstreamTypes": [ + "reference", + "template", + "value" + ], + "title": "Table Name", + "type":"string" + } + }, + "required": [ + "engine", + "table-name" + ], + "title": "Input", + "type": "object" + }, + "output": { + "instillUIOrder": 0, + "properties": { + "status": { + "description": "Drop table status", + "instillFormat": "string", + "required": [], + "instillUIOrder": 0, + "title": "Status", + "type": "string" + } + }, + "required": [ + "status" + ], + "title": "Output", + "type": "object" + } + } +} diff --git a/data/sql/v0/main.go b/data/sql/v0/main.go new file mode 100644 index 00000000..8b8863e4 --- /dev/null +++ b/data/sql/v0/main.go @@ -0,0 +1,121 @@ +//go:generate compogen readme ./config ./README.mdx +package sql + +import ( + "context" + "database/sql" + _ "embed" + "fmt" + "sync" + + "github.com/instill-ai/component/base" + "github.com/instill-ai/x/errmsg" + "github.com/jmoiron/sqlx" + "google.golang.org/protobuf/types/known/structpb" +) + +const ( + TaskInsert = "TASK_INSERT" + TaskUpdate = "TASK_UPDATE" + TaskSelect = "TASK_SELECT" + TaskDelete = "TASK_DELETE" + TaskCreateTable = "TASK_CREATE_TABLE" + TaskDropTable = "TASK_DROP_TABLE" +) + +//go:embed config/definition.json +var definitionJSON []byte + +//go:embed config/setup.json +var setupJSON []byte + +//go:embed config/tasks.json +var tasksJSON []byte + +var once sync.Once +var comp *component + +type SQLClient interface { + NamedExec(query string, arg interface{}) (sql.Result, error) + Queryx(query string, args ...interface{}) (*sqlx.Rows, error) +} + +type component struct { + base.Component +} + +type execution struct { + base.ComponentExecution + + execute func(*structpb.Struct) (*structpb.Struct, error) + client SQLClient +} + +func Init(bc base.Component) *component { + once.Do(func() { + comp = &component{Component: bc} + err := comp.LoadDefinition(definitionJSON, setupJSON, tasksJSON, nil) + if err != nil { + panic(err) + } + }) + return comp +} + +func (c *component) CreateExecution(sysVars map[string]any, setup *structpb.Struct, task string) (*base.ExecutionWrapper, error) { + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: c, SystemVariables: sysVars, Setup: setup, Task: task}, + } + + switch task { + case TaskInsert: + e.execute = e.insert + case TaskUpdate: + e.execute = e.update + case TaskSelect: + e.execute = e.selects + case TaskDelete: + e.execute = e.delete + case TaskCreateTable: + e.execute = e.createTable + case TaskDropTable: + e.execute = e.dropTable + default: + return nil, errmsg.AddMessage( + fmt.Errorf("not supported task: %s", task), + fmt.Sprintf("%s task is not supported.", task), + ) + } + return &base.ExecutionWrapper{Execution: e}, nil +} + +type Engine struct { + DBEngine string `json:"engine"` +} + +// newClient being setup here in the Execute since engine is part of the input +// therefore, every new inputs will create a new connection +func (e *execution) Execute(_ context.Context, inputs []*structpb.Struct) ([]*structpb.Struct, error) { + outputs := make([]*structpb.Struct, len(inputs)) + + for i, input := range inputs { + var inputStruct Engine + err := base.ConvertFromStructpb(input, &inputStruct) + if err != nil { + return nil, err + } + + if e.client == nil { + e.client = newClient(e.Setup, &inputStruct) + } + + output, err := e.execute(input) + if err != nil { + return nil, err + } + + outputs[i] = output + } + + return outputs, nil +} diff --git a/data/sql/v0/tasks.go b/data/sql/v0/tasks.go new file mode 100644 index 00000000..8e58d531 --- /dev/null +++ b/data/sql/v0/tasks.go @@ -0,0 +1,361 @@ +package sql + +import ( + "fmt" + "regexp" + "strings" + + "github.com/instill-ai/component/base" + "google.golang.org/protobuf/types/known/structpb" +) + +type InsertInput struct { + Data map[string]any `json:"data"` + TableName string `json:"table-name"` +} + +type InsertOutput struct { + Status string `json:"status"` +} + +type UpdateInput struct { + UpdateData map[string]any `json:"update-data"` + Filter string `json:"filter"` + TableName string `json:"table-name"` +} + +type UpdateOutput struct { + Status string `json:"status"` +} + +type SelectInput struct { + Filter string `json:"filter"` + TableName string `json:"table-name"` + Limit int `json:"limit"` + Columns []string `json:"columns"` +} + +type SelectOutput struct { + Rows []map[string]any `json:"rows"` + Status string `json:"status"` +} + +type DeleteInput struct { + Filter string `json:"filter"` + TableName string `json:"table-name"` +} + +type DeleteOutput struct { + Status string `json:"status"` +} + +type CreateTableInput struct { + TableName string `json:"table-name"` + ColumnsStructure map[string]string `json:"columns-structure"` +} + +type CreateTableOutput struct { + Status string `json:"status"` +} + +type DropTableInput struct { + TableName string `json:"table-name"` +} + +type DropTableOutput struct { + Status string `json:"status"` +} + +func isValidWhereClause(whereClause string) error { + // Extended regex pattern for logical operators and additional conditions + regex := `^(?:\w+ (?:=|!=|>|<|>=|<=|LIKE|MATCH|IS NULL|IS NOT NULL|BETWEEN|IN|EXISTS|NOT|REGEXP|RLIKE|IS DISTINCT FROM|IS NOT DISTINCT FROM|COALESCE\(.*\)|NULLIF\(.*\)) (?:[\w'%]+|\d+|\([\w\s,']+\)|(?:CASE .* END)|(?:\w+\s+\w+)))(?: (?:AND|OR) (?:\w+ (?:=|!=|>|<|>=|<=|LIKE|MATCH|IS NULL|IS NOT NULL|BETWEEN|IN|EXISTS|NOT|REGEXP|RLIKE|IS DISTINCT FROM|IS NOT DISTINCT FROM|COALESCE\(.*\)|NULLIF\(.*\)) (?:[\w'%]+|\d+|\([\w\s,']+\)|(?:CASE .* END)|(?:\w+\s+\w+))))*$` + matched, err := regexp.MatchString(regex, whereClause) + if err != nil || !matched { + return err + } + return nil +} + +func buildSQLStatementInsert(tableName string, data *map[string]any) (string, map[string]any) { + sqlStatement := "INSERT INTO " + tableName + " (" + var columns []string + var placeholders []string + values := make(map[string]any) + + for dataKey, dataValue := range *data { + columns = append(columns, dataKey) + placeholders = append(placeholders, ":"+dataKey) + values[dataKey] = dataValue + } + + sqlStatement += strings.Join(columns, ", ") + ") VALUES (" + strings.Join(placeholders, ", ") + ")" + + return sqlStatement, values +} + +func buildSQLStatementUpdate(tableName string, updateData map[string]any, filter string) (string, map[string]any) { + sqlStatement := "UPDATE " + tableName + " SET " + values := make(map[string]any) + + var setClauses []string + for col, updateValue := range updateData { + setClauses = append(setClauses, fmt.Sprintf("%s = :%s", col, col)) + values[col] = updateValue + } + + sqlStatement += strings.Join(setClauses, ", ") + + if filter != "" { + sqlStatement += " WHERE " + filter + } + + return sqlStatement, values +} + +// limit can be empty, but it will have default value 0 +// columns can be empty, if empty it will select all columns +func buildSQLStatementSelect(tableName string, filter string, limit int, columns []string) string { + sqlStatement := "SELECT " + + var notAll string + if limit == 0 { + notAll = "" + } else { + notAll = fmt.Sprintf(" LIMIT %d", limit) + } + + if len(columns) > 0 { + sqlStatement += strings.Join(columns, ", ") + } else { + sqlStatement += "*" + } + + sqlStatement += " FROM " + tableName + if filter != "" { + sqlStatement += " WHERE " + filter + } + sqlStatement += notAll + + return sqlStatement +} + +func buildSQLStatementDelete(tableName string, filter string) string { + sqlStatement := "DELETE FROM " + tableName + + if filter != "" { + sqlStatement += " WHERE " + filter + } + + return sqlStatement +} + +// columns is a map of column name and column type and handled in json format to prevent sql injection +func buildSQLStatementCreateTable(tableName string, columnsStructure map[string]string) (string, map[string]any) { + sqlStatement := "CREATE TABLE " + tableName + " (" + var columnDefs []string + values := make(map[string]any) + + for colName, colType := range columnsStructure { + columnDefs = append(columnDefs, fmt.Sprintf("%s %s", colName, colType)) + values[colName] = colType + } + + sqlStatement += strings.Join(columnDefs, ", ") + ");" + return sqlStatement, values +} + +func buildSQLStatementDropTable(tableName string) (string, map[string]any) { + sqlStatement := "DROP TABLE " + tableName + ";" + values := map[string]any{"table_name": tableName} + return sqlStatement, values +} + +func (e *execution) insert(in *structpb.Struct) (*structpb.Struct, error) { + var inputStruct InsertInput + err := base.ConvertFromStructpb(in, &inputStruct) + if err != nil { + return nil, err + } + + sqlStatement, values := buildSQLStatementInsert(inputStruct.TableName, &inputStruct.Data) + + _, err = e.client.NamedExec(sqlStatement, values) + + if err != nil { + return nil, err + } + + outputStruct := InsertOutput{ + Status: "Successfully inserted rows", + } + + output, err := base.ConvertToStructpb(outputStruct) + if err != nil { + return nil, err + } + return output, nil +} + +func (e *execution) update(in *structpb.Struct) (*structpb.Struct, error) { + var inputStruct UpdateInput + err := base.ConvertFromStructpb(in, &inputStruct) + if err != nil { + return nil, err + } + err = isValidWhereClause(inputStruct.Filter) + if err != nil { + return nil, err + } + + sqlStatement, values := buildSQLStatementUpdate(inputStruct.TableName, inputStruct.UpdateData, inputStruct.Filter) + + _, err = e.client.NamedExec(sqlStatement, values) + + if err != nil { + return nil, err + } + + outputStruct := UpdateOutput{ + Status: "Successfully updated rows", + } + + output, err := base.ConvertToStructpb(outputStruct) + if err != nil { + return nil, err + } + return output, nil +} + +// Queryx is used since we need not only status but also result return +func (e *execution) selects(in *structpb.Struct) (*structpb.Struct, error) { + var inputStruct SelectInput + err := base.ConvertFromStructpb(in, &inputStruct) + if err != nil { + return nil, err + } + err = isValidWhereClause(inputStruct.Filter) + if err != nil { + return nil, err + } + + sqlStatement := buildSQLStatementSelect(inputStruct.TableName, inputStruct.Filter, inputStruct.Limit, inputStruct.Columns) + + rows, err := e.client.Queryx(sqlStatement) + if err != nil { + return nil, err + } + defer rows.Close() + + var result []map[string]any + + for rows.Next() { + rowMap := make(map[string]any) + + err := rows.MapScan(rowMap) + if err != nil { + return nil, fmt.Errorf("failed to scan row: %v", err) + } + + for key, value := range rowMap { + switch v := value.(type) { + case []byte: + rowMap[key] = string(v) + } + } + + result = append(result, rowMap) + } + + outputStruct := SelectOutput{ + Rows: result, + Status: "Successfully selected rows", + } + + output, err := base.ConvertToStructpb(outputStruct) + if err != nil { + return nil, err + } + return output, nil +} + +func (e *execution) delete(in *structpb.Struct) (*structpb.Struct, error) { + var inputStruct DeleteInput + err := base.ConvertFromStructpb(in, &inputStruct) + if err != nil { + return nil, err + } + err = isValidWhereClause(inputStruct.Filter) + if err != nil { + return nil, err + } + + sqlStatement := buildSQLStatementDelete(inputStruct.TableName, inputStruct.Filter) + + _, err = e.client.Queryx(sqlStatement) + + if err != nil { + return nil, err + } + + outputStruct := DeleteOutput{ + Status: "Successfully deleted rows", + } + + output, err := base.ConvertToStructpb(outputStruct) + if err != nil { + return nil, err + } + return output, nil +} + +func (e *execution) createTable(in *structpb.Struct) (*structpb.Struct, error) { + var inputStruct CreateTableInput + err := base.ConvertFromStructpb(in, &inputStruct) + if err != nil { + return nil, err + } + + sqlStatement, values := buildSQLStatementCreateTable(inputStruct.TableName, inputStruct.ColumnsStructure) + + _, err = e.client.NamedExec(sqlStatement, values) + + if err != nil { + return nil, err + } + + outputStruct := CreateTableOutput{ + Status: "Successfully created table", + } + + output, err := base.ConvertToStructpb(outputStruct) + if err != nil { + return nil, err + } + return output, nil +} + +func (e *execution) dropTable(in *structpb.Struct) (*structpb.Struct, error) { + var inputStruct DropTableInput + err := base.ConvertFromStructpb(in, &inputStruct) + if err != nil { + return nil, err + } + + sqlStatement, values := buildSQLStatementDropTable(inputStruct.TableName) + + _, err = e.client.NamedExec(sqlStatement, values) + + if err != nil { + return nil, err + } + + outputStruct := DropTableOutput{ + Status: "Successfully dropped table", + } + + output, err := base.ConvertToStructpb(outputStruct) + if err != nil { + return nil, err + } + return output, nil +} diff --git a/go.mod b/go.mod index d6e7ca57..bcafa75b 100644 --- a/go.mod +++ b/go.mod @@ -7,9 +7,12 @@ require ( cloud.google.com/go/iam v1.1.6 cloud.google.com/go/storage v1.38.0 code.sajari.com/docconv v1.3.8 + github.com/DATA-DOG/go-sqlmock v1.5.2 github.com/JohannesKaufmann/html-to-markdown v1.5.0 github.com/PuerkitoBio/goquery v1.9.1 + github.com/belong-inc/go-hubspot v0.9.0 github.com/cohere-ai/cohere-go/v2 v2.8.5 + github.com/denisenkom/go-mssqldb v0.12.3 github.com/emersion/go-imap/v2 v2.0.0-beta.3 github.com/emersion/go-message v0.18.1 github.com/fogleman/gg v1.3.0 @@ -18,27 +21,35 @@ require ( github.com/gage-technologies/mistral-go v1.1.0 github.com/go-chi/chi/v5 v5.1.0 github.com/go-resty/resty/v2 v2.12.0 + github.com/go-sql-driver/mysql v1.8.1 github.com/gocolly/colly/v2 v2.1.0 github.com/gofrs/uuid v4.4.0+incompatible github.com/gojuno/minimock/v3 v3.3.6 github.com/google/go-github/v62 v62.0.0 + github.com/google/uuid v1.6.0 github.com/h2non/filetype v1.1.3 + github.com/iFaceless/godub v0.0.0-20200728093528-a30bb4d1a0f1 github.com/instill-ai/protogen-go v0.3.3-alpha.0.20240530065422-d384f728a1e2 github.com/instill-ai/x v0.4.0-alpha github.com/itchyny/gojq v0.12.14 + github.com/jmoiron/sqlx v1.4.0 github.com/json-iterator/go v1.1.12 github.com/lestrrat-go/jspointer v0.0.0-20181205001929-82fadba7561c github.com/lestrrat-go/jsref v0.0.0-20211028120858-c0bcbb5abf20 github.com/lestrrat-go/option v1.0.0 github.com/lestrrat-go/pdebug v0.0.0-20210111095411-35b07dbf089b github.com/lestrrat-go/structinfo v0.0.0-20210312050401-7f8bd69d6acb + github.com/lib/pq v1.10.9 + github.com/nakagami/firebirdsql v0.9.10 github.com/pkg/errors v0.9.1 github.com/pkoukk/tiktoken-go v0.1.6 github.com/redis/go-redis/v9 v9.5.1 github.com/santhosh-tekuri/jsonschema/v5 v5.3.0 + github.com/sijms/go-ora/v2 v2.8.19 github.com/slack-go/slack v0.12.5 github.com/stretchr/testify v1.9.0 github.com/tmc/langchaingo v0.1.10 + github.com/u2takey/ffmpeg-go v0.5.0 go.uber.org/zap v1.24.0 golang.org/x/image v0.18.0 golang.org/x/oauth2 v0.18.0 @@ -53,6 +64,7 @@ require ( cloud.google.com/go/compute v1.25.1 // indirect cloud.google.com/go/compute/metadata v0.2.3 // indirect cloud.google.com/go/longrunning v0.5.6 // indirect + filippo.io/edwards25519 v1.1.0 // indirect github.com/JalfResi/justext v0.0.0-20170829062021-c0282dea7198 // indirect github.com/advancedlogic/GoOse v0.0.0-20191112112754-e742535969c1 // indirect github.com/andybalholm/cascadia v1.3.2 // indirect @@ -61,6 +73,7 @@ require ( github.com/antchfx/xpath v1.2.4 // indirect github.com/apache/arrow/go/v14 v14.0.2 // indirect github.com/araddon/dateparse v0.0.0-20200409225146-d820a6159ab1 // indirect + github.com/aws/aws-sdk-go v1.55.1 // indirect github.com/aws/aws-sdk-go-v2 v1.30.1 // indirect github.com/aws/smithy-go v1.20.3 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect @@ -75,6 +88,8 @@ require ( github.com/go-logr/stdr v1.2.2 // indirect github.com/gobwas/glob v0.2.3 // indirect github.com/goccy/go-json v0.10.2 // indirect + github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe // indirect + github.com/golang-sql/sqlexp v0.1.0 // indirect github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.4 // indirect @@ -82,13 +97,14 @@ require ( github.com/google/go-cmp v0.6.0 // indirect github.com/google/go-querystring v1.1.0 // indirect github.com/google/s2a-go v0.1.7 // indirect - github.com/google/uuid v1.6.0 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect github.com/googleapis/gax-go/v2 v2.12.3 // indirect github.com/gorilla/websocket v1.5.1 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.0 // indirect github.com/itchyny/timefmt-go v0.1.5 // indirect github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7 // indirect + github.com/jmespath/go-jmespath v0.4.0 // indirect + github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 // indirect github.com/kennygrant/sanitize v1.2.4 // indirect github.com/klauspost/compress v1.17.2 // indirect github.com/klauspost/cpuid/v2 v2.2.5 // indirect @@ -102,19 +118,24 @@ require ( github.com/otiai10/gosseract/v2 v2.2.4 // indirect github.com/pierrec/lz4/v4 v4.1.18 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/richardlehane/mscfb v1.0.3 // indirect github.com/richardlehane/msoleps v1.0.3 // indirect github.com/rivo/uniseg v0.4.4 // indirect github.com/rogpeppe/go-internal v1.11.0 // indirect github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect + github.com/shopspring/decimal v1.2.0 // indirect github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf // indirect github.com/temoto/robotstxt v1.1.2 // indirect + github.com/tink-ab/tempfile v0.0.0-20180226111222-33beb0518f1a // indirect + github.com/u2takey/go-utils v0.3.1 // indirect github.com/zeebo/xxh3 v1.0.2 // indirect gitlab.com/golang-commonmark/html v0.0.0-20191124015941-a22733972181 // indirect gitlab.com/golang-commonmark/linkify v0.0.0-20191026162114-a0c2df6c8f82 // indirect gitlab.com/golang-commonmark/markdown v0.0.0-20211110145824-bf3e522c626a // indirect gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84 // indirect gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f // indirect + gitlab.com/nyarla/go-crypt v0.0.0-20160106005555-d9a5dc2b789b // indirect go.opencensus.io v0.24.0 // indirect go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 // indirect @@ -137,4 +158,5 @@ require ( google.golang.org/genproto/googleapis/api v0.0.0-20240401170217-c3f982113cda // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240325203815-454cdb8f5daa // indirect gopkg.in/yaml.v3 v3.0.1 // indirect + modernc.org/mathutil v1.5.0 // indirect ) diff --git a/go.sum b/go.sum index b21758bf..8e6e6cfc 100644 --- a/go.sum +++ b/go.sum @@ -17,7 +17,14 @@ cloud.google.com/go/storage v1.38.0 h1:Az68ZRGlnNTpIBbLjSMIV2BDcwwXYlRlQzis0llkp cloud.google.com/go/storage v1.38.0/go.mod h1:tlUADB0mAb9BgYls9lq+8MGkfzOXuLrnHXlpHmvFJoY= code.sajari.com/docconv v1.3.8 h1:sT6s2TcjAF+aTNFxxHHhut2T5uoCIHpjG+BCtmMgRvU= code.sajari.com/docconv v1.3.8/go.mod h1:q2Wj80d67JJ4VVZCNv3fTht0fJ6eMFajQBsa+G1pKaw= +filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= +filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= +github.com/Azure/azure-sdk-for-go/sdk/azcore v0.19.0/go.mod h1:h6H6c8enJmmocHUbLiiGY6sx7f9i+X3m1CHdd5c6Rdw= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v0.11.0/go.mod h1:HcM1YX14R7CJcghJGOYCgdezslRSVzqwLf/q+4Y2r/0= +github.com/Azure/azure-sdk-for-go/sdk/internal v0.7.0/go.mod h1:yqy467j36fJxcRV2TzfVZ1pCb5vxm4BtZPUdYWe/Xo8= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU= +github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU= github.com/JalfResi/justext v0.0.0-20170829062021-c0282dea7198 h1:8P+AjBhGByCuCX2zTkAf6UY+dj0JczX+t6cSdCSyvfw= github.com/JalfResi/justext v0.0.0-20170829062021-c0282dea7198/go.mod h1:0SURuH1rsE8aVWvutuMZghRNrNrYEUzibzJfhEYR8L0= github.com/JohannesKaufmann/html-to-markdown v1.5.0 h1:cEAcqpxk0hUJOXEVGrgILGW76d1GpyGY7PCnAaWQyAI= @@ -51,10 +58,15 @@ github.com/apache/arrow/go/v14 v14.0.2/go.mod h1:u3fgh3EdgN/YQ8cVQRguVW3R+seMybF github.com/araddon/dateparse v0.0.0-20180729174819-cfd92a431d0e/go.mod h1:SLqhdZcd+dF3TEVL2RMoob5bBP5R1P1qkox+HtCBgGI= github.com/araddon/dateparse v0.0.0-20200409225146-d820a6159ab1 h1:TEBmxO80TM04L8IuMWk77SGL1HomBmKTdzdJLLWznxI= github.com/araddon/dateparse v0.0.0-20200409225146-d820a6159ab1/go.mod h1:SLqhdZcd+dF3TEVL2RMoob5bBP5R1P1qkox+HtCBgGI= +github.com/aws/aws-sdk-go v1.38.20/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro= +github.com/aws/aws-sdk-go v1.55.1 h1:ZTNPmbRMxaK5RlTJrBullX9r/rF1MPf3yAJOLlwDiT8= +github.com/aws/aws-sdk-go v1.55.1/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU= github.com/aws/aws-sdk-go-v2 v1.30.1 h1:4y/5Dvfrhd1MxRDD77SrfsDaj8kUkkljU7XE83NPV+o= github.com/aws/aws-sdk-go-v2 v1.30.1/go.mod h1:nIQjQVp5sfpQcTc9mPSr1B0PaWK5ByX9MOoDadSN4lc= github.com/aws/smithy-go v1.20.3 h1:ryHwveWzPV5BIof6fyDvor6V3iUL7nTfiTKXHiW05nE= github.com/aws/smithy-go v1.20.3/go.mod h1:krry+ya/rV9RDcV/Q16kpu6ypI4K2czasz0NC3qS14E= +github.com/belong-inc/go-hubspot v0.9.0 h1:s3QhCs27VvjKr+RgMAnsh8hslUYrx1pneqESMhKSoYA= +github.com/belong-inc/go-hubspot v0.9.0/go.mod h1:UemuxWPbKBAWh8n9KKuCZR9H/5yOkMD7/g5KhDiCg7A= github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= @@ -72,10 +84,14 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/denisenkom/go-mssqldb v0.12.3 h1:pBSGx9Tq67pBOTLmxNuirNTeB8Vjmf886Kx+8Y+8shw= +github.com/denisenkom/go-mssqldb v0.12.3/go.mod h1:k0mtMFOnU+AihqFxPMiF05rtiDrorD1Vrm1KEz5hxDo= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= +github.com/disintegration/imaging v1.6.2/go.mod h1:44/5580QXChDfwIclfc/PCwrr44amcmDAg8hxG0Ewe4= github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0= github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= +github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ= github.com/emersion/go-imap/v2 v2.0.0-beta.3 h1:z0TLMfYnDsFupXLhzRXgOzXenD3uPvNniQSu5fN1teg= github.com/emersion/go-imap/v2 v2.0.0-beta.3/go.mod h1:BZTFHsS1hmgBkFlHqbxGLXk2hnRqTItUgwjSSCsYNAk= github.com/emersion/go-message v0.18.1 h1:tfTxIoXFSFRwWaZsgnqS1DSZuGpYGzSmCZD8SK3QA2E= @@ -94,6 +110,7 @@ github.com/fogleman/gg v1.3.0 h1:/7zJX8F6AaYQc57WQCyN9cAIz+4bCJGO9B+dyW29am8= github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0= github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk= github.com/gage-technologies/mistral-go v1.1.0 h1:POv1wM9jA/9OBXGV2YdPi9Y/h09+MjCbUF+9hRYlVUI= @@ -110,6 +127,8 @@ github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre github.com/go-resty/resty/v2 v2.0.0/go.mod h1:dZGr0i9PLlaaTD4H/hoZIDjQ+r6xq8mgbRzHZf7f2J8= github.com/go-resty/resty/v2 v2.12.0 h1:rsVL8P90LFvkUYq/V5BTVe203WfRIU4gvcf+yfzJzGA= github.com/go-resty/resty/v2 v2.12.0/go.mod h1:o0yGPrkS3lOe1+eFajk6kBW8ScXzwU3hD69/gt2yB/0= +github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y= +github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg= github.com/go-test/deep v1.0.4 h1:u2CU3YKy9I2pmu9pX0eq50wCgjfGIt539SqR7FbHiho= github.com/go-test/deep v1.0.4/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA= github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= @@ -121,8 +140,13 @@ github.com/gocolly/colly/v2 v2.1.0 h1:k0DuZkDoCsx51bKpRJNEmcxcp+W5N8ziuwGaSDuFoG github.com/gocolly/colly/v2 v2.1.0/go.mod h1:I2MuhsLjQ+Ex+IzK3afNS8/1qP3AedHOusRPcRdC5o0= github.com/gofrs/uuid v4.4.0+incompatible h1:3qXRTX8/NbyulANqlc0lchS1gqAVxRgsuW1YrTJupqA= github.com/gofrs/uuid v4.4.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM= +github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= github.com/gojuno/minimock/v3 v3.3.6 h1:tZQQaDgKSxsKiVia9vt6zZ/qsKNGBw2D0ubHQPr+mHc= github.com/gojuno/minimock/v3 v3.3.6/go.mod h1:kjvubEBVT8aUQ9e+g8x/hPfAhiOoqW7WinzzJgzr4ws= +github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe h1:lXe2qZdvpiX5WZkZR4hgp4KJVfY3nMkvmwbVkpv1rVY= +github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0= +github.com/golang-sql/sqlexp v0.1.0 h1:ZCD6MBpcuOVfGVqsEmY5/4FtYiKz6tSyUv9LPEDei6A= +github.com/golang-sql/sqlexp v0.1.0/go.mod h1:J4ad9Vo8ZCWQ2GMrC4UCQy1JpCbwU9m3EOqtpKwwwHI= github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g= github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= @@ -154,11 +178,13 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= github.com/google/go-github/v62 v62.0.0 h1:/6mGCaRywZz9MuHyw9gD1CwsbmBX8GWsbFkwMmHdhl4= github.com/google/go-github/v62 v62.0.0/go.mod h1:EMxeUqGJq2xRu9DYBMwel/mr7kZrzUOfQmmpYrZn2a4= github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8= @@ -168,6 +194,7 @@ github.com/google/martian/v3 v3.3.2 h1:IqNFLAmvJOgVlpdEBiQbDc2EwKW77amAycfTuWKdf github.com/google/martian/v3 v3.3.2/go.mod h1:oBOf6HBosgwRXnUGWUB05QECsc6uvmMiJ3+6W4l/CUk= github.com/google/s2a-go v0.1.7 h1:60BLSyTrOV4/haCDW4zb1guZItoSq8foHCXrAnjBo/o= github.com/google/s2a-go v0.1.7/go.mod h1:50CgR4k1jNlWBu4UfS4AcfhVe1r6pdZPygJ3R8F0Qdw= +github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -182,6 +209,9 @@ github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.0 h1:Wqo399gCIufwto+VfwCSvsnfGpF github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.0/go.mod h1:qmOFXW2epJhM0qSnUUYpldc7gVz2KMQwJ/QYCDIa7XU= github.com/h2non/filetype v1.1.3 h1:FKkx9QbD7HR/zjK1Ia5XiBsq9zdLi5Kf3zGyFTAFkGg= github.com/h2non/filetype v1.1.3/go.mod h1:319b3zT68BvV+WRj7cwy856M2ehB3HqNOt6sy1HndBY= +github.com/iFaceless/godub v0.0.0-20200728093528-a30bb4d1a0f1 h1:oqeURuHQrImMykykqJgFbStlaDXyY7JpXXrwXyjr9ls= +github.com/iFaceless/godub v0.0.0-20200728093528-a30bb4d1a0f1/go.mod h1:tKRg0K9YmfD3eD6KFos+YHIVMouKMzxDSK5XpdxdCUI= +github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= github.com/instill-ai/protogen-go v0.3.3-alpha.0.20240530065422-d384f728a1e2 h1:YGvBMxWaHQK7xk+QAT68/9lc7x3LX5LYE+IDHipd6Fs= github.com/instill-ai/protogen-go v0.3.3-alpha.0.20240530065422-d384f728a1e2/go.mod h1:2blmpUwiTwxIDnrjIqT6FhR5ewshZZF554wzjXFvKpQ= github.com/instill-ai/x v0.4.0-alpha h1:zQV2VLbSHjMv6gyBN/2mwwrvWk0/mJM6ZKS12AzjfQg= @@ -194,10 +224,22 @@ github.com/jawher/mow.cli v1.1.0/go.mod h1:aNaQlc7ozF3vw6IJ2dHjp2ZFiA4ozMIYY6Pyu github.com/jaytaylor/html2text v0.0.0-20180606194806-57d518f124b0/go.mod h1:CVKlgaMiht+LXvHG173ujK6JUhZXKb2u/BQtjPDIvyk= github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7 h1:g0fAGBisHaEQ0TRq1iBvemFRf+8AEWEmBESSiWB3Vsc= github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7/go.mod h1:CVKlgaMiht+LXvHG173ujK6JUhZXKb2u/BQtjPDIvyk= +github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= +github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= +github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= +github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= +github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o= +github.com/jmoiron/sqlx v1.4.0/go.mod h1:ZrZ7UsYB/weZdl2Bxg6jCRO9c3YHl8r3ahlKmRT4JLY= +github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 h1:iQTw/8FWTuc7uiaSepXwyf3o52HaUYcV+Tu66S3F5GA= +github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0/go.mod h1:1NbS8ALrpOvjt0rHPNLyCIeMtbizbir8U//inJ+zuB8= github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o= github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak= +github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/kisielk/sqlstruct v0.0.0-20201105191214-5f3e10d3ab46/go.mod h1:yyMNCyc/Ib3bDTKd379tNMpB/7/H5TjM2Y9QJ5THLbE= github.com/klauspost/compress v1.17.2 h1:RlWWUY/Dr4fL8qk9YG7DTZ7PDgME2V4csBXA8L/ixi4= github.com/klauspost/compress v1.17.2/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg= @@ -221,15 +263,24 @@ github.com/lestrrat-go/structinfo v0.0.0-20210312050401-7f8bd69d6acb h1:DDg5u5lk github.com/lestrrat-go/structinfo v0.0.0-20210312050401-7f8bd69d6acb/go.mod h1:i+E8Uf04vf2QjOWyJdGY75vmG+4rxiZW2kIj1lTB5mo= github.com/levigross/exp-html v0.0.0-20120902181939-8df60c69a8f5 h1:W7p+m/AECTL3s/YR5RpQ4hz5SjNeKzZBl1q36ws12s0= github.com/levigross/exp-html v0.0.0-20120902181939-8df60c69a8f5/go.mod h1:QMe2wuKJ0o7zIVE8AqiT8rd8epmm6WDIZ2wyuBqYPzM= +github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= +github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/mattn/go-runewidth v0.0.3/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/mattn/go-runewidth v0.0.7/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= +github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/modocache/gover v0.0.0-20171022184752-b58185e213c5/go.mod h1:caMODM3PzxT8aQXRPkAt8xlV/e7d7w8GM5g0fa5F0D8= +github.com/nakagami/firebirdsql v0.9.10 h1:7Y73BiH3j/f8faIaryZvDZ3nEo0L7c6S5pg+qWoZ91c= +github.com/nakagami/firebirdsql v0.9.10/go.mod h1:ei91eXUYcMkWJOr4rK6Sta+BVmi3K+WvYR4yASlq/kY= github.com/olekukonko/tablewriter v0.0.0-20180506121414-d4647c9c7a84/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= github.com/olekukonko/tablewriter v0.0.4 h1:vHD/YYe1Wolo78koG299f7V/VAS08c6IpCLn+Ejf/w8= github.com/olekukonko/tablewriter v0.0.4/go.mod h1:zq6QwlOf5SlnkVbMSr5EoBv3636FWnp+qbPhuoO21uA= @@ -238,8 +289,10 @@ github.com/otiai10/gosseract/v2 v2.2.4 h1:h/PV+oJqke8q2Ccw9bjpMBWfd7N2vtGDCUcihZ github.com/otiai10/gosseract/v2 v2.2.4/go.mod h1:ahOp/kHojnOMGv1RaUnR0jwY5JVa6BYKhYAS8nbMLSo= github.com/otiai10/mint v1.3.0 h1:Ady6MKVezQwHBkGzLFbrsywyp09Ah7rkmfjV3Bcr5uc= github.com/otiai10/mint v1.3.0/go.mod h1:F5AjcsTsWUqX+Na9fpHb52P8pcRX2CI6A3ctIT91xUo= +github.com/panjf2000/ants/v2 v2.4.2/go.mod h1:f6F0NZVFsGCp5A7QW/Zj/m92atWwOkY0OIhFxRNFr4A= github.com/pierrec/lz4/v4 v4.1.18 h1:xaKrnTkyoqfh1YItXl56+6KJNVYWlEEPuAQW9xsplYQ= github.com/pierrec/lz4/v4 v4.1.18/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pkg/browser v0.0.0-20180916011732-0a3d74bf9ce4/go.mod h1:4OwLy04Bl9Ef3GJJCoec+30X3LQs/0/m4HFRt/2LUSA= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= @@ -251,6 +304,9 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/redis/go-redis/v9 v9.5.1 h1:H1X4D3yHPaYrkL5X06Wh6xNVM/pX0Ft4RV0vMGvLBh8= github.com/redis/go-redis/v9 v9.5.1/go.mod h1:hdY0cQFCN4fnSYT6TkisLufl/4W5UIXyv0b/CLO2V2M= +github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/richardlehane/mscfb v1.0.3 h1:rD8TBkYWkObWO0oLDFCbwMeZ4KoalxQy+QgniCj3nKI= github.com/richardlehane/mscfb v1.0.3/go.mod h1:YzVpcZg9czvAuhk9T+a3avCpcFPMUWm7gK3DypaEsUk= github.com/richardlehane/msoleps v1.0.1/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= @@ -274,9 +330,14 @@ github.com/sebdah/goldie/v2 v2.5.3/go.mod h1:oZ9fp0+se1eapSRjfYbsV/0Hqhbuu3bJVvK github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= github.com/sergi/go-diff v1.2.0 h1:XU+rvMAioB0UC3q1MFrIQy4Vo5/4VsRDQQXHsEya6xQ= github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= +github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ= +github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= +github.com/sijms/go-ora/v2 v2.8.19 h1:7LoKZatDYGi18mkpQTR/gQvG9yOdtc7hPAex96Bqisc= +github.com/sijms/go-ora/v2 v2.8.19/go.mod h1:EHxlY6x7y9HAsdfumurRfTd+v8NrEOTR3Xl4FWlH6xk= github.com/simplereach/timeutils v1.2.0/go.mod h1:VVbQDfN/FHRZa1LSqcwo4kNZ62OOyqLLGQKYB3pB0Q8= github.com/slack-go/slack v0.12.5 h1:ddZ6uz6XVaB+3MTDhoW04gG+Vc/M/X1ctC+wssy2cqs= github.com/slack-go/slack v0.12.5/go.mod h1:hlGi5oXA+Gt+yWTPP0plCdRKmjsDxecdHxYQdlMQKOw= +github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk= github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf h1:pvbZ0lM0XWPBqUKqFU8cmavspvIl9nulOYwdy6IFRRo= github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf/go.mod h1:RJID2RhlZKId02nZ62WenDCkgHFerpIOmW0iT7GKmXM= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -286,6 +347,7 @@ github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpE github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= @@ -296,8 +358,14 @@ github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8 github.com/temoto/robotstxt v1.1.1/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo= github.com/temoto/robotstxt v1.1.2 h1:W2pOjSJ6SWvldyEuiFXNxz3xZ8aiWX5LbfDiOFd7Fxg= github.com/temoto/robotstxt v1.1.2/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo= +github.com/tink-ab/tempfile v0.0.0-20180226111222-33beb0518f1a h1:Qhm/9UKGO1+AjEKIsq8G72uCq4SrYxSxS5wiD0F3IC4= +github.com/tink-ab/tempfile v0.0.0-20180226111222-33beb0518f1a/go.mod h1:Wt5qSdcHgX6XkqZKAZTxnN+93jnqtx0jEgTQakpZ1CE= github.com/tmc/langchaingo v0.1.10 h1:+cssnyaY1avZwzdDFvJYlVUsch9oFRgoqw3Avk5Zig4= github.com/tmc/langchaingo v0.1.10/go.mod h1:lPKUIu8ZGI7RAksRFtKbgtS2v3LL0j7LcccHPCvgNfY= +github.com/u2takey/ffmpeg-go v0.5.0 h1:r7d86XuL7uLWJ5mzSeQ03uvjfIhiJYvsRAJFCW4uklU= +github.com/u2takey/ffmpeg-go v0.5.0/go.mod h1:ruZWkvC1FEiUNjmROowOAps3ZcWxEiOpFoHCvk97kGc= +github.com/u2takey/go-utils v0.3.1 h1:TaQTgmEZZeDHQFYfd+AdUT1cT4QJgJn/XVPELhHw4ys= +github.com/u2takey/go-utils v0.3.1/go.mod h1:6e+v5vEZ/6gu12w/DC2ixZdZtCrNokVxD0JUklcqdCs= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yuin/goldmark v1.6.0 h1:boZcn2GTjpsynOsC0iJHnBWa4Bi0qzfJjthwauItG68= github.com/yuin/goldmark v1.6.0/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= @@ -315,6 +383,8 @@ gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84 h1:qqjvoVX gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84/go.mod h1:IJZ+fdMvbW2qW6htJx7sLJ04FEs4Ldl/MDsJtMKywfw= gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f h1:Wku8eEdeJqIOFHtrfkYUByc4bCaTeA6fL0UJgfEiFMI= gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f/go.mod h1:Tiuhl+njh/JIg0uS/sOJVYi0x2HEa5rc1OAaVsb5tAs= +gitlab.com/nyarla/go-crypt v0.0.0-20160106005555-d9a5dc2b789b h1:7gd+rd8P3bqcn/96gOZa3F5dpJr/vEiDQYlNb/y2uNs= +gitlab.com/nyarla/go-crypt v0.0.0-20160106005555-d9a5dc2b789b/go.mod h1:T3BPAOm2cqquPa0MKWeNkmOM5RQsRhkrwMWonFMN7fE= gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638 h1:uPZaMiz6Sz0PZs3IZJWpU5qHKGNy///1pacZC9txiUI= gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638/go.mod h1:EGRJaqe2eO9XGmFtQCvV3Lm9NLico3UhFwUpCG/+mVU= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= @@ -340,10 +410,13 @@ go.uber.org/multierr v1.6.0 h1:y6IPFStTAIT5Ytl7/XYmHvzXQ7S3g/IeZW9hyZ5thw4= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= go.uber.org/zap v1.24.0 h1:FiJd5l1UOLj0wCgbSE0rwwXHzEdAZS6hiiSnxJN/D60= go.uber.org/zap v1.24.0/go.mod h1:2kMP+WWQ8aoFoedH3T2sq6iJ2yDWpHbP0f6MQbS9Gkg= +gocv.io/x/gocv v0.25.0/go.mod h1:Rar2PS6DV+T4FL+PM535EImD/h13hGVaHhnCu1xarBs= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20201016220609-9e8e0b390897/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.16.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4= golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= @@ -352,6 +425,7 @@ golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5D golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI= golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo= +golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/image v0.18.0 h1:jGzIakQa/ZXI1I0Fxvaa9W7yP25TqT6cHIHn+6CqvSQ= golang.org/x/image v0.18.0/go.mod h1:4yyo5vMFQjVjUcVk4jEQcU9MGy/rulF5WvUILseCM2E= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -375,7 +449,9 @@ golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/ golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210610132358-84b48f89b13b/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= @@ -401,6 +477,7 @@ golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200602225109-6fdc65e7d980/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -437,9 +514,11 @@ golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= +golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= @@ -500,10 +579,16 @@ gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EV gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +modernc.org/mathutil v1.5.0 h1:rV0Ko/6SfM+8G+yKiyI830l3Wuz1zRutdslNoQ0kfiQ= +modernc.org/mathutil v1.5.0/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= +sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc= diff --git a/internal/util/helper.go b/internal/util/helper.go index 0e410fbb..179d9644 100644 --- a/internal/util/helper.go +++ b/internal/util/helper.go @@ -2,6 +2,7 @@ package util import ( "encoding/base64" + "fmt" "mime/multipart" "net/http" "strings" @@ -70,3 +71,36 @@ func ScrapeWebpageHTMLToMarkdown(html string) (string, error) { func DecodeBase64(input string) ([]byte, error) { return base64.StdEncoding.DecodeString(base.TrimBase64Mime(input)) } + +func GetContentTypeFromBase64(base64String string) (string, error) { + // Remove the "data:" prefix and split at the first semicolon + contentType := strings.TrimPrefix(base64String, "data:") + + parts := strings.SplitN(contentType, ";", 2) + if len(parts) != 2 { + return "", fmt.Errorf("invalid format") + } + + // The first part is the content type + return parts[0], nil +} + +func TransformContentTypeToFileExtension(contentType string) string { + // https://gist.github.com/AshHeskes/6038140 + // We can integrate more Content-Type to file extension mappings in the future + switch contentType { + case "application/vnd.openxmlformats-officedocument.wordprocessingml.document": + return "docx" + case "application/msword": + return "doc" + case "application/vnd.openxmlformats-officedocument.presentationml.presentation": + return "pptx" + case "application/vnd.ms-powerpoint": + return "ppt" + case "text/html": + return "html" + case "application/pdf": + return "pdf" + } + return "" +} diff --git a/operator/audio/v0/README.mdx b/operator/audio/v0/README.mdx new file mode 100644 index 00000000..804c45f6 --- /dev/null +++ b/operator/audio/v0/README.mdx @@ -0,0 +1,77 @@ +--- +title: "Audio" +lang: "en-US" +draft: false +description: "Learn about how to set up a VDP Audio component https://github.com/instill-ai/instill-core" +--- + +The Audio component is an operator component that allows users to extract and manipulate audio from different sources. +It can carry out the following tasks: + +- [Chunk Audios](#chunk-audios) +- [Slice Audio](#slice-audio) + + + +## Release Stage + +`Alpha` + + + +## Configuration + +The component configuration is defined and maintained [here](https://github.com/instill-ai/component/blob/main/operator/audio/v0/config/definition.json). + + + + + +## Supported Tasks + +### Chunk Audios + +Split audio file into chunks + + +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_CHUNK_AUDIOS` | +| Audio (required) | `audio` | string | Base64 encoded audio file to be split | +| Chunk count (required) | `chunk-count` | integer | Number of chunks to equally split the audio into | + + + +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Audios | `audios` | array[string] | A list of base64 encoded audios | + + + + + + +### Slice Audio + +Specify a time range to slice an audio file + + +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_SLICE_AUDIO` | +| Audio (required) | `audio` | string | Base64 encoded audio file to be sliced | +| Start time (required) | `start-time` | integer | Start time of the slice in seconds | +| End time (required) | `end-time` | integer | End time of the slice in seconds | + + + +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Audio | `audio` | string | Base64 encoded audio slice | + + + + + + + diff --git a/operator/audio/v0/assets/audio.svg b/operator/audio/v0/assets/audio.svg new file mode 100644 index 00000000..6a3781fe --- /dev/null +++ b/operator/audio/v0/assets/audio.svg @@ -0,0 +1,3 @@ + + + diff --git a/operator/audio/v0/audio_operation.go b/operator/audio/v0/audio_operation.go new file mode 100644 index 00000000..0d51acb9 --- /dev/null +++ b/operator/audio/v0/audio_operation.go @@ -0,0 +1,153 @@ +package audio + +import ( + "bytes" + "encoding/base64" + "fmt" + "time" + + "github.com/iFaceless/godub" + "github.com/iFaceless/godub/wav" + "github.com/instill-ai/component/base" + "google.golang.org/protobuf/types/known/structpb" +) + +type ChunkAudiosInput struct { + Audio Audio `json:"audio"` + ChunkCount int `json:"chunk-count"` +} + +type ChunkAudiosOutput struct { + Audios []Audio `json:"audios"` +} + +type SliceAudioInput struct { + Audio Audio `json:"audio"` + StartTime int `json:"start-time"` + EndTime int `json:"end-time"` +} + +type SliceAudioOutput struct { + Audio Audio `json:"audio"` +} + +type ConcatenateInput struct { + Audios []Audio `json:"audios"` +} + +type ConcatenateOutput struct { + Audio Audio `json:"audio"` +} + +// Base64 encoded audio +type Audio string + +func chunkAudios(input *structpb.Struct) (*structpb.Struct, error) { + + var inputStruct ChunkAudiosInput + + err := base.ConvertFromStructpb(input, &inputStruct) + if err != nil { + return nil, err + } + + buf, err := base64.StdEncoding.DecodeString(base.TrimBase64Mime(string(inputStruct.Audio))) + if err != nil { + return nil, err + } + + segment, err := godub.NewLoader().Load(bytes.NewReader(buf)) + + if err != nil { + return nil, fmt.Errorf("failed to load audio: %w", err) + } + + duration := segment.Duration() + + chunkSeconds := float64(duration) / float64(inputStruct.ChunkCount) + + var audioSegments []*godub.AudioSegment + + var startTime time.Duration + for i := 0; i < inputStruct.ChunkCount; i++ { + startTime = getStartTime(chunkSeconds, i) + endTime := getEndTime(chunkSeconds, i, inputStruct.ChunkCount, duration) + + slicedSegment, err := segment.Slice(startTime, endTime) + if err != nil { + return nil, fmt.Errorf("failed to slice audio: %w in chunk %v", err, i) + } + audioSegments = append(audioSegments, slicedSegment) + } + + var audios []Audio + prefix := "data:audio/wav;base64," + for _, segment := range audioSegments { + var wavBuf bytes.Buffer + err = wav.Encode(&wavBuf, segment.AsWaveAudio()) + + if err != nil { + return nil, fmt.Errorf("failed to encode audio to wav: %w", err) + } + + audios = append(audios, Audio(prefix+base64.StdEncoding.EncodeToString(wavBuf.Bytes()))) + } + + output := ChunkAudiosOutput{ + Audios: audios, + } + + return base.ConvertToStructpb(output) +} + +func sliceAudio(input *structpb.Struct) (*structpb.Struct, error) { + + var inputStruct SliceAudioInput + + err := base.ConvertFromStructpb(input, &inputStruct) + if err != nil { + return nil, err + } + + buf, err := base64.StdEncoding.DecodeString(base.TrimBase64Mime(string(inputStruct.Audio))) + if err != nil { + return nil, err + } + + segment, err := godub.NewLoader().Load(bytes.NewReader(buf)) + + if err != nil { + return nil, fmt.Errorf("failed to load audio: %w", err) + } + + startTime := time.Duration(inputStruct.StartTime) * time.Second + endTime := time.Duration(inputStruct.EndTime) * time.Second + + slicedSegment, err := segment.Slice(startTime, endTime) + if err != nil { + return nil, fmt.Errorf("failed to slice audio: %w", err) + } + + var wavBuf bytes.Buffer + err = wav.Encode(&wavBuf, slicedSegment.AsWaveAudio()) + if err != nil { + return nil, fmt.Errorf("failed to encode audio to wav: %w", err) + } + + output := SliceAudioOutput{ + Audio: Audio("data:audio/wav;base64," + base64.StdEncoding.EncodeToString(wavBuf.Bytes())), + } + + return base.ConvertToStructpb(output) +} + +func getStartTime(chunkSeconds float64, i int) time.Duration { + return time.Duration(chunkSeconds * float64(i)) +} + +func getEndTime(chunkSeconds float64, i, totalCount int, duration time.Duration) time.Duration { + if i == totalCount-1 { + return duration + } + return time.Duration(chunkSeconds * float64(i+1)) +} diff --git a/operator/audio/v0/config/definition.json b/operator/audio/v0/config/definition.json new file mode 100644 index 00000000..3735a4e0 --- /dev/null +++ b/operator/audio/v0/config/definition.json @@ -0,0 +1,18 @@ +{ + "availableTasks": [ + "TASK_CHUNK_AUDIOS", + "TASK_SLICE_AUDIO" + ], + "documentationUrl": "https://www.instill.tech/docs/component/operator/audio", + "icon": "assets/audio.svg", + "id": "audio", + "public": true, + "spec": {}, + "title": "Audio", + "type": "COMPONENT_TYPE_OPERATOR", + "uid": "b5c75caa-9261-4757-bfbf-12e908f59f16", + "version": "0.1.0", + "sourceUrl": "https://github.com/instill-ai/component/blob/main/operator/audio/v0", + "description": "Extract and manipulate audio from different sources", + "releaseStage": "RELEASE_STAGE_ALPHA" +} diff --git a/operator/audio/v0/config/tasks.json b/operator/audio/v0/config/tasks.json new file mode 100644 index 00000000..c7a0539f --- /dev/null +++ b/operator/audio/v0/config/tasks.json @@ -0,0 +1,131 @@ +{ + "TASK_CHUNK_AUDIOS": { + "instillShortDescription": "Split audio file into chunks", + "input": { + "description": "Audio file to split", + "instillEditOnNodeFields": [ + "audio", + "chunk-count" + ], + "instillUIOrder": 0, + "properties": { + "audio": { + "description": "Base64 encoded audio file to be split", + "instillAcceptFormats": [ + "audio/*" + ], + "instillUIOrder": 0, + "instillUpstreamTypes": [ + "reference" + ], + "title": "Audio", + "type": "string" + }, + "chunk-count": { + "description": "Number of chunks to equally split the audio into", + "instillAcceptFormats": [ + "integer" + ], + "instillUIOrder": 1, + "title": "Chunk count", + "type": "integer" + } + }, + "required": [ + "audio", + "chunk-count" + ], + "title": "Input", + "type": "object" + }, + "output": { + "instillUIOrder": 0, + "properties": { + "audios": { + "description": "A list of base64 encoded audios", + "instillFormat": "array:audio/wav", + "instillUIOrder": 0, + "items": { + "type": "string", + "title": "Audio" + }, + "title": "Audios", + "type": "array" + } + }, + "required": [ + "audios" + ], + "title": "Output", + "type": "object" + } + }, + "TASK_SLICE_AUDIO": { + "instillShortDescription": "Specify a time range to slice an audio file", + "input": { + "description": "Audio file to slice", + "instillEditOnNodeFields": [ + "audio", + "start-time", + "end-time" + ], + "instillUIOrder": 0, + "properties": { + "audio": { + "description": "Base64 encoded audio file to be sliced", + "instillAcceptFormats": [ + "audio/*" + ], + "instillUIOrder": 0, + "instillUpstreamTypes": [ + "reference" + ], + "title": "Audio", + "type": "string" + }, + "start-time": { + "description": "Start time of the slice in seconds", + "instillAcceptFormats": [ + "integer" + ], + "instillUIOrder": 1, + "title": "Start time", + "type": "integer" + }, + "end-time": { + "description": "End time of the slice in seconds", + "instillAcceptFormats": [ + "integer" + ], + "instillUIOrder": 2, + "title": "End time", + "type": "integer" + } + }, + "required": [ + "audio", + "start-time", + "end-time" + ], + "title": "Input", + "type": "object" + }, + "output": { + "instillUIOrder": 0, + "properties": { + "audio": { + "description": "Base64 encoded audio slice", + "instillFormat": "audio/wav", + "instillUIOrder": 0, + "title": "Audio", + "type": "string" + } + }, + "required": [ + "audio" + ], + "title": "Output", + "type": "object" + } + } +} diff --git a/operator/audio/v0/main.go b/operator/audio/v0/main.go new file mode 100644 index 00000000..6834c95d --- /dev/null +++ b/operator/audio/v0/main.go @@ -0,0 +1,79 @@ +//go:generate compogen readme ./config ./README.mdx +package audio + +import ( + "context" + _ "embed" // embed + "fmt" + "sync" + + "github.com/instill-ai/component/base" + "google.golang.org/protobuf/types/known/structpb" +) + +const ( + taskChunkAudios string = "TASK_CHUNK_AUDIOS" + taskSliceAudio string = "TASK_SLICE_AUDIO" +) + +var ( + //go:embed config/definition.json + definitionJSON []byte + //go:embed config/tasks.json + tasksJSON []byte + once sync.Once + comp *component +) + +type component struct { + base.Component +} + +type execution struct { + base.ComponentExecution + + execute func(*structpb.Struct) (*structpb.Struct, error) +} + +func Init(bc base.Component) *component { + once.Do(func() { + comp = &component{Component: bc} + err := comp.LoadDefinition(definitionJSON, nil, tasksJSON, nil) + if err != nil { + panic(err) + } + }) + return comp +} + +func (c *component) CreateExecution(sysVars map[string]any, setup *structpb.Struct, task string) (*base.ExecutionWrapper, error) { + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: c, SystemVariables: sysVars, Task: task}, + } + + switch task { + case taskChunkAudios: + e.execute = chunkAudios + case taskSliceAudio: + e.execute = sliceAudio + default: + return nil, fmt.Errorf(task + " task is not supported.") + } + + return &base.ExecutionWrapper{Execution: e}, nil +} + +func (e *execution) Execute(_ context.Context, inputs []*structpb.Struct) ([]*structpb.Struct, error) { + outputs := make([]*structpb.Struct, len(inputs)) + + for i, input := range inputs { + output, err := e.execute(input) + if err != nil { + return nil, err + } + + outputs[i] = output + } + + return outputs, nil +} diff --git a/operator/audio/v0/main_test.go b/operator/audio/v0/main_test.go new file mode 100644 index 00000000..c6cf2c87 --- /dev/null +++ b/operator/audio/v0/main_test.go @@ -0,0 +1,3 @@ +package audio +// TODO chuang8511 Investigate how to run test case with installing ffmpeg in test env +// It will be arranged according to the product schedule diff --git a/operator/document/v0/README.mdx b/operator/document/v0/README.mdx index 4e28aaab..5d1f4fb3 100644 --- a/operator/document/v0/README.mdx +++ b/operator/document/v0/README.mdx @@ -30,13 +30,13 @@ The component configuration is defined and maintained [here](https://github.com/ ### Convert To Markdown -Convert PDF to Markdown format. +Convert document to text in Markdown format. | Input | ID | Type | Description | | :--- | :--- | :--- | :--- | | Task ID (required) | `task` | string | `TASK_CONVERT_TO_MARKDOWN` | -| Document (required) | `pdf` | string | Base64 encoded PDF to be converted to Markdown | +| Document (required) | `document` | string | Base64 encoded PDF/DOCX/DOC/PPTX/PPT/HTML to be converted to text in Markdown format | | Display image tag | `display-image-tag` | boolean | Choose if the result displays image tags | diff --git a/operator/document/v0/config/tasks.json b/operator/document/v0/config/tasks.json index 0248c553..e02ccee6 100644 --- a/operator/document/v0/config/tasks.json +++ b/operator/document/v0/config/tasks.json @@ -1,15 +1,15 @@ { "TASK_CONVERT_TO_MARKDOWN": { - "instillShortDescription": "Convert PDF to Markdown format.", + "instillShortDescription": "Convert document to text in Markdown format.", "input": { "description": "Input", "instillEditOnNodeFields": [ - "pdf" + "document" ], "instillUIOrder": 0, "properties": { - "pdf": { - "description": "Base64 encoded PDF to be converted to Markdown", + "document": { + "description": "Base64 encoded PDF/DOCX/DOC/PPTX/PPT/HTML to be converted to text in Markdown format", "instillAcceptFormats": [ "*/*" ], @@ -37,7 +37,7 @@ } }, "required": [ - "pdf" + "document" ], "title": "Input", "type": "object" diff --git a/operator/document/v0/convert_document_to_markdown.go b/operator/document/v0/convert_document_to_markdown.go new file mode 100644 index 00000000..57af5138 --- /dev/null +++ b/operator/document/v0/convert_document_to_markdown.go @@ -0,0 +1,89 @@ +package document + +import ( + "fmt" + + "github.com/instill-ai/component/base" + "github.com/instill-ai/component/internal/util" + "google.golang.org/protobuf/types/known/structpb" +) + +type convertDocumentToMarkdownInput struct { + Document string `json:"document"` + DisplayImageTag bool `json:"display-image-tag"` +} + +type convertDocumentToMarkdownOutput struct { + Body string `json:"body"` +} + +func (e *execution) convertDocumentToMarkdown(input *structpb.Struct) (*structpb.Struct, error) { + inputStruct := convertDocumentToMarkdownInput{} + err := base.ConvertFromStructpb(input, &inputStruct) + if err != nil { + return nil, err + } + + contentType, err := util.GetContentTypeFromBase64(inputStruct.Document) + if err != nil { + return nil, err + } + + fileExtension := util.TransformContentTypeToFileExtension(contentType) + + if fileExtension == "" { + return nil, fmt.Errorf("unsupported file type") + } + + var transformer MarkdownTransformer + + transformer, err = e.getMarkdownTransformer(fileExtension, inputStruct) + if err != nil { + return nil, err + } + extractedTextInMarkdownFormat, err := transformer.Transform() + if err != nil { + return nil, err + } + + outputStruct := convertDocumentToMarkdownOutput{ + Body: extractedTextInMarkdownFormat, + } + output, err := base.ConvertToStructpb(outputStruct) + if err != nil { + return nil, err + } + + return output, nil +} + +func getMarkdownTransformer(fileExtension string, inputStruct convertDocumentToMarkdownInput) (MarkdownTransformer, error) { + switch fileExtension { + case "pdf": + return PDFToMarkdownTransformer{ + Base64EncodedText: inputStruct.Document, + FileExtension: fileExtension, + DisplayImageTag: inputStruct.DisplayImageTag, + }, nil + case "doc", "docx": + return DocxDocToMarkdownTransformer{ + Base64EncodedText: inputStruct.Document, + FileExtension: fileExtension, + DisplayImageTag: inputStruct.DisplayImageTag, + }, nil + case "ppt", "pptx": + return PptPptxToMarkdownTransformer{ + Base64EncodedText: inputStruct.Document, + FileExtension: fileExtension, + DisplayImageTag: inputStruct.DisplayImageTag, + }, nil + case "html": + return HTMLToMarkdownTransformer{ + Base64EncodedText: inputStruct.Document, + FileExtension: fileExtension, + DisplayImageTag: inputStruct.DisplayImageTag, + }, nil + default: + return nil, fmt.Errorf("unsupported file type") + } +} diff --git a/operator/document/v0/convert_document_to_markdown_test.go b/operator/document/v0/convert_document_to_markdown_test.go new file mode 100644 index 00000000..88ff1e7a --- /dev/null +++ b/operator/document/v0/convert_document_to_markdown_test.go @@ -0,0 +1,95 @@ +package document + +import ( + "encoding/base64" + "fmt" + "os" + "testing" + + "github.com/frankban/quicktest" + "github.com/instill-ai/component/base" +) + +func TestConvertDocumentToMarkdown(t *testing.T) { + c := quicktest.New(t) + + tests := []struct { + name string + filepath string + }{ + { + name: "Convert PDF file", + filepath: "testdata/test.pdf", + }, + { + name: "Convert DOCX file", + filepath: "testdata/test.docx", + }, + { + name: "Convert HTML file", + filepath: "testdata/test.html", + }, + { + name: "Convert PPTX file", + filepath: "testdata/test.pptx", + }, + } + for _, test := range tests { + c.Run(test.name, func(c *quicktest.C) { + fileContent, err := os.ReadFile(test.filepath) + c.Assert(err, quicktest.IsNil) + + base64DataURI := fmt.Sprintf("data:%s;base64,%s", mimeTypeByExtension(test.filepath), base64.StdEncoding.EncodeToString(fileContent)) + + inputStruct := convertDocumentToMarkdownInput{ + Document: base64DataURI, + DisplayImageTag: false, + } + input, err := base.ConvertToStructpb(inputStruct) + c.Assert(err, quicktest.IsNil) + e := &execution{ + getMarkdownTransformer: fakeGetMarkdownTransformer, + } + e.Task = "TASK_CONVERT_TO_MARKDOWN" + + output, err := e.convertDocumentToMarkdown(input) + c.Assert(err, quicktest.IsNil) + + outputStruct := convertDocumentToMarkdownOutput{} + err = base.ConvertFromStructpb(output, &outputStruct) + c.Assert(err, quicktest.IsNil) + c.Assert(outputStruct.Body, quicktest.DeepEquals, "This is test file") + + }) + } + +} + +func mimeTypeByExtension(filepath string) string { + switch filepath { + case "testdata/test.pdf": + return "application/pdf" + case "testdata/test.docx": + return "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + case "testdata/test.html": + return "text/html" + case "testdata/test.pptx": + return "application/vnd.openxmlformats-officedocument.presentationml.presentation" + default: + return "" + } +} + +func fakeGetMarkdownTransformer(fileExtension string, inputStruct convertDocumentToMarkdownInput) (MarkdownTransformer, error) { + return FakeMarkdownTransformer{}, nil +} + +type FakeMarkdownTransformer struct { + Base64EncodedText string + FileExtension string + DisplayImageTag bool +} + +func (f FakeMarkdownTransformer) Transform() (string, error) { + return "This is test file", nil +} diff --git a/operator/document/v0/convert_pdf_to_markdown.go b/operator/document/v0/convert_pdf_to_markdown.go deleted file mode 100644 index a8621f49..00000000 --- a/operator/document/v0/convert_pdf_to_markdown.go +++ /dev/null @@ -1,68 +0,0 @@ -package document - -import ( - "encoding/json" - "io" - - "github.com/instill-ai/component/base" -) - -type commandRunner interface { - CombinedOutput() ([]byte, error) - StdinPipe() (io.WriteCloser, error) -} - -type convertPDFToMarkdownInput struct { - PDF string `json:"pdf"` - DisplayImageTag bool `json:"display-image-tag"` -} - -type convertPDFToMarkdownOutput struct { - Body string `json:"body"` -} - -func convertPDFToMarkdown(input convertPDFToMarkdownInput, cmdRunner commandRunner) (convertPDFToMarkdownOutput, error) { - - paramsJSON, err := json.Marshal(map[string]interface{}{ - "PDF": base.TrimBase64Mime(input.PDF), - "display-image-tag": input.DisplayImageTag, - }) - - if err != nil { - return convertPDFToMarkdownOutput{}, err - } - - stdin, err := cmdRunner.StdinPipe() - if err != nil { - return convertPDFToMarkdownOutput{}, err - } - errChan := make(chan error, 1) - - go func() { - defer stdin.Close() - _, err := stdin.Write(paramsJSON) - if err != nil { - errChan <- err - return - } - errChan <- nil - }() - - outputBytes, err := cmdRunner.CombinedOutput() - if err != nil { - return convertPDFToMarkdownOutput{}, err - } - - writeErr := <-errChan - if writeErr != nil { - return convertPDFToMarkdownOutput{}, writeErr - } - - var output convertPDFToMarkdownOutput - err = json.Unmarshal(outputBytes, &output) - if err != nil { - return convertPDFToMarkdownOutput{}, err - } - - return output, nil -} diff --git a/operator/document/v0/convert_pdf_to_markdown_test.go b/operator/document/v0/convert_pdf_to_markdown_test.go deleted file mode 100644 index 6e5832c2..00000000 --- a/operator/document/v0/convert_pdf_to_markdown_test.go +++ /dev/null @@ -1,60 +0,0 @@ -package document - -import ( - "encoding/base64" - "encoding/json" - "testing" - - qt "github.com/frankban/quicktest" - "github.com/gojuno/minimock/v3" - "github.com/instill-ai/component/base" - "github.com/instill-ai/component/internal/mock" -) - -func TestConvertPDFToText(t *testing.T) { - c := qt.New(t) - - c.Run("Convert PDF file", func(c *qt.C) { - - fakePDF := "# Test\n\nThis is a test document.\n\n" - b, err := json.Marshal(fakePDF) - c.Assert(err, qt.IsNil) - - encoded := base64.StdEncoding.EncodeToString(b) - - input := convertPDFToMarkdownInput{ - PDF: encoded, - DisplayImageTag: false, - } - - mc := minimock.NewController(t) - mockRunner := mock.NewCommandRunnerMock(mc) - mockIoWriteCloser := mock.NewWriteCloserMock(mc) - mockIoWriteCloser.CloseMock.Expect().Return(nil) - - fakeParams, err := json.Marshal(map[string]interface{}{ - "PDF": base.TrimBase64Mime(encoded), - "display-image-tag": false, - }) - - c.Assert(err, qt.IsNil) - - mockIoWriteCloser.WriteMock.Expect(fakeParams).Return(len(fakePDF), nil) - mockRunner.StdinPipeMock.Expect().Return(mockIoWriteCloser, nil) - - mockOutput := convertPDFToMarkdownOutput{ - Body: "# Test\n\nThis is a test document.\n\n", - } - mockOutputBytes, err := json.Marshal(mockOutput) - c.Assert(err, qt.IsNil) - - mockRunner.CombinedOutputMock.Expect().Return(mockOutputBytes, nil) - - output, err := convertPDFToMarkdown(input, mockRunner) - c.Assert(err, qt.IsNil) - - c.Assert(output.Body, qt.Equals, "# Test\n\nThis is a test document.\n\n") - - }) - -} diff --git a/operator/document/v0/main.go b/operator/document/v0/main.go index e17db050..c8d42040 100644 --- a/operator/document/v0/main.go +++ b/operator/document/v0/main.go @@ -4,7 +4,6 @@ package document import ( "context" "fmt" - "os/exec" "sync" _ "embed" @@ -35,6 +34,8 @@ type component struct { type execution struct { base.ComponentExecution + execute func(*structpb.Struct) (*structpb.Struct, error) + getMarkdownTransformer func(fileExtension string, inputStruct convertDocumentToMarkdownInput) (MarkdownTransformer, error) } func Init(bc base.Component) *component { @@ -49,37 +50,31 @@ func Init(bc base.Component) *component { } func (c *component) CreateExecution(sysVars map[string]any, setup *structpb.Struct, task string) (*base.ExecutionWrapper, error) { - return &base.ExecutionWrapper{Execution: &execution{ - ComponentExecution: base.ComponentExecution{Component: c, SystemVariables: sysVars, Task: task}, - }}, nil -} + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: c, SystemVariables: sysVars, Setup: setup, Task: task}, + getMarkdownTransformer: getMarkdownTransformer, + } -func (e *execution) Execute(_ context.Context, inputs []*structpb.Struct) ([]*structpb.Struct, error) { - outputs := []*structpb.Struct{} + switch task { + case taskConvertToMarkdown: + e.execute = e.convertDocumentToMarkdown + default: + return nil, fmt.Errorf(fmt.Sprintf("%s task is not supported.", task)) + } - for _, input := range inputs { - switch e.Task { - case taskConvertToMarkdown: - inputStruct := convertPDFToMarkdownInput{} - err := base.ConvertFromStructpb(input, &inputStruct) - if err != nil { - return nil, err - } + return &base.ExecutionWrapper{Execution: e}, nil +} - cmd := exec.Command(pythonInterpreter, "-c", pythonCode) +func (e *execution) Execute(_ context.Context, inputs []*structpb.Struct) ([]*structpb.Struct, error) { + outputs := make([]*structpb.Struct, len(inputs)) - outputStruct, err := convertPDFToMarkdown(inputStruct, cmd) - if err != nil { - return nil, err - } - output, err := base.ConvertToStructpb(outputStruct) - if err != nil { - return nil, err - } - outputs = append(outputs, output) - default: - return nil, fmt.Errorf("not supported task: %s", e.Task) + for i, input := range inputs { + output, err := e.execute(input) + if err != nil { + return nil, err } + + outputs[i] = output } return outputs, nil } diff --git a/operator/document/v0/markdown_transformer.go b/operator/document/v0/markdown_transformer.go new file mode 100644 index 00000000..d55a81ba --- /dev/null +++ b/operator/document/v0/markdown_transformer.go @@ -0,0 +1,210 @@ +package document + +import ( + "encoding/base64" + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + md "github.com/JohannesKaufmann/html-to-markdown" + "github.com/instill-ai/component/base" +) + +type MarkdownTransformer interface { + Transform() (string, error) +} + +type PDFToMarkdownTransformer struct { + Base64EncodedText string + FileExtension string + DisplayImageTag bool +} + +func (t PDFToMarkdownTransformer) Transform() (string, error) { + return extractPDFTextInMarkdownFormat(t.Base64EncodedText, t.DisplayImageTag) +} + +type DocxDocToMarkdownTransformer struct { + Base64EncodedText string + FileExtension string + DisplayImageTag bool +} + +func (t DocxDocToMarkdownTransformer) Transform() (string, error) { + + tempDoc, err := os.CreateTemp("", "temp_document.*."+t.FileExtension) + if err != nil { + return "", fmt.Errorf("failed to create temporary document: %w", err) + } + inputTempDecodeFileName := tempDoc.Name() + defer os.Remove(inputTempDecodeFileName) + + err = writeDecodeToFile(t.Base64EncodedText, tempDoc) + if err != nil { + return "", fmt.Errorf("failed to decode base64 to file: %w", err) + } + + tempPDFName, err := convertToPDF(inputTempDecodeFileName) + if err != nil { + return "", fmt.Errorf("failed to convert file to PDF: %w", err) + } + defer os.Remove(tempPDFName) + + base64PDF, err := encodeFileToBase64(tempPDFName) + + if err != nil { + return "", fmt.Errorf("failed to encode file to base64: %w", err) + } + + return extractPDFTextInMarkdownFormat(base64PDF, t.DisplayImageTag) +} + +type PptPptxToMarkdownTransformer struct { + Base64EncodedText string + FileExtension string + DisplayImageTag bool +} + +func (t PptPptxToMarkdownTransformer) Transform() (string, error) { + tempPpt, err := os.CreateTemp("", "temp_document.*."+t.FileExtension) + if err != nil { + return "", fmt.Errorf("failed to create temporary document: %w", err) + } + inputTempDecodeFileName := tempPpt.Name() + defer os.Remove(inputTempDecodeFileName) + + err = writeDecodeToFile(t.Base64EncodedText, tempPpt) + if err != nil { + return "", fmt.Errorf("failed to decode base64 to file: %w", err) + } + + tempPDFName, err := convertToPDF(inputTempDecodeFileName) + if err != nil { + return "", fmt.Errorf("failed to convert file to PDF: %w", err) + } + defer os.Remove(tempPDFName) + + base64PDF, err := encodeFileToBase64(tempPDFName) + + if err != nil { + return "", fmt.Errorf("failed to encode file to base64: %w", err) + } + + return extractPDFTextInMarkdownFormat(base64PDF, t.DisplayImageTag) +} + +type HTMLToMarkdownTransformer struct { + Base64EncodedText string + FileExtension string + DisplayImageTag bool +} + +func (t HTMLToMarkdownTransformer) Transform() (string, error) { + + data, err := base64.StdEncoding.DecodeString(base.TrimBase64Mime(t.Base64EncodedText)) + if err != nil { + return "", fmt.Errorf("failed to decode base64 to file: %w", err) + } + + converter := md.NewConverter("", true, nil) + + html := string(data) + markdown, err := converter.ConvertString(html) + if err != nil { + return "", fmt.Errorf("failed to convert HTML to markdown: %w", err) + } + + return markdown, nil +} + +type pythonRunnerOutput struct { + Body string `json:"body"` +} + +func extractPDFTextInMarkdownFormat(base64Text string, displayImageTag bool) (string, error) { + + paramsJSON, err := json.Marshal(map[string]interface{}{ + "PDF": base.TrimBase64Mime(base64Text), + "display-image-tag": displayImageTag, + }) + + if err != nil { + return "", err + } + + cmdRunner := exec.Command(pythonInterpreter, "-c", pythonCode) + stdin, err := cmdRunner.StdinPipe() + + if err != nil { + return "", err + } + errChan := make(chan error, 1) + go func() { + defer stdin.Close() + _, err := stdin.Write(paramsJSON) + if err != nil { + errChan <- err + return + } + errChan <- nil + }() + + outputBytes, err := cmdRunner.CombinedOutput() + if err != nil { + return "", err + } + + writeErr := <-errChan + if writeErr != nil { + return "", writeErr + } + + var output pythonRunnerOutput + err = json.Unmarshal(outputBytes, &output) + if err != nil { + return "", err + } + return output.Body, nil +} + +func writeDecodeToFile(base64Str string, file *os.File) error { + data, err := base64.StdEncoding.DecodeString(base.TrimBase64Mime(base64Str)) + if err != nil { + return err + } + _, err = file.Write(data) + return err +} + +func encodeFileToBase64(inputPath string) (string, error) { + data, err := os.ReadFile(inputPath) + if err != nil { + return "", err + } + return base64.StdEncoding.EncodeToString(data), nil +} + +func convertToPDF(inputFileName string) (fileName string, err error) { + tempDir, err := os.MkdirTemp("", "libreoffice") + if err != nil { + return "", fmt.Errorf("failed to create temporary directory: " + err.Error()) + } + defer os.RemoveAll(tempDir) + + cmd := exec.Command("libreoffice", "--headless", "--convert-to", "pdf", inputFileName) + cmd.Env = append(os.Environ(), "HOME="+tempDir) + + if err := cmd.Run(); err != nil { + return "", fmt.Errorf("failed to execute LibreOffice command: " + err.Error()) + } + + // LibreOffice is not executed in temp directory like inputFileName. + // The generated PDF is not in temp directory. + // So, we need to remove the path and keep only the file name. + noPathFileName := filepath.Base(inputFileName) + generatedPDF := strings.TrimSuffix(noPathFileName, filepath.Ext(inputFileName)) + ".pdf" + return generatedPDF, nil +} diff --git a/operator/document/v0/testdata/test.docx b/operator/document/v0/testdata/test.docx new file mode 100644 index 00000000..953fb02f Binary files /dev/null and b/operator/document/v0/testdata/test.docx differ diff --git a/operator/document/v0/testdata/test.html b/operator/document/v0/testdata/test.html new file mode 100644 index 00000000..5b217181 --- /dev/null +++ b/operator/document/v0/testdata/test.html @@ -0,0 +1,2 @@ + +

This is test file

diff --git a/operator/document/v0/testdata/test.pdf b/operator/document/v0/testdata/test.pdf new file mode 100644 index 00000000..8d2aac6e Binary files /dev/null and b/operator/document/v0/testdata/test.pdf differ diff --git a/operator/document/v0/testdata/test.pptx b/operator/document/v0/testdata/test.pptx new file mode 100644 index 00000000..f19a8c43 Binary files /dev/null and b/operator/document/v0/testdata/test.pptx differ diff --git a/operator/text/v0/README.mdx b/operator/text/v0/README.mdx index 83db949d..ca19f088 100644 --- a/operator/text/v0/README.mdx +++ b/operator/text/v0/README.mdx @@ -68,9 +68,10 @@ Chunk text with different strategies | Output | ID | Type | Description | | :--- | :--- | :--- | :--- | -| Token Count (optional) | `token-count` | integer | Total count of tokens in the input text | +| Token Count | `token-count` | integer | Total count of tokens in the original input text | | Text Chunks | `text-chunks` | array[object] | Text chunks after splitting | | Number of Text Chunks | `chunk-num` | integer | Total number of output text chunks | +| Token Count Chunks | `chunks-token-count` | integer | Total count of tokens in the output text chunks | ### Chunking Strategy diff --git a/operator/text/v0/chunk_text.go b/operator/text/v0/chunk_text.go index 17c94e60..99530805 100644 --- a/operator/text/v0/chunk_text.go +++ b/operator/text/v0/chunk_text.go @@ -33,15 +33,17 @@ type Setting struct { } type ChunkTextOutput struct { - ChunkNum int `json:"chunk-num"` - TextChunks []TextChunk `json:"text-chunks"` - TokenCount int `json:"token-count,omitempty"` + ChunkNum int `json:"chunk-num"` + TextChunks []TextChunk `json:"text-chunks"` + TokenCount int `json:"token-count"` + ChunksTokenCount int `json:"chunks-token-count"` } type TextChunk struct { Text string `json:"text"` StartPosition int `json:"start-position"` EndPosition int `json:"end-position"` + TokenCount int `json:"token-count"` } func (s *Setting) SetDefault() { @@ -110,19 +112,18 @@ func chunkText(input ChunkTextInput) (ChunkTextOutput, error) { ) } - tkm, err := tiktoken.EncodingForModel(setting.ModelName) + chunks, err := split.SplitText(input.Text) if err != nil { return output, err } - token := tkm.Encode(input.Text, setting.AllowedSpecial, setting.DisallowedSpecial) - output.TokenCount = len(token) + output.ChunkNum = len(chunks) - chunks, err := split.SplitText(input.Text) + tkm, err := tiktoken.EncodingForModel(setting.ModelName) if err != nil { return output, err } - output.ChunkNum = len(chunks) + totalTokenCount := 0 startScanPosition := 0 rawRunes := []rune(input.Text) for i, chunk := range chunks { @@ -139,22 +140,35 @@ func chunkText(input ChunkTextInput) (ChunkTextOutput, error) { continue } + token := tkm.Encode(chunk, setting.AllowedSpecial, setting.DisallowedSpecial) + output.TextChunks = append(output.TextChunks, TextChunk{ Text: chunk, StartPosition: startPosition, EndPosition: endPosition, + TokenCount: len(token), }) + totalTokenCount += len(token) startScanPosition = startPosition + 1 } if len(output.TextChunks) == 0 { + token := tkm.Encode(input.Text, setting.AllowedSpecial, setting.DisallowedSpecial) + output.TextChunks = append(output.TextChunks, TextChunk{ Text: input.Text, StartPosition: 0, EndPosition: len(rawRunes) - 1, + TokenCount: len(token), }) output.ChunkNum = 1 + totalTokenCount = len(token) } + + originalTextToken := tkm.Encode(input.Text, setting.AllowedSpecial, setting.DisallowedSpecial) + output.TokenCount = len(originalTextToken) + output.ChunksTokenCount = totalTokenCount + return output, nil } diff --git a/operator/text/v0/chunk_text_test.go b/operator/text/v0/chunk_text_test.go index f52242c4..fb128fe8 100644 --- a/operator/text/v0/chunk_text_test.go +++ b/operator/text/v0/chunk_text_test.go @@ -22,9 +22,9 @@ func TestChunkText(t *testing.T) { Text: "Hello world.", Strategy: Strategy{ Setting: Setting{ - ChunkMethod: "Token", - ChunkSize: 512, - ModelName: "gpt-3.5-turbo", + ChunkMethod: "Token", + ChunkSize: 512, + ModelName: "gpt-3.5-turbo", }, }, }, @@ -34,10 +34,12 @@ func TestChunkText(t *testing.T) { Text: "Hello world.", StartPosition: 0, EndPosition: 11, + TokenCount: 3, }, }, - ChunkNum: 1, - TokenCount: 3, + ChunkNum: 1, + TokenCount: 3, + ChunksTokenCount: 3, }, }, { @@ -46,9 +48,9 @@ func TestChunkText(t *testing.T) { Text: "Hello world.", Strategy: Strategy{ Setting: Setting{ - ChunkMethod: "Markdown", - ModelName: "gpt-3.5-turbo", - ChunkSize: 5, + ChunkMethod: "Markdown", + ModelName: "gpt-3.5-turbo", + ChunkSize: 5, }, }, }, @@ -58,15 +60,18 @@ func TestChunkText(t *testing.T) { Text: "Hello", StartPosition: 0, EndPosition: 4, + TokenCount: 1, }, { Text: "world.", StartPosition: 6, EndPosition: 11, + TokenCount: 2, }, }, - ChunkNum: 2, - TokenCount: 3, + ChunkNum: 2, + TokenCount: 3, + ChunksTokenCount: 3, }, }, { @@ -75,10 +80,10 @@ func TestChunkText(t *testing.T) { Text: "Hello world.", Strategy: Strategy{ Setting: Setting{ - ChunkMethod: "Recursive", - ModelName: "gpt-3.5-turbo", - ChunkSize: 5, - Separators: []string{" ", "."}, + ChunkMethod: "Recursive", + ModelName: "gpt-3.5-turbo", + ChunkSize: 5, + Separators: []string{" ", "."}, }, }, }, @@ -88,15 +93,18 @@ func TestChunkText(t *testing.T) { Text: "Hello", StartPosition: 0, EndPosition: 4, + TokenCount: 1, }, { Text: "world", StartPosition: 6, EndPosition: 10, + TokenCount: 1, }, }, - ChunkNum: 2, - TokenCount: 3, + ChunkNum: 2, + TokenCount: 3, + ChunksTokenCount: 2, }, }, } diff --git a/operator/text/v0/config/tasks.json b/operator/text/v0/config/tasks.json index 148f7999..9a370c8d 100644 --- a/operator/text/v0/config/tasks.json +++ b/operator/text/v0/config/tasks.json @@ -418,10 +418,19 @@ "description": "The ending position of the chunk in the original text", "instillFormat": "integer", "type": "integer" + }, + "token-count": { + "title": "Token Count", + "description": "Count of tokens in a chunk", + "instillFormat": "integer", + "type": "integer" } }, "required": [ - "text" + "text", + "start-position", + "end-position", + "token-count" ], "instillUIMultiline": true, "type": "object" @@ -430,16 +439,25 @@ "type": "array" }, "token-count": { - "description": "Total count of tokens in the input text", + "description": "Total count of tokens in the original input text", "instillUIOrder": 0, "instillFormat": "integer", "title": "Token Count", "type": "integer" + }, + "chunks-token-count": { + "description": "Total count of tokens in the output text chunks", + "instillUIOrder": 3, + "instillFormat": "integer", + "title": "Token Count Chunks", + "type": "integer" } }, "required": [ "text-chunks", - "chunk-num" + "chunk-num", + "token-count", + "chunks-token-count" ], "title": "Output", "type": "object" diff --git a/operator/text/v0/convert.go b/operator/text/v0/convert.go index 28695c1d..27649bf7 100644 --- a/operator/text/v0/convert.go +++ b/operator/text/v0/convert.go @@ -3,7 +3,6 @@ package text import ( "bytes" "fmt" - "strings" "time" "unicode/utf8" @@ -12,6 +11,7 @@ import ( "code.sajari.com/docconv" "github.com/instill-ai/component/base" + "github.com/instill-ai/component/internal/util" ) var ( @@ -58,19 +58,6 @@ type ConvertToTextOutput struct { Error string `json:"error"` } -func getContentTypeFromBase64(base64String string) (string, error) { - // Remove the "data:" prefix and split at the first semicolon - contentType := strings.TrimPrefix(base64String, "data:") - - parts := strings.SplitN(contentType, ";", 2) - if len(parts) != 2 { - return "", fmt.Errorf("invalid format") - } - - // The first part is the content type - return parts[0], nil -} - type converter interface { convert(contentType string, b []byte) (ConvertToTextOutput, error) } @@ -122,7 +109,7 @@ func isSupportedByDocconvConvert(contentType string) bool { func convertToText(input ConvertToTextInput) (ConvertToTextOutput, error) { - contentType, err := getContentTypeFromBase64(input.Doc) + contentType, err := util.GetContentTypeFromBase64(input.Doc) if err != nil { return ConvertToTextOutput{}, err } diff --git a/operator/video/v0/README.mdx b/operator/video/v0/README.mdx new file mode 100644 index 00000000..4da39b65 --- /dev/null +++ b/operator/video/v0/README.mdx @@ -0,0 +1,80 @@ +--- +title: "Video" +lang: "en-US" +draft: false +description: "Learn about how to set up a VDP Video component https://github.com/instill-ai/instill-core" +--- + +The Video component is an operator component that allows users to extract and manipulate video from different sources. +It can carry out the following tasks: + +- [Subsample Video](#subsample-video) +- [Subsample Video Frames](#subsample-video-frames) + + + +## Release Stage + +`Alpha` + + + +## Configuration + +The component configuration is defined and maintained [here](https://github.com/instill-ai/component/blob/main/operator/video/v0/config/definition.json). + + + + + +## Supported Tasks + +### Subsample Video + +Subsample video into a new video + + +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_SUBSAMPLE_VIDEO` | +| Video (required) | `video` | string | Base64 encoded video | +| FPS (required) | `fps` | number | Frames per second | +| Start time | `start-time` | string | Start time in seconds, format is hh:mm:ss | +| Duration | `duration` | string | Duration in seconds, format is hh:mm:ss | + + + +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Video | `video` | string | Base64 encoded sub-sampled video | + + + + + + +### Subsample Video Frames + +Subsample video into frames + + +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_SUBSAMPLE_VIDEO_FRAMES` | +| Video (required) | `video` | string | Base64 encoded video | +| FPS (required) | `fps` | number | Frames per second | +| Start time | `start-time` | string | Start time in seconds, format is hh:mm:ss | +| Duration | `duration` | string | Duration in seconds, format is hh:mm:ss | + + + +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Frames | `frames` | array[string] | Base64 encoded sub-sampled frames | + + + + + + + diff --git a/operator/video/v0/assets/video.svg b/operator/video/v0/assets/video.svg new file mode 100644 index 00000000..41935a26 --- /dev/null +++ b/operator/video/v0/assets/video.svg @@ -0,0 +1,3 @@ + + + diff --git a/operator/video/v0/config/definition.json b/operator/video/v0/config/definition.json new file mode 100644 index 00000000..3840ba17 --- /dev/null +++ b/operator/video/v0/config/definition.json @@ -0,0 +1,18 @@ +{ + "availableTasks": [ + "TASK_SUBSAMPLE_VIDEO", + "TASK_SUBSAMPLE_VIDEO_FRAMES" + ], + "documentationUrl": "https://www.instill.tech/docs/component/operator/video", + "icon": "assets/video.svg", + "id": "video", + "public": true, + "spec": {}, + "title": "Video", + "type": "COMPONENT_TYPE_OPERATOR", + "uid": "f0be2fd3-7266-4eeb-88eb-3bbbcc2a6b32", + "version": "0.1.0", + "sourceUrl": "https://github.com/instill-ai/component/blob/main/operator/video/v0", + "description": "Extract and manipulate video from different sources", + "releaseStage": "RELEASE_STAGE_ALPHA" +} diff --git a/operator/video/v0/config/tasks.json b/operator/video/v0/config/tasks.json new file mode 100644 index 00000000..e1f8b9ce --- /dev/null +++ b/operator/video/v0/config/tasks.json @@ -0,0 +1,182 @@ +{ + "TASK_SUBSAMPLE_VIDEO": { + "instillShortDescription": "Subsample video into a new video", + "input": { + "description": "Factor to be used for sub-sampling the video", + "instillEditOnNodeFields": [ + "video", + "fps" + ], + "instillUIOrder": 0, + "properties": { + "video": { + "description": "Base64 encoded video", + "instillAcceptFormats": [ + "video/*" + ], + "instillUIOrder": 0, + "instillUpstreamTypes": [ + "reference" + ], + "title": "Video", + "type": "string" + }, + "fps": { + "description": "Frames per second", + "instillAcceptFormats": [ + "number", + "integer" + ], + "instillUIOrder": 1, + "instillUpstreamTypes": [ + "reference", + "value" + ], + "title": "FPS", + "type": "number" + }, + "start-time": { + "description": "Start time in seconds, format is hh:mm:ss", + "instillAcceptFormats": [ + "string" + ], + "instillUIOrder": 2, + "instillUpstreamTypes": [ + "reference", + "value" + ], + "title": "Start time", + "type": "string" + }, + "duration": { + "description": "Duration in seconds, format is hh:mm:ss", + "instillAcceptFormats": [ + "string" + ], + "instillUIOrder": 3, + "instillUpstreamTypes": [ + "reference", + "value" + ], + "title": "Duration", + "type": "string" + } + }, + "required": [ + "video", + "fps" + ], + "title": "Input", + "type": "object" + }, + "output": { + "instillUIOrder": 1, + "properties": { + "video": { + "description": "Base64 encoded sub-sampled video", + "instillFormat": "video/*", + "instillUIOrder": 0, + "title": "Video", + "type": "string" + } + }, + "required": [ + "video" + ], + "title": "Output", + "type": "object" + } + }, + "TASK_SUBSAMPLE_VIDEO_FRAMES": { + "instillShortDescription": "Subsample video into frames", + "input": { + "description": "Factor to be used for sub-sampling the video into frame", + "instillEditOnNodeFields": [ + "video", + "fps" + ], + "instillUIOrder": 0, + "properties": { + "video": { + "description": "Base64 encoded video", + "instillAcceptFormats": [ + "video/*" + ], + "instillUIOrder": 0, + "instillUpstreamTypes": [ + "reference" + ], + "title": "Video", + "type": "string" + }, + "fps": { + "description": "Frames per second", + "instillAcceptFormats": [ + "number", + "integer" + ], + "instillUIOrder": 1, + "instillUpstreamTypes": [ + "reference", + "value" + ], + "title": "FPS", + "type": "number" + }, + "start-time": { + "description": "Start time in seconds, format is hh:mm:ss", + "instillAcceptFormats": [ + "string" + ], + "instillUIOrder": 2, + "instillUpstreamTypes": [ + "reference", + "value" + ], + "title": "Start time", + "type": "string" + }, + "duration": { + "description": "Duration in seconds, format is hh:mm:ss", + "instillAcceptFormats": [ + "string" + ], + "instillUIOrder": 3, + "instillUpstreamTypes": [ + "reference", + "value" + ], + "title": "Duration", + "type": "string" + } + }, + "required": [ + "video", + "fps" + ], + "title": "Input", + "type": "object" + }, + "output": { + "instillUIOrder": 1, + "properties": { + "frames": { + "description": "Base64 encoded sub-sampled frames", + "instillFormat": "image/*", + "instillUIOrder": 0, + "items": { + "type": "string", + "title": "Frame" + }, + "title": "Frames", + "type": "array" + } + }, + "required": [ + "frames" + ], + "title": "Output", + "type": "object" + } + } +} diff --git a/operator/video/v0/main.go b/operator/video/v0/main.go new file mode 100644 index 00000000..b9b42c77 --- /dev/null +++ b/operator/video/v0/main.go @@ -0,0 +1,79 @@ +//go:generate compogen readme ./config ./README.mdx +package video + +import ( + "context" + _ "embed" + "fmt" + "sync" + + "github.com/instill-ai/component/base" + "google.golang.org/protobuf/types/known/structpb" +) + +const ( + taskSubsampleVideo string = "TASK_SUBSAMPLE_VIDEO" + taskSubsampleVideoFrames string = "TASK_SUBSAMPLE_VIDEO_FRAMES" +) + +var ( + //go:embed config/definition.json + definitionJSON []byte + //go:embed config/tasks.json + tasksJSON []byte + once sync.Once + comp *component +) + +type component struct { + base.Component +} + +type execution struct { + base.ComponentExecution + + execute func(*structpb.Struct) (*structpb.Struct, error) +} + +func Init(bc base.Component) *component { + once.Do(func() { + comp = &component{Component: bc} + err := comp.LoadDefinition(definitionJSON, nil, tasksJSON, nil) + if err != nil { + panic(err) + } + }) + return comp +} + +func (c *component) CreateExecution(sysVars map[string]any, setup *structpb.Struct, task string) (*base.ExecutionWrapper, error) { + e := &execution{ + ComponentExecution: base.ComponentExecution{Component: c, SystemVariables: sysVars, Task: task}, + } + + switch task { + case taskSubsampleVideo: + e.execute = subsampleVideo + case taskSubsampleVideoFrames: + e.execute = subsampleVideoFrames + default: + return nil, fmt.Errorf(task + " task is not supported.") + } + + return &base.ExecutionWrapper{Execution: e}, nil +} + +func (e *execution) Execute(_ context.Context, inputs []*structpb.Struct) ([]*structpb.Struct, error) { + outputs := make([]*structpb.Struct, len(inputs)) + + for i, input := range inputs { + output, err := e.execute(input) + if err != nil { + return nil, err + } + + outputs[i] = output + } + + return outputs, nil +} diff --git a/operator/video/v0/main_test.go b/operator/video/v0/main_test.go new file mode 100644 index 00000000..152fc8c1 --- /dev/null +++ b/operator/video/v0/main_test.go @@ -0,0 +1,4 @@ +package video + +// TODO chuang8511 Investigate how to run test case with installing ffmpeg in test env +// It will be arranged according to the product schedule diff --git a/operator/video/v0/video_operation.go b/operator/video/v0/video_operation.go new file mode 100644 index 00000000..19aa36e1 --- /dev/null +++ b/operator/video/v0/video_operation.go @@ -0,0 +1,201 @@ +package video + +import ( + "encoding/base64" + "fmt" + "os" + "path/filepath" + "sort" + "strings" + + "github.com/google/uuid" + "github.com/instill-ai/component/base" + ffmpeg "github.com/u2takey/ffmpeg-go" + "google.golang.org/protobuf/types/known/structpb" +) + +type SubsampleVideoInput struct { + Video Video `json:"video"` + Fps int `json:"fps"` + StartTime string `json:"start-time"` + Duration string `json:"duration"` +} + +type SubsampleVideoOutput struct { + Video Video `json:"video"` +} + +type SubsampleVideoFramesInput struct { + Video Video `json:"video"` + Fps int `json:"fps"` + StartTime string `json:"start-time"` + Duration string `json:"duration"` +} + +type SubsampleVideoFramesOutput struct { + Frames []Frame `json:"frames"` +} + +// Base64 encoded video +type Video string + +// Base64 encoded frame +type Frame string + +func subsampleVideo(input *structpb.Struct) (*structpb.Struct, error) { + + inputStruct := SubsampleVideoInput{} + + err := base.ConvertFromStructpb(input, &inputStruct) + if err != nil { + return nil, fmt.Errorf("error converting input to struct: %v", err) + } + + base64Video := string(inputStruct.Video) + + videoBytes, err := base64.StdEncoding.DecodeString(base.TrimBase64Mime(base64Video)) + + if err != nil { + return nil, fmt.Errorf("error in decoding for inner: %s", err) + } + + videoPrefix := strings.Split(base64Video, ",")[0] + + // TODO: chuang8511 map the file extension to the correct format + tempInputFile, err := os.CreateTemp("", "temp.*.mp4") + + if err != nil { + return nil, fmt.Errorf("error in creating temp input file: %s", err) + } + + tempInputFileName := tempInputFile.Name() + defer os.Remove(tempInputFileName) + + if _, err := tempInputFile.Write(videoBytes); err != nil { + return nil, fmt.Errorf("error in writing file: %s", err) + } + + split := ffmpeg.Input(tempInputFileName) + + tempOutputFile, err := os.CreateTemp("", "temp_out.*.mp4") + if err != nil { + return nil, fmt.Errorf("error in creating temp output file: %s", err) + } + tempOutputFileName := tempOutputFile.Name() + defer os.Remove(tempOutputFileName) + + split = split.OverWriteOutput() + err = split.Output(tempOutputFileName, getKwArgs(inputStruct)).Run() + + if err != nil { + return nil, fmt.Errorf("error in running ffmpeg: %s", err) + } + + byOut, _ := os.ReadFile(tempOutputFileName) + base64Subsample := videoPrefix + "," + base64.StdEncoding.EncodeToString(byOut) + + output := SubsampleVideoOutput{ + Video: Video(base64Subsample), + } + + return base.ConvertToStructpb(output) +} + +func getKwArgs(inputStruct SubsampleVideoInput) ffmpeg.KwArgs { + kwArgs := ffmpeg.KwArgs{"r": inputStruct.Fps} + if inputStruct.StartTime != "" { + kwArgs["ss"] = inputStruct.StartTime + } + if inputStruct.Duration != "" { + kwArgs["t"] = inputStruct.Duration + } + return kwArgs +} + +func subsampleVideoFrames(input *structpb.Struct) (*structpb.Struct, error) { + inputStruct := SubsampleVideoFramesInput{} + + err := base.ConvertFromStructpb(input, &inputStruct) + if err != nil { + return nil, fmt.Errorf("error converting input to struct: %v", err) + } + + base64Video := string(inputStruct.Video) + + videoBytes, err := base64.StdEncoding.DecodeString(base.TrimBase64Mime(base64Video)) + + if err != nil { + return nil, fmt.Errorf("error in decoding for inner: %s", err) + } + + tempInputFile, err := os.CreateTemp("", "temp.*.mp4") + if err != nil { + return nil, fmt.Errorf("error in creating temp input file: %s", err) + } + tempInputFileName := tempInputFile.Name() + defer os.Remove(tempInputFileName) + + if _, err := tempInputFile.Write(videoBytes); err != nil { + return nil, fmt.Errorf("error in writing file: %s", err) + } + + random := uuid.New().String() + // TODO: chuang8511 confirm the reasonable numbers for outputPattern. + // In the future, we will support bigger size of video, so we set the frame number to 8 digits. + // Because the sequence is important, we need to use pattern + // with frame number rather than uuid as suffix. + outputPattern := random + "_frame_%08d.jpeg" + + err = ffmpeg.Input(tempInputFileName). + Output(outputPattern, + getFramesKwArgs(inputStruct), + ). + Run() + + if err != nil { + return nil, fmt.Errorf("error in running ffmpeg: %s", err) + } + + files, err := filepath.Glob(random + "_frame_*.jpeg") + if err != nil { + return nil, fmt.Errorf("error listing frames: %s", err) + } + defer removeFiles(files) + + sort.Strings(files) + jpegPrefix := "data:image/jpeg;base64," + var frames []Frame + for _, file := range files { + data, err := os.ReadFile(file) + if err != nil { + return nil, fmt.Errorf("error reading file %s: %v", file, err) + } + + encoded := base64.StdEncoding.EncodeToString(data) + + frames = append(frames, Frame(jpegPrefix+encoded)) + } + + output := SubsampleVideoFramesOutput{ + Frames: frames, + } + + return base.ConvertToStructpb(output) +} + +func getFramesKwArgs(inputStruct SubsampleVideoFramesInput) ffmpeg.KwArgs { + kwArgs := ffmpeg.KwArgs{"vf": "fps=" + fmt.Sprintf("%d", inputStruct.Fps)} + if inputStruct.StartTime != "" { + kwArgs["ss"] = inputStruct.StartTime + } + if inputStruct.Duration != "" { + kwArgs["t"] = inputStruct.Duration + } + return kwArgs +} + +func removeFiles(files []string) { + for _, file := range files { + os.Remove(file) + } +} diff --git a/store/store.go b/store/store.go index fd68ebff..47f5ece0 100644 --- a/store/store.go +++ b/store/store.go @@ -14,12 +14,14 @@ import ( "github.com/instill-ai/component/ai/huggingface/v0" "github.com/instill-ai/component/ai/instill/v0" "github.com/instill-ai/component/ai/mistralai/v0" + "github.com/instill-ai/component/ai/ollama/v0" "github.com/instill-ai/component/ai/openai/v0" "github.com/instill-ai/component/ai/stabilityai/v0" "github.com/instill-ai/component/application/email/v0" "github.com/instill-ai/component/application/github/v0" "github.com/instill-ai/component/application/googlesearch/v0" "github.com/instill-ai/component/application/jira/v0" + "github.com/instill-ai/component/application/hubspot/v0" "github.com/instill-ai/component/application/numbers/v0" "github.com/instill-ai/component/application/restapi/v0" @@ -30,11 +32,14 @@ import ( "github.com/instill-ai/component/data/googlecloudstorage/v0" "github.com/instill-ai/component/data/pinecone/v0" "github.com/instill-ai/component/data/redis/v0" + "github.com/instill-ai/component/data/sql/v0" + "github.com/instill-ai/component/operator/audio/v0" "github.com/instill-ai/component/operator/base64/v0" "github.com/instill-ai/component/operator/document/v0" "github.com/instill-ai/component/operator/image/v0" "github.com/instill-ai/component/operator/json/v0" "github.com/instill-ai/component/operator/text/v0" + "github.com/instill-ai/component/operator/video/v0" pb "github.com/instill-ai/protogen-go/vdp/pipeline/v1beta" ) @@ -83,6 +88,8 @@ func Init( compStore.Import(image.Init(baseComp)) compStore.Import(text.Init(baseComp)) compStore.Import(document.Init(baseComp)) + compStore.Import(audio.Init(baseComp)) + compStore.Import(video.Init(baseComp)) compStore.Import(github.Init(baseComp)) { @@ -129,11 +136,15 @@ func Init( compStore.Import(googlesearch.Init(baseComp)) compStore.Import(pinecone.Init(baseComp)) compStore.Import(redis.Init(baseComp)) + compStore.Import(sql.Init(baseComp)) compStore.Import(restapi.Init(baseComp)) compStore.Import(website.Init(baseComp)) compStore.Import(slack.Init(baseComp)) compStore.Import(email.Init(baseComp)) compStore.Import(jira.Init(baseComp)) + compStore.Import(ollama.Init(baseComp)) + compStore.Import(hubspot.Init(baseComp)) + }) return compStore }