From 739cc045d427fb1c5f37f2be85bf84492567d503 Mon Sep 17 00:00:00 2001 From: "Chang, Hui-Tang" Date: Wed, 4 Sep 2024 22:54:19 +0800 Subject: [PATCH] feat(instill): adopt latest Instill Model protobufs --- ai/instill/v0/README.mdx | 39 +- ai/instill/v0/client.go | 3 +- ai/instill/v0/config/definition.json | 3 +- ai/instill/v0/config/tasks.json | 432 ++---- ai/instill/v0/image_classification.go | 64 - ai/instill/v0/image_to_image.go | 90 -- ai/instill/v0/instance_segmentation.go | 74 - ai/instill/v0/keypoint_detection.go | 75 - ai/instill/v0/llm_utils.go | 127 -- ai/instill/v0/main.go | 37 +- ai/instill/v0/object_detection.go | 73 - ai/instill/v0/ocr.go | 67 - ai/instill/v0/semantic_segmentation.go | 63 - ai/instill/v0/struct.go | 6 + ai/instill/v0/text_generation.go | 87 +- ai/instill/v0/text_generation_chat.go | 150 +- ai/instill/v0/text_to_image.go | 97 +- ai/instill/v0/unspecified.go | 23 - ai/instill/v0/vision.go | 53 + ai/instill/v0/visual_question_answering.go | 69 - ai/instill/v0/xxx.json | 1424 ++++++++++++++++++ ai/openai/v0/main.go | 6 +- ai/openai/v0/text_generation.go | 14 +- application/numbers/v0/main.go | 2 +- data/artifact/v0/main.go | 2 +- data/googlecloudstorage/v0/upload.go | 2 +- data/milvus/v0/config/setup.json | 13 +- data/zilliz/v0/config/setup.json | 8 +- go.mod | 2 +- go.sum | 4 +- operator/audio/v0/main.go | 2 +- operator/document/v0/main.go | 2 +- operator/document/v0/markdown_transformer.go | 4 +- operator/video/v0/main.go | 2 +- operator/web/v0/main.go | 2 +- operator/web/v0/scrape_webpage.go | 2 +- 36 files changed, 1873 insertions(+), 1250 deletions(-) delete mode 100644 ai/instill/v0/image_classification.go delete mode 100644 ai/instill/v0/image_to_image.go delete mode 100644 ai/instill/v0/instance_segmentation.go delete mode 100644 ai/instill/v0/keypoint_detection.go delete mode 100644 ai/instill/v0/llm_utils.go delete mode 100644 ai/instill/v0/object_detection.go delete mode 100644 ai/instill/v0/ocr.go delete mode 100644 ai/instill/v0/semantic_segmentation.go create mode 100644 ai/instill/v0/struct.go delete mode 100644 ai/instill/v0/unspecified.go create mode 100644 ai/instill/v0/vision.go delete mode 100644 ai/instill/v0/visual_question_answering.go create mode 100644 ai/instill/v0/xxx.json diff --git a/ai/instill/v0/README.mdx b/ai/instill/v0/README.mdx index d01d5873..073f4c74 100644 --- a/ai/instill/v0/README.mdx +++ b/ai/instill/v0/README.mdx @@ -18,7 +18,6 @@ It can carry out the following tasks: - [Text Generation Chat](#text-generation-chat) - [Text To Image](#text-to-image) - [Visual Question Answering](#visual-question-answering) -- [Image To Image](#image-to-image) @@ -182,11 +181,8 @@ Generate texts from input text prompts. | Model Name (required) | `model-name` | string | The Instill Model model to be used. | | Prompt (required) | `prompt` | string | The prompt text | | System message | `system-message` | string | The system message helps set the behavior of the assistant. For example, you can modify the personality of the assistant or provide specific instructions about how it should behave throughout the conversation. By default, the model’s behavior is using a generic message as "You are a helpful assistant." | -| Prompt Images | `prompt-images` | array[string] | The prompt images | -| Chat history | `chat-history` | array[object] | Incorporate external chat history, specifically previous messages within the conversation. Please note that System Message will be ignored and will not have any effect when this field is populated. Each message should adhere to the format: : \{"role": "The message role, i.e. 'system', 'user' or 'assistant'", "content": "message content"\}. | | Seed | `seed` | integer | The seed | | Temperature | `temperature` | number | The temperature for sampling | -| Top K | `top-k` | integer | Top k for sampling | | Max new tokens | `max-new-tokens` | integer | The maximum number of tokens for model to generate | @@ -215,7 +211,6 @@ Generate texts from input text prompts and chat history. | Chat history | `chat-history` | array[object] | Incorporate external chat history, specifically previous messages within the conversation. Please note that System Message will be ignored and will not have any effect when this field is populated. Each message should adhere to the format: : \{"role": "The message role, i.e. 'system', 'user' or 'assistant'", "content": "message content"\}. | | Seed | `seed` | integer | The seed | | Temperature | `temperature` | number | The temperature for sampling | -| Top K | `top-k` | integer | Top k for sampling | | Max new tokens | `max-new-tokens` | integer | The maximum number of tokens for model to generate | @@ -239,10 +234,10 @@ Generate images from input text prompts. | Task ID (required) | `task` | string | `TASK_TEXT_TO_IMAGE` | | Model Name (required) | `model-name` | string | The Instill Model model to be used. | | Prompt (required) | `prompt` | string | The prompt text | -| CFG Scale | `cfg-scale` | number | The guidance scale, default is 7.5 | | Samples | `samples` | integer | The number of generated samples, default is 1 | | Seed | `seed` | integer | The seed, default is 0 | -| Steps | `steps` | integer | The steps, default is 5 | +| Aspect ratio | `negative-prompt` | string | Keywords of what you do not wish to see in the output image. | +| Aspect ratio | `aspect-ratio` | string | Controls the aspect ratio of the generated image. Defaults to 1:1. | @@ -266,11 +261,10 @@ Answer questions based on a prompt and an image. | Model Name (required) | `model-name` | string | The Instill Model model to be used. | | Prompt (required) | `prompt` | string | The prompt text | | System message | `system-message` | string | The system message helps set the behavior of the assistant. For example, you can modify the personality of the assistant or provide specific instructions about how it should behave throughout the conversation. By default, the model’s behavior is using a generic message as "You are a helpful assistant." | -| Prompt Images (required) | `prompt-images` | array[string] | The prompt images | +| Prompt Images | `prompt-images` | array[string] | The prompt images | | Chat history | `chat-history` | array[object] | Incorporate external chat history, specifically previous messages within the conversation. Please note that System Message will be ignored and will not have any effect when this field is populated. Each message should adhere to the format: : \{"role": "The message role, i.e. 'system', 'user' or 'assistant'", "content": "message content"\}. | | Seed | `seed` | integer | The seed | | Temperature | `temperature` | number | The temperature for sampling | -| Top K | `top-k` | integer | Top k for sampling | | Max new tokens | `max-new-tokens` | integer | The maximum number of tokens for model to generate | @@ -284,31 +278,4 @@ Answer questions based on a prompt and an image. -### Image To Image - -Generate image from input text prompt and image. - - -| Input | ID | Type | Description | -| :--- | :--- | :--- | :--- | -| Task ID (required) | `task` | string | `TASK_IMAGE_TO_IMAGE` | -| Model Name (required) | `model-name` | string | The Instill Model model to be used. | -| Prompt (required) | `prompt` | string | The prompt text | -| Prompt Image (required) | `image-base64` | string | The prompt image | -| CFG Scale | `cfg-scale` | number | The guidance scale, default is 7.5 | -| Seed | `seed` | integer | The seed | -| Samples | `samples` | integer | The number of generated samples, default is 1 | -| Top K | `top-k` | integer | Top k for sampling | - - - -| Output | ID | Type | Description | -| :--- | :--- | :--- | :--- | -| Images | `images` | array[string] | Images | - - - - - - diff --git a/ai/instill/v0/client.go b/ai/instill/v0/client.go index a2b24ea6..b2c781c5 100644 --- a/ai/instill/v0/client.go +++ b/ai/instill/v0/client.go @@ -10,6 +10,7 @@ import ( "google.golang.org/grpc/credentials" "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/metadata" + "google.golang.org/protobuf/types/known/structpb" "github.com/instill-ai/component/internal/util" modelPB "github.com/instill-ai/protogen-go/model/model/v1alpha" @@ -36,7 +37,7 @@ func initModelPublicServiceClient(serverURL string) (modelPB.ModelPublicServiceC return modelPB.NewModelPublicServiceClient(clientConn), clientConn } -func trigger(gRPCClient modelPB.ModelPublicServiceClient, vars map[string]any, nsID string, modelID string, version string, taskInputs []*modelPB.TaskInput) ([]*modelPB.TaskOutput, error) { +func trigger(gRPCClient modelPB.ModelPublicServiceClient, vars map[string]any, nsID string, modelID string, version string, taskInputs []*structpb.Struct) ([]*structpb.Struct, error) { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) defer cancel() diff --git a/ai/instill/v0/config/definition.json b/ai/instill/v0/config/definition.json index 6afffc85..cb1fbf11 100644 --- a/ai/instill/v0/config/definition.json +++ b/ai/instill/v0/config/definition.json @@ -9,8 +9,7 @@ "TASK_TEXT_GENERATION", "TASK_TEXT_GENERATION_CHAT", "TASK_TEXT_TO_IMAGE", - "TASK_VISUAL_QUESTION_ANSWERING", - "TASK_IMAGE_TO_IMAGE" + "TASK_VISUAL_QUESTION_ANSWERING" ], "custom": false, "documentationUrl": "https://www.instill.tech/docs/component/ai/instill", diff --git a/ai/instill/v0/config/tasks.json b/ai/instill/v0/config/tasks.json index 9c19c25e..baa57dbf 100644 --- a/ai/instill/v0/config/tasks.json +++ b/ai/instill/v0/config/tasks.json @@ -94,148 +94,6 @@ "type": "object" } }, - "TASK_IMAGE_TO_IMAGE": { - "instillShortDescription": "Generate image from input text prompt and image.", - "input": { - "description": "Input", - "instillEditOnNodeFields": [ - "prompt", - "image-base64", - "model-name" - ], - "instillUIOrder": 0, - "properties": { - "cfg-scale": { - "description": "The guidance scale, default is 7.5", - "instillAcceptFormats": [ - "number", - "integer" - ], - "instillUIOrder": 4, - "instillUpstreamTypes": [ - "value", - "reference" - ], - "title": "CFG Scale", - "type": "number" - }, - "image-base64": { - "description": "The prompt image", - "instillAcceptFormats": [ - "image/*" - ], - "instillUIOrder": 3, - "instillUpstreamTypes": [ - "reference" - ], - "title": "Prompt Image", - "type": "string" - }, - "model-name": { - "description": "The Instill Model model to be used.", - "instillAcceptFormats": [ - "string" - ], - "instillUIOrder": 0, - "instillUpstreamTypes": [ - "value", - "reference", - "template" - ], - "title": "Model Name", - "type": "string" - }, - "prompt": { - "description": "The prompt text", - "instillAcceptFormats": [ - "string" - ], - "instillUIMultiline": true, - "instillUIOrder": 2, - "instillUpstreamTypes": [ - "value", - "reference", - "template" - ], - "title": "Prompt", - "type": "string" - }, - "samples": { - "description": "The number of generated samples, default is 1", - "instillAcceptFormats": [ - "integer" - ], - "instillUIOrder": 5, - "instillUpstreamTypes": [ - "value", - "reference" - ], - "title": "Samples", - "type": "integer" - }, - "seed": { - "description": "The seed", - "instillAcceptFormats": [ - "integer" - ], - "instillUIOrder": 4, - "instillUpstreamTypes": [ - "value", - "reference" - ], - "title": "Seed", - "type": "integer" - }, - "top-k": { - "default": 10, - "description": "Top k for sampling", - "instillAcceptFormats": [ - "integer" - ], - "instillUIOrder": 5, - "instillUpstreamTypes": [ - "value", - "reference" - ], - "title": "Top K", - "type": "integer" - } - }, - "required": [ - "prompt", - "image-base64", - "model-name" - ], - "title": "Input", - "type": "object" - }, - "output": { - "description": "Output", - "instillEditOnNodeFields": [ - "images" - ], - "instillUIOrder": 0, - "properties": { - "images": { - "description": "Images", - "instillUIOrder": 0, - "instillFormat": "array:image/jpeg", - "items": { - "instillFormat": "image/jpeg", - "title": "Image", - "type": "string" - }, - "title": "Images", - "type": "array" - } - }, - "required": [ - "images" - ], - "title": "Output", - "type": "object" - } - }, "TASK_INSTANCE_SEGMENTATION": { "instillShortDescription": "Detect, localize and delineate multiple objects in images.", "input": { @@ -302,22 +160,6 @@ ], "instillUIOrder": 0, "properties": { - "chat-history": { - "description": "Incorporate external chat history, specifically previous messages within the conversation. Please note that System Message will be ignored and will not have any effect when this field is populated. Each message should adhere to the format: : {\"role\": \"The message role, i.e. 'system', 'user' or 'assistant'\", \"content\": \"message content\"}.", - "instillAcceptFormats": [ - "structured/chat-messages" - ], - "instillShortDescription": "Incorporate external chat history, specifically previous messages within the conversation. Please note that System Message will be ignored and will not have any effect when this field is populated. Each message should adhere to the format: : {\"role\": \"The message role, i.e. 'system', 'user' or 'assistant'\", \"content\": \"message content\"}.", - "instillUIOrder": 4, - "instillUpstreamTypes": [ - "reference" - ], - "items": { - "$ref": "#/$defs/chat-message" - }, - "title": "Chat history", - "type": "array" - }, "max-new-tokens": { "default": 50, "description": "The maximum number of tokens for model to generate", @@ -361,21 +203,6 @@ "title": "Prompt", "type": "string" }, - "prompt-images": { - "description": "The prompt images", - "instillAcceptFormats": [ - "array:image/*" - ], - "instillUIOrder": 3, - "instillUpstreamTypes": [ - "reference" - ], - "items": { - "type": "string" - }, - "title": "Prompt Images", - "type": "array" - }, "seed": { "description": "The seed", "instillAcceptFormats": [ @@ -419,20 +246,6 @@ ], "title": "Temperature", "type": "number" - }, - "top-k": { - "default": 10, - "description": "Top k for sampling", - "instillAcceptFormats": [ - "integer" - ], - "instillUIOrder": 5, - "instillUpstreamTypes": [ - "value", - "reference" - ], - "title": "Top K", - "type": "integer" } }, "required": [ @@ -467,10 +280,6 @@ }, "TASK_TEXT_GENERATION_CHAT": { "instillShortDescription": "Generate texts from input text prompts and chat history.", - "$ref": "#/TASK_TEXT_GENERATION" - }, - "TASK_TEXT_TO_IMAGE": { - "instillShortDescription": "Generate images from input text prompts.", "input": { "description": "Input", "instillEditOnNodeFields": [ @@ -479,19 +288,35 @@ ], "instillUIOrder": 0, "properties": { - "cfg-scale": { - "description": "The guidance scale, default is 7.5", + "chat-history": { + "description": "Incorporate external chat history, specifically previous messages within the conversation. Please note that System Message will be ignored and will not have any effect when this field is populated. Each message should adhere to the format: : {\"role\": \"The message role, i.e. 'system', 'user' or 'assistant'\", \"content\": \"message content\"}.", "instillAcceptFormats": [ - "number", - "integer" + "structured/chat-messages" ], + "instillShortDescription": "Incorporate external chat history, specifically previous messages within the conversation. Please note that System Message will be ignored and will not have any effect when this field is populated. Each message should adhere to the format: : {\"role\": \"The message role, i.e. 'system', 'user' or 'assistant'\", \"content\": \"message content\"}.", "instillUIOrder": 4, + "instillUpstreamTypes": [ + "reference" + ], + "items": { + "$ref": "#/$defs/chat-message" + }, + "title": "Chat history", + "type": "array" + }, + "max-new-tokens": { + "default": 50, + "description": "The maximum number of tokens for model to generate", + "instillAcceptFormats": [ + "integer" + ], + "instillUIOrder": 6, "instillUpstreamTypes": [ "value", "reference" ], - "title": "CFG Scale", - "type": "number" + "title": "Max new tokens", + "type": "integer" }, "model-name": { "description": "The Instill Model model to be used.", @@ -522,25 +347,27 @@ "title": "Prompt", "type": "string" }, - "samples": { - "description": "The number of generated samples, default is 1", + "prompt-images": { + "description": "The prompt images", "instillAcceptFormats": [ - "integer" + "array:image/*" ], - "instillUIOrder": 5, + "instillUIOrder": 3, "instillUpstreamTypes": [ - "value", "reference" ], - "title": "Samples", - "type": "integer" + "items": { + "type": "string" + }, + "title": "Prompt Images", + "type": "array" }, "seed": { - "description": "The seed, default is 0", + "description": "The seed", "instillAcceptFormats": [ "integer" ], - "instillUIOrder": 6, + "instillUIOrder": 4, "instillUpstreamTypes": [ "value", "reference" @@ -548,18 +375,36 @@ "title": "Seed", "type": "integer" }, - "steps": { - "description": "The steps, default is 5", + "system-message": { + "default": "You are a helpful assistant.", + "description": "The system message helps set the behavior of the assistant. For example, you can modify the personality of the assistant or provide specific instructions about how it should behave throughout the conversation. By default, the model\u2019s behavior is using a generic message as \"You are a helpful assistant.\"", "instillAcceptFormats": [ - "integer" + "string" + ], + "instillShortDescription": "The system message helps set the behavior of the assistant", + "instillUIMultiline": true, + "instillUIOrder": 2, + "instillUpstreamTypes": [ + "value", + "reference", + "template" + ], + "title": "System message", + "type": "string" + }, + "temperature": { + "default": 0.7, + "description": "The temperature for sampling", + "instillAcceptFormats": [ + "number" ], - "instillUIOrder": 7, + "instillUIOrder": 5, "instillUpstreamTypes": [ "value", "reference" ], - "title": "Steps", - "type": "integer" + "title": "Temperature", + "type": "number" } }, "required": [ @@ -572,71 +417,36 @@ "output": { "description": "Output", "instillEditOnNodeFields": [ - "images" + "text" ], "instillUIOrder": 0, "properties": { - "images": { - "description": "Images", + "text": { + "description": "Text", + "instillFormat": "string", + "instillUIMultiline": true, "instillUIOrder": 0, - "instillFormat": "array:image/jpeg", - "items": { - "instillFormat": "image/jpeg", - "title": "Image", - "type": "string" - }, - "title": "Images", - "type": "array" + "title": "Text", + "type": "string" } }, "required": [ - "images" + "text" ], "title": "Output", "type": "object" } }, - "TASK_VISUAL_QUESTION_ANSWERING": { - "instillShortDescription": "Answer questions based on a prompt and an image.", + "TASK_TEXT_TO_IMAGE": { + "instillShortDescription": "Generate images from input text prompts.", "input": { "description": "Input", "instillEditOnNodeFields": [ "prompt", - "prompt-images", "model-name" ], "instillUIOrder": 0, "properties": { - "chat-history": { - "description": "Incorporate external chat history, specifically previous messages within the conversation. Please note that System Message will be ignored and will not have any effect when this field is populated. Each message should adhere to the format: : {\"role\": \"The message role, i.e. 'system', 'user' or 'assistant'\", \"content\": \"message content\"}.", - "instillAcceptFormats": [ - "structured/chat-messages" - ], - "instillShortDescription": "Incorporate external chat history, specifically previous messages within the conversation. Please note that System Message will be ignored and will not have any effect when this field is populated. Each message should adhere to the format: : {\"role\": \"The message role, i.e. 'system', 'user' or 'assistant'\", \"content\": \"message content\"}.", - "instillUIOrder": 4, - "instillUpstreamTypes": [ - "reference" - ], - "items": { - "$ref": "#/$defs/chat-message" - }, - "title": "Chat history", - "type": "array" - }, - "max-new-tokens": { - "default": 50, - "description": "The maximum number of tokens for model to generate", - "instillAcceptFormats": [ - "integer" - ], - "instillUIOrder": 6, - "instillUpstreamTypes": [ - "value", - "reference" - ], - "title": "Max new tokens", - "type": "integer" - }, "model-name": { "description": "The Instill Model model to be used.", "instillAcceptFormats": [ @@ -666,27 +476,25 @@ "title": "Prompt", "type": "string" }, - "prompt-images": { - "description": "The prompt images", + "samples": { + "description": "The number of generated samples, default is 1", "instillAcceptFormats": [ - "array:image/*" + "integer" ], - "instillUIOrder": 3, + "instillUIOrder": 5, "instillUpstreamTypes": [ + "value", "reference" ], - "items": { - "type": "string" - }, - "title": "Prompt Images", - "type": "array" + "title": "Samples", + "type": "integer" }, "seed": { - "description": "The seed", + "description": "The seed, default is 0", "instillAcceptFormats": [ "integer" ], - "instillUIOrder": 4, + "instillUIOrder": 6, "instillUpstreamTypes": [ "value", "reference" @@ -694,55 +502,41 @@ "title": "Seed", "type": "integer" }, - "system-message": { - "default": "You are a helpful assistant.", - "description": "The system message helps set the behavior of the assistant. For example, you can modify the personality of the assistant or provide specific instructions about how it should behave throughout the conversation. By default, the model\u2019s behavior is using a generic message as \"You are a helpful assistant.\"", + "negative-prompt": { + "title": "Aspect ratio", + "type": "string", + "description": "Keywords of what you do not wish to see in the output image.", + "instillShortDescription": "Keywords of what you do not wish to see in the output image.", "instillAcceptFormats": [ "string" ], - "instillShortDescription": "The system message helps set the behavior of the assistant", - "instillUIMultiline": true, - "instillUIOrder": 2, - "instillUpstreamTypes": [ - "value", - "reference", - "template" - ], - "title": "System message", - "type": "string" - }, - "temperature": { - "default": 0.7, - "description": "The temperature for sampling", - "instillAcceptFormats": [ - "number" - ], - "instillUIOrder": 5, - "instillUpstreamTypes": [ - "value", - "reference" - ], - "title": "Temperature", - "type": "number" + "instillUIOrder": 7 }, - "top-k": { - "default": 10, - "description": "Top k for sampling", + "aspect-ratio": { + "title": "Aspect ratio", + "type": "string", + "description": "Controls the aspect ratio of the generated image. Defaults to 1:1.", + "instillShortDescription": "Controls the aspect ratio of the generated image. Defaults to 1:1.", "instillAcceptFormats": [ - "integer" - ], - "instillUIOrder": 5, - "instillUpstreamTypes": [ - "value", - "reference" + "string" ], - "title": "Top K", - "type": "integer" + "instillUIOrder": 8, + "default": "1:1", + "enum": [ + "16:9", + "1:1", + "21:9", + "2:3", + "3:2", + "4:5", + "5:4", + "9:16", + "9:21" + ] } }, "required": [ "prompt", - "prompt-images", "model-name" ], "title": "Input", @@ -751,24 +545,32 @@ "output": { "description": "Output", "instillEditOnNodeFields": [ - "text" + "images" ], "instillUIOrder": 0, "properties": { - "text": { - "description": "Text", - "instillFormat": "string", - "instillUIMultiline": true, + "images": { + "description": "Images", "instillUIOrder": 0, - "title": "Text", - "type": "string" + "instillFormat": "array:image/jpeg", + "items": { + "instillFormat": "image/jpeg", + "title": "Image", + "type": "string" + }, + "title": "Images", + "type": "array" } }, "required": [ - "text" + "images" ], "title": "Output", "type": "object" } + }, + "TASK_VISUAL_QUESTION_ANSWERING": { + "instillShortDescription": "Answer questions based on a prompt and an image.", + "$ref": "#/TASK_TEXT_GENERATION_CHAT" } } diff --git a/ai/instill/v0/image_classification.go b/ai/instill/v0/image_classification.go deleted file mode 100644 index baf80901..00000000 --- a/ai/instill/v0/image_classification.go +++ /dev/null @@ -1,64 +0,0 @@ -package instill - -import ( - "fmt" - - "google.golang.org/protobuf/encoding/protojson" - "google.golang.org/protobuf/types/known/structpb" - - "github.com/instill-ai/component/base" - modelPB "github.com/instill-ai/protogen-go/model/model/v1alpha" -) - -func (e *execution) executeImageClassification(grpcClient modelPB.ModelPublicServiceClient, nsID string, modelID string, version string, inputs []*structpb.Struct) ([]*structpb.Struct, error) { - if len(inputs) <= 0 { - return nil, fmt.Errorf("invalid input: %v for model: %s/%s/%s", inputs, nsID, modelID, version) - } - - if grpcClient == nil { - return nil, fmt.Errorf("uninitialized client") - } - - taskInputs := []*modelPB.TaskInput{} - for _, input := range inputs { - - classificationInput := &modelPB.ClassificationInput{} - classificationInput.Type = &modelPB.ClassificationInput_ImageBase64{ - ImageBase64: base.TrimBase64Mime(input.Fields["image-base64"].GetStringValue()), - } - - taskInput := &modelPB.TaskInput_Classification{ - Classification: classificationInput, - } - taskInputs = append(taskInputs, &modelPB.TaskInput{Input: taskInput}) - } - - taskOutputs, err := trigger(grpcClient, e.SystemVariables, nsID, modelID, version, taskInputs) - if err != nil { - return nil, err - } - if len(taskOutputs) <= 0 { - return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", taskOutputs, nsID, modelID, version) - } - outputs := []*structpb.Struct{} - for idx := range inputs { - imgClassificationOp := taskOutputs[idx].GetClassification() - if imgClassificationOp == nil { - return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", imgClassificationOp, nsID, modelID, version) - } - outputJSON, err := protojson.MarshalOptions{ - UseProtoNames: true, - EmitUnpopulated: true, - }.Marshal(imgClassificationOp) - if err != nil { - return nil, err - } - output := &structpb.Struct{} - err = protojson.Unmarshal(outputJSON, output) - if err != nil { - return nil, err - } - outputs = append(outputs, output) - } - return outputs, nil -} diff --git a/ai/instill/v0/image_to_image.go b/ai/instill/v0/image_to_image.go deleted file mode 100644 index 84e11a98..00000000 --- a/ai/instill/v0/image_to_image.go +++ /dev/null @@ -1,90 +0,0 @@ -package instill - -import ( - "fmt" - - "google.golang.org/protobuf/encoding/protojson" - "google.golang.org/protobuf/types/known/structpb" - - "github.com/instill-ai/component/base" - modelPB "github.com/instill-ai/protogen-go/model/model/v1alpha" -) - -func (e *execution) executeImageToImage(grpcClient modelPB.ModelPublicServiceClient, nsID string, modelID string, version string, inputs []*structpb.Struct) ([]*structpb.Struct, error) { - if len(inputs) <= 0 { - return nil, fmt.Errorf("invalid input: %v for model: %s/%s/%s", inputs, nsID, modelID, version) - } - - if grpcClient == nil { - return nil, fmt.Errorf("uninitialized client") - } - - outputs := []*structpb.Struct{} - - for _, input := range inputs { - - prompt := input.GetFields()["prompt"].GetStringValue() - imageToImageInput := &modelPB.ImageToImageInput{ - Prompt: &prompt, - } - if _, ok := input.GetFields()["steps"]; ok { - v := int32(input.GetFields()["steps"].GetNumberValue()) - imageToImageInput.Steps = &v - } - if _, ok := input.GetFields()["image-base64"]; ok { - imageToImageInput.Type = &modelPB.ImageToImageInput_PromptImageBase64{ - PromptImageBase64: base.TrimBase64Mime(input.GetFields()["image-base64"].GetStringValue()), - } - } - if _, ok := input.GetFields()["temperature"]; ok { - v := int32(input.GetFields()["temperature"].GetNumberValue()) - imageToImageInput.Seed = &v - } - if _, ok := input.GetFields()["cfg-scale"]; ok { - v := float32(input.GetFields()["cfg-scale"].GetNumberValue()) - imageToImageInput.CfgScale = &v - } - - if _, ok := input.GetFields()["seed"]; ok { - v := int32(input.GetFields()["seed"].GetNumberValue()) - imageToImageInput.Seed = &v - } - - taskInput := &modelPB.TaskInput_ImageToImage{ - ImageToImage: imageToImageInput, - } - - // only support batch 1 - taskOutputs, err := trigger(grpcClient, e.SystemVariables, nsID, modelID, version, []*modelPB.TaskInput{{Input: taskInput}}) - if err != nil { - return nil, err - } - if len(taskOutputs) <= 0 { - return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", taskOutputs, nsID, modelID, version) - } - - imageToImageOutput := taskOutputs[0].GetImageToImage() - if imageToImageOutput == nil { - return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", imageToImageOutput, nsID, modelID, version) - } - for imageIdx := range imageToImageOutput.Images { - imageToImageOutput.Images[imageIdx] = fmt.Sprintf("data:image/jpeg;base64,%s", imageToImageOutput.Images[imageIdx]) - } - - outputJSON, err := protojson.MarshalOptions{ - UseProtoNames: true, - EmitUnpopulated: true, - }.Marshal(imageToImageOutput) - if err != nil { - return nil, err - } - output := &structpb.Struct{} - err = protojson.Unmarshal(outputJSON, output) - if err != nil { - return nil, err - } - outputs = append(outputs, output) - - } - return outputs, nil -} diff --git a/ai/instill/v0/instance_segmentation.go b/ai/instill/v0/instance_segmentation.go deleted file mode 100644 index fc850423..00000000 --- a/ai/instill/v0/instance_segmentation.go +++ /dev/null @@ -1,74 +0,0 @@ -package instill - -import ( - "fmt" - - "google.golang.org/protobuf/types/known/structpb" - - "github.com/instill-ai/component/base" - modelPB "github.com/instill-ai/protogen-go/model/model/v1alpha" -) - -func (e *execution) executeInstanceSegmentation(grpcClient modelPB.ModelPublicServiceClient, nsID string, modelID string, version string, inputs []*structpb.Struct) ([]*structpb.Struct, error) { - if len(inputs) <= 0 { - return nil, fmt.Errorf("invalid input: %v for model: %s/%s/%s", inputs, nsID, modelID, version) - } - - if grpcClient == nil { - return nil, fmt.Errorf("uninitialized client") - } - - taskInputs := []*modelPB.TaskInput{} - for _, input := range inputs { - segmentationInput := &modelPB.InstanceSegmentationInput{} - segmentationInput.Type = &modelPB.InstanceSegmentationInput_ImageBase64{ - ImageBase64: base.TrimBase64Mime(input.Fields["image-base64"].GetStringValue()), - } - - taskInput := &modelPB.TaskInput_InstanceSegmentation{ - InstanceSegmentation: segmentationInput, - } - taskInputs = append(taskInputs, &modelPB.TaskInput{Input: taskInput}) - } - taskOutputs, err := trigger(grpcClient, e.SystemVariables, nsID, modelID, version, taskInputs) - if err != nil { - return nil, err - } - if len(taskOutputs) <= 0 { - return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", taskOutputs, nsID, modelID, version) - } - - outputs := []*structpb.Struct{} - for idx := range inputs { - instanceSegmentationOp := taskOutputs[idx].GetInstanceSegmentation() - if instanceSegmentationOp == nil { - return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", instanceSegmentationOp, nsID, modelID, version) - } - - objects := make([]any, len(instanceSegmentationOp.Objects)) - - for i := range instanceSegmentationOp.Objects { - objects[i] = map[string]any{ - "category": instanceSegmentationOp.Objects[i].Category, - "rle": instanceSegmentationOp.Objects[i].Rle, - "score": instanceSegmentationOp.Objects[i].Score, - "bounding-box": map[string]any{ - "top": instanceSegmentationOp.Objects[i].BoundingBox.Top, - "left": instanceSegmentationOp.Objects[i].BoundingBox.Left, - "width": instanceSegmentationOp.Objects[i].BoundingBox.Width, - "height": instanceSegmentationOp.Objects[i].BoundingBox.Height, - }, - } - } - output, err := structpb.NewStruct(map[string]any{ - "objects": objects, - }) - if err != nil { - return nil, err - } - outputs = append(outputs, output) - - } - - return outputs, nil -} diff --git a/ai/instill/v0/keypoint_detection.go b/ai/instill/v0/keypoint_detection.go deleted file mode 100644 index 80408c41..00000000 --- a/ai/instill/v0/keypoint_detection.go +++ /dev/null @@ -1,75 +0,0 @@ -package instill - -import ( - "fmt" - - "google.golang.org/protobuf/types/known/structpb" - - "github.com/instill-ai/component/base" - modelPB "github.com/instill-ai/protogen-go/model/model/v1alpha" -) - -func (e *execution) executeKeyPointDetection(grpcClient modelPB.ModelPublicServiceClient, nsID string, modelID string, version string, inputs []*structpb.Struct) ([]*structpb.Struct, error) { - if len(inputs) <= 0 { - return nil, fmt.Errorf("invalid input: %v for model: %s/%s/%s", inputs, nsID, modelID, version) - } - taskInputs := []*modelPB.TaskInput{} - for _, input := range inputs { - keypointInput := &modelPB.KeypointInput{} - keypointInput.Type = &modelPB.KeypointInput_ImageBase64{ - ImageBase64: base.TrimBase64Mime(input.Fields["image-base64"].GetStringValue()), - } - - taskInput := &modelPB.TaskInput_Keypoint{ - Keypoint: keypointInput, - } - taskInputs = append(taskInputs, &modelPB.TaskInput{Input: taskInput}) - } - - taskOutputs, err := trigger(grpcClient, e.SystemVariables, nsID, modelID, version, taskInputs) - if err != nil { - return nil, err - } - if len(taskOutputs) <= 0 { - return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", taskOutputs, nsID, modelID, version) - } - outputs := []*structpb.Struct{} - for idx := range inputs { - keyPointOutput := taskOutputs[idx].GetKeypoint() - if keyPointOutput == nil { - return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", keyPointOutput, nsID, modelID, version) - } - objects := make([]any, len(keyPointOutput.Objects)) - - for i := range keyPointOutput.Objects { - keypoints := make([]any, len(keyPointOutput.Objects[i].Keypoints)) - for j := range keyPointOutput.Objects[i].Keypoints { - keypoints[j] = map[string]any{ - "x": keyPointOutput.Objects[i].Keypoints[j].X, - "y": keyPointOutput.Objects[i].Keypoints[j].Y, - "v": keyPointOutput.Objects[i].Keypoints[j].V, - } - } - objects[i] = map[string]any{ - "score": keyPointOutput.Objects[i].Score, - "bounding-box": map[string]any{ - "top": keyPointOutput.Objects[i].BoundingBox.Top, - "left": keyPointOutput.Objects[i].BoundingBox.Left, - "width": keyPointOutput.Objects[i].BoundingBox.Width, - "height": keyPointOutput.Objects[i].BoundingBox.Height, - }, - "keypoints": keypoints, - } - } - output, err := structpb.NewStruct(map[string]any{ - "objects": objects, - }) - if err != nil { - return nil, err - } - outputs = append(outputs, output) - - } - - return outputs, nil -} diff --git a/ai/instill/v0/llm_utils.go b/ai/instill/v0/llm_utils.go deleted file mode 100644 index dce994c7..00000000 --- a/ai/instill/v0/llm_utils.go +++ /dev/null @@ -1,127 +0,0 @@ -package instill - -import ( - "google.golang.org/protobuf/types/known/structpb" - - "github.com/instill-ai/component/base" - modelPB "github.com/instill-ai/protogen-go/model/model/v1alpha" -) - -type LLMInput struct { - - // The prompt text - Prompt string - // The prompt images - PromptImages []*modelPB.PromptImage - // The chat history - ChatHistory []*modelPB.Message - // The system message - SystemMessage *string - // The maximum number of tokens for model to generate - MaxNewTokens *int32 - // The temperature for sampling - Temperature *float32 - // Top k for sampling - TopK *int32 - // The seed - Seed *int32 - // The extra parameters - ExtraParams *structpb.Struct -} - -func (e *execution) convertLLMInput(input *structpb.Struct) *LLMInput { - llmInput := &LLMInput{ - Prompt: input.GetFields()["prompt"].GetStringValue(), - } - - if _, ok := input.GetFields()["system-message"]; ok { - v := input.GetFields()["system-message"].GetStringValue() - llmInput.SystemMessage = &v - } - - if _, ok := input.GetFields()["prompt-images"]; ok { - promptImages := []*modelPB.PromptImage{} - for _, item := range input.GetFields()["prompt-images"].GetListValue().GetValues() { - image := &modelPB.PromptImage{} - image.Type = &modelPB.PromptImage_PromptImageBase64{ - PromptImageBase64: base.TrimBase64Mime(item.GetStringValue()), - } - promptImages = append(promptImages, image) - } - llmInput.PromptImages = promptImages - } - - if _, ok := input.GetFields()["chat-history"]; ok { - history := []*modelPB.Message{} - for _, item := range input.GetFields()["chat-history"].GetListValue().GetValues() { - contents := []*modelPB.MessageContent{} - for _, contentItem := range item.GetStructValue().Fields["content"].GetListValue().GetValues() { - t := contentItem.GetStructValue().Fields["type"].GetStringValue() - content := &modelPB.MessageContent{ - Type: t, - } - if t == "text" { - content.Content = &modelPB.MessageContent_Text{ - Text: contentItem.GetStructValue().Fields["text"].GetStringValue(), - } - } else { - image := &modelPB.PromptImage{} - image.Type = &modelPB.PromptImage_PromptImageBase64{ - PromptImageBase64: contentItem.GetStructValue().Fields["image-url"].GetStructValue().Fields["url"].GetStringValue(), - } - content.Content = &modelPB.MessageContent_ImageUrl{ - ImageUrl: &modelPB.ImageContent{ - ImageUrl: image, - }, - } - } - contents = append(contents, content) - } - // Note: Instill Model require the order of chat_history be [user, assistant, user, assistant...] - if len(history) == 0 && item.GetStructValue().Fields["role"].GetStringValue() != "user" { - continue - } - if len(history) > 0 && history[len(history)-1].Role == item.GetStructValue().Fields["role"].GetStringValue() { - for _, content := range contents { - if content.Type == "text" { - for cIdx := range history[len(history)-1].Content { - if history[len(history)-1].Content[cIdx].Type == "text" { - history[len(history)-1].Content[cIdx].Content = &modelPB.MessageContent_Text{ - Text: history[len(history)-1].Content[cIdx].GetText() + "\n" + content.GetText(), - } - } - } - } else { - history[len(history)-1].Content = append(history[len(history)-1].Content, content) - } - } - - } else { - history = append(history, &modelPB.Message{ - Role: item.GetStructValue().Fields["role"].GetStringValue(), - Content: contents, - }) - } - } - llmInput.ChatHistory = history - } - - if _, ok := input.GetFields()["max-new-tokens"]; ok { - v := int32(input.GetFields()["max-new-tokens"].GetNumberValue()) - llmInput.MaxNewTokens = &v - } - if _, ok := input.GetFields()["temperature"]; ok { - v := float32(input.GetFields()["temperature"].GetNumberValue()) - llmInput.Temperature = &v - } - if _, ok := input.GetFields()["top-k"]; ok { - v := int32(input.GetFields()["top-k"].GetNumberValue()) - llmInput.TopK = &v - } - if _, ok := input.GetFields()["seed"]; ok { - v := int32(input.GetFields()["seed"].GetNumberValue()) - llmInput.Seed = &v - } - return llmInput - -} diff --git a/ai/instill/v0/main.go b/ai/instill/v0/main.go index 4002653c..0b3b89ba 100644 --- a/ai/instill/v0/main.go +++ b/ai/instill/v0/main.go @@ -16,7 +16,6 @@ import ( "github.com/instill-ai/component/base" "github.com/instill-ai/component/internal/util" - commonPB "github.com/instill-ai/protogen-go/common/task/v1alpha" modelPB "github.com/instill-ai/protogen-go/model/model/v1alpha" pb "github.com/instill-ai/protogen-go/vdp/pipeline/v1beta" ) @@ -102,30 +101,24 @@ func (e *execution) Execute(ctx context.Context, in base.InputReader, out base.O version := modelNameSplits[2] var result []*structpb.Struct switch e.Task { - case commonPB.Task_TASK_UNSPECIFIED.String(): - result, err = e.executeUnspecified(gRPCClient, nsID, modelID, version, inputs) - case commonPB.Task_TASK_CLASSIFICATION.String(): - result, err = e.executeImageClassification(gRPCClient, nsID, modelID, version, inputs) - case commonPB.Task_TASK_DETECTION.String(): - result, err = e.executeObjectDetection(gRPCClient, nsID, modelID, version, inputs) - case commonPB.Task_TASK_KEYPOINT.String(): - result, err = e.executeKeyPointDetection(gRPCClient, nsID, modelID, version, inputs) - case commonPB.Task_TASK_OCR.String(): - result, err = e.executeOCR(gRPCClient, nsID, modelID, version, inputs) - case commonPB.Task_TASK_INSTANCE_SEGMENTATION.String(): - result, err = e.executeInstanceSegmentation(gRPCClient, nsID, modelID, version, inputs) - case commonPB.Task_TASK_SEMANTIC_SEGMENTATION.String(): - result, err = e.executeSemanticSegmentation(gRPCClient, nsID, modelID, version, inputs) - case commonPB.Task_TASK_TEXT_TO_IMAGE.String(): + case "TASK_CLASSIFICATION": + result, err = e.executeVisionTask(gRPCClient, nsID, modelID, version, inputs) + case "TASK_DETECTION": + result, err = e.executeVisionTask(gRPCClient, nsID, modelID, version, inputs) + case "TASK_KEYPOINT": + result, err = e.executeVisionTask(gRPCClient, nsID, modelID, version, inputs) + case "TASK_OCR": + result, err = e.executeVisionTask(gRPCClient, nsID, modelID, version, inputs) + case "TASK_INSTANCE_SEGMENTATION": + result, err = e.executeVisionTask(gRPCClient, nsID, modelID, version, inputs) + case "TASK_SEMANTIC_SEGMENTATION": + result, err = e.executeVisionTask(gRPCClient, nsID, modelID, version, inputs) + case "TASK_TEXT_TO_IMAGE": result, err = e.executeTextToImage(gRPCClient, nsID, modelID, version, inputs) - case commonPB.Task_TASK_TEXT_GENERATION.String(): + case "TASK_TEXT_GENERATION": result, err = e.executeTextGeneration(gRPCClient, nsID, modelID, version, inputs) - case commonPB.Task_TASK_TEXT_GENERATION_CHAT.String(): + case "TASK_TEXT_GENERATION_CHAT", "TASK_VISUAL_QUESTION_ANSWERING": result, err = e.executeTextGenerationChat(gRPCClient, nsID, modelID, version, inputs) - case commonPB.Task_TASK_VISUAL_QUESTION_ANSWERING.String(): - result, err = e.executeVisualQuestionAnswering(gRPCClient, nsID, modelID, version, inputs) - case commonPB.Task_TASK_IMAGE_TO_IMAGE.String(): - result, err = e.executeImageToImage(gRPCClient, nsID, modelID, version, inputs) default: return fmt.Errorf("unsupported task: %s", e.Task) } diff --git a/ai/instill/v0/object_detection.go b/ai/instill/v0/object_detection.go deleted file mode 100644 index 8b5fc81f..00000000 --- a/ai/instill/v0/object_detection.go +++ /dev/null @@ -1,73 +0,0 @@ -package instill - -import ( - "fmt" - - "google.golang.org/protobuf/types/known/structpb" - - "github.com/instill-ai/component/base" - modelPB "github.com/instill-ai/protogen-go/model/model/v1alpha" -) - -func (e *execution) executeObjectDetection(grpcClient modelPB.ModelPublicServiceClient, nsID string, modelID string, version string, inputs []*structpb.Struct) ([]*structpb.Struct, error) { - if len(inputs) <= 0 { - return nil, fmt.Errorf("invalid input: %v for model: %s/%s/%s", inputs, nsID, modelID, version) - } - - if grpcClient == nil { - return nil, fmt.Errorf("uninitialized client") - } - - taskInputs := []*modelPB.TaskInput{} - for _, input := range inputs { - detectionInput := &modelPB.DetectionInput{} - detectionInput.Type = &modelPB.DetectionInput_ImageBase64{ - ImageBase64: base.TrimBase64Mime(input.Fields["image-base64"].GetStringValue()), - } - - modelInput := &modelPB.TaskInput_Detection{ - Detection: detectionInput, - } - taskInputs = append(taskInputs, &modelPB.TaskInput{Input: modelInput}) - } - - taskOutputs, err := trigger(grpcClient, e.SystemVariables, nsID, modelID, version, taskInputs) - if err != nil { - return nil, err - } - if len(taskOutputs) <= 0 { - return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", taskOutputs, nsID, modelID, version) - } - - outputs := []*structpb.Struct{} - for idx := range inputs { - objDetectionOutput := taskOutputs[idx].GetDetection() - if objDetectionOutput == nil { - return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", objDetectionOutput, nsID, modelID, version) - } - objects := make([]any, len(objDetectionOutput.Objects)) - - for i := range objDetectionOutput.Objects { - objects[i] = map[string]any{ - "category": objDetectionOutput.Objects[i].Category, - "score": objDetectionOutput.Objects[i].Score, - "bounding-box": map[string]any{ - "top": objDetectionOutput.Objects[i].BoundingBox.Top, - "left": objDetectionOutput.Objects[i].BoundingBox.Left, - "width": objDetectionOutput.Objects[i].BoundingBox.Width, - "height": objDetectionOutput.Objects[i].BoundingBox.Height, - }, - } - } - output, err := structpb.NewStruct(map[string]any{ - "objects": objects, - }) - if err != nil { - return nil, err - } - outputs = append(outputs, output) - - } - - return outputs, nil -} diff --git a/ai/instill/v0/ocr.go b/ai/instill/v0/ocr.go deleted file mode 100644 index 2bf565bc..00000000 --- a/ai/instill/v0/ocr.go +++ /dev/null @@ -1,67 +0,0 @@ -package instill - -import ( - "fmt" - - "google.golang.org/protobuf/types/known/structpb" - - "github.com/instill-ai/component/base" - modelPB "github.com/instill-ai/protogen-go/model/model/v1alpha" -) - -func (e *execution) executeOCR(grpcClient modelPB.ModelPublicServiceClient, nsID string, modelID string, version string, inputs []*structpb.Struct) ([]*structpb.Struct, error) { - if len(inputs) <= 0 { - return nil, fmt.Errorf("invalid input: %v for model: %s/%s/%s", inputs, nsID, modelID, version) - } - - if grpcClient == nil { - return nil, fmt.Errorf("uninitialized client") - } - - outputs := []*structpb.Struct{} - for _, input := range inputs { - taskInput := &modelPB.TaskInput_Ocr{ - Ocr: &modelPB.OcrInput{ - Type: &modelPB.OcrInput_ImageBase64{ - ImageBase64: base.TrimBase64Mime(input.Fields["image-base64"].GetStringValue()), - }, - }, - } - - // only support batch 1 - taskOutputs, err := trigger(grpcClient, e.SystemVariables, nsID, modelID, version, []*modelPB.TaskInput{{Input: taskInput}}) - if err != nil { - return nil, err - } - if len(taskOutputs) <= 0 { - return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", taskOutputs, nsID, modelID, version) - } - - ocrOutput := taskOutputs[0].GetOcr() - if ocrOutput == nil { - return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", ocrOutput, nsID, modelID, version) - } - objects := make([]any, len(ocrOutput.Objects)) - - for i := range ocrOutput.Objects { - objects[i] = map[string]any{ - "text": ocrOutput.Objects[i].Text, - "score": ocrOutput.Objects[i].Score, - "bounding-box": map[string]any{ - "top": ocrOutput.Objects[i].BoundingBox.Top, - "left": ocrOutput.Objects[i].BoundingBox.Left, - "width": ocrOutput.Objects[i].BoundingBox.Width, - "height": ocrOutput.Objects[i].BoundingBox.Height, - }, - } - } - output, err := structpb.NewStruct(map[string]any{ - "objects": objects, - }) - if err != nil { - return nil, err - } - outputs = append(outputs, output) - } - return outputs, nil -} diff --git a/ai/instill/v0/semantic_segmentation.go b/ai/instill/v0/semantic_segmentation.go deleted file mode 100644 index 0be5fd79..00000000 --- a/ai/instill/v0/semantic_segmentation.go +++ /dev/null @@ -1,63 +0,0 @@ -package instill - -import ( - "fmt" - - "google.golang.org/protobuf/encoding/protojson" - "google.golang.org/protobuf/types/known/structpb" - - "github.com/instill-ai/component/base" - modelPB "github.com/instill-ai/protogen-go/model/model/v1alpha" -) - -func (e *execution) executeSemanticSegmentation(grpcClient modelPB.ModelPublicServiceClient, nsID string, modelID string, version string, inputs []*structpb.Struct) ([]*structpb.Struct, error) { - if len(inputs) <= 0 { - return nil, fmt.Errorf("invalid input: %v for model: %s/%s/%s", inputs, nsID, modelID, version) - } - if grpcClient == nil { - return nil, fmt.Errorf("uninitialized client") - } - taskInputs := []*modelPB.TaskInput{} - for _, input := range inputs { - semanticSegmentationInput := &modelPB.SemanticSegmentationInput{} - semanticSegmentationInput.Type = &modelPB.SemanticSegmentationInput_ImageBase64{ - ImageBase64: base.TrimBase64Mime(input.Fields["image-base64"].GetStringValue()), - } - - taskInput := &modelPB.TaskInput_SemanticSegmentation{ - SemanticSegmentation: semanticSegmentationInput, - } - taskInputs = append(taskInputs, &modelPB.TaskInput{Input: taskInput}) - - } - - taskOutputs, err := trigger(grpcClient, e.SystemVariables, nsID, modelID, version, taskInputs) - if err != nil { - return nil, err - } - if len(taskOutputs) <= 0 { - return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", taskOutputs, nsID, modelID, version) - } - - outputs := []*structpb.Struct{} - for idx := range inputs { - semanticSegmentationOp := taskOutputs[idx].GetSemanticSegmentation() - if semanticSegmentationOp == nil { - return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", semanticSegmentationOp, nsID, modelID, version) - } - outputJSON, err := protojson.MarshalOptions{ - UseProtoNames: true, - EmitUnpopulated: true, - }.Marshal(semanticSegmentationOp) - if err != nil { - return nil, err - } - output := &structpb.Struct{} - err = protojson.Unmarshal(outputJSON, output) - if err != nil { - return nil, err - } - outputs = append(outputs, output) - } - return outputs, nil -} diff --git a/ai/instill/v0/struct.go b/ai/instill/v0/struct.go new file mode 100644 index 00000000..3ea73123 --- /dev/null +++ b/ai/instill/v0/struct.go @@ -0,0 +1,6 @@ +package instill + +type RequestWrapper struct { + Data any `json:"data,omitempty"` + Parameter any `json:"parameter,omitempty"` +} diff --git a/ai/instill/v0/text_generation.go b/ai/instill/v0/text_generation.go index cfc87d88..1f888263 100644 --- a/ai/instill/v0/text_generation.go +++ b/ai/instill/v0/text_generation.go @@ -3,12 +3,25 @@ package instill import ( "fmt" - "google.golang.org/protobuf/encoding/protojson" "google.golang.org/protobuf/types/known/structpb" + "github.com/instill-ai/component/base" modelPB "github.com/instill-ai/protogen-go/model/model/v1alpha" ) +type TextCompletionRequestData struct { + Prompt string `json:"prompt"` + SystemMessage string `json:"system-message,omitempty"` +} + +type TextCompletionRequestParameter struct { + MaxTokens int `json:"max-tokens,omitempty"` + Seed int `json:"seed,omitempty"` + N int `json:"n,omitempty"` + Temperature float32 `json:"temperature,omitempty"` + TopP int `json:"top-p,omitempty"` +} + func (e *execution) executeTextGeneration(grpcClient modelPB.ModelPublicServiceClient, nsID string, modelID string, version string, inputs []*structpb.Struct) ([]*structpb.Struct, error) { if len(inputs) <= 0 { return nil, fmt.Errorf("invalid input: %v for model: %s/%s/%s", inputs, nsID, modelID, version) @@ -18,52 +31,54 @@ func (e *execution) executeTextGeneration(grpcClient modelPB.ModelPublicServiceC return nil, fmt.Errorf("uninitialized client") } - outputs := []*structpb.Struct{} - + taskInputs := []*structpb.Struct{} for _, input := range inputs { - - llmInput := e.convertLLMInput(input) - taskInput := &modelPB.TaskInput_TextGeneration{ - TextGeneration: &modelPB.TextGenerationInput{ - Prompt: llmInput.Prompt, - PromptImages: llmInput.PromptImages, - ChatHistory: llmInput.ChatHistory, - SystemMessage: llmInput.SystemMessage, - MaxNewTokens: llmInput.MaxNewTokens, - Temperature: llmInput.Temperature, - TopK: llmInput.TopK, - Seed: llmInput.Seed, - ExtraParams: llmInput.ExtraParams, + i := &RequestWrapper{ + Data: &TextCompletionRequestData{ + Prompt: input.GetFields()["prompt"].GetStringValue(), + }, + Parameter: &TextCompletionRequestParameter{ + N: 1, }, } - - // only support batch 1 - taskOutputs, err := trigger(grpcClient, e.SystemVariables, nsID, modelID, version, []*modelPB.TaskInput{{Input: taskInput}}) - if err != nil { - return nil, err + if _, ok := input.GetFields()["system-message"]; ok { + v := input.GetFields()["system-message"].GetStringValue() + i.Data.(*TextCompletionRequestData).SystemMessage = v } - if len(taskOutputs) <= 0 { - return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", taskOutputs, nsID, modelID, version) + if _, ok := input.GetFields()["seed"]; ok { + v := int(input.GetFields()["seed"].GetNumberValue()) + i.Parameter.(*TextCompletionRequestParameter).Seed = v } - - textGenOutput := taskOutputs[0].GetTextGeneration() - if textGenOutput == nil { - return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", textGenOutput, nsID, modelID, version) + if _, ok := input.GetFields()["max-new-tokens"]; ok { + v := int(input.GetFields()["max-new-tokens"].GetNumberValue()) + i.Parameter.(*TextCompletionRequestParameter).MaxTokens = v } - outputJSON, err := protojson.MarshalOptions{ - UseProtoNames: true, - EmitUnpopulated: true, - }.Marshal(textGenOutput) - if err != nil { - return nil, err + if _, ok := input.GetFields()["temperature"]; ok { + v := float32(input.GetFields()["temperature"].GetNumberValue()) + i.Parameter.(*TextCompletionRequestParameter).Temperature = v } - output := &structpb.Struct{} - err = protojson.Unmarshal(outputJSON, output) + taskInput, err := base.ConvertToStructpb(i) if err != nil { return nil, err } - outputs = append(outputs, output) + taskInputs = append(taskInputs, taskInput) + } + taskOutputs, err := trigger(grpcClient, e.SystemVariables, nsID, modelID, version, taskInputs) + if err != nil { + return nil, err } + if len(taskOutputs) <= 0 { + return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", taskOutputs, nsID, modelID, version) + } + + outputs := []*structpb.Struct{} + for idx := range inputs { + choices := taskOutputs[idx].Fields["data"].GetStructValue().Fields["choices"].GetListValue() + output := structpb.Struct{Fields: make(map[string]*structpb.Value)} + output.Fields["text"] = structpb.NewStringValue(choices.GetValues()[0].GetStructValue().Fields["content"].GetStringValue()) + outputs = append(outputs, &output) + } + return outputs, nil } diff --git a/ai/instill/v0/text_generation_chat.go b/ai/instill/v0/text_generation_chat.go index 2bea8ae1..c18356e4 100644 --- a/ai/instill/v0/text_generation_chat.go +++ b/ai/instill/v0/text_generation_chat.go @@ -1,14 +1,51 @@ package instill import ( + "encoding/base64" "fmt" - "google.golang.org/protobuf/encoding/protojson" "google.golang.org/protobuf/types/known/structpb" + "github.com/gabriel-vasile/mimetype" + "github.com/instill-ai/component/ai/openai/v0" + "github.com/instill-ai/component/base" + modelPB "github.com/instill-ai/protogen-go/model/model/v1alpha" ) +type TextGenerationInput struct { + Prompt string `json:"prompt"` + SystemMessage *string `json:"system-message,omitempty"` + PromptImages []string `json:"prompt-images,omitempty"` + + // Note: We're currently sharing the same struct in the OpenAI component, + // but this will be moved to the standardized format later. + ChatHistory []*openai.TextMessage `json:"chat-history,omitempty"` +} + +type ChatRequestData struct { + Messages []Message `json:"messages,omitempty"` +} + +type Message struct { + Content []Content `json:"content,omitempty"` + Role string `json:"role,omitempty"` +} + +type Content struct { + Text string `json:"text,omitempty"` + ImageBase64 string `json:"image-base64,omitempty"` + Type string `json:"type,omitempty"` +} + +type ChatParameter struct { + MaxTokens int `json:"max-tokens,omitempty"` + Seed int `json:"seed,omitempty"` + N int `json:"n,omitempty"` + Temperature float32 `json:"temperature,omitempty"` + TopP int `json:"top-p,omitempty"` +} + func (e *execution) executeTextGenerationChat(grpcClient modelPB.ModelPublicServiceClient, nsID string, modelID string, version string, inputs []*structpb.Struct) ([]*structpb.Struct, error) { if len(inputs) <= 0 { @@ -19,51 +56,96 @@ func (e *execution) executeTextGenerationChat(grpcClient modelPB.ModelPublicServ return nil, fmt.Errorf("uninitialized client") } - outputs := []*structpb.Struct{} - + taskInputs := []*structpb.Struct{} for _, input := range inputs { - llmInput := e.convertLLMInput(input) - taskInput := &modelPB.TaskInput_TextGenerationChat{ - TextGenerationChat: &modelPB.TextGenerationChatInput{ - Prompt: llmInput.Prompt, - PromptImages: llmInput.PromptImages, - ChatHistory: llmInput.ChatHistory, - SystemMessage: llmInput.SystemMessage, - MaxNewTokens: llmInput.MaxNewTokens, - Temperature: llmInput.Temperature, - TopK: llmInput.TopK, - Seed: llmInput.Seed, - ExtraParams: llmInput.ExtraParams, - }, - } - - // only support batch 1 - taskOutputs, err := trigger(grpcClient, e.SystemVariables, nsID, modelID, version, []*modelPB.TaskInput{{Input: taskInput}}) + inputStruct := TextGenerationInput{} + err := base.ConvertFromStructpb(input, &inputStruct) if err != nil { return nil, err } - if len(taskOutputs) <= 0 { - return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", taskOutputs, nsID, modelID, version) + messages := []Message{} + + // If chat history is provided, add it to the messages, and ignore the system message + if inputStruct.ChatHistory != nil { + for _, chat := range inputStruct.ChatHistory { + contents := make([]Content, len(chat.Content)) + for i, c := range chat.Content { + if c.Type == "text" { + contents[i] = Content{ + Text: *c.Text, + Type: "text", + } + } else { + contents[i] = Content{ + ImageBase64: c.ImageURL.URL, + Type: "image-base64", + } + } + } + messages = append(messages, Message{Role: chat.Role, Content: contents}) + } + } else if inputStruct.SystemMessage != nil { + contents := make([]Content, 1) + contents[0] = Content{Text: *inputStruct.SystemMessage, Type: "text"} + // If chat history is not provided, add the system message to the messages + messages = append(messages, Message{Role: "system", Content: contents}) + } + userContents := []Content{} + userContents = append(userContents, Content{Type: "text", Text: inputStruct.Prompt}) + for _, image := range inputStruct.PromptImages { + b, err := base64.StdEncoding.DecodeString(base.TrimBase64Mime(image)) + if err != nil { + return nil, err + } + url := fmt.Sprintf("data:%s;base64,%s", mimetype.Detect(b).String(), base.TrimBase64Mime(image)) + userContents = append(userContents, Content{Type: "image_url", ImageBase64: url}) } + messages = append(messages, Message{Role: "user", Content: userContents}) - textGenChatOutput := taskOutputs[0].GetTextGenerationChat() - if textGenChatOutput == nil { - return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", textGenChatOutput, nsID, modelID, version) + i := &RequestWrapper{ + Data: &ChatRequestData{ + Messages: messages, + }, + Parameter: &ChatParameter{ + N: 1, + }, } - outputJSON, err := protojson.MarshalOptions{ - UseProtoNames: true, - EmitUnpopulated: true, - }.Marshal(textGenChatOutput) - if err != nil { - return nil, err + if _, ok := input.GetFields()["seed"]; ok { + v := int(input.GetFields()["seed"].GetNumberValue()) + i.Parameter.(*ChatParameter).Seed = v } - output := &structpb.Struct{} - err = protojson.Unmarshal(outputJSON, output) + if _, ok := input.GetFields()["max-new-tokens"]; ok { + v := int(input.GetFields()["max-new-tokens"].GetNumberValue()) + i.Parameter.(*ChatParameter).MaxTokens = v + } + if _, ok := input.GetFields()["temperature"]; ok { + v := float32(input.GetFields()["temperature"].GetNumberValue()) + i.Parameter.(*ChatParameter).Temperature = v + } + taskInput, err := base.ConvertToStructpb(i) if err != nil { return nil, err } - outputs = append(outputs, output) + taskInputs = append(taskInputs, taskInput) + } + taskOutputs, err := trigger(grpcClient, e.SystemVariables, nsID, modelID, version, taskInputs) + if err != nil { + return nil, err + } + if len(taskOutputs) <= 0 { + return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", taskOutputs, nsID, modelID, version) + } + + outputs := []*structpb.Struct{} + for idx := range inputs { + choices := taskOutputs[idx].Fields["data"].GetStructValue().Fields["choices"].GetListValue() + output := structpb.Struct{Fields: make(map[string]*structpb.Value)} + output.Fields["text"] = structpb.NewStringValue( + choices.GetValues()[0].GetStructValue(). + Fields["message"].GetStructValue(). + Fields["content"].GetStringValue()) + outputs = append(outputs, &output) } return outputs, nil } diff --git a/ai/instill/v0/text_to_image.go b/ai/instill/v0/text_to_image.go index f8d128ad..fc0a13e8 100644 --- a/ai/instill/v0/text_to_image.go +++ b/ai/instill/v0/text_to_image.go @@ -3,13 +3,23 @@ package instill import ( "fmt" - "google.golang.org/protobuf/encoding/protojson" "google.golang.org/protobuf/types/known/structpb" "github.com/instill-ai/component/base" modelPB "github.com/instill-ai/protogen-go/model/model/v1alpha" ) +type TextToImageRequestData struct { + Prompt string `json:"prompt"` +} + +type TextToImageRequestParameter struct { + AspectRatio string `json:"aspect-ratio,omitempty"` + NegativePrompt string `json:"negative-prompt,omitempty"` + N int `json:"n,omitempty"` + Seed int `json:"seed,omitempty"` +} + func (e *execution) executeTextToImage(grpcClient modelPB.ModelPublicServiceClient, nsID string, modelID string, version string, inputs []*structpb.Struct) ([]*structpb.Struct, error) { if len(inputs) <= 0 { return nil, fmt.Errorf("invalid input: %v for model: %s/%s/%s", inputs, nsID, modelID, version) @@ -19,68 +29,59 @@ func (e *execution) executeTextToImage(grpcClient modelPB.ModelPublicServiceClie return nil, fmt.Errorf("uninitialized client") } - outputs := []*structpb.Struct{} + taskInputs := []*structpb.Struct{} + for _, input := range inputs { - textToImageInput := &modelPB.TextToImageInput{ - Prompt: input.GetFields()["prompt"].GetStringValue(), - } - if _, ok := input.GetFields()["steps"]; ok { - v := int32(input.GetFields()["steps"].GetNumberValue()) - textToImageInput.Steps = &v - } - if _, ok := input.GetFields()["image-base64"]; ok { - textToImageInput.Type = &modelPB.TextToImageInput_PromptImageBase64{ - PromptImageBase64: base.TrimBase64Mime(input.GetFields()["image-base64"].GetStringValue()), - } - } - if _, ok := input.GetFields()["cfg-scale"]; ok { - v := float32(input.GetFields()["cfg-scale"].GetNumberValue()) - textToImageInput.CfgScale = &v + i := &RequestWrapper{ + Data: &TextToImageRequestData{ + Prompt: input.GetFields()["prompt"].GetStringValue(), + }, + Parameter: &TextToImageRequestParameter{}, } if _, ok := input.GetFields()["samples"]; ok { - v := int32(input.GetFields()["samples"].GetNumberValue()) - textToImageInput.Samples = &v + v := int(input.GetFields()["samples"].GetNumberValue()) + i.Parameter.(*TextToImageRequestParameter).N = v } if _, ok := input.GetFields()["seed"]; ok { - v := int32(input.GetFields()["seed"].GetNumberValue()) - textToImageInput.Seed = &v + v := int(input.GetFields()["seed"].GetNumberValue()) + i.Parameter.(*TextToImageRequestParameter).Seed = v } - taskInput := &modelPB.TaskInput_TextToImage{ - TextToImage: textToImageInput, + if _, ok := input.GetFields()["aspect-ratio"]; ok { + v := input.GetFields()["aspect-ratio"].GetStringValue() + i.Parameter.(*TextToImageRequestParameter).AspectRatio = v } - - // only support batch 1 - taskOutputs, err := trigger(grpcClient, e.SystemVariables, nsID, modelID, version, []*modelPB.TaskInput{{Input: taskInput}}) + if _, ok := input.GetFields()["negative-prompt"]; ok { + v := input.GetFields()["negative-prompt"].GetStringValue() + i.Parameter.(*TextToImageRequestParameter).NegativePrompt = v + } + taskInput, err := base.ConvertToStructpb(i) if err != nil { return nil, err } - if len(taskOutputs) <= 0 { - return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", taskOutputs, nsID, modelID, version) - } + taskInputs = append(taskInputs, taskInput) + } - textToImgOutput := taskOutputs[0].GetTextToImage() + taskOutputs, err := trigger(grpcClient, e.SystemVariables, nsID, modelID, version, taskInputs) + if err != nil { + return nil, err + } + if len(taskOutputs) <= 0 { + return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", taskOutputs, nsID, modelID, version) + } + outputs := []*structpb.Struct{} + for idx := range inputs { + choices := taskOutputs[idx].Fields["data"].GetStructValue().Fields["choices"].GetListValue() - for imageIdx := range textToImgOutput.Images { - textToImgOutput.Images[imageIdx] = fmt.Sprintf("data:image/jpeg;base64,%s", textToImgOutput.Images[imageIdx]) + images := make([]*structpb.Value, len(choices.Values)) + for i, c := range choices.Values { + images[i] = c.GetStructValue().Fields["image"] } - if textToImgOutput == nil || len(textToImgOutput.Images) <= 0 { - return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", textToImgOutput, nsID, modelID, version) - } + output := structpb.Struct{Fields: make(map[string]*structpb.Value)} - outputJSON, err := protojson.MarshalOptions{ - UseProtoNames: true, - EmitUnpopulated: true, - }.Marshal(textToImgOutput) - if err != nil { - return nil, err - } - output := &structpb.Struct{} - err = protojson.Unmarshal(outputJSON, output) - if err != nil { - return nil, err - } - outputs = append(outputs, output) + output.Fields["images"] = structpb.NewListValue(&structpb.ListValue{Values: images}) + outputs = append(outputs, &output) } + return outputs, nil } diff --git a/ai/instill/v0/unspecified.go b/ai/instill/v0/unspecified.go deleted file mode 100644 index 6fe66405..00000000 --- a/ai/instill/v0/unspecified.go +++ /dev/null @@ -1,23 +0,0 @@ -package instill - -import ( - "fmt" - - modelPB "github.com/instill-ai/protogen-go/model/model/v1alpha" - "google.golang.org/protobuf/types/known/structpb" -) - -func (e *execution) executeUnspecified(grpcClient modelPB.ModelPublicServiceClient, nsID string, modelID string, version string, inputs []*structpb.Struct) ([]*structpb.Struct, error) { - if len(inputs) <= 0 { - return nil, fmt.Errorf("invalid input: %v for model: %s/%s", inputs, nsID, modelID) - } - //TODO: figure out what to do here? - /* - modelInput := &modelPB.TaskInput_Unspecified{ - Unspecified: &modelPB.UnspecifiedInput{ - RawInputs: []*structpb.Struct{}, - }, - } - */ - return inputs, nil -} diff --git a/ai/instill/v0/vision.go b/ai/instill/v0/vision.go new file mode 100644 index 00000000..02f38647 --- /dev/null +++ b/ai/instill/v0/vision.go @@ -0,0 +1,53 @@ +package instill + +import ( + "fmt" + + "google.golang.org/protobuf/types/known/structpb" + + "github.com/instill-ai/component/base" + modelPB "github.com/instill-ai/protogen-go/model/model/v1alpha" +) + +type VisionRequestData struct { + ImageBase64 string `json:"image-base64"` + Type string `json:"type"` +} + +func (e *execution) executeVisionTask(grpcClient modelPB.ModelPublicServiceClient, nsID string, modelID string, version string, inputs []*structpb.Struct) ([]*structpb.Struct, error) { + if len(inputs) <= 0 { + return nil, fmt.Errorf("invalid input: %v for model: %s/%s/%s", inputs, nsID, modelID, version) + } + + if grpcClient == nil { + return nil, fmt.Errorf("uninitialized client") + } + + taskInputs := []*structpb.Struct{} + for _, input := range inputs { + i := &RequestWrapper{ + Data: &VisionRequestData{ + ImageBase64: base.TrimBase64Mime(input.Fields["image-base64"].GetStringValue()), + Type: "image-base64", + }, + } + taskInput, err := base.ConvertToStructpb(i) + if err != nil { + return nil, err + } + taskInputs = append(taskInputs, taskInput) + } + + taskOutputs, err := trigger(grpcClient, e.SystemVariables, nsID, modelID, version, taskInputs) + if err != nil { + return nil, err + } + if len(taskOutputs) <= 0 { + return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", taskOutputs, nsID, modelID, version) + } + outputs := []*structpb.Struct{} + for idx := range inputs { + outputs = append(outputs, taskOutputs[idx].Fields["data"].GetStructValue()) + } + return outputs, nil +} diff --git a/ai/instill/v0/visual_question_answering.go b/ai/instill/v0/visual_question_answering.go deleted file mode 100644 index 3a699e30..00000000 --- a/ai/instill/v0/visual_question_answering.go +++ /dev/null @@ -1,69 +0,0 @@ -package instill - -import ( - "fmt" - - "google.golang.org/protobuf/encoding/protojson" - "google.golang.org/protobuf/types/known/structpb" - - modelPB "github.com/instill-ai/protogen-go/model/model/v1alpha" -) - -func (e *execution) executeVisualQuestionAnswering(grpcClient modelPB.ModelPublicServiceClient, nsID string, modelID string, version string, inputs []*structpb.Struct) ([]*structpb.Struct, error) { - if len(inputs) <= 0 { - return nil, fmt.Errorf("invalid input: %v for model: %s/%s/%s", inputs, nsID, modelID, version) - } - - if grpcClient == nil { - return nil, fmt.Errorf("uninitialized client") - } - - outputs := []*structpb.Struct{} - - for _, input := range inputs { - - llmInput := e.convertLLMInput(input) - taskInput := &modelPB.TaskInput_VisualQuestionAnswering{ - VisualQuestionAnswering: &modelPB.VisualQuestionAnsweringInput{ - Prompt: llmInput.Prompt, - PromptImages: llmInput.PromptImages, - ChatHistory: llmInput.ChatHistory, - SystemMessage: llmInput.SystemMessage, - MaxNewTokens: llmInput.MaxNewTokens, - Temperature: llmInput.Temperature, - TopK: llmInput.TopK, - Seed: llmInput.Seed, - ExtraParams: llmInput.ExtraParams, - }, - } - - // only support batch 1 - taskOutputs, err := trigger(grpcClient, e.SystemVariables, nsID, modelID, version, []*modelPB.TaskInput{{Input: taskInput}}) - if err != nil { - return nil, err - } - if len(taskOutputs) <= 0 { - return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", taskOutputs, nsID, modelID, version) - } - - visualQuestionAnsweringOutput := taskOutputs[0].GetVisualQuestionAnswering() - if visualQuestionAnsweringOutput == nil { - return nil, fmt.Errorf("invalid output: %v for model: %s/%s/%s", visualQuestionAnsweringOutput, nsID, modelID, version) - } - outputJSON, err := protojson.MarshalOptions{ - UseProtoNames: true, - EmitUnpopulated: true, - }.Marshal(visualQuestionAnsweringOutput) - if err != nil { - return nil, err - } - output := &structpb.Struct{} - err = protojson.Unmarshal(outputJSON, output) - if err != nil { - return nil, err - } - outputs = append(outputs, output) - - } - return outputs, nil -} diff --git a/ai/instill/v0/xxx.json b/ai/instill/v0/xxx.json new file mode 100644 index 00000000..d50cc806 --- /dev/null +++ b/ai/instill/v0/xxx.json @@ -0,0 +1,1424 @@ +{ + "TASK_EMBEDDING": { + "title": "Embedding", + "instillShortDescription": "Generate embeddings based on input data", + "input": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Embedding input", + "description": "Input schema of the embedding task", + "instillShortDescription": "Input schema of the embedding task", + "type": "object", + "properties": { + "data": { + "description": "Input data", + "instillShortDescription": "Input data", + "type": "object", + "properties": { + "model": { + "description": "The model to be used for generating embeddings.", + "instillShortDescription": "The model to be used.", + "instillAcceptFormats": [ + "string" + ], + "title": "Model Name", + "type": "string" + }, + "input": { + "title": "Embedding Input", + "type": "array", + "items": { + "type": "object", + "properties": { + "content": { + "description": "The content to be embedded.", + "instillShortDescription": "The content to be embedded.", + "title": "Content", + "type": "array", + "items": { + "type": "object", + "oneOf": [ + { + "type": "object", + "properties": { + "text": { + "title": "Text content", + "description": "Text content to be embedded", + "instillShortDescription": "Text content", + "instillAcceptFormats": [ + "string" + ], + "type": "string" + }, + "type": { + "title": "Content type", + "description": "Input content type.", + "instillShortDescription": "Input content type.", + "instillAcceptFormats": [ + "string" + ], + "type": "string", + "const": "text" + } + }, + "required": [ + "text", + "type" + ] + }, + { + "type": "object", + "properties": { + "image-url": { + "title": "Image url", + "description": "Image content with URL.", + "instillShortDescription": "Image content url.", + "instillAcceptFormats": [ + "string" + ], + "type": "string" + }, + "type": { + "title": "Content type", + "description": "Input content type", + "instillShortDescription": "Input content type", + "instillAcceptFormats": [ + "string" + ], + "type": "string", + "const": "image-url" + } + }, + "required": [ + "image-url", + "type" + ] + }, + { + "type": "object", + "properties": { + "image-base64": { + "title": "Image base64", + "description": "Image content with base64 encoded string.", + "instillShortDescription": "Image content with base64 encoded string.", + "instillAcceptFormats": [ + "image/*" + ], + "type": "string" + }, + "type": { + "title": "Content type", + "description": "Input content type", + "instillShortDescription": "Input content type", + "instillAcceptFormats": [ + "string" + ], + "type": "string", + "const": "image-base64" + } + }, + "required": [ + "image-base64", + "type" + ] + } + ] + } + } + }, + "required": [ + "content" + ] + } + } + }, + "required": [ + "model", + "input" + ] + }, + "parameter": { + "description": "Input parameter", + "instillShortDescription": "Input parameter", + "type": "object", + "properties": { + "format": { + "title": "Data format", + "type": "string", + "description": "The data format of the embeddings. Defaults to float.", + "instillShortDescription": "Data format", + "instillAcceptFormats": [ + "string" + ], + "enum": [ + "float", + "base64" + ], + "default": "float" + }, + "dimensions": { + "title": "Dimensions", + "type": "integer", + "description": "Number of dimensions in the output embedding vectors.", + "instillShortDescription": "Number of dimensions", + "instillAcceptFormats": [ + "integer" + ], + "default": 512 + }, + "input-type": { + "title": "Input type", + "type": "string", + "description": "The type of input data to be embedded (e.g., query, document).", + "instillShortDescription": "Type of input data", + "instillAcceptFormats": [ + "string" + ] + }, + "truncate": { + "title": "Truncate", + "type": "string", + "description": "How to handle inputs longer than the max token length. Defaults to 'End'.", + "instillShortDescription": "Truncation handling", + "instillAcceptFormats": [ + "string" + ], + "enum": [ + "None", + "End", + "Start" + ], + "default": "End" + } + } + } + }, + "required": [ + "data" + ] + }, + "output": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Embedding output", + "description": "Output schema of the embedding task", + "instillShortDescription": "Output schema of the embedding task", + "type": "object", + "properties": { + "data": { + "description": "Output data", + "instillShortDescription": "Output data", + "type": "object", + "properties": { + "embeddings": { + "title": "Embeddings", + "type": "array", + "description": "List of generated embeddings.", + "instillShortDescription": "List of embeddings", + "instillFormat": "array", + "items": { + "type": "object", + "properties": { + "index": { + "title": "Index", + "type": "integer", + "description": "The index of the embedding vector in the array.", + "instillShortDescription": "Index in the array", + "instillFormat": "integer" + }, + "vector": { + "title": "Embedding Vector", + "type": "array", + "description": "The embedding vector.", + "instillShortDescription": "Embedding vector", + "instillFormat": "array" + }, + "created": { + "title": "Created", + "type": "integer", + "description": "The Unix timestamp (in seconds) of when the embedding was created.", + "instillShortDescription": "Timestamp of creation", + "instillFormat": "integer" + } + }, + "required": [ + "index", + "vector", + "created" + ] + } + } + }, + "required": [ + "embeddings" + ] + } + } + } + }, + "TASK_CHAT": { + "title": "Chat", + "instillShortDescription": "Generate response base on conversation input", + "input": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Chat input", + "description": "Input schema of the chat task", + "instillShortDescription": "Input schema of the chat task", + "type": "object", + "properties": { + "data": { + "description": "Input data", + "instillShortDescription": "Input data", + "type": "object", + "properties": { + "model": { + "description": "The model to be used.", + "instillShortDescription": "The model to be used.", + "instillAcceptFormats": [ + "string" + ], + "title": "Model Name", + "type": "string" + }, + "messages": { + "title": "Chat Messages", + "type": "array", + "items": { + "type": "object", + "properties": { + "content": { + "description": "The message content", + "instillShortDescription": "The message content", + "title": "Content", + "type": "array", + "items": { + "type": "object", + "oneOf": [ + { + "type": "object", + "properties": { + "text": { + "title": "Text message", + "description": "Text message.", + "instillShortDescription": "Text message.", + "instillAcceptFormats": [ + "string" + ], + "type": "string" + }, + "type": { + "title": "Content type", + "description": "Input content type.", + "instillShortDescription": "Input content type.", + "instillAcceptFormats": [ + "string" + ], + "type": "string", + "const": "text" + } + }, + "required": [ + "text", + "type" + ] + }, + { + "type": "object", + "properties": { + "image-url": { + "title": "Image url", + "description": "Image message url.", + "instillShortDescription": "Image message url.", + "instillAcceptFormats": [ + "string" + ], + "type": "string" + }, + "type": { + "title": "Content type", + "description": "Input content type", + "instillShortDescription": "Input content type", + "instillAcceptFormats": [ + "string" + ], + "type": "string", + "const": "image-url" + } + }, + "required": [ + "image-url", + "type" + ] + }, + { + "type": "object", + "properties": { + "image-base64": { + "title": "Image base64", + "description": "Image base64 encoded string.", + "instillShortDescription": "Image base64 encoded string.", + "instillAcceptFormats": [ + "image/*" + ], + "type": "string" + }, + "type": { + "title": "Content type", + "description": "Input content type", + "instillShortDescription": "Input content type", + "instillAcceptFormats": [ + "string" + ], + "type": "string", + "const": "image-base64" + } + }, + "required": [ + "image-base64", + "type" + ] + } + ] + } + }, + "role": { + "description": "The message role, i.e. 'system', 'user' or 'assistant'", + "instillShortDescription": "The message role, i.e. 'system', 'user' or 'assistant'", + "instillAcceptFormats": [ + "string" + ], + "title": "Role", + "type": "string", + "enum": [ + "system", + "user", + "assistant" + ] + }, + "name": { + "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role.", + "instillShortDescription": "An optional name for the participant. Provides the model information to differentiate between participants of the same role.", + "instillAcceptFormats": [ + "string" + ], + "title": "Name", + "type": "string" + } + }, + "required": [ + "content", + "role" + ] + } + } + }, + "required": [ + "model", + "messages" + ] + }, + "parameter": { + "description": "Input parameter", + "instillShortDescription": "Input parameter", + "type": "object", + "properties": { + "max-tokens": { + "title": "Max new tokens", + "type": "integer", + "description": "The maximum number of tokens for model to generate", + "instillShortDescription": "The maximum number of tokens for model to generate", + "instillAcceptFormats": [ + "integer" + ], + "default": 50 + }, + "seed": { + "title": "Seed", + "type": "integer", + "description": "The seed, default is 0", + "instillShortDescription": "The seed, default is 0", + "instillAcceptFormats": [ + "integer" + ], + "default": 0 + }, + "n": { + "title": "Number of choices", + "type": "integer", + "description": "How many chat completion choices to generate for each input message.", + "instillShortDescription": "How many chat completion choices to generate for each input message.", + "instillAcceptFormats": [ + "integer" + ], + "default": 1 + }, + "temperature": { + "title": "Temperature", + "type": "number", + "description": "The temperature for sampling", + "instillShortDescription": "The temperature for sampling", + "instillAcceptFormats": [ + "number" + ], + "default": 0.7 + }, + "top-p": { + "title": "Top P", + "type": "number", + "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.", + "instillShortDescription": "Nucleus sampling", + "instillAcceptFormats": [ + "number" + ], + "default": 1 + }, + "stream": { + "title": "Stream", + "type": "boolean", + "description": "If set, partial message deltas will be sent. Tokens will be sent as data-only server-sent events as they become available.", + "instillShortDescription": "If set, partial message deltas will be sent", + "instillAcceptFormats": [ + "boolean" + ], + "default": false + } + } + } + }, + "required": [ + "data" + ] + }, + "output": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Chat output", + "description": "Output schema of the chat task", + "instillShortDescription": "Output schema of the chat task", + "type": "object", + "properties": { + "data": { + "description": "Output data", + "instillShortDescription": "Output data", + "type": "object", + "properties": { + "choices": { + "title": "Choices", + "type": "array", + "description": "List of chat completion choices", + "instillShortDescription": "List of chat completion choices", + "instillFormat": "array", + "items": { + "type": "object", + "properties": { + "finish-reason": { + "title": "Finish reason", + "type": "string", + "description": "The reason the model stopped generating tokens.", + "instillShortDescription": "The reason the model stopped generating tokens.", + "instillFormat": "string", + "enum": [ + "stop", + "length" + ] + }, + "index": { + "title": "Index", + "type": "integer", + "description": "The index of the choice in the list of choices.", + "instillShortDescription": "The index of the choice in the list of choices.", + "instillFormat": "integer" + }, + "message": { + "title": "Message", + "type": "object", + "description": "A chat message generated by the model.", + "instillShortDescription": "A chat message generated by the model.", + "properties": { + "content": { + "title": "Content", + "type": "string", + "description": "The contents of the message.", + "instillShortDescription": "The contents of the message.", + "instillFormat": "string" + }, + "role": { + "title": "Role", + "type": "string", + "description": "The role of the author of this message.", + "instillShortDescription": "The role of the author of this message.", + "instillFormat": "string" + } + } + }, + "created": { + "title": "Created", + "type": "integer", + "description": "The Unix timestamp (in seconds) of when the chat completion was created.", + "instillShortDescription": "The Unix timestamp (in seconds) of when the chat completion was created.", + "instillFormat": "integer" + } + }, + "required": [ + "finish-reason", + "index", + "message", + "created" + ] + } + } + }, + "required": [ + "choices" + ] + }, + "metadata": { + "description": "Output metadata", + "instillShortDescription": "Output metadata", + "type": "object", + "properties": { + "usage": { + "description": "Usage statistics for the request.", + "instillShortDescription": "Usage statistics for the request.", + "type": "object", + "properties": { + "completion-tokens": { + "title": "Completion tokens", + "type": "integer", + "description": "Number of tokens in the generated response.", + "instillShortDescription": "Number of tokens in the generated response.", + "instillFormat": "integer" + }, + "prompt-tokens": { + "title": "Prompt tokens", + "type": "integer", + "description": "Number of tokens in the prompt.", + "instillShortDescription": "Number of tokens in the prompt.", + "instillFormat": "integer" + }, + "total-tokens": { + "title": "Total tokens", + "type": "integer", + "description": "Total number of tokens used in the request (prompt + completion).", + "instillShortDescription": "Total number of tokens used in the request (prompt + completion).", + "instillFormat": "integer" + } + }, + "required": [ + "completion-tokens", + "prompt-tokens", + "total-tokens" + ] + } + } + } + }, + "required": [ + "data" + ] + } + }, + "TASK_COMPLETION": { + "title": "Completion", + "instillShortDescription": "Generate text response base on input", + "input": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Completion input", + "description": "Input schema of the completion task", + "instillShortDescription": "Input schema of the completion task", + "type": "object", + "properties": { + "data": { + "description": "Input data", + "instillShortDescription": "Input data", + "type": "object", + "properties": { + "model": { + "description": "The model to be used.", + "instillShortDescription": "The model to be used.", + "instillAcceptFormats": [ + "string" + ], + "title": "Model Name", + "type": "string" + }, + "system-message": { + "title": "System message", + "type": "string", + "description": "The contents of the system message.", + "instillShortDescription": "The contents of the system message.", + "instillAcceptFormats": [ + "string" + ] + }, + "prompt": { + "title": "Input prompt", + "type": "string", + "description": "The input prompt to generate text on.", + "instillShortDescription": "The input prompt to generate text on.", + "instillAcceptFormats": [ + "string" + ] + } + }, + "required": [ + "model", + "prompt" + ] + }, + "parameter": { + "description": "Input parameter", + "instillShortDescription": "Input parameter", + "type": "object", + "properties": { + "max-tokens": { + "title": "Max new tokens", + "type": "integer", + "description": "The maximum number of tokens for model to generate", + "instillShortDescription": "The maximum number of tokens for model to generate", + "instillAcceptFormats": [ + "integer" + ], + "default": 50 + }, + "seed": { + "title": "Seed", + "type": "integer", + "description": "The seed, default is 0", + "instillShortDescription": "The seed, default is 0", + "instillAcceptFormats": [ + "integer" + ], + "default": 0 + }, + "n": { + "title": "Number of choices", + "type": "integer", + "description": "How many chat completion choices to generate for each input message.", + "instillShortDescription": "How many chat completion choices to generate for each input message.", + "instillAcceptFormats": [ + "integer" + ], + "default": 1 + }, + "temperature": { + "title": "Temperature", + "type": "number", + "description": "The temperature for sampling", + "instillShortDescription": "The temperature for sampling", + "instillAcceptFormats": [ + "number" + ], + "default": 0.7 + }, + "top-p": { + "title": "Top P", + "type": "number", + "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.", + "instillShortDescription": "Nucleus sampling", + "instillAcceptFormats": [ + "number" + ], + "default": 1 + }, + "stream": { + "title": "Stream", + "type": "boolean", + "description": "If set, partial message deltas will be sent. Tokens will be sent as data-only server-sent events as they become available.", + "instillShortDescription": "If set, partial message deltas will be sent", + "instillAcceptFormats": [ + "boolean" + ], + "default": false + } + } + } + }, + "required": [ + "data" + ] + }, + "output": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Chat output", + "description": "Output schema of the chat task", + "instillShortDescription": "Output schema of the chat task", + "type": "object", + "properties": { + "data": { + "description": "Output data", + "instillShortDescription": "Output data", + "type": "object", + "properties": { + "choices": { + "title": "Choices", + "type": "array", + "description": "List of chat completion choices", + "instillShortDescription": "List of chat completion choices", + "instillFormat": "array", + "items": { + "type": "object", + "properties": { + "finish-reason": { + "title": "Finish reason", + "type": "string", + "description": "The reason the model stopped generating tokens.", + "instillShortDescription": "The reason the model stopped generating tokens.", + "instillFormat": "string", + "enum": [ + "stop", + "length" + ] + }, + "index": { + "title": "Index", + "type": "integer", + "description": "The index of the choice in the list of choices.", + "instillShortDescription": "The index of the choice in the list of choices.", + "instillFormat": "integer" + }, + "content": { + "title": "Content", + "type": "string", + "description": "The contents generated by the model.", + "instillShortDescription": "The contents generated by the model.", + "instillFormat": "string" + }, + "created": { + "title": "Created", + "type": "integer", + "description": "The Unix timestamp (in seconds) of when the chat completion was created.", + "instillShortDescription": "The Unix timestamp (in seconds) of when the chat completion was created.", + "instillFormat": "integer" + } + }, + "required": [ + "finish-reason", + "index", + "content", + "created" + ] + } + } + }, + "required": [ + "choices" + ] + }, + "metadata": { + "description": "Output metadata", + "instillShortDescription": "Output metadata", + "type": "object", + "properties": { + "usage": { + "description": "Usage statistics for the request.", + "instillShortDescription": "Usage statistics for the request.", + "type": "object", + "properties": { + "completion-tokens": { + "title": "Completion tokens", + "type": "integer", + "description": "Number of tokens in the generated response.", + "instillShortDescription": "Number of tokens in the generated response.", + "instillFormat": "integer" + }, + "prompt-tokens": { + "title": "Prompt tokens", + "type": "integer", + "description": "Number of tokens in the prompt.", + "instillShortDescription": "Number of tokens in the prompt.", + "instillFormat": "integer" + }, + "total-tokens": { + "title": "Total tokens", + "type": "integer", + "description": "Total number of tokens used in the request (prompt + completion).", + "instillShortDescription": "Total number of tokens used in the request (prompt + completion).", + "instillFormat": "integer" + } + }, + "required": [ + "completion-tokens", + "prompt-tokens", + "total-tokens" + ] + } + } + } + }, + "required": [ + "data" + ] + } + }, + "TASK_TEXT_TO_IMAGE": { + "title": "Completion", + "instillShortDescription": "Generate text response base on input", + "input": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Completion input", + "description": "Input schema of the completion task", + "instillShortDescription": "Input schema of the completion task", + "type": "object", + "properties": { + "data": { + "description": "Input data", + "instillShortDescription": "Input data", + "type": "object", + "properties": { + "model": { + "description": "The model to be used.", + "instillShortDescription": "The model to be used.", + "instillAcceptFormats": [ + "string" + ], + "title": "Model Name", + "type": "string" + }, + "prompt": { + "title": "Input prompt", + "type": "string", + "description": "What you wish to see in the output image. A strong, descriptive prompt that clearly defines elements, colors, and subjects will lead to better results.", + "instillShortDescription": "Descriptive prompt.", + "instillAcceptFormats": [ + "string" + ] + } + }, + "required": [ + "model", + "prompt" + ] + }, + "parameter": { + "description": "Input parameter", + "instillShortDescription": "Input parameter", + "type": "object", + "properties": { + "aspect-ratio": { + "title": "Aspect ratio", + "type": "string", + "description": "Controls the aspect ratio of the generated image. Defaults to 1:1.", + "instillShortDescription": "Controls the aspect ratio of the generated image. Defaults to 1:1.", + "instillAcceptFormats": [ + "string" + ], + "default": "1:1", + "enum": [ + "16:9", + "1:1", + "21:9", + "2:3", + "3:2", + "4:5", + "5:4", + "9:16", + "9:21" + ] + }, + "negative-prompt": { + "title": "Aspect ratio", + "type": "string", + "description": "Keywords of what you do not wish to see in the output image.", + "instillShortDescription": "Keywords of what you do not wish to see in the output image.", + "instillAcceptFormats": [ + "string" + ], + "default": "1:1" + }, + "n": { + "title": "Number of choices", + "type": "integer", + "description": "How many samples to generate for each input prompt.", + "instillShortDescription": "How many samples to generate for each input prompt.", + "instillAcceptFormats": [ + "integer" + ], + "default": 1 + }, + "seed": { + "title": "Seed", + "type": "integer", + "description": "The seed, default is 0", + "instillShortDescription": "The seed, default is 0", + "instillAcceptFormats": [ + "integer" + ], + "default": 0 + } + } + } + }, + "required": [ + "data" + ] + }, + "output": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Chat output", + "description": "Output schema of the chat task", + "instillShortDescription": "Output schema of the chat task", + "type": "object", + "properties": { + "data": { + "description": "Output data", + "instillShortDescription": "Output data", + "type": "object", + "properties": { + "choices": { + "title": "Choices", + "type": "array", + "description": "List of generated sample images", + "instillShortDescription": "List of generated sample images", + "instillFormat": "array", + "items": { + "type": "object", + "properties": { + "finish-reason": { + "title": "Finish reason", + "type": "string", + "description": "The reason the model stopped generating tokens.", + "instillShortDescription": "The reason the model stopped generating tokens.", + "instillFormat": "string", + "enum": [ + "content_filtered", + "success" + ] + }, + "image": { + "title": "Image", + "type": "string", + "description": "The generated image, encoded to base64.", + "instillShortDescription": "The generated image, encoded to base64.", + "instillFormat": "string" + } + }, + "required": [ + "finish-reason", + "image" + ] + } + } + }, + "required": [ + "choices" + ] + }, + "metadata": { + "description": "Output metadata", + "instillShortDescription": "Output metadata", + "type": "object", + "properties": { + "usage": { + "description": "Usage statistics for the request.", + "instillShortDescription": "Usage statistics for the request.", + "type": "object", + "properties": {}, + "required": [] + } + } + } + }, + "required": [ + "data" + ] + } + }, + "TASK_CLASSIFICATION": { + "title": "Classification", + "instillShortDescription": "Classify images into predefined categories.", + "input": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Classification input", + "description": "Input schema of the classification task", + "instillShortDescription": "Input schema of the classification task", + "type": "object", + "properties": { + "data": { + "$ref": "https://raw.githubusercontent.com/instill-ai/instill-core/62743c4/schema/schema.json#/$defs/instill-types/vision-input", + "type": "object" + }, + "parameter": { + "description": "Input parameter", + "instillShortDescription": "Input parameter", + "type": "object", + "properties": {} + } + }, + "required": [ + "data" + ] + }, + "output": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Classification output", + "description": "Output schema of the classification task", + "instillShortDescription": "Output schema of the classification task", + "type": "object", + "properties": { + "data": { + "description": "Output data", + "instillShortDescription": "Output data", + "type": "object", + "$ref": "https://raw.githubusercontent.com/instill-ai/instill-core/62743c4/schema/schema.json#/$defs/instill-types/classification" + }, + "metadata": { + "description": "Output metadata", + "instillShortDescription": "Output metadata", + "type": "object", + "properties": { + "usage": { + "description": "Usage statistics for the request.", + "instillShortDescription": "Usage statistics for the request.", + "type": "object", + "properties": {}, + "required": [] + } + } + } + }, + "required": [ + "data" + ] + } + }, + "TASK_DETECTION": { + "title": "Detection", + "instillShortDescription": "Detect and localize multiple objects in images.", + "input": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Detection input", + "description": "Input schema of the detection task", + "instillShortDescription": "Input schema of the detection task", + "type": "object", + "properties": { + "data": { + "$ref": "https://raw.githubusercontent.com/instill-ai/instill-core/62743c4/schema/schema.json#/$defs/instill-types/vision-input", + "type": "object" + }, + "parameter": { + "description": "Input parameter", + "instillShortDescription": "Input parameter", + "type": "object", + "properties": {} + } + }, + "required": [ + "data" + ] + }, + "output": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Detection output", + "description": "Output schema of the detection task", + "instillShortDescription": "Output schema of the detection task", + "type": "object", + "properties": { + "data": { + "description": "Output data", + "instillShortDescription": "Output data", + "type": "object", + "$ref": "https://raw.githubusercontent.com/instill-ai/instill-core/62743c4/schema/schema.json#/$defs/instill-types/detection" + }, + "metadata": { + "description": "Output metadata", + "instillShortDescription": "Output metadata", + "type": "object", + "properties": { + "usage": { + "description": "Usage statistics for the request.", + "instillShortDescription": "Usage statistics for the request.", + "type": "object", + "properties": {}, + "required": [] + } + } + } + }, + "required": [ + "data" + ] + } + }, + "TASK_KEYPOINT": { + "title": "Keypoint", + "instillShortDescription": "Detect and localize multiple keypoints of objects in images.", + "input": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Keypoint input", + "description": "Input schema of the keypoint task", + "instillShortDescription": "Input schema of the keypoint task", + "type": "object", + "properties": { + "data": { + "$ref": "https://raw.githubusercontent.com/instill-ai/instill-core/62743c4/schema/schema.json#/$defs/instill-types/vision-input", + "type": "object" + }, + "parameter": { + "description": "Input parameter", + "instillShortDescription": "Input parameter", + "type": "object", + "properties": {} + } + }, + "required": [ + "data" + ] + }, + "output": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Keypoint output", + "description": "Output schema of the keypoint task", + "instillShortDescription": "Output schema of the keypoint task", + "type": "object", + "properties": { + "data": { + "description": "Output data", + "instillShortDescription": "Output data", + "type": "object", + "$ref": "https://raw.githubusercontent.com/instill-ai/instill-core/62743c4/schema/schema.json#/$defs/instill-types/keypoint" + }, + "metadata": { + "description": "Output metadata", + "instillShortDescription": "Output metadata", + "type": "object", + "properties": { + "usage": { + "description": "Usage statistics for the request.", + "instillShortDescription": "Usage statistics for the request.", + "type": "object", + "properties": {}, + "required": [] + } + } + } + }, + "required": [ + "data" + ] + } + }, + "TASK_OCR": { + "title": "OCR", + "instillShortDescription": "Detect and recognize text in images.", + "input": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "OCR input", + "description": "Input schema of the OCR task", + "instillShortDescription": "Input schema of the OCR task", + "type": "object", + "properties": { + "data": { + "$ref": "https://raw.githubusercontent.com/instill-ai/instill-core/62743c4/schema/schema.json#/$defs/instill-types/vision-input", + "type": "object" + }, + "parameter": { + "description": "Input parameter", + "instillShortDescription": "Input parameter", + "type": "object", + "properties": {} + } + }, + "required": [ + "data" + ] + }, + "output": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "OCR output", + "description": "Output schema of the OCR task", + "instillShortDescription": "Output schema of the OCR task", + "type": "object", + "properties": { + "data": { + "description": "Output data", + "instillShortDescription": "Output data", + "type": "object", + "$ref": "https://raw.githubusercontent.com/instill-ai/instill-core/62743c4/schema/schema.json#/$defs/instill-types/ocr" + }, + "metadata": { + "description": "Output metadata", + "instillShortDescription": "Output metadata", + "type": "object", + "properties": { + "usage": { + "description": "Usage statistics for the request.", + "instillShortDescription": "Usage statistics for the request.", + "type": "object", + "properties": {}, + "required": [] + } + } + } + }, + "required": [ + "data" + ] + } + }, + "TASK_SEMANTIC_SEGMENTATION": { + "title": "Semantic Segmentation", + "instillShortDescription": "Classify image pixels into predefined categories.", + "input": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Semantic segmentation input", + "description": "Input schema of the semantic segmentation task", + "instillShortDescription": "Input schema of the semantic segmentation task", + "type": "object", + "properties": { + "data": { + "$ref": "https://raw.githubusercontent.com/instill-ai/instill-core/62743c4/schema/schema.json#/$defs/instill-types/vision-input", + "type": "object" + }, + "parameter": { + "description": "Input parameter", + "instillShortDescription": "Input parameter", + "type": "object", + "properties": {} + } + }, + "required": [ + "data" + ] + }, + "output": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Semantic segmentation output", + "description": "Output schema of the semantic segmentation task", + "instillShortDescription": "Output schema of the semantic segmentation task", + "type": "object", + "properties": { + "data": { + "description": "Output data", + "instillShortDescription": "Output data", + "type": "object", + "$ref": "https://raw.githubusercontent.com/instill-ai/instill-core/62743c4/schema/schema.json#/$defs/instill-types/semantic-segmentation" + }, + "metadata": { + "description": "Output metadata", + "instillShortDescription": "Output metadata", + "type": "object", + "properties": { + "usage": { + "description": "Usage statistics for the request.", + "instillShortDescription": "Usage statistics for the request.", + "type": "object", + "properties": {}, + "required": [] + } + } + } + }, + "required": [ + "data" + ] + } + }, + "TASK_INSTANCE_SEGMENTATION": { + "title": "Instance Segmentation", + "instillShortDescription": "Detect, localize and delineate multiple objects in images.", + "input": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Instance segmentation input", + "description": "Input schema of the instance segmentation task", + "instillShortDescription": "Input schema of the instance segmentation task", + "type": "object", + "properties": { + "data": { + "$ref": "https://raw.githubusercontent.com/instill-ai/instill-core/62743c4/schema/schema.json#/$defs/instill-types/vision-input", + "type": "object" + }, + "parameter": { + "description": "Input parameter", + "instillShortDescription": "Input parameter", + "type": "object", + "properties": {} + } + }, + "required": [ + "data" + ] + }, + "output": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Instance segmentation output", + "description": "Output schema of the Instance segmentation task", + "instillShortDescription": "Output schema of the Instance segmentation task", + "type": "object", + "properties": { + "data": { + "description": "Output data", + "instillShortDescription": "Output data", + "type": "object", + "$ref": "https://raw.githubusercontent.com/instill-ai/instill-core/62743c4/schema/schema.json#/$defs/instill-types/instance-segmentation" + }, + "metadata": { + "description": "Output metadata", + "instillShortDescription": "Output metadata", + "type": "object", + "properties": { + "usage": { + "description": "Usage statistics for the request.", + "instillShortDescription": "Usage statistics for the request.", + "type": "object", + "properties": {}, + "required": [] + } + } + } + }, + "required": [ + "data" + ] + } + }, + "TASK_CUSTOM": { + "title": "Custom", + "instillShortDescription": "Custom, with arbitrary input/output data", + "input": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Custom input", + "description": "Input schema of the custom task", + "instillShortDescription": "Input schema of the custom task", + "type": "object", + "properties": { + "data": { + "description": "Input data", + "instillShortDescription": "Input data", + "type": "object", + "properties": {} + }, + "parameter": { + "description": "Input parameter", + "instillShortDescription": "Input parameter", + "type": "object", + "properties": {} + } + }, + "required": [ + "data" + ] + }, + "output": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Custom output", + "description": "Output schema of the custom task", + "instillShortDescription": "Output schema of the custom task", + "type": "object", + "properties": { + "data": { + "description": "Output data", + "instillShortDescription": "Output data", + "type": "object", + "properties": {} + }, + "metadata": { + "description": "Output metadata", + "instillShortDescription": "Output metadata", + "type": "object", + "properties": {} + } + }, + "required": [ + "data" + ] + } + } +} diff --git a/ai/openai/v0/main.go b/ai/openai/v0/main.go index f67fcf4f..24d1051b 100644 --- a/ai/openai/v0/main.go +++ b/ai/openai/v0/main.go @@ -169,8 +169,8 @@ func (e *execution) worker(ctx context.Context, client *httpclient.Client, batch // If chat history is not provided, add the system message to the messages messages = append(messages, message{Role: "system", Content: *inputStruct.SystemMessage}) } - userContents := []content{} - userContents = append(userContents, content{Type: "text", Text: &inputStruct.Prompt}) + userContents := []Content{} + userContents = append(userContents, Content{Type: "text", Text: &inputStruct.Prompt}) for _, image := range inputStruct.Images { b, err := base64.StdEncoding.DecodeString(base.TrimBase64Mime(image)) if err != nil { @@ -178,7 +178,7 @@ func (e *execution) worker(ctx context.Context, client *httpclient.Client, batch return } url := fmt.Sprintf("data:%s;base64,%s", mimetype.Detect(b).String(), base.TrimBase64Mime(image)) - userContents = append(userContents, content{Type: "image_url", ImageURL: &imageURL{URL: url}}) + userContents = append(userContents, Content{Type: "image_url", ImageURL: &ImageURL{URL: url}}) } messages = append(messages, multiModalMessage{Role: "user", Content: userContents}) diff --git a/ai/openai/v0/text_generation.go b/ai/openai/v0/text_generation.go index e028af24..c87bb333 100644 --- a/ai/openai/v0/text_generation.go +++ b/ai/openai/v0/text_generation.go @@ -4,15 +4,15 @@ const ( completionsPath = "/v1/chat/completions" ) -type textMessage struct { +type TextMessage struct { Role string `json:"role"` - Content []content `json:"content"` + Content []Content `json:"content"` } type TextCompletionInput struct { Prompt string `json:"prompt"` Images []string `json:"images"` - ChatHistory []*textMessage `json:"chat-history,omitempty"` + ChatHistory []*TextMessage `json:"chat-history,omitempty"` Model string `json:"model"` SystemMessage *string `json:"system-message,omitempty"` Temperature *float32 `json:"temperature,omitempty"` @@ -61,7 +61,7 @@ type responseFormatReqStruct struct { type multiModalMessage struct { Role string `json:"role"` - Content []content `json:"content"` + Content []Content `json:"content"` } type message struct { @@ -69,14 +69,14 @@ type message struct { Content string `json:"content"` } -type imageURL struct { +type ImageURL struct { URL string `json:"url"` } -type content struct { +type Content struct { Type string `json:"type"` Text *string `json:"text,omitempty"` - ImageURL *imageURL `json:"image_url,omitempty"` + ImageURL *ImageURL `json:"image_url,omitempty"` } type textCompletionResp struct { diff --git a/application/numbers/v0/main.go b/application/numbers/v0/main.go index 69be81ec..4ff7e3f8 100644 --- a/application/numbers/v0/main.go +++ b/application/numbers/v0/main.go @@ -201,7 +201,7 @@ func (e *execution) registerAsset(data []byte, reg Register) (string, error) { if err != nil { return "", err } - return "", fmt.Errorf(string(bodyBytes)) + return "", fmt.Errorf("error: %s", string(bodyBytes)) } } diff --git a/data/artifact/v0/main.go b/data/artifact/v0/main.go index e5f79509..a3996c6c 100644 --- a/data/artifact/v0/main.go +++ b/data/artifact/v0/main.go @@ -85,7 +85,7 @@ func (c *component) CreateExecution(x base.ComponentExecution) (base.IExecution, case taskQuery: e.execute = e.query default: - return nil, fmt.Errorf(x.Task + " task is not supported.") + return nil, fmt.Errorf("%s task is not supported", x.Task) } return e, nil diff --git a/data/googlecloudstorage/v0/upload.go b/data/googlecloudstorage/v0/upload.go index 68063189..fb8528e3 100644 --- a/data/googlecloudstorage/v0/upload.go +++ b/data/googlecloudstorage/v0/upload.go @@ -14,7 +14,7 @@ import ( func uploadToGCS(client *storage.Client, bucketName, objectName, data string) error { wc := client.Bucket(bucketName).Object(objectName).NewWriter(context.Background()) b, _ := base64.StdEncoding.DecodeString(base.TrimBase64Mime(data)) - if _, err := io.WriteString(wc, string(b)); err != nil { + if _, err := io.Writer.Write(wc, b); err != nil { return err } return wc.Close() diff --git a/data/milvus/v0/config/setup.json b/data/milvus/v0/config/setup.json index 561c753b..0590325c 100644 --- a/data/milvus/v0/config/setup.json +++ b/data/milvus/v0/config/setup.json @@ -5,7 +5,8 @@ "url": { "description": "Fill in your Milvus public URL endpoint with port number, e.g http://3.25.202.142:19530", "instillUpstreamTypes": [ - "value","reference" + "value", + "reference" ], "instillAcceptFormats": [ "string" @@ -15,10 +16,11 @@ "title": "Milvus URL Endpoint", "type": "string" }, - "username":{ + "username": { "description": "Fill in your Milvus username", "instillUpstreamTypes": [ - "value","reference" + "value", + "reference" ], "instillAcceptFormats": [ "string" @@ -28,10 +30,11 @@ "title": "Milvus Username", "type": "string" }, - "password":{ + "password": { "description": "Fill in your Milvus password", "instillUpstreamTypes": [ - "value","reference" + "value", + "reference" ], "instillAcceptFormats": [ "string" diff --git a/data/zilliz/v0/config/setup.json b/data/zilliz/v0/config/setup.json index 20b62097..0f3678ee 100644 --- a/data/zilliz/v0/config/setup.json +++ b/data/zilliz/v0/config/setup.json @@ -5,7 +5,8 @@ "url": { "description": "Fill in your Zilliz public URL endpoint", "instillUpstreamTypes": [ - "value","reference" + "value", + "reference" ], "instillAcceptFormats": [ "string" @@ -15,10 +16,11 @@ "title": "Zilliz URL Endpoint", "type": "string" }, - "api-key":{ + "api-key": { "description": "Fill in your Zilliz API key", "instillUpstreamTypes": [ - "value","reference" + "value", + "reference" ], "instillAcceptFormats": [ "string" diff --git a/go.mod b/go.mod index 5d47c719..ca7ef50e 100644 --- a/go.mod +++ b/go.mod @@ -31,7 +31,7 @@ require ( github.com/google/uuid v1.6.0 github.com/h2non/filetype v1.1.3 github.com/iFaceless/godub v0.0.0-20200728093528-a30bb4d1a0f1 - github.com/instill-ai/protogen-go v0.3.3-alpha.0.20240823161910-354761b16f15 + github.com/instill-ai/protogen-go v0.3.3-alpha.0.20240902123216-b1e82befa60a github.com/instill-ai/x v0.4.0-alpha github.com/itchyny/gojq v0.12.14 github.com/jmoiron/sqlx v1.4.0 diff --git a/go.sum b/go.sum index ac1f787d..d450557c 100644 --- a/go.sum +++ b/go.sum @@ -289,8 +289,8 @@ github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uG github.com/iFaceless/godub v0.0.0-20200728093528-a30bb4d1a0f1 h1:oqeURuHQrImMykykqJgFbStlaDXyY7JpXXrwXyjr9ls= github.com/iFaceless/godub v0.0.0-20200728093528-a30bb4d1a0f1/go.mod h1:tKRg0K9YmfD3eD6KFos+YHIVMouKMzxDSK5XpdxdCUI= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= -github.com/instill-ai/protogen-go v0.3.3-alpha.0.20240823161910-354761b16f15 h1:4nFVI3TCq8Iu/lDz2YOAM/koNdjUktXUG16YEEpnXBo= -github.com/instill-ai/protogen-go v0.3.3-alpha.0.20240823161910-354761b16f15/go.mod h1:2blmpUwiTwxIDnrjIqT6FhR5ewshZZF554wzjXFvKpQ= +github.com/instill-ai/protogen-go v0.3.3-alpha.0.20240902123216-b1e82befa60a h1:HFcG4wsROTfj6dq/17jX9nxUjp2M8zYm7/BPXGIQ3kI= +github.com/instill-ai/protogen-go v0.3.3-alpha.0.20240902123216-b1e82befa60a/go.mod h1:2blmpUwiTwxIDnrjIqT6FhR5ewshZZF554wzjXFvKpQ= github.com/instill-ai/x v0.4.0-alpha h1:zQV2VLbSHjMv6gyBN/2mwwrvWk0/mJM6ZKS12AzjfQg= github.com/instill-ai/x v0.4.0-alpha/go.mod h1:L6jmDPrUou6XskaLXZuK/gDeitdoPa9yE8ONKt1ZwCw= github.com/itchyny/gojq v0.12.14 h1:6k8vVtsrhQSYgSGg827AD+PVVaB1NLXEdX+dda2oZCc= diff --git a/operator/audio/v0/main.go b/operator/audio/v0/main.go index a44ca109..3b726f05 100644 --- a/operator/audio/v0/main.go +++ b/operator/audio/v0/main.go @@ -57,7 +57,7 @@ func (c *component) CreateExecution(x base.ComponentExecution) (base.IExecution, case taskSliceAudio: e.execute = sliceAudio default: - return nil, fmt.Errorf(x.Task + " task is not supported.") + return nil, fmt.Errorf("%s task is not supported", x.Task) } return e, nil diff --git a/operator/document/v0/main.go b/operator/document/v0/main.go index 6ee727cc..24a00a67 100644 --- a/operator/document/v0/main.go +++ b/operator/document/v0/main.go @@ -87,7 +87,7 @@ func (c *component) CreateExecution(x base.ComponentExecution) (base.IExecution, case taskConvertToImages: e.execute = e.convertPDFToImages default: - return nil, fmt.Errorf(fmt.Sprintf("%s task is not supported.", x.Task)) + return nil, fmt.Errorf("%s task is not supported", x.Task) } return e, nil diff --git a/operator/document/v0/markdown_transformer.go b/operator/document/v0/markdown_transformer.go index 21d4d51a..4ad5db0e 100644 --- a/operator/document/v0/markdown_transformer.go +++ b/operator/document/v0/markdown_transformer.go @@ -267,7 +267,7 @@ func ConvertToPDF(base64Encoded, fileExtension string) (string, error) { tempDir, err := os.MkdirTemp("", "libreoffice") if err != nil { - return "", fmt.Errorf("failed to create temporary directory: " + err.Error()) + return "", fmt.Errorf("failed to create temporary directory: %s", err.Error()) } defer os.RemoveAll(tempDir) @@ -275,7 +275,7 @@ func ConvertToPDF(base64Encoded, fileExtension string) (string, error) { cmd.Env = append(os.Environ(), "HOME="+tempDir) if err := cmd.Run(); err != nil { - return "", fmt.Errorf("failed to execute LibreOffice command: " + err.Error()) + return "", fmt.Errorf("failed to execute LibreOffice command: %s", err.Error()) } // LibreOffice is not executed in temp directory like inputFileName. diff --git a/operator/video/v0/main.go b/operator/video/v0/main.go index 3e8867a8..d3e1b2b1 100644 --- a/operator/video/v0/main.go +++ b/operator/video/v0/main.go @@ -57,7 +57,7 @@ func (c *component) CreateExecution(x base.ComponentExecution) (base.IExecution, case taskSubsampleVideoFrames: e.execute = subsampleVideoFrames default: - return nil, fmt.Errorf(x.Task + " task is not supported.") + return nil, fmt.Errorf("%s task is not supported", x.Task) } return e, nil diff --git a/operator/web/v0/main.go b/operator/web/v0/main.go index da0548a7..e88f1b13 100644 --- a/operator/web/v0/main.go +++ b/operator/web/v0/main.go @@ -68,7 +68,7 @@ func (c *component) CreateExecution(x base.ComponentExecution) (base.IExecution, e.getDocAfterRequestURL = getDocAfterRequestURL e.execute = e.ScrapeWebpage default: - return nil, fmt.Errorf(x.Task + " task is not supported.") + return nil, fmt.Errorf("%s task is not supported", x.Task) } return e, nil diff --git a/operator/web/v0/scrape_webpage.go b/operator/web/v0/scrape_webpage.go index 3c05fd9f..2d4d6a84 100644 --- a/operator/web/v0/scrape_webpage.go +++ b/operator/web/v0/scrape_webpage.go @@ -138,7 +138,7 @@ func getRemovedTagsHTML(doc *goquery.Document, input ScrapeWebpageInput) string } } - if input.OnlyIncludeTags == nil || len(input.OnlyIncludeTags) == 0 { + if len(input.OnlyIncludeTags) == 0 { html, err := doc.Html() if err != nil { log.Println("error getting HTML: ", err)