feat(instillmodel): implement instill model embedding (#727)

Because - we need embedding from instill model This commit - add the standardised input & output for embedding - add embedding to instill model - support oneOf for the array items
instill-ai · Oct 18, 2024 · 17d88bc · 17d88bc
1 parent 700805f
commit 17d88bc
Show file tree

Hide file tree

Showing 10 changed files with 571 additions and 15 deletions.
diff --git a/pkg/component/ai/embedding.go b/pkg/component/ai/embedding.go
@@ -0,0 +1,63 @@
+package ai
+
+// EmbeddingInput is the standardized input for the embedding model.
+type EmbeddingInput struct {
+	// Data is the the standardized input data for the embedding model.
+	Data      EmbeddingInputData `json:"data"`
+	// Parameter is the standardized parameter for the embedding model.
+	Parameter EmbeddingParameter `json:"parameter"`
+}
+
+// EmbeddingInputData is the standardized input data for the embedding model.
+type EmbeddingInputData struct {
+	// Model is the model name.
+	Model      string           `json:"model"`
+	// Embeddings is the list of data to be embedded.
+	Embeddings []InputEmbedding `json:"embeddings"`
+}
+
+// InputEmbedding is the standardized input data to be embedded.
+type InputEmbedding struct {
+	// Type is the type of the input data. It can be either "text", "image-url", or "image-base64".
+	Type        string `json:"type"`
+	// Text is the text to be embedded.
+	Text        string `json:"text"`
+	// ImageURL is the URL of the image to be embedded.
+	ImageURL    string `json:"image-url"`
+	// ImageBase64 is the base64 encoded image to be embedded.
+	ImageBase64 string `json:"image-base64"`
+}
+
+// EmbeddingParameter is the standardized parameter for the embedding model.
+type EmbeddingParameter struct {
+	// Format is the format of the output embeddings. Default is "float", can be "float" or "base64".
+	Format     string `json:"format"`
+	// Dimensions is the number of dimensions of the output embeddings.
+	Dimensions int    `json:"dimensions"`
+	// InputType is the type of the input data. It can be "query" or "data".
+	InputType  string `json:"input-type"`
+	// Truncate is how to handle inputs longer than the max token length. Defaults to 'End'. Can be 'End', 'Start', or 'None'.
+	Truncate   string `json:"truncate"`
+}
+
+// EmbeddingOutput is the standardized output for the embedding model.
+type EmbeddingOutput struct {
+	// Data is the standardized output data for the embedding model.
+	Data EmbeddingOutputData `json:"data"`
+}
+
+// EmbeddingOutputData is the standardized output data for the embedding model.
+type EmbeddingOutputData struct {
+	// Embeddings is the list of output embeddings.
+	Embeddings []OutputEmbedding `json:"embeddings"`
+}
+
+// OutputEmbedding is the standardized output embedding.
+type OutputEmbedding struct {
+	// Index is the index of the output embedding.
+	Index   int   `json:"index"`
+	// Vector is the output embedding.
+	Vector  []any `json:"vector"`
+	// Created is the Unix timestamp (in seconds) of when the embedding was created.
+	Created int   `json:"created"`
+}
diff --git a/pkg/component/ai/instill/v0/README.mdx b/pkg/component/ai/instill/v0/README.mdx
@@ -18,6 +18,7 @@ It can carry out the following tasks:
 - [Text to Image](#text-to-image)
 - [Visual Question Answering](#visual-question-answering)
 - [Chat](#chat)
+- [Embedding](#embedding)
 
 
 
@@ -582,4 +583,116 @@ The image URL
 | Text | `text` | string | Text |
 </div>
 
+### Embedding
+
+This task refers to the process of generating vector embeddings from input data, which can be text, images, or other formats. This transformation converts the data into a dense, fixed-length numerical representation that captures the essential features of the original input. These embeddings are typically used in machine learning tasks to represent complex data in a more structured, simplified form.
+
+<div class="markdown-col-no-wrap" data-col-1 data-col-2>
+
+| Input | ID | Type | Description |
+| :--- | :--- | :--- | :--- |
+| Task ID (required) | `task` | string | `TASK_EMBEDDING` |
+| [Data](#embedding-data) (required) | `data` | object | Input data. |
+| [Parameter](#embedding-parameter) | `parameter` | object | Input parameter. |
+</div>
+
+
+<details>
+<summary> Input Objects in Embedding</summary>
+
+<h4 id="embedding-data">Data</h4>
+
+Input data.
+
+<div class="markdown-col-no-wrap" data-col-1 data-col-2>
+
+| Field | Field ID | Type | Note |
+| :--- | :--- | :--- | :--- |
+| [Embeddings](#embedding-embeddings) | `embeddings` | array | List of input data to be embedded.  |
+| Model | `model` | string | The model to be used for generating embeddings. It should be `namespace/model-name/version`. i.e. `abrc/yolov7-stomata/v0.1.0`. You can see the version from the Versions tab of Model page.  |
+</div>
+<h4 id="embedding-parameter">Parameter</h4>
+
+Input parameter.
+
+<div class="markdown-col-no-wrap" data-col-1 data-col-2>
+
+| Field | Field ID | Type | Note |
+| :--- | :--- | :--- | :--- |
+| Dimensions | `dimensions` | integer | Number of dimensions in the output embedding vectors.  |
+| Data Format | `format` | string | The data format of the embeddings. Defaults to float.  <br/><details><summary><strong>Enum values</strong></summary><ul><li>`float`</li><li>`base64`</li></ul></details>  |
+| Input Type | `input-type` | string | The type of input data to be embedded (e.g., query, document).  |
+| Truncate | `truncate` | string | How to handle inputs longer than the max token length. Defaults to 'End'.  <br/><details><summary><strong>Enum values</strong></summary><ul><li>`None`</li><li>`End`</li><li>`Start`</li></ul></details>  |
+</div>
+</details>
+
+<details>
+<summary>The <code>embeddings</code> Object </summary>
+
+<h4 id="embedding-embeddings">Embeddings</h4>
+
+`embeddings` must fulfill one of the following schemas:
+
+<h5 id="embedding-text"><code>Text</code></h5>
+
+<div class="markdown-col-no-wrap" data-col-1 data-col-2>
+
+| Field | Field ID | Type | Note |
+| :--- | :--- | :--- | :--- |
+| Text Content | `text` | string |  When the input is text, the raw text is tokenized and processed into a dense, fixed-length vector that captures semantic information such as word meanings and relationships. These text embeddings enable tasks like sentiment analysis, search, or classification.  |
+| Text | `type` | string |  Must be `"text"`   |
+</div>
+
+<h5 id="embedding-image-url"><code>Image URL</code></h5>
+
+<div class="markdown-col-no-wrap" data-col-1 data-col-2>
+
+| Field | Field ID | Type | Note |
+| :--- | :--- | :--- | :--- |
+| Image URL | `image-url` | string |  When the input is an image from a URL, the image is first fetched from the URL and then decoded into its original format. It is then processed into a fixed-length vector representing essential visual features like shapes and colors. These image embeddings are useful for tasks like image classification or similarity search, providing structured numerical data for complex visual inputs.  |
+| Image URL | `type` | string |  Must be `"image-url"`   |
+</div>
+
+<h5 id="embedding-image-base64"><code>Image Base64</code></h5>
+
+<div class="markdown-col-no-wrap" data-col-1 data-col-2>
+
+| Field | Field ID | Type | Note |
+| :--- | :--- | :--- | :--- |
+| Image File | `image-base64` | string |  When the input is an image in base64 format, the base64-encoded data is first decoded into its original image form. The image is then processed and transformed into a dense, fixed-length numerical vector, capturing key visual features like shapes, colors, or textures.  |
+| Image File | `type` | string |  Must be `"image-base64"`   |
+</div>
+</details>
+
+<div class="markdown-col-no-wrap" data-col-1 data-col-2>
+
+| Output | ID | Type | Description |
+| :--- | :--- | :--- | :--- |
+| [Data](#embedding-data) | `data` | object | Output data. |
+</div>
+
+<details>
+<summary> Output Objects in Embedding</summary>
+
+<h4 id="embedding-data">Data</h4>
+
+<div class="markdown-col-no-wrap" data-col-1 data-col-2>
+
+| Field | Field ID | Type | Note |
+| :--- | :--- | :--- | :--- |
+| [Embeddings](#embedding-embeddings) | `embeddings` | array | List of generated embeddings. |
+</div>
+
+<h4 id="embedding-embeddings">Embeddings</h4>
+
+<div class="markdown-col-no-wrap" data-col-1 data-col-2>
+
+| Field | Field ID | Type | Note |
+| :--- | :--- | :--- | :--- |
+| Created | `created` | integer | The Unix timestamp (in seconds) of when the embedding was created. |
+| Index | `index` | integer | The index of the embedding vector in the array. |
+| Embedding Vector | `vector` | array | The embedding vector. |
+</div>
+</details>
+
 
diff --git a/pkg/component/ai/instill/v0/config/definition.json b/pkg/component/ai/instill/v0/config/definition.json
@@ -10,7 +10,8 @@
     "TASK_TEXT_GENERATION_CHAT",
     "TASK_TEXT_TO_IMAGE",
     "TASK_VISUAL_QUESTION_ANSWERING",
-    "TASK_CHAT"
+    "TASK_CHAT",
+    "TASK_EMBEDDING"
   ],
   "custom": false,
   "documentationUrl": "https://www.instill.tech/docs/component/ai/instill-model",