-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(instillmodel): implement instill model embedding (#727)
Because - we need embedding from instill model This commit - add the standardised input & output for embedding - add embedding to instill model - support oneOf for the array items
- Loading branch information
1 parent
700805f
commit 17d88bc
Showing
10 changed files
with
571 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
package ai | ||
|
||
// EmbeddingInput is the standardized input for the embedding model. | ||
type EmbeddingInput struct { | ||
// Data is the the standardized input data for the embedding model. | ||
Data EmbeddingInputData `json:"data"` | ||
// Parameter is the standardized parameter for the embedding model. | ||
Parameter EmbeddingParameter `json:"parameter"` | ||
} | ||
|
||
// EmbeddingInputData is the standardized input data for the embedding model. | ||
type EmbeddingInputData struct { | ||
// Model is the model name. | ||
Model string `json:"model"` | ||
// Embeddings is the list of data to be embedded. | ||
Embeddings []InputEmbedding `json:"embeddings"` | ||
} | ||
|
||
// InputEmbedding is the standardized input data to be embedded. | ||
type InputEmbedding struct { | ||
// Type is the type of the input data. It can be either "text", "image-url", or "image-base64". | ||
Type string `json:"type"` | ||
// Text is the text to be embedded. | ||
Text string `json:"text"` | ||
// ImageURL is the URL of the image to be embedded. | ||
ImageURL string `json:"image-url"` | ||
// ImageBase64 is the base64 encoded image to be embedded. | ||
ImageBase64 string `json:"image-base64"` | ||
} | ||
|
||
// EmbeddingParameter is the standardized parameter for the embedding model. | ||
type EmbeddingParameter struct { | ||
// Format is the format of the output embeddings. Default is "float", can be "float" or "base64". | ||
Format string `json:"format"` | ||
// Dimensions is the number of dimensions of the output embeddings. | ||
Dimensions int `json:"dimensions"` | ||
// InputType is the type of the input data. It can be "query" or "data". | ||
InputType string `json:"input-type"` | ||
// Truncate is how to handle inputs longer than the max token length. Defaults to 'End'. Can be 'End', 'Start', or 'None'. | ||
Truncate string `json:"truncate"` | ||
} | ||
|
||
// EmbeddingOutput is the standardized output for the embedding model. | ||
type EmbeddingOutput struct { | ||
// Data is the standardized output data for the embedding model. | ||
Data EmbeddingOutputData `json:"data"` | ||
} | ||
|
||
// EmbeddingOutputData is the standardized output data for the embedding model. | ||
type EmbeddingOutputData struct { | ||
// Embeddings is the list of output embeddings. | ||
Embeddings []OutputEmbedding `json:"embeddings"` | ||
} | ||
|
||
// OutputEmbedding is the standardized output embedding. | ||
type OutputEmbedding struct { | ||
// Index is the index of the output embedding. | ||
Index int `json:"index"` | ||
// Vector is the output embedding. | ||
Vector []any `json:"vector"` | ||
// Created is the Unix timestamp (in seconds) of when the embedding was created. | ||
Created int `json:"created"` | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.