From a3488b16138194608902658be992ab15406be79d Mon Sep 17 00:00:00 2001 From: danielle9897 Date: Sun, 9 Feb 2025 12:27:11 +0200 Subject: [PATCH 1/5] RDoc-3170 modify searchTerm => queryVector wherever is relevant --- .../VectorSearchUsingDynamicQuery.cs | 20 +++++++++---------- .../VectorSearchUsingStaticIndex.cs | 16 +++++++-------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchUsingDynamicQuery.cs b/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchUsingDynamicQuery.cs index d85668b95..d3ce56e34 100644 --- a/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchUsingDynamicQuery.cs +++ b/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchUsingDynamicQuery.cs @@ -186,7 +186,7 @@ where vector.search(embedding.text(Name), 'italian food', 0.82, 20)") x => x.TagsEmbeddedAsSingle, VectorEmbeddingType.Single), // Call 'ByEmbedding' // Provide the vector for the similarity comparison - searchVector => searchVector.ByEmbedding( + queryVector => queryVector.ByEmbedding( new RavenVector(new float[] { 6.599999904632568f, 7.699999809265137f })), // Optionally, specify the minimum similarity level 0.85f, @@ -204,7 +204,7 @@ where vector.search(embedding.text(Name), 'italian food', 0.82, 20)") .VectorSearch( field => field.WithEmbedding( x => x.TagsEmbeddedAsSingle, VectorEmbeddingType.Single), - searchVector => searchVector.ByEmbedding( + queryVector => queryVector.ByEmbedding( new RavenVector(new float[] { 6.599999904632568f, 7.699999809265137f })), 0.85f, 10) @@ -221,7 +221,7 @@ where vector.search(embedding.text(Name), 'italian food', 0.82, 20)") .VectorSearch( field => field.WithEmbedding( x => x.TagsEmbeddedAsSingle, VectorEmbeddingType.Single), - searchVector => searchVector.ByEmbedding( + queryVector => queryVector.ByEmbedding( new RavenVector(new float[] { 6.599999904632568f, 7.699999809265137f })), 0.85f, 10) @@ -238,7 +238,7 @@ where vector.search(embedding.text(Name), 'italian food', 0.82, 20)") .VectorSearch( field => field.WithEmbedding( x => x.TagsEmbeddedAsSingle, VectorEmbeddingType.Single), - searchVector => searchVector.ByEmbedding( + queryVector => queryVector.ByEmbedding( new RavenVector(new float[] { 6.599999904632568f, 7.699999809265137f })), 0.85f, 10) @@ -286,7 +286,7 @@ where vector.search(TagsEmbeddedAsSingle, $queryVector, 0.85, 10) // Call 'ByEmbedding' // Provide the vector for the similarity comparison // (provide a single vector from the vector list in the TagsEmbeddedAsInt8 field) - searchVector => searchVector.ByEmbedding( + queryVector => queryVector.ByEmbedding( // The provided vector MUST be in the same format as was stored in your document // Call 'VectorQuantizer.ToInt8' to transform the raw data to the Int8 format VectorQuantizer.ToInt8(new float[] { 0.1f, 0.2f }))) @@ -307,7 +307,7 @@ where vector.search(TagsEmbeddedAsSingle, $queryVector, 0.85, 10) field => field.WithBase64(x => x.TagsEmbeddedAsBase64, VectorEmbeddingType.Single), // Call 'ByBase64' // Provide the Base64 string that represents the vector to query against - searchVector => searchVector.ByBase64("zczMPc3MTD6amZk+")) + queryVector => queryVector.ByBase64("zczMPc3MTD6amZk+")) .Customize(x => x.WaitForNonStaleResults()) .ToList(); #endregion @@ -599,7 +599,7 @@ where vector.search(embedding.text_i8(Name), $searchTerm) .WithEmbedding(x => x.TagsEmbeddedAsSingle, VectorEmbeddingType.Single) // Set the quantization type for the generated embeddings .TargetQuantization(VectorEmbeddingType.Binary), - searchVector => searchVector + queryVector => queryVector // Provide the vector to use for comparison .ByEmbedding(new RavenVector(new float[] { @@ -620,7 +620,7 @@ where vector.search(embedding.text_i8(Name), $searchTerm) .WithEmbedding(x => x.TagsEmbeddedAsSingle, VectorEmbeddingType.Single) // Set the quantization type for the generated embeddings .TargetQuantization(VectorEmbeddingType.Binary), - searchVector => searchVector + queryVector => queryVector // Provide the vector to use for comparison .ByEmbedding(new RavenVector(new float[] { @@ -642,7 +642,7 @@ where vector.search(embedding.text_i8(Name), $searchTerm) .WithEmbedding(x => x.TagsEmbeddedAsSingle, VectorEmbeddingType.Single) // Set the quantization type for the generated embeddings .TargetQuantization(VectorEmbeddingType.Binary), - searchVector => searchVector + queryVector => queryVector // Provide the vector to use for comparison .ByEmbedding(new RavenVector(new float[] { @@ -664,7 +664,7 @@ where vector.search(embedding.text_i8(Name), $searchTerm) .WithEmbedding(x => x.TagsEmbeddedAsSingle, VectorEmbeddingType.Single) // Set the quantization type for the generated embeddings .TargetQuantization(VectorEmbeddingType.Binary), - searchVector => searchVector + queryVector => queryVector // Provide the vector to use for comparison .ByEmbedding(new RavenVector(new float[] { diff --git a/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchUsingStaticIndex.cs b/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchUsingStaticIndex.cs index e6ad75a9f..540e58d18 100644 --- a/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchUsingStaticIndex.cs +++ b/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchUsingStaticIndex.cs @@ -617,7 +617,7 @@ where exact(vector.search(VectorFromText, 'italian food', 0.82, 20))") // Call 'WithField' // Specify the index-field in which to search for similar values .WithField(x => x.VectorFromSingle), - searchTerm => searchTerm + queryVector => queryVector // Call 'ByEmbedding' // Provide the vector for the similarity comparison .ByEmbedding( @@ -636,7 +636,7 @@ where exact(vector.search(VectorFromText, 'italian food', 0.82, 20))") .VectorSearch( field => field .WithField(x => x.VectorFromSingle), - searchTerm => searchTerm + queryVector => queryVector .ByEmbedding( new RavenVector(new float[] { 6.599999904632568f, 7.699999809265137f }))) .Customize(x => x.WaitForNonStaleResults()) @@ -653,7 +653,7 @@ where exact(vector.search(VectorFromText, 'italian food', 0.82, 20))") .VectorSearch( field => field .WithField(x => x.VectorFromSingle), - searchTerm => searchTerm + queryVector => queryVector .ByEmbedding( new RavenVector(new float[] { 6.599999904632568f, 7.699999809265137f }))) .WaitForNonStaleResults() @@ -670,7 +670,7 @@ where exact(vector.search(VectorFromText, 'italian food', 0.82, 20))") .VectorSearch( field => field .WithField(x => x.VectorFromSingle), - searchTerm => searchTerm + queryVector => queryVector .ByEmbedding( new RavenVector(new float[] { 6.599999904632568f, 7.699999809265137f }))) .WaitForNonStaleResults() @@ -717,7 +717,7 @@ where vector.search(VectorFromSingle, $queryVector) // Call 'WithField' // Specify the index-field in which to search for similar values .WithField(x => x.VectorFromInt8Arrays), - searchTerm => searchTerm + queryVector => queryVector // Call 'ByEmbedding' // Provide the vector for the similarity comparison // (Note: provide a single vector) @@ -739,7 +739,7 @@ where vector.search(VectorFromSingle, $queryVector) .VectorSearch( field => field .WithField(x => x.VectorFromInt8Arrays), - searchTerm => searchTerm + queryVector => queryVector .ByEmbedding( VectorQuantizer.ToInt8(new float[] { 0.1f, 0.2f }))) .Customize(x => x.WaitForNonStaleResults()) @@ -756,7 +756,7 @@ where vector.search(VectorFromSingle, $queryVector) .VectorSearch( field => field .WithField(x => x.VectorFromInt8Arrays), - searchTerm => searchTerm + queryVector => queryVector .ByEmbedding( VectorQuantizer.ToInt8(new float[] { 0.1f, 0.2f }))) .WaitForNonStaleResults() @@ -773,7 +773,7 @@ where vector.search(VectorFromSingle, $queryVector) .VectorSearch( field => field .WithField(x => x.VectorFromInt8Arrays), - searchTerm => searchTerm + queryVector => queryVector .ByEmbedding( VectorQuantizer.ToInt8(new float[] { 0.1f, 0.2f }))) .WaitForNonStaleResults() From 79775435e5343bd21d66ebf22d28770361dde0b5 Mon Sep 17 00:00:00 2001 From: danielle9897 Date: Tue, 11 Feb 2025 10:28:17 +0200 Subject: [PATCH 2/5] RDoc-3170 The attachments article --- .../Raven.Documentation.Pages.csproj | 6 +- .../ai-integration/.docs.json | 6 + ...achments-for-vector-search.dotnet.markdown | 205 +++++ .../ravendb-as-vector-database.markdown | 2 +- ...-search-using-static-index.dotnet.markdown | 5 +- .../VectorSearchUsingDynamicQuery.cs | 2 - .../VectorSearchWithAttachments.cs | 842 ++++++++++++++++++ .../Raven.Documentation.Samples.csproj | 6 +- 8 files changed, 1062 insertions(+), 12 deletions(-) create mode 100644 Documentation/7.0/Raven.Documentation.Pages/ai-integration/indexing-attachments-for-vector-search.dotnet.markdown create mode 100644 Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchWithAttachments.cs diff --git a/Documentation/7.0/Raven.Documentation.Pages/Raven.Documentation.Pages.csproj b/Documentation/7.0/Raven.Documentation.Pages/Raven.Documentation.Pages.csproj index 839af2671..9039c75ff 100644 --- a/Documentation/7.0/Raven.Documentation.Pages/Raven.Documentation.Pages.csproj +++ b/Documentation/7.0/Raven.Documentation.Pages/Raven.Documentation.Pages.csproj @@ -5,9 +5,9 @@ - - - + + + diff --git a/Documentation/7.0/Raven.Documentation.Pages/ai-integration/.docs.json b/Documentation/7.0/Raven.Documentation.Pages/ai-integration/.docs.json index b16ff6417..f862aa778 100644 --- a/Documentation/7.0/Raven.Documentation.Pages/ai-integration/.docs.json +++ b/Documentation/7.0/Raven.Documentation.Pages/ai-integration/.docs.json @@ -17,6 +17,12 @@ "DiscussionId": "a25c7cca-e662-401f-9b66-8ce1102a2a09", "Mappings": [] }, + { + "Path": "indexing-attachments-for-vector-search.markdown", + "Name": "Indexing Attachments for Vector Search", + "DiscussionId": "7510d989-4925-4e7d-8c48-c3608c09506c", + "Mappings": [] + }, { "Path": "data-types-for-vector-search.markdown", "Name": "Data Types for Vector Search", diff --git a/Documentation/7.0/Raven.Documentation.Pages/ai-integration/indexing-attachments-for-vector-search.dotnet.markdown b/Documentation/7.0/Raven.Documentation.Pages/ai-integration/indexing-attachments-for-vector-search.dotnet.markdown new file mode 100644 index 000000000..207c6b4cb --- /dev/null +++ b/Documentation/7.0/Raven.Documentation.Pages/ai-integration/indexing-attachments-for-vector-search.dotnet.markdown @@ -0,0 +1,205 @@ +# Indexing Attachments for Vector Search +--- + +{NOTE: } + +* This article explains how to index attachments using a **static-index** to enable vector search on their content. + Note: Vector search on attachment content is not available when making a [dynamic query](../ai-integration/vector-search-using-dynamic-query). + +* **Prior to this article**, refer to the [Vector search using a static index](../ai-integration/vector-search-using-static-index) article for general knowledge about + indexing a vector field. + +* In this page: + * [Overview](../ai-integration/indexing-attachments-for-vector-search#overview) + * [Indexing TEXT attachments](../ai-integration/indexing-attachments-for-vector-search#indexing-text-attachments) + * [Indexing NUMERICAL attachments](../ai-integration/indexing-attachments-for-vector-search#indexing-numerical-attachments) + * [LINQ index](../ai-integration/indexing-attachments-for-vector-search#linq-index) + * [JS index](../ai-integration/indexing-attachments-for-vector-search#js-index) + * [Indexing ALL attachments](../ai-integration/indexing-attachments-for-vector-search#indexing-all-attachments) + +{NOTE/} + +--- + +{PANEL: Overview} + +{CONTENT-FRAME: } + +#### Attachments in RavenDB + +--- + +* Attachments in RavenDB allow you to associate binary files with your JSON documents. + You can use attachments to store images, PDFs, videos, text files, or any other format. + +* Attachments are stored separately from documents, reducing document size and avoiding unnecessary duplication. + They are stored as **binary data**, regardless of content type. + +* Attachments are handled as streams, allowing efficient upload and retrieval. + Learn more in: [What are attachments](../document-extensions/attachments/what-are-attachments). + +{CONTENT-FRAME/} +{CONTENT-FRAME: } + +#### Indexing attachment content for vector search + +--- + +You can index attachment content in a vector field within a static-index, +enabling vector search on text or numerical data that is stored in the attachments: + +* **Attachments with TEXT**: + * During indexing, RavenDB processes the text into a single embedding per attachment using the built-in + [bge-micro-v2](https://huggingface.co/TaylorAI/bge-micro-v2) model. + +* **Attachments with NUMERICAL data**: + * While attachments can store any file type, RavenDB does Not generate embeddings from images, videos, or other non-textual content. + Each attachment must contain a **single** precomputed embedding vector, generated externally. + * RavenDB indexes the embedding vector from the attachment in and can apply [quantization](../ai-integration/vector-search-using-dynamic-query#quantization-options) + (e.g., index it in _Int8_ format) if this is configured. + * All embeddings indexed within the same vector-field in the static-index must be vectors of the **same dimension** to ensure consistency in indexing and search. + They must also be created using the **same model**. + +{CONTENT-FRAME/} +{PANEL/} + +{PANEL: Indexing TEXT attachments} + +* The following index defines a **vector field** named `VectorFromAttachment`. + +* It indexes embeddings generated from the text content of the `description.txt` attachment. + This applies to all _Company_ documents that contain an attachment with that name. + +{CODE-TABS} +{CODE-TAB:csharp:Map_index index_1@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:JS_index index_2@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:IndexDefinition index_3@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:Storing_text_attachments store_attachments_1@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TABS/} + +Execute a vector search using the index: +Results will include _Company_ documents whose attachment contains text similar to `"chinese food"`. + +{CODE-TABS} +{CODE-TAB:csharp:Query query_1@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:Query_async query_1_async@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:DocumentQuery query_2@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:DocumentQuery_async query_2_async@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:RawQuery query_3@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:RawQuery_async query_3_async@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB-BLOCK:sql:RQL} +from index "Companies/ByVector/FromTextAttachment" +where vector.search(VectorFromAttachment, $searchTerm, 0.8) +{ "searchTerm" : "chinese food" } +{CODE-TAB-BLOCK/} +{CODE-TABS/} + +{PANEL/} + +{PANEL: Indexing NUMERICAL attachments} + +### LINQ index + +* The following index defines a **vector field** named `VectorFromAttachment`. + +* It indexes embeddings generated from the numerical data stored in the `vector.raw` attachment. + This applies to all _Company_ documents that contain an attachment with that name. + +* Each attachment contains raw numerical data in 32-bit floating-point format. + +{CODE-TABS} +{CODE-TAB:csharp:Map_index index_4@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:IndexDefinition index_5@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:Storing_numerical_attachments store_attachments_2@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TABS/} + +Execute a vector search using the index: +Results will include _Company_ documents whose attachment contains vectors similar to the query vector. + +{CODE-TABS} +{CODE-TAB:csharp:Query query_4@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:Query_async query_4_async@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:DocumentQuery query_5@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:DocumentQuery_async query_5_async@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:RawQuery query_6@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:RawQuery_async query_6_async@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB-BLOCK:sql:RQL} +from index "Companies/ByVector/FromNumericalAttachment" +where vector.search(VectorFromAttachment, $queryVector) +{ "queryVector" : [0.1, 0.2, 0.3, 0.4] } +{CODE-TAB-BLOCK/} +{CODE-TABS/} + +--- + +### JS index + +* The following is the JavaScript index format equivalent to the [LINQ index](../ai-integration/indexing-attachments-for-vector-search#linq-index) shown above. + +* The main difference is that JavaScript indexes do Not support `getContentAsStream()` on attachment objects: + * Because of this, embedding vectors must be stored in attachments as **Base64-encoded strings**. + * Use `getContentAsString()` to retrieve the attachment content as a string, as shown in this example. + +{CODE-TABS} +{CODE-TAB:csharp:JS_index index_6@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:Storing_numerical_attachments_as_base64 store_attachments_3@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TABS/} + +Execute a vector search using the index: +Results will include _Company_ documents whose attachment contains vectors similar to the query vector. + +{CODE-TABS} +{CODE-TAB:csharp:RawQuery query_7@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:RawQuery_async query_7_async@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB-BLOCK:sql:RQL} +from index "Companies/ByVector/FromNumericalAttachment/JS" +where vector.search(VectorFromAttachment, $queryVector) +{ "queryVector" : [0.1, 0.2, 0.3, 0.4] } +{CODE-TAB-BLOCK/} +{CODE-TABS/} + +{PANEL/} + +{PANEL: Indexing ALL attachments} + +* The following index defines a vector field named `VectorFromAttachment`. + +* It indexes embeddings generated from the numerical data stored in ALL attachments of all _Company_ documents. + +{CODE-TABS} +{CODE-TAB:csharp:Map_index index_7@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:IndexDefinition index_8@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:Storing_numerical_attachments store_attachments_4@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TABS/} + +Execute a vector search using the index: +Results will include Company documents whose attachments contains vectors similar to the query vector. + +{CODE-TABS} +{CODE-TAB:csharp:Query query_8@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:Query_async query_8_async@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:DocumentQuery query_9@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:DocumentQuery_async query_9_async@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:RawQuery query_10@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:RawQuery_async query_10_async@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB-BLOCK:sql:RQL} +from index "Companies/ByVector/AllAttachments" +where vector.search(VectorFromAttachment, $queryVector) +{ "queryVector" : [0.1, 0.2, -0.7, -0.8] } +{CODE-TAB-BLOCK/} +{CODE-TABS/} + +{PANEL/} + +## Related Articles + +### Vector Search + +- [RavenDB as a vector database](../ai-integration/ravendb-as-vector-database) +- [Vector search using a dynamic query](../ai-integration/vector-search-using-dynamic-query) +- [Vector search using a static index](../ai-integration/vector-search-using-static-index) + +### Querying + +- [Query overview](../client-api/session/querying/how-to-query) +- [Full-text search](../client-api/session/querying/text-search/full-text-search) diff --git a/Documentation/7.0/Raven.Documentation.Pages/ai-integration/ravendb-as-vector-database.markdown b/Documentation/7.0/Raven.Documentation.Pages/ai-integration/ravendb-as-vector-database.markdown index ffef72553..cc640b463 100644 --- a/Documentation/7.0/Raven.Documentation.Pages/ai-integration/ravendb-as-vector-database.markdown +++ b/Documentation/7.0/Raven.Documentation.Pages/ai-integration/ravendb-as-vector-database.markdown @@ -73,7 +73,7 @@ This flexibility allows you to work with complex documents containing various data types and retrieve meaningful insights by querying the index across all these fields. An example is available in [Indexing multiple field types](../ai-integration/vector-search-using-static-index#indexing-multiple-field-types). -* Document attachments can also be indexed as vector fields, and Map-Reduce indexes can incorporate vector fields in their reduce phase, +* Document [attachments](../ai-integration/indexing-attachments-for-vector-search) can also be indexed as vector fields, and Map-Reduce indexes can incorporate vector fields in their reduce phase, further extending the versatility of your data processing and search capabilities. ##### Built-in embedding support: diff --git a/Documentation/7.0/Raven.Documentation.Pages/ai-integration/vector-search-using-static-index.dotnet.markdown b/Documentation/7.0/Raven.Documentation.Pages/ai-integration/vector-search-using-static-index.dotnet.markdown index 27a0bf3f8..430a45fcd 100644 --- a/Documentation/7.0/Raven.Documentation.Pages/ai-integration/vector-search-using-static-index.dotnet.markdown +++ b/Documentation/7.0/Raven.Documentation.Pages/ai-integration/vector-search-using-static-index.dotnet.markdown @@ -155,8 +155,6 @@ It indexes embeddings generated from the textual data in the `Name` field of all {CODE-TAB:csharp:IndexDefinition index_3@AiIntegration\VectorSearchUsingStaticIndex.cs /} {CODE-TABS/} ---- - Execute a vector search using the index: Results will include _Product_ documents where the `Name` field is similar to the search term `"italian food"`. @@ -184,7 +182,7 @@ The examples in this section use the [sample data provided in the dynamic query --- -The following index defines a vector field named `VectorFromSingle `. +The following index defines a vector field named `VectorFromSingle`. It indexes embeddings generated from the numerical data in the `TagsEmbeddedAsSingle` field of all _Movie_ documents. The raw numerical data in the source documents is in **32-bit floating-point format**. @@ -289,6 +287,7 @@ or vector.search(VectorFromText, $searchTerm2, 0.8) - [RavenDB as a vector database](../ai-integration/ravendb-as-vector-database) - [Vector search using a dynamic query](../ai-integration/vector-search-using-dynamic-query) - [Data types for vector search](../ai-integration/data-tuypes-for-vector-search) +- [Indexing attachment for vector search](../ai-integration/indexing-attachments-for-vector-search) ### Querying diff --git a/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchUsingDynamicQuery.cs b/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchUsingDynamicQuery.cs index d3ce56e34..6f554e17d 100644 --- a/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchUsingDynamicQuery.cs +++ b/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchUsingDynamicQuery.cs @@ -1,11 +1,9 @@ using System; -using System.Collections; using System.Collections.Generic; using System.Linq; using System.Linq.Expressions; using System.Numerics; using System.Threading.Tasks; -using Raven.Client; using Raven.Client.Documents; using Raven.Client.Documents.Indexes.Vector; using Raven.Client.Documents.Linq; diff --git a/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchWithAttachments.cs b/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchWithAttachments.cs new file mode 100644 index 000000000..7dee32537 --- /dev/null +++ b/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchWithAttachments.cs @@ -0,0 +1,842 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Runtime.InteropServices; +using System.Text; +using System.Threading.Tasks; +using Raven.Client.Documents; +using Raven.Client.Documents.Indexes; +using Raven.Client.Documents.Indexes.Vector; +using Raven.Client.Documents.Operations.Indexes; +using Raven.Documentation.Samples.Orders; + +namespace Raven.Documentation.Samples.AiIntegration +{ + public class VectorSearchWithAttachments + { + // The indexes: + // ============ + + #region index_1 + public class Companies_ByVector_FromTextAttachment : + AbstractIndexCreationTask + { + public class IndexEntry() + { + // This index-field will hold embeddings + // generated from the TEXT in the attachments. + public object VectorFromAttachment { get; set; } + } + + public Companies_ByVector_FromTextAttachment() + { + Map = companies => from company in companies + + // Load the attachment from the document (ensure it is not null) + let attachment = LoadAttachment(company, "description.txt") + where attachment != null + + select new IndexEntry() + { + // Index the text content from the attachment in the vector field + VectorFromAttachment = + CreateVector(attachment.GetContentAsString(Encoding.UTF8)) + }; + + // Configure the vector field: + VectorIndexes.Add(x => x.VectorFromAttachment, + new VectorOptions() + { + // Specify 'Text' as the source format + SourceEmbeddingType = VectorEmbeddingType.Text, + // Specify the desired destination format within the index + DestinationEmbeddingType = VectorEmbeddingType.Single + }); + + SearchEngineType = Raven.Client.Documents.Indexes.SearchEngineType.Corax; + } + } + #endregion + + #region Index_2 + public class Companies_ByVector_FromTextAttachment_JS : + AbstractJavaScriptIndexCreationTask + { + public Companies_ByVector_FromTextAttachment_JS() + { + Maps = new HashSet + { + $@"map('Companies', function (company) {{ + + var attachment = loadAttachment(company, 'description.txt'); + if (!attachment) return null; + + return {{ + VectorFromAttachment: createVector(attachment.getContentAsString('utf8')) + }}; + }})" + }; + + Fields = new Dictionary() + { + { + "VectorFromAttachment", + new IndexFieldOptions() + { + Vector = new() + { + SourceEmbeddingType = VectorEmbeddingType.Text, + DestinationEmbeddingType = VectorEmbeddingType.Single + } + } + } + }; + + SearchEngineType = Raven.Client.Documents.Indexes.SearchEngineType.Corax; + } + } + #endregion + + #region index_4 + public class Companies_ByVector_FromNumericalAttachment : + AbstractIndexCreationTask + { + public class IndexEntry() + { + // This index-field will hold embeddings + // generated from the NUMERICAL content in the attachments. + public object VectorFromAttachment { get; set; } + } + + public Companies_ByVector_FromNumericalAttachment() + { + Map = companies => from company in companies + + // Load the attachment from the document (ensure it is not null) + let attachment = LoadAttachment(company, "vector.raw") + where attachment != null + + select new IndexEntry + { + // Index the attachment's content in the vector field + VectorFromAttachment = CreateVector(attachment.GetContentAsStream()) + }; + + // Configure the vector field: + VectorIndexes.Add(x => x.VectorFromAttachment, + new VectorOptions() + { + // Define the source embedding type + SourceEmbeddingType = VectorEmbeddingType.Single, + // Define the desired destination format within the index + DestinationEmbeddingType = VectorEmbeddingType.Single + }); + + SearchEngineType = Raven.Client.Documents.Indexes.SearchEngineType.Corax; + } + } + #endregion + + #region Index_6 + public class Companies_ByVector_FromNumericalAttachment_JS : + AbstractJavaScriptIndexCreationTask + { + public Companies_ByVector_FromNumericalAttachment_JS() + { + Maps = new HashSet() + { + $@"map('Companies', function (company) {{ + + var attachment = loadAttachment(company, 'vector_base64.raw'); + if (!attachment) return null; + + return {{ + VectorFromAttachment: createVector(attachment.getContentAsString('utf8')) + }}; + }})" + }; + + Fields = new(); + Fields.Add("VectorFromAttachment", new IndexFieldOptions() + { + Vector = new VectorOptions() + { + SourceEmbeddingType = VectorEmbeddingType.Single, + DestinationEmbeddingType = VectorEmbeddingType.Single + } + }); + + SearchEngineType = Raven.Client.Documents.Indexes.SearchEngineType.Corax; + } + } + #endregion + + #region index_7 + public class Companies_ByVector_AllAttachments : + AbstractIndexCreationTask + { + public class IndexEntry() + { + // This index-field will hold embeddings + // generated from the NUMERICAL content of ALL attachments. + public object VectorFromAttachment { get; set; } + } + + public Companies_ByVector_AllAttachments() + { + Map = companies => from company in companies + + // Load ALL attachments from the document + let attachments = LoadAttachments(company) + + select new IndexEntry + { + // Index the attachments content in the vector field + VectorFromAttachment = CreateVector( + attachments.Select(e => e.GetContentAsStream())) + }; + + // Configure the vector field: + VectorIndexes.Add(x => x.VectorFromAttachment, + new VectorOptions() + { + SourceEmbeddingType = VectorEmbeddingType.Single, + DestinationEmbeddingType = VectorEmbeddingType.Single + }); + + SearchEngineType = Raven.Client.Documents.Indexes.SearchEngineType.Corax; + } + } + #endregion + + // Index definitions: + // ================== + + public void IndexDefinitionExamples() + { + using (var store = new DocumentStore()) + { + #region Index_3 + var indexDefinition = new IndexDefinition + { + Name = "Companies/ByVector/FromTextAttachment", + + Maps = new HashSet + { + @"from company in docs.Companies + + let attachment = LoadAttachment(company, ""description.txt"") + where attachment != null + + select new + { + VectorFromAttachment = + CreateVector(attachment.GetContentAsString(Encoding.UTF8)) + }" + }, + + Fields = new Dictionary() + { + { + "VectorFromAttachment", + new IndexFieldOptions() + { + Vector = new VectorOptions() + { + SourceEmbeddingType = VectorEmbeddingType.Text, + DestinationEmbeddingType = VectorEmbeddingType.Single + } + } + } + }, + + Configuration = new IndexConfiguration() + { + ["Indexing.Static.SearchEngineType"] = "Corax" + } + }; + + store.Maintenance.Send(new PutIndexesOperation(indexDefinition)); + #endregion + } + + using (var store = new DocumentStore()) + { + #region Index_5 + var indexDefinition = new IndexDefinition + { + Name = "Companies/ByVector/FromNumericalAttachment", + + Maps = new HashSet + { + @"from company in docs.Companies + + let attachment = LoadAttachment(company, ""vector.raw"") + where attachment != null + + select new + { + VectorFromAttachment = CreateVector(attachment.GetContentAsStream()) + }" + }, + + Fields = new Dictionary() + { + { + "VectorFromAttachment", + new IndexFieldOptions() + { + Vector = new VectorOptions() + { + SourceEmbeddingType = VectorEmbeddingType.Single, + DestinationEmbeddingType = VectorEmbeddingType.Single + } + } + } + }, + + Configuration = new IndexConfiguration() + { + ["Indexing.Static.SearchEngineType"] = "Corax" + } + }; + + store.Maintenance.Send(new PutIndexesOperation(indexDefinition)); + #endregion + } + + using (var store = new DocumentStore()) + { + #region Index_8 + var indexDefinition = new IndexDefinition + { + Name = "Companies/ByVector/AllAttachments", + + Maps = new HashSet + { + @"from company in docs.Companies + + let attachments = LoadAttachments(company) + + select new + { + VectorFromAttachment = + CreateVector(attachments.Select(e => e.GetContentAsStream())) + }" + }, + + Fields = new Dictionary() + { + { + "VectorFromAttachment", + new IndexFieldOptions() + { + Vector = new VectorOptions() + { + SourceEmbeddingType = VectorEmbeddingType.Single, + DestinationEmbeddingType = VectorEmbeddingType.Single, + } + } + } + }, + + Configuration = new IndexConfiguration() + { + ["Indexing.Static.SearchEngineType"] = "Corax" + } + }; + + store.Maintenance.Send(new PutIndexesOperation(indexDefinition)); + #endregion + } + } + + // Storing the attachments: + // ======================== + + public async Task StoreAttachments() + { + using (var store = new DocumentStore()) + { + #region store_attachments_1 + // Prepare text as `byte[]` to be stored as attachments: + // ===================================================== + var byteArray1 = Encoding.UTF8.GetBytes( + "Supplies soft drinks, fruit juices, and flavored syrups to restaurants and retailers."); + var byteArray2 = Encoding.UTF8.GetBytes( + "Supplies fine dining restaurants with premium meats, cheeses, and wines across France."); + var byteArray3 = Encoding.UTF8.GetBytes( + "An American grocery chain known for its fresh produce, organic foods, and local meats."); + var byteArray4 = Encoding.UTF8.GetBytes( + "An Asian grocery store specializing in ingredients for Japanese and Thai cuisine."); + var byteArray5 = Encoding.UTF8.GetBytes( + "A rural general store offering homemade jams, fresh-baked bread, and locally crafted gifts."); + + using (var session = store.OpenSession()) + { + // Load existing Company documents from RavenDB's sample data: + // =========================================================== + var company1 = session.Load("companies/11-A"); + var company2 = session.Load("companies/26-A"); + var company3 = session.Load("companies/32-A"); + var company4 = session.Load("companies/41-A"); + var company5 = session.Load("companies/43-A"); + + // Store the attachments in the documents (using MemoryStream): + // ============================================================ + session.Advanced.Attachments.Store(company1, "description.txt", + new MemoryStream(byteArray1), "text/plain"); + session.Advanced.Attachments.Store(company2, "description.txt", + new MemoryStream(byteArray2), "text/plain"); + session.Advanced.Attachments.Store(company3, "description.txt", + new MemoryStream(byteArray3), "text/plain"); + session.Advanced.Attachments.Store(company4, "description.txt", + new MemoryStream(byteArray4), "text/plain"); + session.Advanced.Attachments.Store(company5, "description.txt", + new MemoryStream(byteArray5), "text/plain"); + + session.SaveChanges(); + } + #endregion + } + + using (var store = new DocumentStore()) + { + #region store_attachments_2 + // These vectors are simple pre-computed embedding vectors with 32-bit floating-point values. + // Note: In a real scenario, embeddings would be generated by a model. + // ========================================================================================== + var v1 = new float[] { 0.1f, 0.2f, 0.3f, 0.4f }; + var v2 = new float[] { 0.1f, 0.7f, 0.8f, 0.9f }; + var v3 = new float[] { 0.5f, 0.6f, 0.7f, 0.8f }; + + // Prepare the embedding vectors as `byte[]` to be stored as attachments: + // ===================================================================== + var byteArray1 = MemoryMarshal.Cast(v1).ToArray(); + var byteArray2 = MemoryMarshal.Cast(v2).ToArray(); + var byteArray3 = MemoryMarshal.Cast(v3).ToArray(); + + using (var session = store.OpenSession()) + { + // Load existing Company documents from RavenDB's sample data: + // =========================================================== + var company1 = session.Load("companies/50-A"); + var company2 = session.Load("companies/51-A"); + var company3 = session.Load("companies/52-A"); + + // Store the attachments in the documents (using MemoryStream): + // ============================================================ + session.Advanced.Attachments.Store(company1, "vector.raw", new MemoryStream(byteArray1)); + session.Advanced.Attachments.Store(company2, "vector.raw", new MemoryStream(byteArray2)); + session.Advanced.Attachments.Store(company3, "vector.raw", new MemoryStream(byteArray3)); + + session.SaveChanges(); + } + #endregion + } + + using (var store = new DocumentStore()) + { + #region store_attachments_3 + // These vectors are simple pre-computed embedding vectors with 32-bit floating-point values. + // Note: In a real scenario, embeddings would be generated by a model. + // ========================================================================================== + var v1 = new float[] { 0.1f, 0.2f, 0.3f, 0.4f }; + var v2 = new float[] { 0.1f, 0.7f, 0.8f, 0.9f }; + var v3 = new float[] { 0.5f, 0.6f, 0.7f, 0.8f }; + + // Prepare the embedding vectors as a BASE64 string to be stored as attachments: + // ============================================================================= + var base64ForV1 = Convert.ToBase64String(MemoryMarshal.Cast(v1)); + var base64ForV2 = Convert.ToBase64String(MemoryMarshal.Cast(v2)); + var base64ForV3 = Convert.ToBase64String(MemoryMarshal.Cast(v3)); + + // Convert to byte[] for streaming: + // ================================ + var byteArray1 = Encoding.UTF8.GetBytes(base64ForV1); + var byteArray2 = Encoding.UTF8.GetBytes(base64ForV2); + var byteArray3 = Encoding.UTF8.GetBytes(base64ForV3); + + using (var session = store.OpenSession()) + { + // Load existing Company documents from RavenDB's sample data: + // =========================================================== + var company1 = session.Load("companies/60-A"); + var company2 = session.Load("companies/61-A"); + var company3 = session.Load("companies/62-A"); + + // Store the attachments in the documents (using MemoryStream): + // ============================================================ + session.Advanced.Attachments.Store(company1, "vector_base64.raw", new MemoryStream(byteArray1)); + session.Advanced.Attachments.Store(company2, "vector_base64.raw", new MemoryStream(byteArray2)); + session.Advanced.Attachments.Store(company3, "vector_base64.raw", new MemoryStream(byteArray3)); + + session.SaveChanges(); + } + #endregion + } + + using (var store = new DocumentStore()) + { + #region store_attachments_4 + // These vectors are simple pre-computed embedding vectors with 32-bit floating-point values. + // Note: In a real scenario, embeddings would be generated by a model. + // ========================================================================================== + var v1 = new float[] { 0.1f, 0.2f, 0.3f, 0.4f }; + var v2 = new float[] { 0.5f, 0.6f, 0.7f, 0.8f }; + + var v3 = new float[] { -0.1f, 0.2f, -0.7f, -0.8f }; + var v4 = new float[] { 0.3f, -0.6f, 0.9f, -0.9f }; + + // Prepare the embedding vectors as `byte[]` to be stored as attachments: + // ===================================================================== + var byteArray1 = MemoryMarshal.Cast(v1).ToArray(); + var byteArray2 = MemoryMarshal.Cast(v2).ToArray(); + + var byteArray3 = MemoryMarshal.Cast(v3).ToArray(); + var byteArray4 = MemoryMarshal.Cast(v4).ToArray(); + + using (var session = store.OpenSession()) + { + // Load existing Company documents from RavenDB's sample data: + // =========================================================== + var company1 = session.Load("companies/70-A"); + var company2 = session.Load("companies/71-A"); + + // Store multiple attachments in the documents (using MemoryStream): + // ================================================================= + + session.Advanced.Attachments.Store(company1, "vector1.raw", new MemoryStream(byteArray1)); + session.Advanced.Attachments.Store(company1, "vector2.raw", new MemoryStream(byteArray2)); + + session.Advanced.Attachments.Store(company2, "vector1.raw", new MemoryStream(byteArray3)); + session.Advanced.Attachments.Store(company2, "vector2.raw", new MemoryStream(byteArray4)); + + session.SaveChanges(); + } + #endregion + } + } + + public async Task QueryExamples() + { + using (var store = new DocumentStore()) + { + // Query for textual content in attachments + // ======================================== + + using (var session = store.OpenSession()) + { + #region query_1 + var relevantCompanies = session + .Query() + .VectorSearch( + field => field + .WithField(x => x.VectorFromAttachment), + searchTerm => searchTerm + .ByText("chinese food"), 0.8f) + .Customize(x => x.WaitForNonStaleResults()) + .OfType() + .ToList(); + #endregion + } + + using (var asyncSession = store.OpenAsyncSession()) + { + #region query_1_async + var relevantCompanies = await asyncSession + .Query() + .VectorSearch( + field => field + .WithField(x => x.VectorFromAttachment), + searchTerm => searchTerm + .ByText("chinese food"), 0.8f) + .Customize(x => x.WaitForNonStaleResults()) + .OfType() + .ToListAsync(); + #endregion + } + + using (var session = store.OpenSession()) + { + #region query_2 + var relevantCompanies = session.Advanced + .DocumentQuery() + .VectorSearch( + field => field + .WithField(x => x.VectorFromAttachment), + searchTerm => searchTerm + .ByText("chinese food"), 0.8f) + .WaitForNonStaleResults() + .OfType() + .ToList(); + #endregion + } + + using (var asyncSession = store.OpenAsyncSession()) + { + #region query_2_async + var relevantCompanies = await asyncSession.Advanced + .AsyncDocumentQuery() + .VectorSearch( + field => field + .WithField(x => x.VectorFromAttachment), + searchTerm => searchTerm + .ByText("chinese food"), 0.8f) + .WaitForNonStaleResults() + .OfType() + .ToListAsync(); + #endregion + } + + using (var session = store.OpenSession()) + { + #region query_3 + var relevantCompanies = session.Advanced + .RawQuery(@" + from index 'Companies/ByVector/FromTextAttachment' + where vector.search(VectorFromAttachment, $searchTerm, 0.8)") + .AddParameter("searchTerm", "chinese food") + .WaitForNonStaleResults() + .ToList(); + #endregion + } + + using (var asyncSession = store.OpenAsyncSession()) + { + #region query_3_async + var relevantCompanies = await asyncSession.Advanced + .AsyncRawQuery(@" + from index 'Companies/ByVector/FromTextAttachment' + where vector.search(VectorFromAttachment, $searchTerm, 0.8)") + .AddParameter("searchTerm", "chinese food") + .WaitForNonStaleResults() + .ToListAsync(); + #endregion + } + + // Query for numerical content + // =========================== + + using (var session = store.OpenSession()) + { + #region query_4 + var similarCompanies = session + .Query() + .VectorSearch( + field => field + .WithField(x => x.VectorFromAttachment), + queryVector => queryVector + .ByEmbedding(new float[] { 0.1f, 0.2f, 0.3f, 0.4f })) + .Customize(x => x.WaitForNonStaleResults()) + .OfType() + .ToList(); + #endregion + } + + using (var asyncSession = store.OpenAsyncSession()) + { + #region query_4_async + var similarCompanies = await asyncSession + .Query() + .VectorSearch( + field => field + .WithField(x => x.VectorFromAttachment), + queryVector => queryVector + .ByEmbedding(new float[] { 0.1f, 0.2f, 0.3f, 0.4f })) + .Customize(x => x.WaitForNonStaleResults()) + .OfType() + .ToListAsync(); + #endregion + } + + using (var session = store.OpenSession()) + { + #region query_5 + var similarCompanies = session.Advanced + .DocumentQuery() + .VectorSearch( + field => field + .WithField(x => x.VectorFromAttachment), + queryVector => queryVector + .ByEmbedding(new float[] { 0.1f, 0.2f, 0.3f, 0.4f })) + .WaitForNonStaleResults() + .OfType() + .ToList(); + #endregion + } + + using (var asyncSession = store.OpenAsyncSession()) + { + #region query_5_async + var similarCompanies = await asyncSession.Advanced + .AsyncDocumentQuery() + .VectorSearch( + field => field + .WithField(x => x.VectorFromAttachment), + queryVector => queryVector + .ByEmbedding(new float[] { 0.1f, 0.2f, 0.3f, 0.4f })) + .WaitForNonStaleResults() + .OfType() + .ToListAsync(); + #endregion + } + + using (var session = store.OpenSession()) + { + #region query_6 + var similarCompanies = session.Advanced + .RawQuery(@" + from index 'Companies/ByVector/FromNumericalAttachment' + where vector.search(VectorFromAttachment, $queryVector)") + .AddParameter("queryVector", new float[] { 0.1f, 0.2f, 0.3f, 0.4f }) + .WaitForNonStaleResults() + .ToList(); + #endregion + } + + using (var asyncSession = store.OpenAsyncSession()) + { + #region query_6_async + var similarCompanies = await asyncSession.Advanced + .AsyncRawQuery(@" + from index 'Companies/ByVector/FromNumericalAttachment' + where vector.search(VectorFromAttachment, $queryVector)") + .AddParameter("queryVector", new float[] { 0.1f, 0.2f, 0.3f, 0.4f }) + .WaitForNonStaleResults() + .ToListAsync(); + #endregion + } + + using (var session = store.OpenSession()) + { + #region query_7 + var similarCompanies = session.Advanced + .RawQuery(@" + from index 'Companies/ByVector/FromNumericalAttachment/JS' + where vector.search(VectorFromAttachment, $queryVector)") + .AddParameter("queryVector", new float[] { 0.1f, 0.2f, 0.3f, 0.4f }) + .WaitForNonStaleResults() + .ToList(); + #endregion + } + + using (var asyncSession = store.OpenAsyncSession()) + { + #region query_7_async + var similarCompanies = await asyncSession.Advanced + .AsyncRawQuery(@" + from index 'Companies/ByVector/FromNumericalAttachment/JS' + where vector.search(VectorFromAttachment, $queryVector)") + .AddParameter("queryVector", new float[] { 0.1f, 0.2f, 0.3f, 0.4f }) + .WaitForNonStaleResults() + .ToListAsync(); + #endregion + } + + using (var session = store.OpenSession()) + { + #region query_8 + var similarCompanies = session + .Query() + .VectorSearch( + field => field + .WithField(x => x.VectorFromAttachment), + queryVector => queryVector + .ByEmbedding(new float[] { -0.1f, 0.2f, -0.7f, -0.8f })) + .Customize(x => x.WaitForNonStaleResults()) + .OfType() + .ToList(); + #endregion + } + + using (var asyncSession = store.OpenAsyncSession()) + { + #region query_8_async + var similarCompanies = await asyncSession + .Query() + .VectorSearch( + field => field + .WithField(x => x.VectorFromAttachment), + queryVector => queryVector + .ByEmbedding(new float[] { -0.1f, 0.2f, -0.7f, -0.8f })) + .Customize(x => x.WaitForNonStaleResults()) + .OfType() + .ToListAsync(); + #endregion + } + + using (var session = store.OpenSession()) + { + #region query_9 + var similarCompanies = session.Advanced + .DocumentQuery() + .VectorSearch( + field => field + .WithField(x => x.VectorFromAttachment), + queryVector => queryVector + .ByEmbedding(new float[] { -0.1f, 0.2f, -0.7f, -0.8f })) + .WaitForNonStaleResults() + .OfType() + .ToList(); + #endregion + } + + using (var asyncSession = store.OpenAsyncSession()) + { + #region query_9_async + var similarCompanies = await asyncSession.Advanced + .AsyncDocumentQuery() + .VectorSearch( + field => field + .WithField(x => x.VectorFromAttachment), + queryVector => queryVector + .ByEmbedding(new float[] { -0.1f, 0.2f, -0.7f, -0.8f })) + .WaitForNonStaleResults() + .OfType() + .ToListAsync(); + #endregion + } + + using (var session = store.OpenSession()) + { + #region query_10 + var similarCompanies = session.Advanced + .RawQuery(@" + from index 'Companies/ByVector/AllAttachments' + where vector.search(VectorFromAttachment, $queryVector)") + .AddParameter("queryVector", new float[] { 0.1f, 0.2f, -0.7f, -0.8f }) + .WaitForNonStaleResults() + .ToList(); + #endregion + } + + using (var asyncSession = store.OpenAsyncSession()) + { + #region query_10_async + var similarCompanies = await asyncSession.Advanced + .AsyncRawQuery(@" + from index 'Companies/ByVector/AllAttachments' + where vector.search(VectorFromAttachment, $queryVector)") + .AddParameter("queryVector", new float[] { 0.1f, 0.2f, -0.7f, -0.8f }) + .WaitForNonStaleResults() + .ToListAsync(); + #endregion + } + } + } + } +} diff --git a/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/Raven.Documentation.Samples.csproj b/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/Raven.Documentation.Samples.csproj index 53428c5b0..f7db37a55 100644 --- a/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/Raven.Documentation.Samples.csproj +++ b/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/Raven.Documentation.Samples.csproj @@ -8,9 +8,9 @@ - - - + + + From 7831a0017ba71149dd921594704f50aba03fe95e Mon Sep 17 00:00:00 2001 From: danielle9897 Date: Tue, 11 Feb 2025 15:34:06 +0200 Subject: [PATCH 3/5] RDoc-3170 Rename tab title: Map_index => LINQ_index --- ...indexing-attachments-for-vector-search.dotnet.markdown | 6 +++--- .../vector-search-using-static-index.dotnet.markdown | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Documentation/7.0/Raven.Documentation.Pages/ai-integration/indexing-attachments-for-vector-search.dotnet.markdown b/Documentation/7.0/Raven.Documentation.Pages/ai-integration/indexing-attachments-for-vector-search.dotnet.markdown index 207c6b4cb..27a87ec8d 100644 --- a/Documentation/7.0/Raven.Documentation.Pages/ai-integration/indexing-attachments-for-vector-search.dotnet.markdown +++ b/Documentation/7.0/Raven.Documentation.Pages/ai-integration/indexing-attachments-for-vector-search.dotnet.markdown @@ -71,7 +71,7 @@ enabling vector search on text or numerical data that is stored in the attachmen This applies to all _Company_ documents that contain an attachment with that name. {CODE-TABS} -{CODE-TAB:csharp:Map_index index_1@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:LINQ_index index_1@AiIntegration\VectorSearchWithAttachments.cs /} {CODE-TAB:csharp:JS_index index_2@AiIntegration\VectorSearchWithAttachments.cs /} {CODE-TAB:csharp:IndexDefinition index_3@AiIntegration\VectorSearchWithAttachments.cs /} {CODE-TAB:csharp:Storing_text_attachments store_attachments_1@AiIntegration\VectorSearchWithAttachments.cs /} @@ -108,7 +108,7 @@ where vector.search(VectorFromAttachment, $searchTerm, 0.8) * Each attachment contains raw numerical data in 32-bit floating-point format. {CODE-TABS} -{CODE-TAB:csharp:Map_index index_4@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:LINQ_index index_4@AiIntegration\VectorSearchWithAttachments.cs /} {CODE-TAB:csharp:IndexDefinition index_5@AiIntegration\VectorSearchWithAttachments.cs /} {CODE-TAB:csharp:Storing_numerical_attachments store_attachments_2@AiIntegration\VectorSearchWithAttachments.cs /} {CODE-TABS/} @@ -167,7 +167,7 @@ where vector.search(VectorFromAttachment, $queryVector) * It indexes embeddings generated from the numerical data stored in ALL attachments of all _Company_ documents. {CODE-TABS} -{CODE-TAB:csharp:Map_index index_7@AiIntegration\VectorSearchWithAttachments.cs /} +{CODE-TAB:csharp:LINQ_index index_7@AiIntegration\VectorSearchWithAttachments.cs /} {CODE-TAB:csharp:IndexDefinition index_8@AiIntegration\VectorSearchWithAttachments.cs /} {CODE-TAB:csharp:Storing_numerical_attachments store_attachments_4@AiIntegration\VectorSearchWithAttachments.cs /} {CODE-TABS/} diff --git a/Documentation/7.0/Raven.Documentation.Pages/ai-integration/vector-search-using-static-index.dotnet.markdown b/Documentation/7.0/Raven.Documentation.Pages/ai-integration/vector-search-using-static-index.dotnet.markdown index 430a45fcd..8b99a1155 100644 --- a/Documentation/7.0/Raven.Documentation.Pages/ai-integration/vector-search-using-static-index.dotnet.markdown +++ b/Documentation/7.0/Raven.Documentation.Pages/ai-integration/vector-search-using-static-index.dotnet.markdown @@ -150,7 +150,7 @@ The following index defines a **vector field** named `VectorfromText`. It indexes embeddings generated from the textual data in the `Name` field of all _Product_ documents. {CODE-TABS} -{CODE-TAB:csharp:Map_index index_1@AiIntegration\VectorSearchUsingStaticIndex.cs /} +{CODE-TAB:csharp:LINQ_index index_1@AiIntegration\VectorSearchUsingStaticIndex.cs /} {CODE-TAB:csharp:JS_index index_2@AiIntegration\VectorSearchUsingStaticIndex.cs /} {CODE-TAB:csharp:IndexDefinition index_3@AiIntegration\VectorSearchUsingStaticIndex.cs /} {CODE-TABS/} @@ -187,7 +187,7 @@ It indexes embeddings generated from the numerical data in the `TagsEmbeddedAsSi The raw numerical data in the source documents is in **32-bit floating-point format**. {CODE-TABS} -{CODE-TAB:csharp:Map_index index_4@AiIntegration\VectorSearchUsingStaticIndex.cs /} +{CODE-TAB:csharp:LINQ_index index_4@AiIntegration\VectorSearchUsingStaticIndex.cs /} {CODE-TAB:csharp:JS_index index_5@AiIntegration\VectorSearchUsingStaticIndex.cs /} {CODE-TAB:csharp:IndexDefinition index_6@AiIntegration\VectorSearchUsingStaticIndex.cs /} {CODE-TABS/} @@ -215,7 +215,7 @@ It indexes embeddings generated from the numerical arrays in the `TagsEmbeddedAs The raw numerical data in the source documents is in **Int8 (8-bit integers) format**. {CODE-TABS} -{CODE-TAB:csharp:Map_index index_7@AiIntegration\VectorSearchUsingStaticIndex.cs /} +{CODE-TAB:csharp:LINQ_index index_7@AiIntegration\VectorSearchUsingStaticIndex.cs /} {CODE-TAB:csharp:JS_index index_8@AiIntegration\VectorSearchUsingStaticIndex.cs /} {CODE-TAB:csharp:IndexDefinition index_9@AiIntegration\VectorSearchUsingStaticIndex.cs /} {CODE-TABS/} @@ -245,7 +245,7 @@ A _'regular'_ field, a _'vector'_ field, and a field configured for [full-text s This allows you to query across all fields using various predicates. {CODE-TABS} -{CODE-TAB:csharp:Map_index index_10@AiIntegration\VectorSearchUsingStaticIndex.cs /} +{CODE-TAB:csharp:LINQ_index index_10@AiIntegration\VectorSearchUsingStaticIndex.cs /} {CODE-TAB:csharp:JS_index index_11@AiIntegration\VectorSearchUsingStaticIndex.cs /} {CODE-TAB:csharp:IndexDefinition index_12@AiIntegration\VectorSearchUsingStaticIndex.cs /} {CODE-TABS/} From 289ec2e6dea060a71f4670e3b9c7f8add70c258c Mon Sep 17 00:00:00 2001 From: danielle9897 Date: Tue, 11 Feb 2025 18:27:31 +0200 Subject: [PATCH 4/5] RDoc-3170 Use AddParameter in RawQuery --- ...-search-using-static-index.dotnet.markdown | 3 +- .../VectorSearchUsingDynamicQuery.cs | 120 ++++++++++-------- .../VectorSearchUsingStaticIndex.cs | 44 ++++--- 3 files changed, 99 insertions(+), 68 deletions(-) diff --git a/Documentation/7.0/Raven.Documentation.Pages/ai-integration/vector-search-using-static-index.dotnet.markdown b/Documentation/7.0/Raven.Documentation.Pages/ai-integration/vector-search-using-static-index.dotnet.markdown index 8b99a1155..3e83ccf3b 100644 --- a/Documentation/7.0/Raven.Documentation.Pages/ai-integration/vector-search-using-static-index.dotnet.markdown +++ b/Documentation/7.0/Raven.Documentation.Pages/ai-integration/vector-search-using-static-index.dotnet.markdown @@ -168,7 +168,8 @@ Results will include _Product_ documents where the `Name` field is similar to th {CODE-TAB-BLOCK:sql:RQL} from index "Products/ByVector/Text" // Optionally, wrap the 'vector.search' query with 'exact()' to perform an exact search -where exact(vector.search(VectorFromText, "italian food", 0.82, 20)) +where exact(vector.search(VectorFromText, $searchTerm, 0.82, 20)) +{ "searchTerm" : "italian food" } {CODE-TAB-BLOCK/} {CODE-TABS/} diff --git a/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchUsingDynamicQuery.cs b/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchUsingDynamicQuery.cs index 6f554e17d..42277d6d2 100644 --- a/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchUsingDynamicQuery.cs +++ b/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchUsingDynamicQuery.cs @@ -94,9 +94,10 @@ public async Task Examples() #region vs_3 var similarProducts = session.Advanced .RawQuery(@" - from 'Products' - // Wrap the document field 'Name' with 'embedding.text' to indicate the source data type - where vector.search(embedding.text(Name), 'italian food', 0.82, 20)") + from 'Products' + // Wrap the document field 'Name' with 'embedding.text' to indicate the source data type + where vector.search(embedding.text(Name), $searchTerm, 0.82, 20)") + .AddParameter("searchTerm", "italian food") .WaitForNonStaleResults() .ToList(); #endregion @@ -107,9 +108,10 @@ where vector.search(embedding.text(Name), 'italian food', 0.82, 20)") #region vs_3_async var similarProducts = await asyncSession.Advanced .AsyncRawQuery(@" - from 'Products' - // Wrap the document field 'Name' with 'embedding.text' to indicate the source data type - where vector.search(embedding.text(Name), 'italian food', 0.82, 20)") + from 'Products' + // Wrap the document field 'Name' with 'embedding.text' to indicate the source data type + where vector.search(embedding.text(Name), $searchTerm, 0.82, 20)") + .AddParameter("searchTerm", "italian food") .WaitForNonStaleResults() .ToListAsync(); #endregion @@ -251,8 +253,11 @@ where vector.search(embedding.text(Name), 'italian food', 0.82, 20)") var similarProducts = session.Advanced .RawQuery(@" from 'Movies' - where vector.search(TagsEmbeddedAsSingle, $queryVector, 0.85, 10) - { 'queryVector' : { '@vector': [6.599999904632568, 7.699999809265137] }}") + where vector.search(TagsEmbeddedAsSingle, $queryVector, 0.85, 10)") + .AddParameter("queryVector", new RavenVector(new float[] + { + 6.599999904632568f, 7.699999809265137f + })) .WaitForNonStaleResults() .ToList(); #endregion @@ -264,8 +269,11 @@ where vector.search(TagsEmbeddedAsSingle, $queryVector, 0.85, 10) var similarProducts = await asyncSession.Advanced .AsyncRawQuery(@" from 'Movies' - where vector.search(TagsEmbeddedAsSingle, $queryVector, 0.85, 10) - { 'queryVector' : { '@vector': [6.599999904632568, 7.699999809265137] }}") + where vector.search(TagsEmbeddedAsSingle, $queryVector, 0.85, 10)") + .AddParameter("queryVector", new RavenVector(new float[] + { + 6.599999904632568f, 7.699999809265137f + })) .WaitForNonStaleResults() .ToListAsync(); #endregion @@ -374,9 +382,10 @@ where vector.search(TagsEmbeddedAsSingle, $queryVector, 0.85, 10) #region vs_11 var similarProducts = session.Advanced .RawQuery(@" - from 'Products' - // Wrap the query with the 'exact()' method - where exact(vector.search(embedding.text(Name), 'italian food'))") + from 'Products' + // Wrap the query with the 'exact()' method + where exact(vector.search(embedding.text(Name), $searchTerm))") + .AddParameter("searchTerm", "italian food") .WaitForNonStaleResults() .ToList(); #endregion @@ -387,9 +396,10 @@ where exact(vector.search(embedding.text(Name), 'italian food'))") #region vs_11_async var similarProducts = await asyncSession.Advanced .AsyncRawQuery(@" - from 'Products' - // Wrap the query with the 'exact()' method - where exact(vector.search(embedding.text(Name), 'italian food'))") + from 'Products' + // Wrap the query with the 'exact()' method + where exact(vector.search(embedding.text(Name), $searchTerm))") + .AddParameter("searchTerm", "italian food") .WaitForNonStaleResults() .ToListAsync(); #endregion @@ -459,9 +469,10 @@ where exact(vector.search(embedding.text(Name), 'italian food'))") #region vs_14 var similarProducts = session.Advanced .RawQuery(@" - from 'Products' - where (PricePerUnit > $minPrice) and (vector.search(embedding.text(Name), $searchTerm)) - { 'minPrice' : 35.0, 'searchTerm' : 'italian food' }") + from 'Products' + where (PricePerUnit > $minPrice) and (vector.search(embedding.text(Name), $searchTerm))") + .AddParameter("minPrice", 35.0) + .AddParameter("searchTerm", "italian food") .WaitForNonStaleResults() .ToList(); #endregion @@ -472,9 +483,10 @@ where exact(vector.search(embedding.text(Name), 'italian food'))") #region vs_14_async var similarProducts = await asyncSession.Advanced .AsyncRawQuery(@" - from 'Products' - where (PricePerUnit > $minPrice) and (vector.search(embedding.text(Name), $searchTerm)) - { 'minPrice' : 35.0, 'searchTerm' : 'italian food' }") + from 'Products' + where (PricePerUnit > $minPrice) and (vector.search(embedding.text(Name), $searchTerm))") + .AddParameter("minPrice", 35.0) + .AddParameter("searchTerm", "italian food") .WaitForNonStaleResults() .ToListAsync(); #endregion @@ -563,10 +575,10 @@ where exact(vector.search(embedding.text(Name), 'italian food'))") #region vs_17 var similarProducts = session.Advanced .RawQuery(@" - from 'Products' - // Wrap the 'Name' field with 'embedding.text_i8' - where vector.search(embedding.text_i8(Name), $searchTerm) - { 'searchTerm' : 'italian food' }") + from 'Products' + // Wrap the 'Name' field with 'embedding.text_i8' + where vector.search(embedding.text_i8(Name), $searchTerm)") + .AddParameter("searchTerm", "italian food") .WaitForNonStaleResults() .ToList(); #endregion @@ -577,10 +589,10 @@ where vector.search(embedding.text_i8(Name), $searchTerm) #region vs_17_async var similarProducts = await asyncSession.Advanced .AsyncRawQuery(@" - from 'Products' - // Wrap the 'Name' field with 'embedding.text_i8' - where vector.search(embedding.text_i8(Name), $searchTerm) - { 'searchTerm' : 'italian food' }") + from 'Products' + // Wrap the 'Name' field with 'embedding.text_i8' + where vector.search(embedding.text_i8(Name), $searchTerm)") + .AddParameter("searchTerm", "italian food") .WaitForNonStaleResults() .ToListAsync(); #endregion @@ -621,9 +633,9 @@ where vector.search(embedding.text_i8(Name), $searchTerm) queryVector => queryVector // Provide the vector to use for comparison .ByEmbedding(new RavenVector(new float[] - { - 6.599999904632568f, 7.699999809265137f - }))) + { + 6.599999904632568f, 7.699999809265137f + }))) .Customize(x => x.WaitForNonStaleResults()) .ToListAsync(); #endregion @@ -643,9 +655,9 @@ where vector.search(embedding.text_i8(Name), $searchTerm) queryVector => queryVector // Provide the vector to use for comparison .ByEmbedding(new RavenVector(new float[] - { - 6.599999904632568f, 7.699999809265137f - }))) + { + 6.599999904632568f, 7.699999809265137f + }))) .WaitForNonStaleResults() .ToList(); #endregion @@ -665,9 +677,9 @@ where vector.search(embedding.text_i8(Name), $searchTerm) queryVector => queryVector // Provide the vector to use for comparison .ByEmbedding(new RavenVector(new float[] - { - 6.599999904632568f, 7.699999809265137f - }))) + { + 6.599999904632568f, 7.699999809265137f + }))) .WaitForNonStaleResults() .ToListAsync(); #endregion @@ -676,12 +688,15 @@ where vector.search(embedding.text_i8(Name), $searchTerm) using (var session = store.OpenSession()) { #region vs_20 - var similarProducts = session.Advanced - .RawQuery(@" - from 'Movies' - // Wrap the 'TagsEmbeddedAsSingle' field with 'embedding.f32_i1' - where vector.search(embedding.f32_i1(TagsEmbeddedAsSingle), $queryVector) - { 'queryVector' : { '@vector' : [6.599999904632568,7.699999809265137] }}") + var similarMovies = session.Advanced + .RawQuery(@" + from 'Movies' + // Wrap the 'TagsEmbeddedAsSingle' field with 'embedding.f32_i1' + where vector.search(embedding.f32_i1(TagsEmbeddedAsSingle), $queryVector)") + .AddParameter("queryVector", new RavenVector(new float[] + { + 6.599999904632568f, 7.699999809265137f + })) .WaitForNonStaleResults() .ToList(); #endregion @@ -690,12 +705,15 @@ where vector.search(embedding.f32_i1(TagsEmbeddedAsSingle), $queryVector) using (var asyncSession = store.OpenAsyncSession()) { #region vs_20_async - var similarProducts = await asyncSession.Advanced - .AsyncRawQuery(@" - from 'Movies' - // Wrap the 'TagsEmbeddedAsSingle' field with 'embedding.f32_i1' - where vector.search(embedding.f32_i1(TagsEmbeddedAsSingle), $queryVector) - { 'queryVector' : { '@vector' : [6.599999904632568,7.699999809265137] }}") + var similarMovies = await asyncSession.Advanced + .AsyncRawQuery(@" + from 'Movies' + // Wrap the 'TagsEmbeddedAsSingle' field with 'embedding.f32_i1' + where vector.search(embedding.f32_i1(TagsEmbeddedAsSingle), $queryVector)") + .AddParameter("queryVector", new RavenVector(new float[] + { + 6.599999904632568f, 7.699999809265137f + })) .WaitForNonStaleResults() .ToListAsync(); #endregion diff --git a/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchUsingStaticIndex.cs b/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchUsingStaticIndex.cs index 540e58d18..d1e9db963 100644 --- a/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchUsingStaticIndex.cs +++ b/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchUsingStaticIndex.cs @@ -583,7 +583,8 @@ public async Task QueryExamples() .RawQuery(@" from index 'Products/ByVector/Text' // Optionally, wrap the 'vector.search' query with 'exact()' to perform an exact search - where exact(vector.search(VectorFromText, 'italian food', 0.82, 20))") + where exact(vector.search(VectorFromText, $searchTerm, 0.82, 20))") + .AddParameter("searchTerm", "italian food") .WaitForNonStaleResults() .ToList(); #endregion @@ -596,7 +597,8 @@ where exact(vector.search(VectorFromText, 'italian food', 0.82, 20))") .AsyncRawQuery(@" from index 'Products/ByVector/Text' // Optionally, wrap the 'vector.search' query with 'exact()' to perform an exact search - where exact(vector.search(VectorFromText, 'italian food', 0.82, 20))") + where exact(vector.search(VectorFromText, $searchTerm, 0.82, 20))") + .AddParameter("searchTerm", "italian food") .WaitForNonStaleResults() .ToListAsync(); #endregion @@ -683,10 +685,13 @@ where exact(vector.search(VectorFromText, 'italian food', 0.82, 20))") { #region query_6 var similarMovies = session.Advanced - .RawQuery(@" + .RawQuery(@" from index 'Movies/ByVector/Single' - where vector.search(VectorFromSingle, $queryVector) - { 'queryVector' : { '@vector' : [6.599999904632568, 7.699999809265137] }}") + where vector.search(VectorFromSingle, $queryVector)") + .AddParameter("queryVector", new RavenVector(new float[] + { + 6.599999904632568f, 7.699999809265137f + })) .WaitForNonStaleResults() .ToList(); #endregion @@ -696,10 +701,13 @@ where vector.search(VectorFromSingle, $queryVector) { #region query_6_async var similarMovies = await asyncSession.Advanced - .AsyncRawQuery(@" + .AsyncRawQuery(@" from index 'Movies/ByVector/Single' - where vector.search(VectorFromSingle, $queryVector) - { 'queryVector' : { '@vector' : [6.599999904632568, 7.699999809265137] }}") + where vector.search(VectorFromSingle, $queryVector)") + .AddParameter("queryVector", new RavenVector(new float[] + { + 6.599999904632568f, 7.699999809265137f + })) .WaitForNonStaleResults() .ToListAsync(); #endregion @@ -788,8 +796,8 @@ where vector.search(VectorFromSingle, $queryVector) var similarMovies = session.Advanced .RawQuery(@" from index 'Movies/ByVector/Int8' - where vector.search(VectorFromInt8Arrays, $queryVector) - { 'queryVector' : [64, 127, -51, -52, 76, 62] }") + where vector.search(VectorFromInt8Arrays, $queryVector)") + .AddParameter("queryVector", VectorQuantizer.ToInt8(new float[] { 0.1f, 0.2f })) .WaitForNonStaleResults() .ToList(); #endregion @@ -801,8 +809,8 @@ where vector.search(VectorFromInt8Arrays, $queryVector) var similarMovies = await asyncSession.Advanced .AsyncRawQuery(@" from index 'Movies/ByVector/Int8' - where vector.search(VectorFromInt8Arrays, $queryVector) - { 'queryVector' : [64, 127, -51, -52, 76, 62] }") + where vector.search(VectorFromInt8Arrays, $queryVector)") + .AddParameter("queryVector", VectorQuantizer.ToInt8(new float[] { 0.1f, 0.2f })) .WaitForNonStaleResults() .ToListAsync(); #endregion @@ -861,8 +869,10 @@ where vector.search(VectorFromInt8Arrays, $queryVector) from index 'Products/ByMultipleFields' where PricePerUnit > $minPrice or search(Name, $searchTerm1) - or vector.search(VectorFromText, $searchTerm2, 0.8) - { 'minPrice' : 200, 'searchTerm1' : 'Alice', 'searchTerm2': 'italian' }") + or vector.search(VectorFromText, $searchTerm2, 0.8)") + .AddParameter("minPrice", 200) + .AddParameter("searchTerm1", "Alice") + .AddParameter("searchTerm2", "italian") .WaitForNonStaleResults() .ToList(); #endregion @@ -876,8 +886,10 @@ or vector.search(VectorFromText, $searchTerm2, 0.8) from index 'Products/ByMultipleFields' where PricePerUnit > $minPrice or search(Name, $searchTerm1) - or vector.search(VectorFromText, $searchTerm2, 0.8) - { 'minPrice' : 200, 'searchTerm1' : 'Alice', 'searchTerm2': 'italian' }") + or vector.search(VectorFromText, $searchTerm2, 0.8)") + .AddParameter("minPrice", 200) + .AddParameter("searchTerm1", "Alice") + .AddParameter("searchTerm2", "italian") .WaitForNonStaleResults() .ToListAsync(); #endregion From ad20684250f4cc77e1315640f7bf666f3444fddb Mon Sep 17 00:00:00 2001 From: danielle9897 Date: Tue, 11 Feb 2025 19:04:01 +0200 Subject: [PATCH 5/5] RDoc-3170 Show how to extract the text from the attachments of the resulting documents --- ...ttachments-for-vector-search.dotnet.markdown | 4 ++++ .../VectorSearchWithAttachments.cs | 17 ++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/Documentation/7.0/Raven.Documentation.Pages/ai-integration/indexing-attachments-for-vector-search.dotnet.markdown b/Documentation/7.0/Raven.Documentation.Pages/ai-integration/indexing-attachments-for-vector-search.dotnet.markdown index 27a87ec8d..62f4cd87e 100644 --- a/Documentation/7.0/Raven.Documentation.Pages/ai-integration/indexing-attachments-for-vector-search.dotnet.markdown +++ b/Documentation/7.0/Raven.Documentation.Pages/ai-integration/indexing-attachments-for-vector-search.dotnet.markdown @@ -94,6 +94,10 @@ where vector.search(VectorFromAttachment, $searchTerm, 0.8) {CODE-TAB-BLOCK/} {CODE-TABS/} +You can now extract the text from the attachments of the resulting documents: + +{CODE:csharp extract_attachment_content@AiIntegration\VectorSearchWithAttachments.cs /} + {PANEL/} {PANEL: Indexing NUMERICAL attachments} diff --git a/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchWithAttachments.cs b/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchWithAttachments.cs index 7dee32537..12c4d45a6 100644 --- a/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchWithAttachments.cs +++ b/Documentation/7.0/Samples/csharp/Raven.Documentation.Samples/AiIntegration/VectorSearchWithAttachments.cs @@ -540,7 +540,22 @@ public async Task QueryExamples() .Customize(x => x.WaitForNonStaleResults()) .OfType() .ToList(); - #endregion + #endregion + + #region extract_attachment_content + // Extract text from the attachment of the first resulting document + // ================================================================ + + // Retrieve the attachment stream + var company = relevantCompanies[0]; + var attachmentResult = session.Advanced.Attachments.Get(company, "description.txt"); + var attStream = attachmentResult.Stream; + + // Read the attachment content into memory and decode it as a UTF-8 string + var ms = new MemoryStream(); + attStream.CopyTo(ms); + string attachmentText = Encoding.UTF8.GetString(ms.ToArray()); + #endregion } using (var asyncSession = store.OpenAsyncSession())