From ace25ab7ceb341d20563ecce4c1bc07166c1cc72 Mon Sep 17 00:00:00 2001 From: Mohamed Shahin <150928105+mohamedshahin-weaviate@users.noreply.github.com> Date: Fri, 16 Aug 2024 16:57:30 +0100 Subject: [PATCH] Update developer docs for Vector Index Params (#2469) * adding my name to the slack find author * Adding Set vector index parameters snippet and a bit of clean up --- _build_scripts/slack-find-author.sh | 1 + .../io/weaviate/docs/manage-data.classes.java | 535 +++++++++--------- .../weaviate/manage-data/collections.mdx | 9 + 3 files changed, 283 insertions(+), 262 deletions(-) diff --git a/_build_scripts/slack-find-author.sh b/_build_scripts/slack-find-author.sh index af2f1b2b16..d355f09669 100644 --- a/_build_scripts/slack-find-author.sh +++ b/_build_scripts/slack-find-author.sh @@ -34,6 +34,7 @@ git_slack_map=( ["thomashacker"]="<@U056E1ZEM3L>" ["Wera"]="<@U043TKSEU5V>" ["Zain Hasan"]="<@U043TKSJQF9>" + ["Mohamed Shahin"]="<@U05V4HPJ3M0>" ) # Get the Author name and map it to their Slack handle diff --git a/_includes/code/howto/java/src/test/java/io/weaviate/docs/manage-data.classes.java b/_includes/code/howto/java/src/test/java/io/weaviate/docs/manage-data.classes.java index 77028f0e3c..d7737d7228 100644 --- a/_includes/code/howto/java/src/test/java/io/weaviate/docs/manage-data.classes.java +++ b/_includes/code/howto/java/src/test/java/io/weaviate/docs/manage-data.classes.java @@ -9,10 +9,17 @@ import io.weaviate.client.v1.misc.model.VectorIndexConfig; import io.weaviate.client.v1.schema.model.Schema; import io.weaviate.client.v1.schema.model.WeaviateClass; +import io.weaviate.client.v1.schema.model.Property; +import io.weaviate.client.v1.schema.model.DataType; +import io.weaviate.client.v1.misc.model.ReplicationConfig; +import io.weaviate.client.v1.misc.model.BQConfig; import io.weaviate.docs.helper.EnvHelper; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; import static org.assertj.core.api.Assertions.assertThat; @@ -20,40 +27,46 @@ @Tag("classes") class ManageDataClassesTest { - private static WeaviateClient client; + private static WeaviateClient client; - @BeforeAll - public static void beforeAll() { - String scheme = EnvHelper.scheme("http"); - String host = EnvHelper.host("localhost"); - String port = EnvHelper.port("8080"); + @BeforeAll + public static void beforeAll() { + String scheme = EnvHelper.scheme("http"); + String host = EnvHelper.host("localhost"); + String port = EnvHelper.port("8080"); - Config config = new Config(scheme, host + ":" + port); - client = new WeaviateClient(config); + Config config = new Config(scheme, host + ":" + port); + client = new WeaviateClient(config); - Result result = client.schema().allDeleter().run(); - assertThat(result).isNotNull() - .withFailMessage(() -> result.getError().toString()) - .returns(false, Result::hasErrors) - .withFailMessage(null) - .returns(true, Result::getResult); - } + Result result = client.schema().allDeleter().run(); + assertThat(result).isNotNull() + .withFailMessage(() -> result.getError().toString()) + .returns(false, Result::hasErrors) + .withFailMessage(null) + .returns(true, Result::getResult); + } - @Test - public void shouldManageDataClasses() { - // START BasicCreateCollection // START ReadOneCollection // START UpdateCollection + @Test + public void shouldManageDataClasses() { + // START BasicCreateCollection // START ReadOneCollection // START UpdateCollection String className = "Article"; - // END BasicCreateCollection // END ReadOneCollection // END UpdateCollection - - createCollection(className); - readOneCollection(className); - readAllCollections(); - updateCollection(className); - } - - private void createCollection(String className) { - // START BasicCreateCollection + // END BasicCreateCollection // END ReadOneCollection // END UpdateCollection + + createCollection(className); + readOneCollection(className); + readAllCollections(); + updateCollection(className); + createCollectionWithProperties(className); + createCollectionWithVectorizer(className); + createCollectionWithNamedVectors(className); + createCollectionWithModuleSettings(className); + createCollectionWithVectorIndexType(className); + createCollectionWithVectorIndexParams(className); + } + + private void createCollection(String className) { + // START BasicCreateCollection WeaviateClass emptyClass = WeaviateClass.builder() .className(className) .build(); @@ -63,256 +76,254 @@ private void createCollection(String className) { .withClass(emptyClass) .run(); - // END BasicCreateCollection + // END BasicCreateCollection - assertThat(result).isNotNull() - .withFailMessage(() -> result.getError().toString()) - .returns(false, Result::hasErrors) - .withFailMessage(null) - .returns(true, Result::getResult); - } + assertThat(result).isNotNull() + .withFailMessage(() -> result.getError().toString()) + .returns(false, Result::hasErrors) + .withFailMessage(null) + .returns(true, Result::getResult); + } - // START CreateCollectionWithProperties - private void createCollectionWithProperties(String className){ - String className = className; - // Define class properties" + private void createCollectionWithProperties(String className){ + // START CreateCollectionWithProperties +// Define class properties Property titleProperty = Property.builder() - .name("title") - .description("Title Property Description...") - .dataType(Arrays.asList(DataType.TEXT)) - .build(); - + .name("title") + .description("Title Property Description...") + .dataType(Arrays.asList(DataType.TEXT)) + .build(); Property bodyProperty = Property.builder() - .name("body") - .description("Body Property Description...") - .dataType(Arrays.asList(DataType.TEXT)) - .build(); - - //Add the defined properties to the class + .name("body") + .description("Body Property Description...") + .dataType(Arrays.asList(DataType.TEXT)) + .build(); + + //Add the defined properties to the class + WeaviateClass articleClass = WeaviateClass.builder() + .className(className) + .description("Article Class Description...") + .properties(Arrays.asList(titleProperty, bodyProperty)) + .build(); + + Result result = client.schema().classCreator() + .withClass(articleClass) + .run(); + // END CreateCollectionWithProperties + } + +private void createCollectionWithVectorizer(String className){ +// START CreateCollectionWithVectorizer + // Additional configuration not shown + // Define the vectorizer in the WeaviateClass Builder + WeaviateClass articleClass = WeaviateClass.builder() + .className(className) + .properties(Arrays.asList(titleProperty, bodyProperty)) + .vectorizer("text2vec-openai") // Vectorize of your choic e.g. text2vec-openai or text2vec-cohere + .build(); + // Add the class to the schema + Result result = client.schema().classCreator() + .withClass(articleClass) + .run(); +// END CreateCollectionWithVectorizer + } + + private void createCollectionWithNamedVectors(String className){ + // START CreateCollectionWithNamedVectors + // Additional configuration not shown + //Define the vectorizers configurations + Map text2vecOpenAI = new HashMap<>(); + Map text2vecOpenAISettings = new HashMap<>(); + text2vecOpenAISettings.put("properties", new String[]{"name"}); + text2vecOpenAI.put("text2vec-openai", text2vecOpenAISettings); + + Map text2vecCohere = new HashMap<>(); + Map text2vecCohereSettings = new HashMap<>(); + text2vecCohereSettings.put("properties", new String[]{"body"}); + text2vecCohere.put("text2vec_cohere", text2vecCohereSettings); + + //Define the vector configurations + Map vectorConfig = new HashMap<>(); + vectorConfig.put("name_vector", WeaviateClass.VectorConfig.builder() + .vectorIndexType("hnsw") + .vectorizer(text2vecOpenAI) + .build()); + vectorConfig.put("body_vector", WeaviateClass.VectorConfig.builder() + .vectorIndexType("hnsw") + .vectorizer(text2vecCohere) + .build()); + + // Define the vectorizers in the WeaviateClass Builder WeaviateClass articleClass = WeaviateClass.builder() - .className(className) - .description("Article Class Description...") - .properties(Arrays.asList(titleProperty, bodyProperty)) - .build(); - - Result classResult = client.schema().classCreator() - .withClass(articleClass) - .run(); - } - // END CreateCollectionWithProperties - - // START CreateCollectionWithVectorizer - private void createCollectionWithVectorizer(){ - // Define class properties" - Property titleProperty = Property.builder() - .name("title") - .dataType(Arrays.asList(DataType.TEXT)) - .build(); - Property bodyProperty = Property.builder() - .name("body") - .dataType(Arrays.asList(DataType.TEXT)) - .build(); - // Define the vectorizer in the WeaviateClass Builder - WeaviateClass articleClass = WeaviateClass.builder() - .className("Article") - .properties(Arrays.asList(titleProperty, bodyProperty)) - .replicationConfig(replicationConfig) - .vectorizer("text2vec-openai") // Vectorize of your choic e.g. text2vec-openai or text2vec-cohere - .build(); - // Add the class to the schema - Result classResult = client.schema().classCreator() - .withClass(countryClass) - .run(); - } - // END CreateCollectionWithVectorizer - - // START CreateCollectionWithNamedVectors - private void createCollectionWithNamedVectors(){ - // Define class properties" - Property titleProperty = Property.builder() - .name("title") - .dataType(Arrays.asList(DataType.TEXT)) - .build(); - Property bodyProperty = Property.builder() - .name("body") - .dataType(Arrays.asList(DataType.TEXT)) - .build(); - //Define the vectorizers configurations - Map text2vecOpenAI = new HashMap<>(); - Map text2vecOpenAISettings = new HashMap<>(); - text2vecOpenAISettings.put("properties", new String[]{"name"}); - text2vecOpenAI.put("text2vec-openai", text2vecOpenAISettings); - Map text2vecCohere = new HashMap<>(); - Map text2vecCohereSettings = new HashMap<>(); - text2vecCohereSettings.put("properties", new String[]{"body"}); - text2vecCohere.put("text2vec_cohere", text2vecCohereSettings); - //Define the vector configurations - Map vectorConfig = new HashMap<>(); - vectorConfig.put("name_vector", WeaviateClass.VectorConfig.builder() - .vectorIndexType("hnsw") - .vectorizer(text2vecOpenAI) - .build()); - vectorConfig.put("body_vector", WeaviateClass.VectorConfig.builder() - .vectorIndexType("hnsw") - .vectorizer(text2vecCohere) - .build()); - // Define the vectorizers in the WeaviateClass Builder - WeaviateClass countryClass = WeaviateClass.builder() - .className(className) - .properties(Arrays.asList(titleProperty, bodyProperty)) - .vectorConfig(vectorConfig) - .build(); - // Add the class to the schema - Result classResult = client.schema().classCreator() - .withClass(countryClass) - .run(); + .className(className) + .properties(Arrays.asList(titleProperty, bodyProperty)) + .vectorConfig(vectorConfig) + .build(); + // Add the class to the schema + Result result = client.schema().classCreator() + .withClass(articleClass) + .run(); + // END CreateCollectionWithNamedVectors } - // END CreateCollectionWithNamedVectors +private void createCollectionWithModuleSettings(String className){ // START ModuleSettings - private void createCollectionWithModuleSettings(){ - // Define class properties" - Property titleProperty = Property.builder() - .name("title") - .dataType(Arrays.asList(DataType.TEXT)) - .build(); - Property bodyProperty = Property.builder() - .name("body") - .dataType(Arrays.asList(DataType.TEXT)) - .build(); - //Define the module settings - Map text2vecOpenAI = new HashMap<>(); - Map text2vecOpenAISettings = new HashMap<>(); - text2vecOpenAISettings.put("vectorizePropertyName", false); - text2vecOpenAISettings.put("model", "text-embedding-3-small"); //set the model of your choice e.g. text-embedding-3-small - text2vecOpenAI.put("text2vec-openai", text2vecOpenAISettings); - Map moduleConfig = new HashMap<>(); - moduleConfig.put("text2vec-openai", text2vecOpenAI); - // Set the module configu in the WeaviateClass Builder - WeaviateClass countryClass = WeaviateClass.builder() - .className(className) - .properties(Arrays.asList(titleProperty, bodyProperty)) - .moduleConfig(moduleConfig) // Set the module config - .build(); - // Add the class to the schema - Result classResult = client.schema().classCreator() - .withClass(countryClass) - .run(); - } - // END ModuleSettings + // Additional configuration not shown + //Define the module settings + Map text2vecOpenAI = new HashMap<>(); + Map text2vecOpenAISettings = new HashMap<>(); + text2vecOpenAISettings.put("vectorizePropertyName", false); + text2vecOpenAISettings.put("model", "text-embedding-3-small"); //set the model of your choice e.g. text-embedding-3-small + text2vecOpenAI.put("text2vec-openai", text2vecOpenAISettings); + Map moduleConfig = new HashMap<>(); + moduleConfig.put("text2vec-openai", text2vecOpenAI); + + // Set the module configu in the WeaviateClass Builder + WeaviateClass articleClass = WeaviateClass.builder() + .className(className) + .properties(Arrays.asList(titleProperty, bodyProperty)) + .moduleConfig(moduleConfig) // Set the module config + .build(); + + // Add the class to the schema + Result result = client.schema().classCreator() + .withClass(articleClass) + .run(); + // END ModuleSettings +} +private void createCollectionWithVectorIndexType(String className){ // START SetVectorIndexType - private void createCollectionWithVectorIndexType(){ - // Define class properties" - Property titleProperty = Property.builder() - .name("title") - .dataType(Arrays.asList(DataType.TEXT)) - .build(); - Property bodyProperty = Property.builder() - .name("body") - .dataType(Arrays.asList(DataType.TEXT)) - .build(); - // Define the index type in the WeaviateClass Builder - WeaviateClass countryClass = WeaviateClass.builder() - .className(className) - .properties(Arrays.asList(titleProperty, bodyProperty)) - .vectorizer("text2vec-openai") - .vectorIndexType("hnsw") //set the vector index of your choice e.g. hnsw, flat... - .build(); - // Add the class to the schema - Result classResult = client.schema().classCreator() - .withClass(countryClass) - .run(); - } + // Additional configuration not shown + // Define the index type in the WeaviateClass Builder + WeaviateClass articleClass = WeaviateClass.builder() + .className(className) + .properties(Arrays.asList(titleProperty, bodyProperty)) + .vectorizer("text2vec-openai") + .vectorIndexType("hnsw") //set the vector index of your choice e.g. hnsw, flat... + .build(); + // Add the class to the schema + Result result = client.schema().classCreator() + .withClass(articleClass) + .run(); // END SetVectorIndexType + } + +private void createCollectionWithVectorIndexParams(String className){ + // START SetVectorIndexParams + // Additional configuration not shown + // Define the VectorIndexConfig with compression + VectorIndexConfig createBqIndexConfig = VectorIndexConfig.builder() + .bq(BQConfig.builder() + .enabled(true) + .rescoreLimit(123L) + .cache(true) + .build()) + .vectorCacheMaxObjects(100000L) + .build(); - private void readOneCollection(String className) { - // START ReadOneCollection - Result result = client.schema().classGetter() - .withClassName(className) - .run(); - - // END ReadOneCollection - - assertThat(result).isNotNull() - .withFailMessage(() -> result.getError().toString()) - .returns(false, Result::hasErrors) - .withFailMessage(null) - .extracting(Result::getResult).isNotNull() - .extracting(WeaviateClass::getClassName).isEqualTo(className); - - print(result); - } - - private void readAllCollections() { - // START ReadAllCollections - Result result = client.schema().getter() - .run(); - - // END ReadAllCollections - - assertThat(result).isNotNull() - .withFailMessage(() -> result.getError().toString()) - .returns(false, Result::hasErrors) - .withFailMessage(null) - .extracting(Result::getResult).isNotNull() - .extracting(Schema::getClasses).asList() - .hasSize(1); - - print(result); - } - - private void updateCollection(String className) { - Result delResult = client.schema().classDeleter() - .withClassName(className) - .run(); - - assertThat(delResult).isNotNull() - .withFailMessage(() -> delResult.getError().toString()) - .returns(false, Result::hasErrors) - .returns(true, Result::getResult); - - // START UpdateCollectionTODO - // Define class - WeaviateClass originalClass = WeaviateClass.builder() + WeaviateClass articleClass = WeaviateClass.builder() .className(className) - .vectorIndexConfig(VectorIndexConfig.builder() - .distance(DistanceType.COSINE) // Note the distance metric - .build()) + .properties(Arrays.asList(titleProperty, bodyProperty)) + .vectorIndexType("flat") //set the vector index of your choice e.g. hnsw, flat... + .vectorIndexConfig(createBqIndexConfig) + .vectorizer("text2vec-openai") .build(); // Add the class to the schema Result result = client.schema().classCreator() - .withClass(originalClass) + .withClass(articleClass) .run(); - - // END UpdateCollectionTODO - - assertThat(result).isNotNull() - .withFailMessage(() -> result.getError().toString()) - .returns(false, Result::hasErrors) - .withFailMessage(null) - .returns(true, Result::getResult); - - // START UpdateCollectionTODO - // Define updated class - WeaviateClass updatedClass = WeaviateClass.builder() - .className(className) - .vectorIndexConfig(VectorIndexConfig.builder() - .distance(DistanceType.DOT) // Note the distance metric - .build()) - .build(); - - // Update the class definition - // TODO Not yet available in JAVA - - // END UpdateCollectionTODO - } - - private void print(Result result) { - // START ReadOneCollection // START ReadAllCollections - String json = new GsonBuilder().setPrettyPrinting().create().toJson(result.getResult()); - System.out.println(json); - // END ReadOneCollection // END ReadAllCollections - } + // END SetVectorIndexParams + } + + private void readOneCollection(String className) { + // START ReadOneCollection + Result result = client.schema().classGetter() + .withClassName(className) + .run(); + + // END ReadOneCollection + + assertThat(result).isNotNull() + .withFailMessage(() -> result.getError().toString()) + .returns(false, Result::hasErrors) + .withFailMessage(null) + .extracting(Result::getResult).isNotNull() + .extracting(WeaviateClass::getClassName).isEqualTo(className); + + print(result); + } + + private void readAllCollections() { + // START ReadAllCollections + Result result = client.schema().getter() + .run(); + + // END ReadAllCollections + + assertThat(result).isNotNull() + .withFailMessage(() -> result.getError().toString()) + .returns(false, Result::hasErrors) + .withFailMessage(null) + .extracting(Result::getResult).isNotNull() + .extracting(Schema::getClasses).asList() + .hasSize(1); + + print(result); + } + + private void updateCollection(String className) { + Result delResult = client.schema().classDeleter() + .withClassName(className) + .run(); + + assertThat(delResult).isNotNull() + .withFailMessage(() -> delResult.getError().toString()) + .returns(false, Result::hasErrors) + .returns(true, Result::getResult); + + // START UpdateCollectionTODO + // Define class + WeaviateClass originalClass = WeaviateClass.builder() + .className(className) + .vectorIndexConfig(VectorIndexConfig.builder() + .distance(DistanceType.COSINE) // Note the distance metric + .build()) + .build(); + + // Add the class to the schema + Result result = client.schema().classCreator() + .withClass(originalClass) + .run(); + + // END UpdateCollectionTODO + + assertThat(result).isNotNull() + .withFailMessage(() -> result.getError().toString()) + .returns(false, Result::hasErrors) + .withFailMessage(null) + .returns(true, Result::getResult); + + // START UpdateCollectionTODO + // Define updated class + WeaviateClass updatedClass = WeaviateClass.builder() + .className(className) + .vectorIndexConfig(VectorIndexConfig.builder() + .distance(DistanceType.DOT) // Note the distance metric + .build()) + .build(); + + // Update the class definition + // TODO Not yet available in JAVA + + // END UpdateCollectionTODO + } + + private void print(Result result) { + // START ReadOneCollection // START ReadAllCollections + String json = new GsonBuilder().setPrettyPrinting().create().toJson(result.getResult()); + System.out.println(json); + // END ReadOneCollection // END ReadAllCollections + } } diff --git a/developers/weaviate/manage-data/collections.mdx b/developers/weaviate/manage-data/collections.mdx index ce60937f5b..90cdd39273 100644 --- a/developers/weaviate/manage-data/collections.mdx +++ b/developers/weaviate/manage-data/collections.mdx @@ -426,6 +426,15 @@ Various vector index parameters are configurable at collection creation time, in language="ts" /> + + + +