From ea2df756dbac5c5121e1111dc59323835134be39 Mon Sep 17 00:00:00 2001 From: Gleb Sizov Date: Tue, 14 Jan 2025 14:31:47 +0100 Subject: [PATCH] Glebashnik/feed field generator (#4310) Tests generate indexing expression with custom searcher and local LLM. --- tests/pom.xml | 1 + .../generate_text_when_feeding/app/pom.xml | 77 +++++++++++++++++++ .../app/src/main/application/files/prompt.txt | 1 + .../src/main/application/schemas/passage.sd | 60 +++++++++++++++ .../app/src/main/application/services.xml | 67 ++++++++++++++++ .../java/ai/vespa/test/MockLanguageModel.java | 39 ++++++++++ .../java/ai/vespa/test/MockTextGenerator.java | 28 +++++++ .../configdefinitions/mock-language-model.def | 5 ++ .../configdefinitions/mock-text-generator.def | 5 ++ .../data/feed.jsonl | 8 ++ .../generate_text_when_feeding.rb | 30 ++++++++ 11 files changed, 321 insertions(+) create mode 100644 tests/search/generate_text_when_feeding/app/pom.xml create mode 100644 tests/search/generate_text_when_feeding/app/src/main/application/files/prompt.txt create mode 100644 tests/search/generate_text_when_feeding/app/src/main/application/schemas/passage.sd create mode 100644 tests/search/generate_text_when_feeding/app/src/main/application/services.xml create mode 100644 tests/search/generate_text_when_feeding/app/src/main/java/ai/vespa/test/MockLanguageModel.java create mode 100644 tests/search/generate_text_when_feeding/app/src/main/java/ai/vespa/test/MockTextGenerator.java create mode 100644 tests/search/generate_text_when_feeding/app/src/main/resources/configdefinitions/mock-language-model.def create mode 100644 tests/search/generate_text_when_feeding/app/src/main/resources/configdefinitions/mock-text-generator.def create mode 100644 tests/search/generate_text_when_feeding/data/feed.jsonl create mode 100644 tests/search/generate_text_when_feeding/generate_text_when_feeding.rb diff --git a/tests/pom.xml b/tests/pom.xml index c804bf121..d227f98de 100644 --- a/tests/pom.xml +++ b/tests/pom.xml @@ -26,6 +26,7 @@ search/struct_and_map_types/inherited_struct/concretedocs search/wasfound/project performance/container_tensor_eval/tensor-eval + search/generate_text_when_feeding/app diff --git a/tests/search/generate_text_when_feeding/app/pom.xml b/tests/search/generate_text_when_feeding/app/pom.xml new file mode 100644 index 000000000..1421206a1 --- /dev/null +++ b/tests/search/generate_text_when_feeding/app/pom.xml @@ -0,0 +1,77 @@ + + + 4.0.0 + + ai.vespa.test + generate_text_when_feeding + container-plugin + 1.0.0 + + + com.yahoo.vespa.systemtest + test-apps-parent + 8-SNAPSHOT + ../../../pom.xml + + + + + com.yahoo.vespa + container + ${vespa.version} + provided + + + com.yahoo.vespa + model-integration + ${vespa.version} + provided + + + + + true + UTF-8 + true + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + true + true + true + 17 + + + + + com.yahoo.vespa + vespa-application-maven-plugin + ${vespa.version} + + + + packageApplication + + + + + + + com.yahoo.vespa + bundle-plugin + ${vespa.version} + true + + + + + diff --git a/tests/search/generate_text_when_feeding/app/src/main/application/files/prompt.txt b/tests/search/generate_text_when_feeding/app/src/main/application/files/prompt.txt new file mode 100644 index 000000000..694244fcf --- /dev/null +++ b/tests/search/generate_text_when_feeding/app/src/main/application/files/prompt.txt @@ -0,0 +1 @@ +define {input} \ No newline at end of file diff --git a/tests/search/generate_text_when_feeding/app/src/main/application/schemas/passage.sd b/tests/search/generate_text_when_feeding/app/src/main/application/schemas/passage.sd new file mode 100644 index 000000000..37210be65 --- /dev/null +++ b/tests/search/generate_text_when_feeding/app/src/main/application/schemas/passage.sd @@ -0,0 +1,60 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +schema passage { + document passage { + field id type string { + indexing: summary | attribute + } + + field text type string { + indexing: summary | index + index: enable-bm25 + } + + field prompt type string { + } + } + + field mock_gen type string { + indexing: input prompt . " " . input text | generate mock_gen | index | summary + index: enable-bm25 + } + + field mock_lm_gen type string { + indexing: input text | generate mock_lm_gen | index | summary + index: enable-bm25 + } + + field local_llm_gen type string { + indexing: input text | generate local_llm_gen | index | summary + index: enable-bm25 + } + + fieldset default { + fields: text, prompt, mock_gen, mock_lm_gen, local_llm_gen + } + + rank-profile default { + first-phase { + expression: bm25(text) + } + } + + rank-profile mock_gen { + first-phase { + expression: bm25(mock_gen) + } + } + + rank-profile mock_lm_gen { + first-phase { + expression: bm25(mock_lm_gen) + } + } + + rank-profile local_llm_gen { + first-phase { + expression: bm25(local_llm_gen) + } + } +} diff --git a/tests/search/generate_text_when_feeding/app/src/main/application/services.xml b/tests/search/generate_text_when_feeding/app/src/main/application/services.xml new file mode 100644 index 000000000..1a84ce33b --- /dev/null +++ b/tests/search/generate_text_when_feeding/app/src/main/application/services.xml @@ -0,0 +1,67 @@ + + + + + + + + 2 + + + + + + + + + + 512 + 1 + 0 + 10 + + + + + + 2 + + + + + + mock_lm + files/prompt.txt + + + + + + local_llm + files/prompt.txt + + + + + + + + + + + + + + + 1 + + + + + + + + + + + diff --git a/tests/search/generate_text_when_feeding/app/src/main/java/ai/vespa/test/MockLanguageModel.java b/tests/search/generate_text_when_feeding/app/src/main/java/ai/vespa/test/MockLanguageModel.java new file mode 100644 index 000000000..c62580b5e --- /dev/null +++ b/tests/search/generate_text_when_feeding/app/src/main/java/ai/vespa/test/MockLanguageModel.java @@ -0,0 +1,39 @@ +package ai.vespa.test; + +import ai.vespa.llm.InferenceParameters; +import ai.vespa.llm.completion.Completion; +import ai.vespa.llm.completion.Prompt; + +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.function.Consumer; + +public class MockLanguageModel implements ai.vespa.llm.LanguageModel { + private final MockLanguageModelConfig config; + + public MockLanguageModel(MockLanguageModelConfig config) { + this.config = config; + } + + @Override + public List complete(Prompt prompt, InferenceParameters params) { + var stringBuilder = new StringBuilder(); + + for (int i = 0; i < config.repetitions(); i++) { + stringBuilder.append(prompt.asString()); + + if (i < config.repetitions() - 1) { + stringBuilder.append(" "); + } + } + + return List.of(Completion.from(stringBuilder.toString().trim())); + } + + @Override + public CompletableFuture completeAsync(Prompt prompt, + InferenceParameters params, + Consumer consumer) { + throw new UnsupportedOperationException(); + } +} diff --git a/tests/search/generate_text_when_feeding/app/src/main/java/ai/vespa/test/MockTextGenerator.java b/tests/search/generate_text_when_feeding/app/src/main/java/ai/vespa/test/MockTextGenerator.java new file mode 100644 index 000000000..a8b0d30f4 --- /dev/null +++ b/tests/search/generate_text_when_feeding/app/src/main/java/ai/vespa/test/MockTextGenerator.java @@ -0,0 +1,28 @@ +package ai.vespa.test; + +import ai.vespa.llm.completion.Prompt; +import com.yahoo.language.process.TextGenerator; + +public class MockTextGenerator implements TextGenerator { + private final MockTextGeneratorConfig config; + + public MockTextGenerator(MockTextGeneratorConfig config) { + this.config = config; + } + + @Override + public String generate(Prompt prompt, Context context) { + var stringBuilder = new StringBuilder(); + + for (int i = 0; i < config.repetitions(); i++) { + stringBuilder.append(prompt.asString()); + + if (i < config.repetitions() - 1) { + stringBuilder.append(" "); + } + } + + + return stringBuilder.toString(); + } +} diff --git a/tests/search/generate_text_when_feeding/app/src/main/resources/configdefinitions/mock-language-model.def b/tests/search/generate_text_when_feeding/app/src/main/resources/configdefinitions/mock-language-model.def new file mode 100644 index 000000000..e67b18312 --- /dev/null +++ b/tests/search/generate_text_when_feeding/app/src/main/resources/configdefinitions/mock-language-model.def @@ -0,0 +1,5 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +namespace=ai.vespa.test +package=ai.vespa.test + +repetitions int default=1 \ No newline at end of file diff --git a/tests/search/generate_text_when_feeding/app/src/main/resources/configdefinitions/mock-text-generator.def b/tests/search/generate_text_when_feeding/app/src/main/resources/configdefinitions/mock-text-generator.def new file mode 100644 index 000000000..e67b18312 --- /dev/null +++ b/tests/search/generate_text_when_feeding/app/src/main/resources/configdefinitions/mock-text-generator.def @@ -0,0 +1,5 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +namespace=ai.vespa.test +package=ai.vespa.test + +repetitions int default=1 \ No newline at end of file diff --git a/tests/search/generate_text_when_feeding/data/feed.jsonl b/tests/search/generate_text_when_feeding/data/feed.jsonl new file mode 100644 index 000000000..3934db8dc --- /dev/null +++ b/tests/search/generate_text_when_feeding/data/feed.jsonl @@ -0,0 +1,8 @@ +{ + "put": "id:passage:passage::0", + "fields": { + "text": "hello", + "id": "0", + "prompt": "define" + } +} \ No newline at end of file diff --git a/tests/search/generate_text_when_feeding/generate_text_when_feeding.rb b/tests/search/generate_text_when_feeding/generate_text_when_feeding.rb new file mode 100644 index 000000000..b5a557204 --- /dev/null +++ b/tests/search/generate_text_when_feeding/generate_text_when_feeding.rb @@ -0,0 +1,30 @@ +require 'search_test' + +class Generate < SearchTest + def setup + super + set_owner('glebashnik') + set_description('Test text generation when feeding.') + end + + def test_generate_text_when_feeding + # I haven't found a better way to build the app + system('cd app && mvn clean generate-resources && mvn clean package') + deploy(selfdir + 'app/target/application') + start + + feed_and_wait_for_docs('passage', 1, :file => selfdir + "data/feed.jsonl") + assert_hitcount('query=hello&ranking=mock_gen', 1) # Custom text generator. + assert_hitcount('query=hello&ranking=mock_lm_gen', 1) # Generator with custom LM. + assert_hitcount('query=hello&ranking=local_llm_gen', 1) # Generator with local LLM. + + result = search("query=hello&ranking=mock_gen") + assert_equal("define hello define hello", result.hit[0].field["mock_gen"]) + assert_equal("define hello define hello", result.hit[0].field["mock_lm_gen"]) + assert(!result.hit[0].field["local_llm_gen"].empty?) # LLM output changes every time + end + + def teardown + stop + end +end