-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Glebashnik/feed field generator (#4310)
Tests generate indexing expression with custom searcher and local LLM.
- Loading branch information
1 parent
97d4b6d
commit ea2df75
Showing
11 changed files
with
321 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
<?xml version="1.0"?> | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 | ||
http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
|
||
<groupId>ai.vespa.test</groupId> | ||
<artifactId>generate_text_when_feeding</artifactId> | ||
<packaging>container-plugin</packaging> | ||
<version>1.0.0</version> | ||
|
||
<parent> | ||
<groupId>com.yahoo.vespa.systemtest</groupId> | ||
<artifactId>test-apps-parent</artifactId> | ||
<version>8-SNAPSHOT</version> | ||
<relativePath>../../../pom.xml</relativePath> | ||
</parent> | ||
|
||
<dependencies> | ||
<dependency> | ||
<groupId>com.yahoo.vespa</groupId> | ||
<artifactId>container</artifactId> | ||
<version>${vespa.version}</version> | ||
<scope>provided</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>com.yahoo.vespa</groupId> | ||
<artifactId>model-integration</artifactId> | ||
<version>${vespa.version}</version> | ||
<scope>provided</scope> | ||
</dependency> | ||
</dependencies> | ||
|
||
<properties> | ||
<bundle-plugin.failOnWarnings>true</bundle-plugin.failOnWarnings> | ||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | ||
<test.hide>true</test.hide> | ||
</properties> | ||
|
||
<build> | ||
<plugins> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-compiler-plugin</artifactId> | ||
|
||
<configuration> | ||
<optimize>true</optimize> | ||
<showDeprecation>true</showDeprecation> | ||
<showWarnings>true</showWarnings> | ||
<release>17</release> | ||
</configuration> | ||
</plugin> | ||
|
||
<plugin> | ||
<groupId>com.yahoo.vespa</groupId> | ||
<artifactId>vespa-application-maven-plugin</artifactId> | ||
<version>${vespa.version}</version> | ||
<executions> | ||
<execution> | ||
<goals> | ||
<goal>packageApplication</goal> | ||
</goals> | ||
</execution> | ||
</executions> | ||
</plugin> | ||
|
||
<plugin> | ||
<groupId>com.yahoo.vespa</groupId> | ||
<artifactId>bundle-plugin</artifactId> | ||
<version>${vespa.version}</version> | ||
<extensions>true</extensions> | ||
</plugin> | ||
</plugins> | ||
</build> | ||
|
||
</project> |
1 change: 1 addition & 0 deletions
1
tests/search/generate_text_when_feeding/app/src/main/application/files/prompt.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
define {input} |
60 changes: 60 additions & 0 deletions
60
tests/search/generate_text_when_feeding/app/src/main/application/schemas/passage.sd
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. | ||
|
||
schema passage { | ||
document passage { | ||
field id type string { | ||
indexing: summary | attribute | ||
} | ||
|
||
field text type string { | ||
indexing: summary | index | ||
index: enable-bm25 | ||
} | ||
|
||
field prompt type string { | ||
} | ||
} | ||
|
||
field mock_gen type string { | ||
indexing: input prompt . " " . input text | generate mock_gen | index | summary | ||
index: enable-bm25 | ||
} | ||
|
||
field mock_lm_gen type string { | ||
indexing: input text | generate mock_lm_gen | index | summary | ||
index: enable-bm25 | ||
} | ||
|
||
field local_llm_gen type string { | ||
indexing: input text | generate local_llm_gen | index | summary | ||
index: enable-bm25 | ||
} | ||
|
||
fieldset default { | ||
fields: text, prompt, mock_gen, mock_lm_gen, local_llm_gen | ||
} | ||
|
||
rank-profile default { | ||
first-phase { | ||
expression: bm25(text) | ||
} | ||
} | ||
|
||
rank-profile mock_gen { | ||
first-phase { | ||
expression: bm25(mock_gen) | ||
} | ||
} | ||
|
||
rank-profile mock_lm_gen { | ||
first-phase { | ||
expression: bm25(mock_lm_gen) | ||
} | ||
} | ||
|
||
rank-profile local_llm_gen { | ||
first-phase { | ||
expression: bm25(local_llm_gen) | ||
} | ||
} | ||
} |
67 changes: 67 additions & 0 deletions
67
tests/search/generate_text_when_feeding/app/src/main/application/services.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
<?xml version="1.0" encoding="utf-8" ?> | ||
<!-- Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> | ||
<services version="1.0"> | ||
|
||
<container id="container" version="1.0"> | ||
<component id="mock_lm" class="ai.vespa.test.MockLanguageModel" bundle="generate_text_when_feeding"> | ||
<config name="ai.vespa.test.mock-language-model"> | ||
<repetitions>2</repetitions> | ||
</config> | ||
</component> | ||
|
||
<component id="local_llm" class="ai.vespa.llm.clients.LocalLLM"> | ||
<config name="ai.vespa.llm.clients.llm-local-client"> | ||
|
||
<!-- File is approx 130Mb" --> | ||
<model url="https://data.vespa-cloud.com/gguf_models/Llama-160M-Chat-v1.Q6_K.gguf" /> | ||
|
||
<contextSize>512</contextSize> | ||
<parallelRequests>1</parallelRequests> | ||
<maxQueueSize>0</maxQueueSize> | ||
<maxTokens>10</maxTokens> | ||
</config> | ||
</component> | ||
|
||
<component id="mock_gen" class="ai.vespa.test.MockTextGenerator" bundle="generate_text_when_feeding"> | ||
<config name="ai.vespa.test.mock-text-generator"> | ||
<repetitions>2</repetitions> | ||
</config> | ||
</component> | ||
|
||
<component id="mock_lm_gen" class="ai.vespa.llm.generation.LanguageModelTextGenerator"> | ||
<config name="ai.vespa.llm.generation.language-model-text-generator"> | ||
<providerId>mock_lm</providerId> | ||
<promptTemplateFile>files/prompt.txt</promptTemplateFile> | ||
</config> | ||
</component> | ||
|
||
<component id="local_llm_gen" class="ai.vespa.llm.generation.LanguageModelTextGenerator"> | ||
<config name="ai.vespa.llm.generation.language-model-text-generator"> | ||
<providerId>local_llm</providerId> | ||
<promptTemplateFile>files/prompt.txt</promptTemplateFile> | ||
</config> | ||
</component> | ||
|
||
<document-api/> | ||
|
||
<search/> | ||
|
||
<nodes> | ||
<jvm options="-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005"/> | ||
<node hostalias="node1"/> | ||
</nodes> | ||
</container> | ||
|
||
<content id="content" version="1.0"> | ||
<redundancy>1</redundancy> | ||
|
||
<documents> | ||
<document mode="index" type="passage"/> | ||
</documents> | ||
|
||
<nodes> | ||
<node hostalias="node1" distribution-key="0"/> | ||
</nodes> | ||
</content> | ||
|
||
</services> |
39 changes: 39 additions & 0 deletions
39
.../search/generate_text_when_feeding/app/src/main/java/ai/vespa/test/MockLanguageModel.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
package ai.vespa.test; | ||
|
||
import ai.vespa.llm.InferenceParameters; | ||
import ai.vespa.llm.completion.Completion; | ||
import ai.vespa.llm.completion.Prompt; | ||
|
||
import java.util.List; | ||
import java.util.concurrent.CompletableFuture; | ||
import java.util.function.Consumer; | ||
|
||
public class MockLanguageModel implements ai.vespa.llm.LanguageModel { | ||
private final MockLanguageModelConfig config; | ||
|
||
public MockLanguageModel(MockLanguageModelConfig config) { | ||
this.config = config; | ||
} | ||
|
||
@Override | ||
public List<Completion> complete(Prompt prompt, InferenceParameters params) { | ||
var stringBuilder = new StringBuilder(); | ||
|
||
for (int i = 0; i < config.repetitions(); i++) { | ||
stringBuilder.append(prompt.asString()); | ||
|
||
if (i < config.repetitions() - 1) { | ||
stringBuilder.append(" "); | ||
} | ||
} | ||
|
||
return List.of(Completion.from(stringBuilder.toString().trim())); | ||
} | ||
|
||
@Override | ||
public CompletableFuture<Completion.FinishReason> completeAsync(Prompt prompt, | ||
InferenceParameters params, | ||
Consumer<Completion> consumer) { | ||
throw new UnsupportedOperationException(); | ||
} | ||
} |
28 changes: 28 additions & 0 deletions
28
.../search/generate_text_when_feeding/app/src/main/java/ai/vespa/test/MockTextGenerator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
package ai.vespa.test; | ||
|
||
import ai.vespa.llm.completion.Prompt; | ||
import com.yahoo.language.process.TextGenerator; | ||
|
||
public class MockTextGenerator implements TextGenerator { | ||
private final MockTextGeneratorConfig config; | ||
|
||
public MockTextGenerator(MockTextGeneratorConfig config) { | ||
this.config = config; | ||
} | ||
|
||
@Override | ||
public String generate(Prompt prompt, Context context) { | ||
var stringBuilder = new StringBuilder(); | ||
|
||
for (int i = 0; i < config.repetitions(); i++) { | ||
stringBuilder.append(prompt.asString()); | ||
|
||
if (i < config.repetitions() - 1) { | ||
stringBuilder.append(" "); | ||
} | ||
} | ||
|
||
|
||
return stringBuilder.toString(); | ||
} | ||
} |
5 changes: 5 additions & 0 deletions
5
...nerate_text_when_feeding/app/src/main/resources/configdefinitions/mock-language-model.def
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. | ||
namespace=ai.vespa.test | ||
package=ai.vespa.test | ||
|
||
repetitions int default=1 |
5 changes: 5 additions & 0 deletions
5
...nerate_text_when_feeding/app/src/main/resources/configdefinitions/mock-text-generator.def
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. | ||
namespace=ai.vespa.test | ||
package=ai.vespa.test | ||
|
||
repetitions int default=1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
{ | ||
"put": "id:passage:passage::0", | ||
"fields": { | ||
"text": "hello", | ||
"id": "0", | ||
"prompt": "define" | ||
} | ||
} |
30 changes: 30 additions & 0 deletions
30
tests/search/generate_text_when_feeding/generate_text_when_feeding.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
require 'search_test' | ||
|
||
class Generate < SearchTest | ||
def setup | ||
super | ||
set_owner('glebashnik') | ||
set_description('Test text generation when feeding.') | ||
end | ||
|
||
def test_generate_text_when_feeding | ||
# I haven't found a better way to build the app | ||
system('cd app && mvn clean generate-resources && mvn clean package') | ||
deploy(selfdir + 'app/target/application') | ||
start | ||
|
||
feed_and_wait_for_docs('passage', 1, :file => selfdir + "data/feed.jsonl") | ||
assert_hitcount('query=hello&ranking=mock_gen', 1) # Custom text generator. | ||
assert_hitcount('query=hello&ranking=mock_lm_gen', 1) # Generator with custom LM. | ||
assert_hitcount('query=hello&ranking=local_llm_gen', 1) # Generator with local LLM. | ||
|
||
result = search("query=hello&ranking=mock_gen") | ||
assert_equal("define hello define hello", result.hit[0].field["mock_gen"]) | ||
assert_equal("define hello define hello", result.hit[0].field["mock_lm_gen"]) | ||
assert(!result.hit[0].field["local_llm_gen"].empty?) # LLM output changes every time | ||
end | ||
|
||
def teardown | ||
stop | ||
end | ||
end |