Skip to content

Commit

Permalink
Glebashnik/feed field generator (#4310)
Browse files Browse the repository at this point in the history
Tests generate indexing expression with custom searcher and local LLM.
  • Loading branch information
glebashnik authored Jan 14, 2025
1 parent 97d4b6d commit ea2df75
Show file tree
Hide file tree
Showing 11 changed files with 321 additions and 0 deletions.
1 change: 1 addition & 0 deletions tests/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
<module>search/struct_and_map_types/inherited_struct/concretedocs</module>
<module>search/wasfound/project</module>
<module>performance/container_tensor_eval/tensor-eval</module>
<module>search/generate_text_when_feeding/app</module>
</modules>

<properties>
Expand Down
77 changes: 77 additions & 0 deletions tests/search/generate_text_when_feeding/app/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
<?xml version="1.0"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>ai.vespa.test</groupId>
<artifactId>generate_text_when_feeding</artifactId>
<packaging>container-plugin</packaging>
<version>1.0.0</version>

<parent>
<groupId>com.yahoo.vespa.systemtest</groupId>
<artifactId>test-apps-parent</artifactId>
<version>8-SNAPSHOT</version>
<relativePath>../../../pom.xml</relativePath>
</parent>

<dependencies>
<dependency>
<groupId>com.yahoo.vespa</groupId>
<artifactId>container</artifactId>
<version>${vespa.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.yahoo.vespa</groupId>
<artifactId>model-integration</artifactId>
<version>${vespa.version}</version>
<scope>provided</scope>
</dependency>
</dependencies>

<properties>
<bundle-plugin.failOnWarnings>true</bundle-plugin.failOnWarnings>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<test.hide>true</test.hide>
</properties>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>

<configuration>
<optimize>true</optimize>
<showDeprecation>true</showDeprecation>
<showWarnings>true</showWarnings>
<release>17</release>
</configuration>
</plugin>

<plugin>
<groupId>com.yahoo.vespa</groupId>
<artifactId>vespa-application-maven-plugin</artifactId>
<version>${vespa.version}</version>
<executions>
<execution>
<goals>
<goal>packageApplication</goal>
</goals>
</execution>
</executions>
</plugin>

<plugin>
<groupId>com.yahoo.vespa</groupId>
<artifactId>bundle-plugin</artifactId>
<version>${vespa.version}</version>
<extensions>true</extensions>
</plugin>
</plugins>
</build>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
define {input}
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

schema passage {
document passage {
field id type string {
indexing: summary | attribute
}

field text type string {
indexing: summary | index
index: enable-bm25
}

field prompt type string {
}
}

field mock_gen type string {
indexing: input prompt . " " . input text | generate mock_gen | index | summary
index: enable-bm25
}

field mock_lm_gen type string {
indexing: input text | generate mock_lm_gen | index | summary
index: enable-bm25
}

field local_llm_gen type string {
indexing: input text | generate local_llm_gen | index | summary
index: enable-bm25
}

fieldset default {
fields: text, prompt, mock_gen, mock_lm_gen, local_llm_gen
}

rank-profile default {
first-phase {
expression: bm25(text)
}
}

rank-profile mock_gen {
first-phase {
expression: bm25(mock_gen)
}
}

rank-profile mock_lm_gen {
first-phase {
expression: bm25(mock_lm_gen)
}
}

rank-profile local_llm_gen {
first-phase {
expression: bm25(local_llm_gen)
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
<?xml version="1.0" encoding="utf-8" ?>
<!-- Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
<services version="1.0">

<container id="container" version="1.0">
<component id="mock_lm" class="ai.vespa.test.MockLanguageModel" bundle="generate_text_when_feeding">
<config name="ai.vespa.test.mock-language-model">
<repetitions>2</repetitions>
</config>
</component>

<component id="local_llm" class="ai.vespa.llm.clients.LocalLLM">
<config name="ai.vespa.llm.clients.llm-local-client">

<!-- File is approx 130Mb" -->
<model url="https://data.vespa-cloud.com/gguf_models/Llama-160M-Chat-v1.Q6_K.gguf" />

<contextSize>512</contextSize>
<parallelRequests>1</parallelRequests>
<maxQueueSize>0</maxQueueSize>
<maxTokens>10</maxTokens>
</config>
</component>

<component id="mock_gen" class="ai.vespa.test.MockTextGenerator" bundle="generate_text_when_feeding">
<config name="ai.vespa.test.mock-text-generator">
<repetitions>2</repetitions>
</config>
</component>

<component id="mock_lm_gen" class="ai.vespa.llm.generation.LanguageModelTextGenerator">
<config name="ai.vespa.llm.generation.language-model-text-generator">
<providerId>mock_lm</providerId>
<promptTemplateFile>files/prompt.txt</promptTemplateFile>
</config>
</component>

<component id="local_llm_gen" class="ai.vespa.llm.generation.LanguageModelTextGenerator">
<config name="ai.vespa.llm.generation.language-model-text-generator">
<providerId>local_llm</providerId>
<promptTemplateFile>files/prompt.txt</promptTemplateFile>
</config>
</component>

<document-api/>

<search/>

<nodes>
<jvm options="-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005"/>
<node hostalias="node1"/>
</nodes>
</container>

<content id="content" version="1.0">
<redundancy>1</redundancy>

<documents>
<document mode="index" type="passage"/>
</documents>

<nodes>
<node hostalias="node1" distribution-key="0"/>
</nodes>
</content>

</services>
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package ai.vespa.test;

import ai.vespa.llm.InferenceParameters;
import ai.vespa.llm.completion.Completion;
import ai.vespa.llm.completion.Prompt;

import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.function.Consumer;

public class MockLanguageModel implements ai.vespa.llm.LanguageModel {
private final MockLanguageModelConfig config;

public MockLanguageModel(MockLanguageModelConfig config) {
this.config = config;
}

@Override
public List<Completion> complete(Prompt prompt, InferenceParameters params) {
var stringBuilder = new StringBuilder();

for (int i = 0; i < config.repetitions(); i++) {
stringBuilder.append(prompt.asString());

if (i < config.repetitions() - 1) {
stringBuilder.append(" ");
}
}

return List.of(Completion.from(stringBuilder.toString().trim()));
}

@Override
public CompletableFuture<Completion.FinishReason> completeAsync(Prompt prompt,
InferenceParameters params,
Consumer<Completion> consumer) {
throw new UnsupportedOperationException();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package ai.vespa.test;

import ai.vespa.llm.completion.Prompt;
import com.yahoo.language.process.TextGenerator;

public class MockTextGenerator implements TextGenerator {
private final MockTextGeneratorConfig config;

public MockTextGenerator(MockTextGeneratorConfig config) {
this.config = config;
}

@Override
public String generate(Prompt prompt, Context context) {
var stringBuilder = new StringBuilder();

for (int i = 0; i < config.repetitions(); i++) {
stringBuilder.append(prompt.asString());

if (i < config.repetitions() - 1) {
stringBuilder.append(" ");
}
}


return stringBuilder.toString();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
namespace=ai.vespa.test
package=ai.vespa.test

repetitions int default=1
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
namespace=ai.vespa.test
package=ai.vespa.test

repetitions int default=1
8 changes: 8 additions & 0 deletions tests/search/generate_text_when_feeding/data/feed.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"put": "id:passage:passage::0",
"fields": {
"text": "hello",
"id": "0",
"prompt": "define"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
require 'search_test'

class Generate < SearchTest
def setup
super
set_owner('glebashnik')
set_description('Test text generation when feeding.')
end

def test_generate_text_when_feeding
# I haven't found a better way to build the app
system('cd app && mvn clean generate-resources && mvn clean package')
deploy(selfdir + 'app/target/application')
start

feed_and_wait_for_docs('passage', 1, :file => selfdir + "data/feed.jsonl")
assert_hitcount('query=hello&ranking=mock_gen', 1) # Custom text generator.
assert_hitcount('query=hello&ranking=mock_lm_gen', 1) # Generator with custom LM.
assert_hitcount('query=hello&ranking=local_llm_gen', 1) # Generator with local LLM.

result = search("query=hello&ranking=mock_gen")
assert_equal("define hello define hello", result.hit[0].field["mock_gen"])
assert_equal("define hello define hello", result.hit[0].field["mock_lm_gen"])
assert(!result.hit[0].field["local_llm_gen"].empty?) # LLM output changes every time
end

def teardown
stop
end
end

0 comments on commit ea2df75

Please sign in to comment.