Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changing from a plugin to a standalone CLI application #68

Merged
merged 42 commits into from
Jan 23, 2025
Merged
Changes from 1 commit
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
3fb1add
Working on standalone implementation.
jzonthemtn Dec 17, 2024
17dcece
Working on standalone implementation.
jzonthemtn Dec 17, 2024
f1048fb
Working on standalone implementation.
jzonthemtn Dec 17, 2024
e937df1
Working on standalone implementation.
jzonthemtn Dec 17, 2024
871a9bf
Working on standalone implementation.
jzonthemtn Dec 17, 2024
bdcf6c5
Working on converting to a standalone app.
jzonthemtn Dec 30, 2024
504e3f9
Working on converting to a standalone app.
jzonthemtn Dec 30, 2024
75a7641
Working on converting to a standalone app.
jzonthemtn Dec 30, 2024
aa26e3b
Working on converting to a standalone app.
jzonthemtn Dec 30, 2024
6c4b241
Working on converting to a standalone app.
jzonthemtn Dec 31, 2024
9ee0b9b
Working on converting to a standalone app.
jzonthemtn Dec 31, 2024
83633bc
Working on converting to a standalone app.
jzonthemtn Dec 31, 2024
7a2d577
Working on converting to a standalone app.
jzonthemtn Dec 31, 2024
2899bcf
Working on converting to a standalone app.
jzonthemtn Jan 1, 2025
5b7888b
Working on converting to a standalone app.
jzonthemtn Jan 1, 2025
41aa00f
Working on converting to a standalone app.
jzonthemtn Jan 1, 2025
b5d34cf
Adding running of query set to CLI.
jzonthemtn Jan 1, 2025
a1ad39d
Wiring up CLI options.
jzonthemtn Jan 1, 2025
bd2ffb3
Replacing strings.
jzonthemtn Jan 1, 2025
eb8bb37
Moving the app into the root directory.
jzonthemtn Jan 2, 2025
b2ca178
Moving scripts.
jzonthemtn Jan 2, 2025
873449b
Updating github action.
jzonthemtn Jan 2, 2025
71678d3
Updating github action.
jzonthemtn Jan 2, 2025
d17d116
Updating github action.
jzonthemtn Jan 2, 2025
640b0ad
Updating paths.
jzonthemtn Jan 2, 2025
b0689d7
Wiring up more of the query set runner.
jzonthemtn Jan 16, 2025
66fa252
Changing to generic client for the user query running.
jzonthemtn Jan 17, 2025
b9ed325
Updating comment.
jzonthemtn Jan 17, 2025
3bc2065
Adding script to make a search pipeline.
jzonthemtn Jan 17, 2025
a05d3fb
Merge branch 'main' into standalone
jzonthemtn Jan 20, 2025
f76c5f9
Parameterizing the opensearch host.
jzonthemtn Jan 20, 2025
b58b3fe
Updating pipeline name.
jzonthemtn Jan 21, 2025
bfb6b7f
Fixing query size with >=.
jzonthemtn Jan 21, 2025
e154262
Adding userId to UbiEvent.
jzonthemtn Jan 21, 2025
5a2cb16
Persisting the query run results.
jzonthemtn Jan 21, 2025
fbb87c4
Removing unneeded class. Removing plugin implementation.
jzonthemtn Jan 21, 2025
b9396d4
Removing unneeded docker files.
jzonthemtn Jan 21, 2025
52c620c
Updates for hybrid query.
jzonthemtn Jan 21, 2025
95207f2
Changing to queryResponseHitIds.
jzonthemtn Jan 22, 2025
d4a3d17
Adding additionalProperties.
jzonthemtn Jan 22, 2025
975ca44
Changing to queryResponseHitIds.
jzonthemtn Jan 22, 2025
483bdfb
Renaming script.
jzonthemtn Jan 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Working on converting to a standalone app.
jzonthemtn committed Dec 30, 2024
commit 75a7641b18f369684e96fe8f800fbdf5a9bb4a98
Original file line number Diff line number Diff line change
@@ -34,7 +34,10 @@
import org.opensearch.client.transport.httpclient5.ApacheHttpClient5TransportBuilder;
import org.opensearch.eval.Constants;
import org.opensearch.eval.model.ClickthroughRate;
import org.opensearch.eval.model.data.ClickThroughRate;
import org.opensearch.eval.model.data.Judgment;
import org.opensearch.eval.model.data.QuerySet;
import org.opensearch.eval.model.data.RankAggregatedClickThrough;
import org.opensearch.eval.model.ubi.query.UbiQuery;
import org.opensearch.eval.utils.TimeUtils;

@@ -106,6 +109,45 @@ public boolean deleteIndex(String index) throws IOException {

}

@Override
public String indexQuerySet(final QuerySet querySet) throws IOException {

final String index = Constants.QUERY_SETS_INDEX_NAME;
final String id = querySet.getId();

final IndexRequest<QuerySet> indexRequest = new IndexRequest.Builder<QuerySet>().index(index).id(id).document(querySet).build();
return client.index(indexRequest).id();

}

@Override
public Collection<UbiQuery> getUbiQueries() throws IOException {

final Collection<UbiQuery> ubiQueries = new ArrayList<>();

final SearchResponse<UbiQuery> searchResponse = client.search(s -> s.index(Constants.UBI_QUERIES_INDEX_NAME).size(1000).scroll(Time.of(t -> t.offset(1000))), UbiQuery.class);

String scrollId = searchResponse.scrollId();
List<Hit<UbiQuery>> searchHits = searchResponse.hits().hits();

while (searchHits != null && !searchHits.isEmpty()) {

for (int i = 0; i < searchResponse.hits().hits().size(); i++) {
ubiQueries.add(searchResponse.hits().hits().get(i).source());
}

final ScrollRequest scrollRequest = new ScrollRequest.Builder().scrollId(scrollId).build();
final ScrollResponse<UbiQuery> scrollResponse = client.scroll(scrollRequest, UbiQuery.class);

scrollId = scrollResponse.scrollId();
searchHits = scrollResponse.hits().hits();

}

return ubiQueries;

}

public Collection<Judgment> getJudgments(final String index) throws IOException {

final Collection<Judgment> judgments = new ArrayList<>();
@@ -326,24 +368,21 @@ public void indexRankAggregatedClickthrough(final Map<Integer, Double> rankAggre

if(!rankAggregatedClickThrough.isEmpty()) {

// TODO: Split this into multiple bulk insert requests.

final BulkRequest request = new BulkRequest();
// TODO: Use bulk indexing.

for (final int position : rankAggregatedClickThrough.keySet()) {

final Map<String, Object> jsonMap = new HashMap<>();
jsonMap.put("position", position);
jsonMap.put("ctr", rankAggregatedClickThrough.get(position));
final String id = UUID.randomUUID().toString();

final IndexRequest indexRequest = new IndexRequest(INDEX_RANK_AGGREGATED_CTR).id(UUID.randomUUID().toString()).source(jsonMap);
final RankAggregatedClickThrough r = new RankAggregatedClickThrough(id);
r.setPosition(position);
r.setCtr(rankAggregatedClickThrough.get(position));

request.add(indexRequest);
final IndexRequest<RankAggregatedClickThrough> indexRequest = new IndexRequest.Builder<RankAggregatedClickThrough>().index(INDEX_RANK_AGGREGATED_CTR).id(id).document(r).build();
client.index(indexRequest);

}

client.bulk(request).get();

}

}
@@ -358,47 +397,28 @@ public void indexClickthroughRates(final Map<String, Set<ClickthroughRate>> clic

if(!clickthroughRates.isEmpty()) {

final BulkRequest request = new BulkRequest();
// TODO: Use bulk inserts.

for(final String userQuery : clickthroughRates.keySet()) {
for (final String userQuery : clickthroughRates.keySet()) {

for(final ClickthroughRate clickthroughRate : clickthroughRates.get(userQuery)) {
for (final ClickthroughRate clickthroughRate : clickthroughRates.get(userQuery)) {

final Map<String, Object> jsonMap = new HashMap<>();
jsonMap.put("user_query", userQuery);
jsonMap.put("clicks", clickthroughRate.getClicks());
jsonMap.put("events", clickthroughRate.getImpressions());
jsonMap.put("ctr", clickthroughRate.getClickthroughRate());
jsonMap.put("object_id", clickthroughRate.getObjectId());
final String id = UUID.randomUUID().toString();

final IndexRequest indexRequest = new IndexRequest(INDEX_QUERY_DOC_CTR)
.id(UUID.randomUUID().toString())
.source(jsonMap);
final ClickThroughRate clickThroughRate = new ClickThroughRate();
clickThroughRate.setUserQuery(userQuery);
clickThroughRate.setClicks(clickthroughRate.getClicks());
clickThroughRate.setEvents(clickthroughRate.getImpressions());
clickThroughRate.setCtr(clickthroughRate.getClickthroughRate());
clickThroughRate.setObjectId(clickthroughRate.getObjectId());

request.add(indexRequest);
final IndexRequest<ClickThroughRate> indexRequest = new IndexRequest.Builder<ClickThroughRate>().index(INDEX_QUERY_DOC_CTR).id(id).document(clickThroughRate).build();
client.index(indexRequest);

}

}

client.bulk(request, new ActionListener<>() {

@Override
public void onResponse(BulkResponse bulkItemResponses) {
if(bulkItemResponses.hasFailures()) {
LOGGER.error("Clickthrough rates were not all successfully indexed: {}", bulkItemResponses.buildFailureMessage());
} else {
LOGGER.debug("Clickthrough rates has been successfully indexed.");
}
}

@Override
public void onFailure(Exception ex) {
LOGGER.error("Indexing the clickthrough rates failed.", ex);
}

});

}

}
@@ -415,25 +435,18 @@ public String indexJudgments(final Collection<Judgment> judgments) throws Except
final String judgmentsId = UUID.randomUUID().toString();
final String timestamp = TimeUtils.getTimestamp();

final BulkRequest bulkRequest = new BulkRequest();
// TODO: Use bulk imports.

for(final Judgment judgment : judgments) {

final Map<String, Object> j = judgment.getJudgmentAsMap();
j.put("judgments_id", judgmentsId);
j.put("timestamp", timestamp);
judgment.setJudgmentsId(judgmentsId);
judgment.setTimestamp(timestamp);

final IndexRequest indexRequest = new IndexRequest(Constants.JUDGMENTS_INDEX_NAME)
.id(UUID.randomUUID().toString())
.source(j);

bulkRequest.add(indexRequest);
final IndexRequest<Judgment> indexRequest = new IndexRequest.Builder<Judgment>().index(Constants.JUDGMENTS_INDEX_NAME).id(judgment.getId()).document(judgment).build();
client.index(indexRequest);

}

// TODO: Don't use .get()
client.bulk(bulkRequest).get();

return judgmentsId;

}
Original file line number Diff line number Diff line change
@@ -2,6 +2,7 @@

import org.opensearch.eval.model.ClickthroughRate;
import org.opensearch.eval.model.data.Judgment;
import org.opensearch.eval.model.data.QuerySet;
import org.opensearch.eval.model.ubi.query.UbiQuery;

import java.io.IOException;
@@ -26,4 +27,7 @@ public abstract class SearchEngine {

public abstract Collection<Judgment> getJudgments(final String index) throws IOException;

public abstract String indexQuerySet(QuerySet querySet) throws IOException;
public abstract Collection<UbiQuery> getUbiQueries() throws IOException;

}
Original file line number Diff line number Diff line change
@@ -2,4 +2,18 @@

public abstract class AbstractData {

private String id;

public AbstractData(final String id) {
this.id = id;
}

public String getId() {
return id;
}

public void setId(final String id) {
this.id = id;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package org.opensearch.eval.model.data;

import java.util.UUID;

public class ClickThroughRate extends AbstractData {

private String userQuery;
private long clicks;
private long events;
private double ctr;
private String objectId;

public ClickThroughRate() {
super(UUID.randomUUID().toString());
}

public ClickThroughRate(String id) {
super(id);
}

public String getUserQuery() {
return userQuery;
}

public void setUserQuery(String userQuery) {
this.userQuery = userQuery;
}

public long getClicks() {
return clicks;
}

public void setClicks(long clicks) {
this.clicks = clicks;
}

public long getEvents() {
return events;
}

public void setEvents(long events) {
this.events = events;
}

public double getCtr() {
return ctr;
}

public void setCtr(double ctr) {
this.ctr = ctr;
}

public String getObjectId() {
return objectId;
}

public void setObjectId(String objectId) {
this.objectId = objectId;
}

}
Original file line number Diff line number Diff line change
@@ -26,15 +26,19 @@ public class Judgment extends AbstractData {
private final String query;
private final String document;
private final double judgment;
private String judgmentsId;
private String timestamp;

/**
* Creates a new judgment.
* @param id The judgment ID.
* @param queryId The query ID for the judgment.
* @param query The query for the judgment.
* @param document The document in the jdugment.
* @param document The document in the judgment.
* @param judgment The judgment value.
*/
public Judgment(final String queryId, final String query, final String document, final double judgment) {
public Judgment(final String id, final String queryId, final String query, final String document, final double judgment) {
super(id);
this.queryId = queryId;
this.query = query;
this.document = document;
@@ -94,4 +98,20 @@ public double getJudgment() {
return judgment;
}

public String getJudgmentsId() {
return judgmentsId;
}

public void setJudgmentsId(String judgmentsId) {
this.judgmentsId = judgmentsId;
}

public String getTimestamp() {
return timestamp;
}

public void setTimestamp(String timestamp) {
this.timestamp = timestamp;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package org.opensearch.eval.model.data;

import java.util.Collection;
import java.util.Map;
import java.util.UUID;

public class QuerySet extends AbstractData {

private String name;
private String description;
private String sampling;
private Collection<Map<String, Long>> querySetQueries;
private String timestamp;

public QuerySet() {
super(UUID.randomUUID().toString());
}

public QuerySet(String id) {
super(id);
}

public String getName() {
return name;
}

public void setName(String name) {
this.name = name;
}

public String getDescription() {
return description;
}

public void setDescription(String description) {
this.description = description;
}

public String getSampling() {
return sampling;
}

public void setSampling(String sampling) {
this.sampling = sampling;
}

public Collection<Map<String, Long>> getQuerySetQueries() {
return querySetQueries;
}

public void setQuerySetQueries(Collection<Map<String, Long>> querySetQueries) {
this.querySetQueries = querySetQueries;
}

public String getTimestamp() {
return timestamp;
}

public void setTimestamp(String timestamp) {
this.timestamp = timestamp;
}

}
Loading