Skip to content

Commit

Permalink
Index the _id field with encoding to be searchable (#762)
Browse files Browse the repository at this point in the history
* add a new ID field

* typo
  • Loading branch information
vthacker authored Feb 6, 2024
1 parent d55d87c commit f58714f
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,8 @@ public void reloadSchema() {
tryRegisterField(mapperService, entry.getValue().name, b -> b.field("type", "text"));
} else if (entry.getValue().fieldType == FieldType.STRING) {
tryRegisterField(mapperService, entry.getValue().name, b -> b.field("type", "keyword"));
} else if (entry.getValue().fieldType == FieldType.ID) {
tryRegisterField(mapperService, entry.getValue().name, b -> b.field("type", "keyword"));
} else if (entry.getValue().fieldType == FieldType.INTEGER) {
tryRegisterField(mapperService, entry.getValue().name, b -> b.field("type", "integer"));
} else if (entry.getValue().fieldType == FieldType.LONG) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ public static ImmutableMap<String, LuceneFieldDef> getDefaultLuceneFieldDefiniti
}

String[] fieldsAsString = {
LogMessage.SystemField.ID.fieldName,
LogMessage.SystemField.INDEX.fieldName,
LogMessage.ReservedField.TYPE.fieldName,
LogMessage.ReservedField.HOSTNAME.fieldName,
Expand All @@ -78,6 +77,14 @@ public static ImmutableMap<String, LuceneFieldDef> getDefaultLuceneFieldDefiniti
fieldName, new LuceneFieldDef(fieldName, FieldType.STRING.name, false, true, true));
}

String[] fieldsAsIds = {
LogMessage.SystemField.ID.fieldName,
};
for (String fieldName : fieldsAsIds) {
fieldDefBuilder.put(
fieldName, new LuceneFieldDef(fieldName, FieldType.ID.name, false, true, true));
}

String[] fieldsAsLong = {
LogMessage.ReservedField.DURATION_MS.fieldName,
LogMessage.SystemField.TIME_SINCE_EPOCH.fieldName,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.opensearch.index.mapper.Uid;

/** The FieldType enum describes the types of fields in a chunk. */
public enum FieldType {
Expand Down Expand Up @@ -81,6 +82,32 @@ public Analyzer getAnalyzer(boolean quoted) {
return KEYWORD_ANALYZER;
}
},
ID("id") {
@Override
public void addField(Document doc, String name, Object value, LuceneFieldDef fieldDef) {
BytesRef id = Uid.encodeId((String) value);
if (fieldDef.isIndexed) {
doc.add(new StringField(name, id, getStoreEnum(fieldDef.isStored)));
}
if (fieldDef.isStored) {
doc.add(new StoredField(name, (String) value));
}
if (fieldDef.storeDocValue) {
doc.add(new SortedDocValuesField(name, id));
}
}

@Override
public Query termQuery(String field, String queryText, Analyzer analyzer) {
final Term term = new Term(field, queryText);
return new TermQuery(term);
}

@Override
public Analyzer getAnalyzer(boolean quoted) {
return KEYWORD_ANALYZER;
}
},
INTEGER("integer") {
@Override
public void addField(Document doc, String name, Object v, LuceneFieldDef fieldDef) {
Expand Down Expand Up @@ -373,7 +400,7 @@ private static Field.Store getStoreEnum(boolean isStored) {
// Aliased Field Types are FieldTypes that can be considered as same type from a field conflict
// detection perspective
public static final List<Set<FieldType>> ALIASED_FIELD_TYPES =
ImmutableList.of(ImmutableSet.of(FieldType.STRING, FieldType.TEXT));
ImmutableList.of(ImmutableSet.of(FieldType.STRING, FieldType.TEXT, FieldType.ID));

public static boolean areTypeAliasedFieldTypes(FieldType type1, FieldType type2) {
for (Set<FieldType> s : ALIASED_FIELD_TYPES) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,10 @@
import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.RegisterExtension;
import org.opensearch.index.mapper.Uid;
import org.opensearch.search.aggregations.AbstractAggregationBuilder;
import org.opensearch.search.aggregations.Aggregator;
import org.opensearch.search.aggregations.InternalAggregation;
Expand Down Expand Up @@ -404,6 +406,19 @@ public void handlesDateHistogramExtendedBoundsMinDocEdgeCases() throws IOExcepti
.isThrownBy(() -> collectorManager.newCollector());
}

@Test
public void shouldParseIdFieldSearch() throws Exception {
String idField = "_id";
String idValue = "1";
IndexSearcher indexSearcher = logStoreAndSearcherRule.logStore.getSearcherManager().acquire();
Query idQuery =
openSearchAdapter.buildQuery("foo", STR."\{idField}:\{idValue}", null, null, indexSearcher);
BytesRef queryStrBytes = new BytesRef(Uid.encodeId("1").bytes);
// idQuery.toString="#_id:([fe 1f])"
// queryStrBytes.toString="[fe 1f]"
assertThat(idQuery.toString()).contains(queryStrBytes.toString());
}

@Test
public void shouldExcludeDateFilterWhenNullTimestamps() throws Exception {
IndexSearcher indexSearcher = logStoreAndSearcherRule.logStore.getSearcherManager().acquire();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public void testBasicDocumentCreation() throws IOException {
assertThat(MetricsUtil.getCount(CONVERT_FIELD_VALUE_COUNTER, meterRegistry)).isZero();
assertThat(MetricsUtil.getCount(CONVERT_AND_DUPLICATE_FIELD_COUNTER, meterRegistry)).isZero();
// Only string fields have doc values not text fields.
assertThat(docBuilder.getSchema().get("_id").fieldType.name).isEqualTo(FieldType.STRING.name);
assertThat(docBuilder.getSchema().get("_id").fieldType.name).isEqualTo(FieldType.ID.name);
assertThat(
testDocument.getFields().stream()
.filter(
Expand Down Expand Up @@ -96,7 +96,7 @@ public void testBasicDocumentCreationWithoutFullTextSearch() throws IOException
assertThat(MetricsUtil.getCount(CONVERT_FIELD_VALUE_COUNTER, meterRegistry)).isZero();
assertThat(MetricsUtil.getCount(CONVERT_AND_DUPLICATE_FIELD_COUNTER, meterRegistry)).isZero();
// Only string fields have doc values not text fields.
assertThat(docBuilder.getSchema().get("_id").fieldType.name).isEqualTo(FieldType.STRING.name);
assertThat(docBuilder.getSchema().get("_id").fieldType.name).isEqualTo(FieldType.ID.name);
assertThat(
testDocument.getFields().stream()
.filter(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1725,4 +1725,20 @@ public void testConcurrentSearches() throws InterruptedException {
assertThat(searchFailures.get()).isEqualTo(0);
assertThat(successfulRuns.get()).isEqualTo(200);
}

@Test
public void testSearchById() {
Instant time = Instant.ofEpochSecond(1593365471);
loadTestData(time);
SearchResult<LogMessage> index =
strictLogStore.logSearcher.search(
TEST_DATASET_NAME,
"_id:1",
time.toEpochMilli(),
time.plusSeconds(2).toEpochMilli(),
10,
new DateHistogramAggBuilder(
"1", LogMessage.SystemField.TIME_SINCE_EPOCH.fieldName, "1s"));
assertThat(index.hits.size()).isEqualTo(1);
}
}

0 comments on commit f58714f

Please sign in to comment.