Skip to content

Commit

Permalink
[opt](invert index) use lowercase by default
Browse files Browse the repository at this point in the history
  • Loading branch information
zzzxl1993 committed Mar 14, 2024
1 parent b52a02c commit f8dd87e
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 17 deletions.
11 changes: 1 addition & 10 deletions be/src/olap/inverted_index_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ std::string get_parser_phrase_support_string_from_properties(
if (properties.find(INVERTED_INDEX_PARSER_PHRASE_SUPPORT_KEY) != properties.end()) {
return properties.at(INVERTED_INDEX_PARSER_PHRASE_SUPPORT_KEY);
} else {
return INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO;
return INVERTED_INDEX_PARSER_NO;
}
}

Expand Down Expand Up @@ -126,13 +126,4 @@ std::string get_parser_ignore_above_value_from_properties(
}
}

std::string get_parser_lowercase_from_properties(
const std::map<std::string, std::string>& properties) {
if (properties.find(INVERTED_INDEX_PARSER_LOWERCASE_KEY) != properties.end()) {
return properties.at(INVERTED_INDEX_PARSER_LOWERCASE_KEY);
} else {
return "";
}
}

} // namespace doris
19 changes: 16 additions & 3 deletions be/src/olap/inverted_index_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ struct InvertedIndexCtx {

using InvertedIndexCtxSPtr = std::shared_ptr<InvertedIndexCtx>;

const std::string INVERTED_INDEX_PARSER_YES = "true";
const std::string INVERTED_INDEX_PARSER_NO = "false";

const std::string INVERTED_INDEX_PARSER_MODE_KEY = "parser_mode";
const std::string INVERTED_INDEX_PARSER_FINE_GRANULARITY = "fine_grained";
const std::string INVERTED_INDEX_PARSER_COARSE_GRANULARITY = "coarse_grained";
Expand All @@ -62,8 +65,6 @@ const std::string INVERTED_INDEX_PARSER_ENGLISH = "english";
const std::string INVERTED_INDEX_PARSER_CHINESE = "chinese";

const std::string INVERTED_INDEX_PARSER_PHRASE_SUPPORT_KEY = "support_phrase";
const std::string INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES = "true";
const std::string INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO = "false";

const std::string INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE = "char_filter_type";
const std::string INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN = "char_filter_pattern";
Expand Down Expand Up @@ -91,6 +92,18 @@ CharFilterMap get_parser_char_filter_map_from_properties(
std::string get_parser_ignore_above_value_from_properties(
const std::map<std::string, std::string>& properties);

template <bool ReturnTrue = false>
std::string get_parser_lowercase_from_properties(
const std::map<std::string, std::string>& properties);
const std::map<std::string, std::string>& properties) {
if (properties.find(INVERTED_INDEX_PARSER_LOWERCASE_KEY) != properties.end()) {
return properties.at(INVERTED_INDEX_PARSER_LOWERCASE_KEY);
} else {
if constexpr (ReturnTrue) {
return INVERTED_INDEX_PARSER_YES;
} else {
return "";
}
}
}

} // namespace doris
2 changes: 1 addition & 1 deletion be/src/olap/match_predicate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ bool MatchPredicate::_skip_evaluate(InvertedIndexIterator* iterator) const {
if ((_match_type == MatchType::MATCH_PHRASE || _match_type == MatchType::MATCH_PHRASE_PREFIX) &&
iterator->get_inverted_index_reader_type() == InvertedIndexReaderType::FULLTEXT &&
get_parser_phrase_support_string_from_properties(iterator->get_index_properties()) ==
INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO) {
INVERTED_INDEX_PARSER_NO) {
return true;
}
return false;
Expand Down
4 changes: 2 additions & 2 deletions be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
// ANALYSER_NOT_SET, ANALYSER_NONE use default SimpleAnalyzer
_analyzer = std::make_unique<lucene::analysis::SimpleAnalyzer<char>>();
}
auto lowercase = get_parser_lowercase_from_properties(_index_meta->properties());
auto lowercase = get_parser_lowercase_from_properties<true>(_index_meta->properties());
if (lowercase == "true") {
_analyzer->set_lowercase(true);
} else if (lowercase == "false") {
Expand Down Expand Up @@ -234,7 +234,7 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
}
_field = new lucene::document::Field(_field_name.c_str(), field_config);
if (get_parser_phrase_support_string_from_properties(_index_meta->properties()) ==
INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES) {
INVERTED_INDEX_PARSER_YES) {
_field->setOmitTermFreqAndPositions(false);
} else {
_field->setOmitTermFreqAndPositions(true);
Expand Down
2 changes: 1 addition & 1 deletion be/src/olap/rowset/segment_v2/segment_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1234,7 +1234,7 @@ Status SegmentIterator::_init_inverted_index_iterators() {
if (_inverted_index_iterators[cid] == nullptr) {
RETURN_IF_ERROR(_segment->new_inverted_index_iterator(
_opts.tablet_schema->column(cid),
_opts.tablet_schema->get_inverted_index(unique_id), _opts,
_segment->_tablet_schema->get_inverted_index(unique_id), _opts,
&_inverted_index_iterators[cid]));
}
}
Expand Down
6 changes: 6 additions & 0 deletions be/src/olap/tablet_schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,12 @@ void TabletIndex::to_schema_pb(TabletIndexPB* index) const {
for (const auto& kv : _properties) {
(*index->mutable_properties())[kv.first] = kv.second;
}

// lowercase by default
if (!_properties.contains(INVERTED_INDEX_PARSER_LOWERCASE_KEY)) {
(*index->mutable_properties())[INVERTED_INDEX_PARSER_LOWERCASE_KEY] =
INVERTED_INDEX_PARSER_YES;
}
}

void TabletSchema::append_column(TabletColumn column, bool is_dropped_column) {
Expand Down

0 comments on commit f8dd87e

Please sign in to comment.