Skip to content

Commit

Permalink
[feature](inverted index) Add profile statistics for each condition i…
Browse files Browse the repository at this point in the history
…n inverted index filters
  • Loading branch information
zzzxl1993 committed Feb 5, 2025
1 parent e042e9f commit b35d749
Show file tree
Hide file tree
Showing 6 changed files with 244 additions and 2 deletions.
61 changes: 61 additions & 0 deletions be/src/olap/inverted_index_profile.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <string>
#include <vector>

#include "olap/inverted_index_stats.h"
#include "olap/tablet_schema.h"
#include "util/runtime_profile.h"

namespace doris {

class InvertedIndexProfileReporter {
public:
InvertedIndexProfileReporter(RuntimeProfile* profile) : _profile(profile) {}

void update(const InvertedIndexStatistics* statistics, const TabletSchemaSPtr& tablet_schema) {
if (_profile == nullptr) {
return;
}

std::string inverted_index_filter = "[";
for (auto stats : statistics->stats) {
inverted_index_filter += "(";
inverted_index_filter +=
tablet_schema->column_by_uid(stats.column_unique_id).name() + ", ";
inverted_index_filter += PrettyPrinter::print(stats.filter_count, TUnit::UNIT) + ", ";
inverted_index_filter += PrettyPrinter::print(stats.filter_time, TUnit::TIME_NS);
inverted_index_filter += "), ";
}
if (inverted_index_filter.length() > 1) {
inverted_index_filter =
inverted_index_filter.substr(0, inverted_index_filter.length() - 2);
}
inverted_index_filter += "]";

auto* child = _profile->create_child("IndexFilter");
child->add_info_string("InvertedIndexFilter", inverted_index_filter);
}

private:
RuntimeProfile* _profile = nullptr;
};

} // namespace doris
34 changes: 34 additions & 0 deletions be/src/olap/inverted_index_stats.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <vector>

namespace doris {

struct InvertedIndexQueryStatistics {
int32_t column_unique_id;
int64_t filter_count = 0;
int64_t filter_time = 0;
};

struct InvertedIndexStatistics {
std::vector<InvertedIndexQueryStatistics> stats;
};

} // namespace doris
2 changes: 2 additions & 0 deletions be/src/olap/olap_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#include "common/config.h"
#include "common/exception.h"
#include "io/io_common.h"
#include "olap/inverted_index_stats.h"
#include "olap/olap_define.h"
#include "olap/rowset/rowset_fwd.h"
#include "util/hash_util.hpp"
Expand Down Expand Up @@ -378,6 +379,7 @@ struct OlapReaderStatistics {
int64_t inverted_index_searcher_cache_hit = 0;
int64_t inverted_index_searcher_cache_miss = 0;
int64_t inverted_index_downgrade_count = 0;
InvertedIndexStatistics inverted_index_stats;

int64_t output_index_result_column_timer = 0;
// number of segment filtered by column stat when creating seg iterator
Expand Down
20 changes: 18 additions & 2 deletions be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1160,8 +1160,24 @@ Status InvertedIndexIterator::read_from_inverted_index(
}
}

RETURN_IF_ERROR(_reader->query(&_io_ctx, _stats, _runtime_state, column_name, query_value,
query_type, bit_map));
auto execute_query = [&]() {
return _reader->query(&_io_ctx, _stats, _runtime_state, column_name, query_value,
query_type, bit_map);
};

if (_runtime_state->query_options().enable_profile) {
InvertedIndexQueryStatistics query_stats;
{
SCOPED_RAW_TIMER(&query_stats.filter_time);
RETURN_IF_ERROR(execute_query());
}
query_stats.column_unique_id = std::stoi(column_name);
query_stats.filter_count = bit_map->cardinality();
_stats->inverted_index_stats.stats.emplace_back(query_stats);
} else {
RETURN_IF_ERROR(execute_query());
}

return Status::OK();
}

Expand Down
5 changes: 5 additions & 0 deletions be/src/vec/exec/scan/new_olap_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "exprs/function_filter.h"
#include "io/cache/block_file_cache_profile.h"
#include "io/io_common.h"
#include "olap/inverted_index_profile.h"
#include "olap/olap_common.h"
#include "olap/olap_tuple.h"
#include "olap/rowset/rowset.h"
Expand Down Expand Up @@ -634,6 +635,10 @@ void NewOlapScanner::_collect_profile_before_close() {
stats.inverted_index_searcher_cache_miss);
COUNTER_UPDATE(local_state->_inverted_index_downgrade_count_counter,
stats.inverted_index_downgrade_count);

InvertedIndexProfileReporter inverted_index_profile(local_state->_segment_profile.get());
inverted_index_profile.update(&stats.inverted_index_stats, _tablet_reader_params.tablet_schema);

if (config::enable_file_cache) {
io::FileCacheProfileReporter cache_profile(local_state->_segment_profile.get());
cache_profile.update(&stats.file_cache_stats);
Expand Down
124 changes: 124 additions & 0 deletions be/test/olap/inverted_index_profile_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "olap/inverted_index_profile.h"

#include <gtest/gtest.h>

#include <memory>

#include "olap/inverted_index_stats.h"

namespace doris {

class InvertedIndexProfileReporterTest : public ::testing::Test {
protected:
void SetUp() override {
_runtime_profile = new RuntimeProfile("test_profile");
_reporter = new InvertedIndexProfileReporter(_runtime_profile);
}

void TearDown() override {
delete _reporter;
delete _runtime_profile;
}

RuntimeProfile* _runtime_profile = nullptr;
InvertedIndexProfileReporter* _reporter = nullptr;
};

class MockInvertedIndexStatistics1 : public InvertedIndexStatistics {
public:
MockInvertedIndexStatistics1() { stats.push_back({123, 100, 200}); }
};

class MockInvertedIndexStatistics2 : public InvertedIndexStatistics {
public:
MockInvertedIndexStatistics2() {
stats.push_back({123, 100, 200});
stats.push_back({124, 150, 250});
stats.push_back({125, 200, 300});
}
};

TEST_F(InvertedIndexProfileReporterTest, UpdateTest) {
MockInvertedIndexStatistics1 statistics;

TabletColumn tablet_column;
tablet_column.set_unique_id(123);
tablet_column.set_name("test_column");

TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
tablet_schema->append_column(tablet_column);

_reporter->update(&statistics, tablet_schema);

std::vector<RuntimeProfile*> children;
_runtime_profile->get_children(&children);

ASSERT_EQ(children.size(), 1);
ASSERT_EQ(children[0]->name(), "IndexFilter");

const std::string* index_filter = children[0]->get_info_string("InvertedIndexFilter");
ASSERT_NE(index_filter, nullptr);

// 验证输出格式是否正确
ASSERT_TRUE(index_filter->find("test_column") != std::string::npos);
ASSERT_TRUE(index_filter->find("100") != std::string::npos);
ASSERT_TRUE(index_filter->find("200") != std::string::npos);
}

TEST_F(InvertedIndexProfileReporterTest, UpdateTestWithMultipleStats) {
MockInvertedIndexStatistics2 statistics;

TabletColumn tablet_column1, tablet_column2, tablet_column3;
tablet_column1.set_unique_id(123);
tablet_column1.set_name("test_column1");
tablet_column2.set_unique_id(124);
tablet_column2.set_name("test_column2");
tablet_column3.set_unique_id(125);
tablet_column3.set_name("test_column3");

TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
tablet_schema->append_column(tablet_column1);
tablet_schema->append_column(tablet_column2);
tablet_schema->append_column(tablet_column3);

_reporter->update(&statistics, tablet_schema);

std::vector<RuntimeProfile*> children;
_runtime_profile->get_children(&children);

ASSERT_EQ(children.size(), 1);
ASSERT_EQ(children[0]->name(), "IndexFilter");

const std::string* index_filter = children[0]->get_info_string("InvertedIndexFilter");
ASSERT_NE(index_filter, nullptr);

// 验证输出格式是否正确
ASSERT_TRUE(index_filter->find("test_column1") != std::string::npos);
ASSERT_TRUE(index_filter->find("test_column2") != std::string::npos);
ASSERT_TRUE(index_filter->find("test_column3") != std::string::npos);
ASSERT_TRUE(index_filter->find("100") != std::string::npos);
ASSERT_TRUE(index_filter->find("200") != std::string::npos);
ASSERT_TRUE(index_filter->find("150") != std::string::npos);
ASSERT_TRUE(index_filter->find("250") != std::string::npos);
ASSERT_TRUE(index_filter->find("200") != std::string::npos);
ASSERT_TRUE(index_filter->find("300") != std::string::npos);
}

} // namespace doris

0 comments on commit b35d749

Please sign in to comment.