Skip to content

Commit

Permalink
PS-9391 Fixes replication break error because of HASH_SCAN
Browse files Browse the repository at this point in the history
https://perconadev.atlassian.net/browse/PS-9391

Problem
=======
When a replica's slave_rows_search_algorithms is set to HASH_SCAN, the
replication may break with HA_ERR_KEY_NOT_FOUND.

Analysis
========
When a replica's slave_rows_search_algorithms is set to HASH_SCAN,
it prepares a unique key list for all the rows in a particular Row_event.
The same unique key list will later be used to retrieve all tuples associated
to each key in the list from storage engine. In the case of multiple updates
targeted at the same row like how it is shown in the testcase, it may happen
that this unique key list filled with entries which don't exist yet in the table.
This is a problem when there is an intermediate update which changes the value
of the index column to a lesser value than the original entry and that changed
value is used in another update as shown in the second part of the testcase.
It is an issue because the unique key list is a std::set which internally
sorts it's entries. When this sorting happens, the first entry of the list
could potentially be a value which doesn't exist in the table and the when
it is searched in next_record_scan() method, it fails returning
HA_ERR_KEY_NOT_FOUND error.

Solution
========
Instead of using std::set to store the distinct keys, a combination of
unordered_set and a list is used to preserve the original order of
updates and avoid duplicates at the same time which prevents the side
effects of sorting.
  • Loading branch information
VarunNagaraju committed Jan 20, 2025
1 parent 849deb0 commit 6ab96de
Show file tree
Hide file tree
Showing 4 changed files with 182 additions and 23 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
include/master-slave.inc
Warnings:
Note #### Sending passwords in plain text without SSL/TLS is extremely insecure.
Note #### Storing MySQL user name or password information in the connection metadata repository is not secure and is therefore not recommended. Please consider using the USER and PASSWORD connection options for START REPLICA; see the 'START REPLICA Syntax' in the MySQL Manual for more information.
[connection master]
[connection slave]
set @prior_slave_rows_search_algorithms=@@global.slave_rows_search_algorithms;
Warnings:
Warning 1287 '@@slave_rows_search_algorithms' is deprecated and will be removed in a future release.
set @@global.slave_rows_search_algorithms= 'HASH_SCAN';
Warnings:
Warning 1287 '@@slave_rows_search_algorithms' is deprecated and will be removed in a future release.
[connection master]
CREATE TABLE t1 (i INT, INDEX t1_i(i));
CREATE FUNCTION f1 () RETURNS INT BEGIN
UPDATE t1 SET i = 11 WHERE i = 10;
UPDATE t1 SET i = 12 WHERE i = 11;
RETURN 0;
END|
include/sync_slave_sql_with_master.inc
[connection master]
INSERT INTO t1 VALUES (10);
SELECT f1();
f1()
0
include/sync_slave_sql_with_master.inc
include/assert.inc ['There is only one row in table t1']
[connection master]
include/diff_tables.inc [master:test.t1, slave:test.t1]
CREATE FUNCTION f2 () RETURNS INT BEGIN
UPDATE t1 SET i = 11 WHERE i = 12;
UPDATE t1 SET i = 13 WHERE i = 11;
RETURN 0;
END|
include/sync_slave_sql_with_master.inc
[connection master]
SELECT f2();
f2()
0
include/sync_slave_sql_with_master.inc
include/assert.inc ['There is only one row in table t1']
[connection master]
include/diff_tables.inc [master:test.t1, slave:test.t1]
[connection master]
SELECT * FROM t1;
i
13
DROP FUNCTION f1;
DROP FUNCTION f2;
DROP TABLE t1;
[connection slave]
set @@global.slave_rows_search_algorithms= @prior_slave_rows_search_algorithms;
Warnings:
Warning 1287 '@@slave_rows_search_algorithms' is deprecated and will be removed in a future release.
include/rpl_end.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Bug#115741 Test whether setting slave_rows_search_algorithms to HASH_SCAN works properly
# when multiple updates are targeted to the same row within a single update rows log event

--source include/have_binlog_format_row.inc
--source include/set_privilege_checks_user_as_system_user.inc
--source include/master-slave.inc

--source include/rpl_connection_slave.inc
set @prior_slave_rows_search_algorithms=@@global.slave_rows_search_algorithms;
set @@global.slave_rows_search_algorithms= 'HASH_SCAN';

# Scenarios considered in this test case involve a table with no primary key but an index on the column
# When slave_rows_search_algorithms is set to HASH_SCAN, a list "m_distinct_keys" is created internally
# to store the unique key values which, later will be used to perform a table scan to apply the events
# for the rows present in the table.
--source include/rpl_connection_master.inc
# Create a table, with no primary key and an index.
CREATE TABLE t1 (i INT, INDEX t1_i(i));

# Scenario no.1
# Sorting the elements of m_distinct_keys doesn't alter the original order of updates
# Create a stored function so that only one Update_rows_log_event is generated.
--delimiter |
CREATE FUNCTION f1 () RETURNS INT BEGIN
UPDATE t1 SET i = 11 WHERE i = 10;
UPDATE t1 SET i = 12 WHERE i = 11;
RETURN 0;
END|
--delimiter ;
--source include/sync_slave_sql_with_master.inc

--source include/rpl_connection_master.inc
INSERT INTO t1 VALUES (10);
SELECT f1();

--source include/sync_slave_sql_with_master.inc
--let $assert_text= 'There is only one row in table t1'
--let $assert_cond= [SELECT COUNT(*) FROM t1] = 1
--source include/assert.inc

--source include/rpl_connection_master.inc

# Verify that there is no difference between tables of master and slave.
--let diff_tables=master:test.t1, slave:test.t1
--source include/diff_tables.inc


# Scenario no.2
# Sorting the elements of m_distinct_keys ALTERs the original order of updates causing
# the case where the key value from the list doesn't exist in the table yet until other
# UPDATEs are executed.
--delimiter |
CREATE FUNCTION f2 () RETURNS INT BEGIN
UPDATE t1 SET i = 11 WHERE i = 12;
UPDATE t1 SET i = 13 WHERE i = 11;
RETURN 0;
END|
--delimiter ;
--source include/sync_slave_sql_with_master.inc

--source include/rpl_connection_master.inc
SELECT f2();

--source include/sync_slave_sql_with_master.inc
--let $assert_text= 'There is only one row in table t1'
--let $assert_cond= [SELECT COUNT(*) FROM t1] = 1
--source include/assert.inc

--source include/rpl_connection_master.inc

# Verify that there is no difference between tables of master and slave.
--let diff_tables=master:test.t1, slave:test.t1
--source include/diff_tables.inc

# Cleanup
--source include/rpl_connection_master.inc
SELECT * FROM t1;
DROP FUNCTION f1;
DROP FUNCTION f2;
DROP TABLE t1;

--source include/rpl_connection_slave.inc
set @@global.slave_rows_search_algorithms= @prior_slave_rows_search_algorithms;
--source include/rpl_end.inc
27 changes: 13 additions & 14 deletions sql/log_event.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7870,7 +7870,7 @@ Rows_log_event::Rows_log_event(THD *thd_arg, TABLE *tbl_arg,
m_curr_row_end(nullptr),
m_key(nullptr),
m_key_info(nullptr),
m_distinct_keys(Key_compare(&m_key_info)),
m_distinct_keys(0, Key_hash(&m_key_info), Key_equal(&m_key_info)),
m_distinct_key_spare_buf(nullptr) {
DBUG_TRACE;
common_header->type_code = event_type;
Expand Down Expand Up @@ -7961,7 +7961,7 @@ Rows_log_event::Rows_log_event(
m_curr_row_end(nullptr),
m_key(nullptr),
m_key_info(nullptr),
m_distinct_keys(Key_compare(&m_key_info)),
m_distinct_keys(0, Key_hash(&m_key_info), Key_equal(&m_key_info)),
m_distinct_key_spare_buf(nullptr)
#endif
{
Expand Down Expand Up @@ -9052,9 +9052,9 @@ int Rows_log_event::next_record_scan(bool first_read) {
marker according to the next key value that we have in the list.
*/
if (error) {
if (m_itr != m_distinct_keys.end()) {
m_key = *m_itr;
m_itr++;
if (m_distinct_key_idx != m_distinct_keys_original_order.size()) {
m_key = m_distinct_keys_original_order[m_distinct_key_idx];
m_distinct_key_idx++;
first_read = true;
} else {
if (!is_trx_retryable_upon_engine_error(error))
Expand Down Expand Up @@ -9088,14 +9088,13 @@ int Rows_log_event::open_record_scan() {

if (m_key_index < MAX_KEY) {
if (m_rows_lookup_algorithm == ROW_LOOKUP_HASH_SCAN) {
/* initialize the iterator over the list of distinct keys that we have */
m_itr = m_distinct_keys.begin();

/* get the first element from the list of keys and increment the
iterator
/* Initialize the index to go over the vector of distinct keys */
m_distinct_key_idx = 0;
/* get the first element from the vector of keys and increment the
index
*/
m_key = *m_itr;
m_itr++;
m_key = m_distinct_keys_original_order[m_distinct_key_idx];
m_distinct_key_idx++;
} else {
/* this is an INDEX_SCAN we need to store the key in m_key */
assert((m_rows_lookup_algorithm == ROW_LOOKUP_INDEX_SCAN) && m_key);
Expand Down Expand Up @@ -9142,9 +9141,9 @@ int Rows_log_event::add_key_to_distinct_keyset() {
DBUG_TRACE;
assert(m_key_index < MAX_KEY);
key_copy(m_distinct_key_spare_buf, m_table->record[0], m_key_info, 0);
std::pair<std::set<uchar *, Key_compare>::iterator, bool> ret =
m_distinct_keys.insert(m_distinct_key_spare_buf);
auto ret = m_distinct_keys.insert(m_distinct_key_spare_buf);
if (ret.second) {
m_distinct_keys_original_order.push_back(m_distinct_key_spare_buf);
/* Insert is successful, so allocate a new buffer for next key */
m_distinct_key_spare_buf = (uchar *)thd->alloc(m_key_info->key_length);
if (!m_distinct_key_spare_buf) {
Expand Down
39 changes: 30 additions & 9 deletions sql/log_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@
#include <functional>
#include <list>
#include <map>
#include <set>
#include <string>
#include <string_view>
#include <unordered_set>

#include "my_aes.h"
#include "libbinlogevents/include/binlog_event.h"
Expand Down Expand Up @@ -2882,30 +2882,51 @@ class Rows_log_event : public virtual binary_log::Rows_event, public Log_event {
uchar *m_key; /* Buffer to keep key value during searches */
uint m_key_index;
KEY *m_key_info; /* Points to description of index #m_key_index */
class Key_compare {
class Key_equal {
public:
/**
@param ki Where to find KEY description
@note m_distinct_keys is instantiated when Rows_log_event is constructed;
it stores a Key_compare object internally. However at that moment, the
it stores a Key_equal object internally. However at that moment, the
index (KEY*) to use for comparisons, is not yet known. So, at
instantiation, we indicate the Key_compare the place where it can
instantiation, we indicate the Key_equal the place where it can
find the KEY* when needed (this place is Rows_log_event::m_key_info),
Key_compare remembers the place in member m_key_info.
Key_equal remembers the place in member m_key_info.
Before we need to do comparisons - i.e. before we need to insert
elements, we update Rows_log_event::m_key_info once for all.
*/
Key_compare(KEY **ki = nullptr) : m_key_info(ki) {}
Key_equal(KEY **ki = nullptr) : m_key_info(ki) {}
bool operator()(uchar *k1, uchar *k2) const {
return key_cmp2((*m_key_info)->key_part, k1, (*m_key_info)->key_length,
k2, (*m_key_info)->key_length) < 0;
k2, (*m_key_info)->key_length) == 0;
}

private:
KEY **m_key_info;
};
std::set<uchar *, Key_compare> m_distinct_keys;
std::set<uchar *, Key_compare>::iterator m_itr;
class Key_hash {
public:
Key_hash(KEY **ki = nullptr) : m_key_info(ki) {}
size_t operator()(uchar *ptr) const {
size_t hash = 0;
if (ptr) {
std::string_view sv{reinterpret_cast<const char *>(ptr),
(*m_key_info)->key_length};
return std::hash<std::string_view>{}(sv);
}
return hash;
}

private:
KEY **m_key_info;
};
std::unordered_set<uchar *, Key_hash, Key_equal> m_distinct_keys;

/**
A linear list to store the distinct keys preserving the insert order
*/
std::vector<uchar *> m_distinct_keys_original_order;
std::size_t m_distinct_key_idx = 0;
/**
A spare buffer which will be used when saving the distinct keys
for doing an index scan with HASH_SCAN search algorithm.
Expand Down

0 comments on commit 6ab96de

Please sign in to comment.