From bf9e6a9da77b01a84d422d90a0cd513b7508a8f2 Mon Sep 17 00:00:00 2001
From: Nikita Zheleztsov <n.zheleztsov@proton.me>
Date: Wed, 11 Sep 2024 19:08:14 +0300
Subject: [PATCH] gc: fix wait_lsn function

GC goes to replicas to check if a SENT bucket can be deleted. But
it can happen that this call via netbox was faster than the
replication. Some replicas still can have the bucket SENDING. So,
GC tries to sync with replicas before checking the state of buckets.

However, wait_lsn function, which is used in GC and vshard.storage.sync()
tried to sync with all replicas in the current replicaset, without
considering vshard config. In case some CDC is connected as
non-anonymous replica, GC process failed.

Let's make wait_lsn function consider vshard config. It must sync
only with instances, which were explicitly passed to the config
and which we know to be vshard storages.

However, it's not always possible to figure out from the
box.info.replication, whether the instance is vshard storage. This
happens, e.g. when named identification is used, but names have
not been set yet and UUID have not been passed to configuration.
In such case we fallback to the old behavior and anyway check
the downstream of such replica, even if we're not sure, that
instance is in our configuration.

Closes tarantool/vshard#490

NO_DOC=bugfix
---
 test/storage-luatest/storage_1_1_test.lua | 59 +++++++++++++++++++++++
 vshard/storage/init.lua                   | 31 ++++++++++++
 2 files changed, 90 insertions(+)

diff --git a/test/storage-luatest/storage_1_1_test.lua b/test/storage-luatest/storage_1_1_test.lua
index e6c229f1..5bfc1587 100644
--- a/test/storage-luatest/storage_1_1_test.lua
+++ b/test/storage-luatest/storage_1_1_test.lua
@@ -2,6 +2,7 @@ local t = require('luatest')
 local vconst = require('vshard.consts')
 local vtest = require('test.luatest_helpers.vtest')
 local vutil = require('vshard.util')
+local lserver = require('test.luatest_helpers.server')
 
 local group_config = {{engine = 'memtx'}, {engine = 'vinyl'}}
 
@@ -531,3 +532,61 @@ test_group.test_noactivity_timeout_for_explicit_master = function(g)
         _G.bucket_gc_wait()
     end, {g.replica_1_a:replicaset_uuid(), bid})
 end
+
+--
+-- gh-490: wait_lsn function didn't take into account, that there may
+-- be non anonymous replicas in box.info.replication, which are not
+-- accessible (e.g. CDC). Such replica broke GC process.
+--
+test_group.test_gc_with_non_anon_replica_in_cluster = function(g)
+    local box_cfg = {}
+    box_cfg.replication = {g.replica_1_a.net_box_uri}
+    local server = lserver:new({box_cfg = box_cfg, alias = 'cdc'})
+    server:start()
+    local id = server:instance_id()
+    server:drop()
+
+    vtest.cluster_exec_each_master(g, function(engine)
+        local format = {
+            {'id', 'unsigned'},
+            {'bid', 'unsigned'},
+        }
+        local s = box.schema.create_space('test', {
+            engine = engine,
+            format = format,
+        })
+        s:create_index('pk')
+        s:create_index('bucket_id', {unique = false, parts = {2}})
+    end, {g.params.engine})
+
+    local bid = g.replica_1_a:exec(function(dst)
+        local opts = {timeout = iwait_timeout}
+        local bid = _G.get_first_bucket()
+        -- Bump vclock of the master.
+        box.space.test:insert({1, bid})
+
+        -- Send bucket so that sent/garbage remains.
+        local ok, err = ivshard.storage.bucket_send(bid, dst, opts)
+        ilt.assert_equals(err, nil, 'bucket_send no error')
+        ilt.assert(ok, 'bucket_send ok')
+
+        -- This failed before the patch.
+        _G.bucket_gc_wait()
+        ivshard.storage.sync()
+        return bid
+    end, {g.replica_2_a:replicaset_uuid()})
+
+    g.replica_2_a:exec(function(bid, dst)
+        local opts = {timeout = iwait_timeout}
+        local ok, err = ivshard.storage.bucket_send(bid, dst, opts)
+        ilt.assert_equals(err, nil, 'bucket_send no error')
+        ilt.assert(ok, 'bucket_send ok')
+    end, {bid, g.replica_1_a:replicaset_uuid()})
+
+    g.replica_1_a:exec(function(id)
+        box.space._cluster:delete(id)
+    end, {id})
+    vtest.cluster_exec_each_master(g, function()
+        box.space.test:drop()
+    end)
+end
diff --git a/vshard/storage/init.lua b/vshard/storage/init.lua
index 193b8e2d..f6809a0d 100644
--- a/vshard/storage/init.lua
+++ b/vshard/storage/init.lua
@@ -1140,14 +1140,37 @@ end
 local function vclock_lesseq(vc1, vc2)
     local lesseq = true
     for i, lsn in ipairs(vc1) do
+        if i == 0 then
+            -- Skip local component.
+            goto continue
+        end
         lesseq = lesseq and lsn <= (vc2[i] or 0)
         if not lesseq then
             break
         end
+        ::continue::
     end
     return lesseq
 end
 
+local function is_replica_in_configuration(replica)
+    local is_named = M.this_replica.id == M.this_replica.name
+    local id = is_named and replica.name or replica.uuid
+    if id ~= nil then
+        -- In most cases name or uuid are properly set.
+        return M.this_replicaset.replicas[id] ~= nil
+    end
+
+    -- When named identification is used it's possible, that names
+    -- have not been set yet: we're working on top of schema < 3.0.0.
+    -- If user passed uuid to the configuration, then we can validate
+    -- such replica by checking UUIDs of replicaset.replicas, but we
+    -- won't do that, since if user didn't specify the uuid, then it's
+    -- anyway better to check the downstream rather than fail immediately,
+    -- in most cases it'll pass.
+    return true
+end
+
 local function wait_lsn(timeout, interval)
     local info = box.info
     local current_id = info.id
@@ -1163,6 +1186,14 @@ local function wait_lsn(timeout, interval)
             if replica.id == current_id then
                 goto continue
             end
+
+            -- We must validate, that we're checking downstream of the replica.
+            -- which was actually configured as vshard storage and not some CDC.
+            -- If we're not sure, then we'll anyway check it.
+            if not is_replica_in_configuration(replica) then
+                goto continue
+            end
+
             local down = replica.downstream
             if not down or (down.status == 'stopped' or
                             not vclock_lesseq(vclock, down.vclock)) then