gc: fix wait_lsn function

GC goes to replicas to check if a SENT bucket can be deleted. But it can happen that this call via netbox was faster than the replication. Some replicas still can have the bucket SENDING. So, GC tries to sync with replicas before checking the state of buckets. However, wait_lsn function, which is used in GC and vshard.storage.sync() tried to sync with all replicas in the current replicaset, without considering vshard config. In case some CDC is connected as non-anonymous replica, GC process failed. Let's make wait_lsn function consider vshard config. It must sync only with instances, which were explicitly passed to the config and which we know to be vshard storages. However, it's not always possible to figure out from the box.info.replication, whether the instance is vshard storage. This happens, e.g. when named identification is used, but names have not been set yet and UUID have not been passed to configuration. In such case we fallback to the old behavior and anyway check the downstream of such replica, even if we're not sure, that instance is in our configuration. Closes #490 NO_DOC=bugfix
tarantool · Sep 16, 2024 · bf9e6a9 · bf9e6a9
1 parent c8f56cc
commit bf9e6a9
Show file tree

Hide file tree

Showing 2 changed files with 90 additions and 0 deletions.
diff --git a/test/storage-luatest/storage_1_1_test.lua b/test/storage-luatest/storage_1_1_test.lua
@@ -2,6 +2,7 @@ local t = require('luatest')
 local vconst = require('vshard.consts')
 local vtest = require('test.luatest_helpers.vtest')
 local vutil = require('vshard.util')
+local lserver = require('test.luatest_helpers.server')
 
 local group_config = {{engine = 'memtx'}, {engine = 'vinyl'}}
 
@@ -531,3 +532,61 @@ test_group.test_noactivity_timeout_for_explicit_master = function(g)
         _G.bucket_gc_wait()
     end, {g.replica_1_a:replicaset_uuid(), bid})
 end
+
+--
+-- gh-490: wait_lsn function didn't take into account, that there may
+-- be non anonymous replicas in box.info.replication, which are not
+-- accessible (e.g. CDC). Such replica broke GC process.
+--
+test_group.test_gc_with_non_anon_replica_in_cluster = function(g)
+    local box_cfg = {}
+    box_cfg.replication = {g.replica_1_a.net_box_uri}
+    local server = lserver:new({box_cfg = box_cfg, alias = 'cdc'})
+    server:start()
+    local id = server:instance_id()
+    server:drop()
+
+    vtest.cluster_exec_each_master(g, function(engine)
+        local format = {
+            {'id', 'unsigned'},
+            {'bid', 'unsigned'},
+        }
+        local s = box.schema.create_space('test', {
+            engine = engine,
+            format = format,
+        })
+        s:create_index('pk')
+        s:create_index('bucket_id', {unique = false, parts = {2}})
+    end, {g.params.engine})
+
+    local bid = g.replica_1_a:exec(function(dst)
+        local opts = {timeout = iwait_timeout}
+        local bid = _G.get_first_bucket()
+        -- Bump vclock of the master.
+        box.space.test:insert({1, bid})
+
+        -- Send bucket so that sent/garbage remains.
+        local ok, err = ivshard.storage.bucket_send(bid, dst, opts)
+        ilt.assert_equals(err, nil, 'bucket_send no error')
+        ilt.assert(ok, 'bucket_send ok')
+
+        -- This failed before the patch.
+        _G.bucket_gc_wait()
+        ivshard.storage.sync()
+        return bid
+    end, {g.replica_2_a:replicaset_uuid()})
+
+    g.replica_2_a:exec(function(bid, dst)
+        local opts = {timeout = iwait_timeout}
+        local ok, err = ivshard.storage.bucket_send(bid, dst, opts)
+        ilt.assert_equals(err, nil, 'bucket_send no error')
+        ilt.assert(ok, 'bucket_send ok')
+    end, {bid, g.replica_1_a:replicaset_uuid()})
+
+    g.replica_1_a:exec(function(id)
+        box.space._cluster:delete(id)
+    end, {id})
+    vtest.cluster_exec_each_master(g, function()
+        box.space.test:drop()
+    end)
+end
diff --git a/vshard/storage/init.lua b/vshard/storage/init.lua
@@ -1140,14 +1140,37 @@ end
 local function vclock_lesseq(vc1, vc2)
     local lesseq = true
     for i, lsn in ipairs(vc1) do
+        if i == 0 then
+            -- Skip local component.
+            goto continue
+        end
         lesseq = lesseq and lsn <= (vc2[i] or 0)
         if not lesseq then
             break
         end
+        ::continue::
     end
     return lesseq
 end
 
+local function is_replica_in_configuration(replica)
+    local is_named = M.this_replica.id == M.this_replica.name
+    local id = is_named and replica.name or replica.uuid
+    if id ~= nil then
+        -- In most cases name or uuid are properly set.
+        return M.this_replicaset.replicas[id] ~= nil
+    end
+
+    -- When named identification is used it's possible, that names
+    -- have not been set yet: we're working on top of schema < 3.0.0.
+    -- If user passed uuid to the configuration, then we can validate
+    -- such replica by checking UUIDs of replicaset.replicas, but we
+    -- won't do that, since if user didn't specify the uuid, then it's
+    -- anyway better to check the downstream rather than fail immediately,
+    -- in most cases it'll pass.
+    return true
+end
+
 local function wait_lsn(timeout, interval)
     local info = box.info
     local current_id = info.id
@@ -1163,6 +1186,14 @@ local function wait_lsn(timeout, interval)
             if replica.id == current_id then
                 goto continue
             end
+
+            -- We must validate, that we're checking downstream of the replica.
+            -- which was actually configured as vshard storage and not some CDC.
+            -- If we're not sure, then we'll anyway check it.
+            if not is_replica_in_configuration(replica) then
+                goto continue
+            end
+
             local down = replica.downstream
             if not down or (down.status == 'stopped' or
                             not vclock_lesseq(vclock, down.vclock)) then