From b4059721dda834daf29d6833429685e816f9b489 Mon Sep 17 00:00:00 2001 From: bhaskarbora2 <124803977+bhaskarbora2@users.noreply.github.com> Date: Tue, 17 Dec 2024 17:27:10 -0500 Subject: [PATCH] DeadPage Reference Counter Test Case (#178) * llfs dead page recycling test case to recreation llfs recycler bug * final changes with death test * after pulling script changes * some style changes * review comment changes * more review changes * updated test case --------- Co-authored-by: Bhaskar Bora --- src/llfs/ioring_page_file_device.cpp | 3 + src/llfs/simulated_page_device_impl.cpp | 5 +- src/llfs/volume.test.cpp | 393 ++++++++++++++++++- src/llfs/volume_trimmer.cpp | 3 +- src/llfs/volume_trimmer_recovery_visitor.cpp | 2 +- 5 files changed, 401 insertions(+), 5 deletions(-) diff --git a/src/llfs/ioring_page_file_device.cpp b/src/llfs/ioring_page_file_device.cpp index a6ea5a0..6bbb06b 100644 --- a/src/llfs/ioring_page_file_device.cpp +++ b/src/llfs/ioring_page_file_device.cpp @@ -222,6 +222,9 @@ void IoRingPageFileDevice::read_some(PageId page_id, i64 page_offset_in_file, void IoRingPageFileDevice::drop(PageId id, WriteHandler&& handler) { // TODO [tastolfi 2021-06-11] - trim at device level? + // Note that we are not clearing the data blocks here. This behavior is copied replicated for + // SimulatedPageDevice drop() too. Thus, when modifying this function do visit + // SimulatedPageDevice::Impl::drop() and make sure things are updated there. (void)id; handler(OkStatus()); } diff --git a/src/llfs/simulated_page_device_impl.cpp b/src/llfs/simulated_page_device_impl.cpp index 4af4d09..c23feb2 100644 --- a/src/llfs/simulated_page_device_impl.cpp +++ b/src/llfs/simulated_page_device_impl.cpp @@ -309,9 +309,10 @@ void SimulatedPageDevice::Impl::drop(u32 device_create_step, PageId page_id, return; } - // No failure injected; erase the DataBlock at the given index and notify the op. + // No failure injected. + // Note that we are not not erasing the data block as we want to mimic the actual + // io_ring Page Device behavior. // - locked_blocks->erase(block_i); op->set_block_result(block_i - block_0, batt::OkStatus()); }); }); diff --git a/src/llfs/volume.test.cpp b/src/llfs/volume.test.cpp index dea3e4c..d82dd11 100644 --- a/src/llfs/volume.test.cpp +++ b/src/llfs/volume.test.cpp @@ -1373,6 +1373,11 @@ class VolumeSimTest : public ::testing::Test */ auto get_slot_visitor(); + /** \brief Returns a slot visitor function for use when recovering/verifying the Volume, + * post-crash for dead page ref count testing. + */ + auto get_slot_visitor_for_first_page_only(); + /** \brief Resets all member data that might have been modified by the slot visitor. */ void reset_visitor_outputs(); @@ -1401,6 +1406,40 @@ class VolumeSimTest : public ::testing::Test */ void run_recovery_sim(u32 seed); + /** \brief Runs dead page ref count recovery test where recycler does the recovery and retry the + * dead page recycling task. + * + * \param seed Used to initialize the pseudo-random number generator that drives the simulation. + * \param yield_count Used to specify number of times task::yield() needs to be run. + */ + void run_dead_page_recovery_test(u32 seed, u32 yield_count); + + /** \brief Runs dead page ref count recovery test where both recycler and volume-trimmer does the + * recovery/retry of dead page recycling. + * + * \param seed Used to initialize the pseudo-random number generator that drives the simulation. + * \param yield_count_pre_halt Used to specify # of times task::yield() needs to be run pre-halt. + * \param yield_count_post_halt Used to specify # of times task::yield() needs to be run + * post-halt. + */ + void run_dead_page_recovery_test_variant(u32 seed, u32 yield_count_pre_halt, + u32 yield_count_post_halt); + + /** \brief This allocates a page and writes a slot to log with the page info. + */ + void alloc_one_page_and_commit(RecoverySimState& state, llfs::StorageSimulation& sim, + llfs::Volume& volume, llfs::SlotRange& slot_offset); + + /** \brief This handles the pre-halt writing tasks for the volume (which include volume trim). + */ + void pre_halt_processing(RecoverySimState& state, llfs::StorageSimulation& sim, u32 yield_count, + const std::function& funct); + + /** \brief This handles the post-halt volme reopening of the volume. + */ + void post_halt_processing(RecoverySimState& state, llfs::StorageSimulation& sim, u32 yield_count, + const std::function& funct); + /** \brief Commit one job with a page from the 1kb device. */ void commit_first_job(RecoverySimState& state, llfs::StorageSimulation& sim, @@ -1421,7 +1460,7 @@ class VolumeSimTest : public ::testing::Test //+++++++++++-+-+--+----- --- -- - - - - - const llfs::PageCount pages_per_device = llfs::PageCount{4}; + const llfs::PageCount pages_per_device = llfs::PageCount{10}; }; //==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - @@ -2079,4 +2118,356 @@ TEST_F(VolumeTest, AppendableJobGrantSize) EXPECT_EQ(llfs::packed_sizeof_slot(*packed_commit_job), commit_slot_size); } +//==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - +// +TEST_F(VolumeSimTest, DeadPageRefCountSimulation) +{ + const u32 max_seeds = batt::getenv_as("MAX_SEEDS").value_or(1000); + const u32 yield_count = batt::getenv_as("YIELD_COUNT").value_or(37); + + // Note that default yield_count is 37. At or around this value simulation ends up getting only + // one retry at the time of recovery, which happens in page recycler. This is the condition the + // test is shooting for thus the test is tuned to hit that with 100% certainty. + + LOG(INFO) << "Starting DeadPageRefCount test for " << max_seeds << " iterations..."; + for (u32 current_seed = 0; current_seed < max_seeds; ++current_seed) { + LOG_EVERY_N(INFO, 100) << BATT_INSPECT(current_seed) << BATT_INSPECT(max_seeds) + << BATT_INSPECT(yield_count); + ASSERT_NO_FATAL_FAILURE(this->run_dead_page_recovery_test(current_seed, yield_count)); + } +} + +using DeathTestVolumeSimTest = VolumeSimTest; + +//==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - +// +TEST_F(DeathTestVolumeSimTest, DeadPageRefCountVariantSimulation) +{ + const u32 max_seeds = batt::getenv_as("MAX_SEEDS").value_or(1000); + + // Note that this test fails few times when pre_halt and post_halt yield values are around + // 50 and 40 respectively. To create a perfectly failing test use 50 and 40 as the pre_halt and + // post_halt values respectively. + + LOG(INFO) << "Starting DeadPageRefCountVariant test for " << max_seeds << " iterations..."; + + // We will setup a two ranges to pick yield counts from. + std::vector> ranges{{30, 50}, {30, 100}}; + std::mt19937 rng{0}; + std::uniform_int_distribution pick_value1{ranges[0].first, ranges[0].second}; + std::uniform_int_distribution pick_value2{ranges[1].first, ranges[1].second}; + usize yield_pre_halt = 0, yield_post_halt = 0; + + auto main_test_block = [&]() { + for (u32 current_seed = 0; current_seed < max_seeds; ++current_seed) { + LOG_EVERY_N(INFO, 100) << BATT_INSPECT(current_seed) << BATT_INSPECT(max_seeds); + + yield_pre_halt = (yield_pre_halt % 2) ? pick_value1(rng) : pick_value2(rng); + yield_post_halt = (yield_post_halt % 2) ? pick_value1(rng) : pick_value2(rng); + + ASSERT_NO_FATAL_FAILURE( + this->run_dead_page_recovery_test_variant(current_seed, yield_pre_halt, yield_post_halt)); + } + }; + // Note that we are enabling thread-safe mode for Death-Test. + // + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; + EXPECT_EXIT(main_test_block(), testing::ExitedWithCode(6), "FATAL.*"); + ::testing::FLAGS_gtest_death_test_style = "fast"; +} + +//==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - +// +void VolumeSimTest::alloc_one_page_and_commit(RecoverySimState& state, llfs::StorageSimulation& sim, + llfs::Volume& volume, llfs::SlotRange& slot_offset) +{ + std::unique_ptr job = volume.new_job(); + + ASSERT_NE(job, nullptr); + + batt::StatusOr new_page_id = + this->build_page_with_refs_to({}, llfs::PageSize{1 * kKiB}, *job, sim); + + ASSERT_TRUE(new_page_id.ok()) << BATT_INSPECT(new_page_id.status()); + + // Save the page_id so we can use it later. + // + state.first_page_id = *new_page_id; + + // Write the page and slot to the Volume. + // + batt::StatusOr slot_range = + this->commit_job_to_root_log(std::move(job), *new_page_id, volume, sim); + + ASSERT_TRUE(slot_range.ok()) << BATT_INSPECT(slot_range.status()); + + sim.log_event("first job successfully appended! slot_range=", *slot_range); + slot_offset = *slot_range; +} + +//==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - +// +auto VolumeSimTest::RecoverySimState::get_slot_visitor_for_first_page_only() +{ + return [this](const llfs::SlotParse /*slot*/, const llfs::PageId& page_id) { + if (page_id == this->first_page_id) { + this->recovered_first_page = true; + } else { + this->no_unknown_pages = false; + } + return batt::OkStatus(); + }; +} + +//==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - +// +void display_metric_data(llfs::Volume& volume) +{ + LLFS_VLOG(1) << BATT_INSPECT(volume.page_recycler_metrics().insert_count); + LLFS_VLOG(1) << BATT_INSPECT(volume.page_recycler_metrics().remove_count); + LLFS_VLOG(1) << BATT_INSPECT(volume.page_recycler_metrics().page_drop_ok_count); + LLFS_VLOG(1) << BATT_INSPECT(volume.page_recycler_metrics().page_drop_error_count); +} + +//==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - +// +void batt_task_yield(u32 count) +{ + while (count--) { + batt::Task::yield(); + } +} + +//==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - +// +void VolumeSimTest::pre_halt_processing( + RecoverySimState& state, llfs::StorageSimulation& sim, u32 yield_count, + const std::function& funct) +{ + llfs::SlotRange saved_slot_range; + + LLFS_VLOG(1) << "Creating local simulation volume"; + batt::StatusOr> recovered_volume = sim.get_volume( + "TestVolume", /*slot_visitor_fn=*/ + [](auto&&...) { + return batt::OkStatus(); + }, + /*root_log_capacity=*/64 * kKiB); + + LLFS_VLOG(1) << "volume recovery status=" << recovered_volume.status(); + + ASSERT_TRUE(recovered_volume.ok()) << recovered_volume.status(); + + llfs::Volume& volume = **recovered_volume; + + llfs::SlotRange slot_range; + constexpr u32 max_page_count = 10; + constexpr u32 num_pages_to_trim = 7; + for (u32 page = 1; page <= max_page_count; ++page) { + this->alloc_one_page_and_commit(state, sim, volume, slot_range); + + if (page == num_pages_to_trim) { + saved_slot_range = slot_range; + } + } + + auto status = volume.trim(saved_slot_range.upper_bound); + ASSERT_TRUE(status.ok()) << "trim failed..."; + + batt_task_yield(yield_count); + + display_metric_data(volume); + + funct(volume, saved_slot_range); +} + +//==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - +// +void VolumeSimTest::post_halt_processing(RecoverySimState& state, llfs::StorageSimulation& sim, + const u32 yield_count, + const std::function& funct) +{ + LLFS_VLOG(1) << "About to do recovery()" << BATT_INSPECT(state.seed); + + // Recover the Volume. + // + batt::StatusOr> recovered_volume = + sim.get_volume("TestVolume", llfs::TypedSlotReader::make_slot_visitor( + state.get_slot_visitor_for_first_page_only())); + + ASSERT_TRUE(recovered_volume.ok()) << recovered_volume.status(); + + llfs::Volume& volume = **recovered_volume; + batt_task_yield(yield_count); + + display_metric_data(volume); + funct(volume); +} + +//==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - +// +/** + * @brief This test function is crafted to recreate a scenario where dead-page recycling happens + * around DB halt/shut-down. + * + * In this simulation test 10 pages are written to the Volume-Log and then first 7 pages are + * trimmed. Before halting the system we make sure volume-trim is completed. + * + * Following are the sequence of events + * 1. Write few pages (10) to the log. + * 2. Trim few couple (7) of pages from the log (trim). + * 3. Page recycler does the recycling for the pages (batch creation, commit etc). + * 4. We do await_trim() on the volume log before halting. + * 4. DB is halted. + * 5. DB is restarted. + * 6. Recycler finds a pending batch to recyle and hands over that to recycler main_task. + * 7. Recycler successfully processes the recycling task by ignoring it. + * + * @param seed for the simulation. + * @param yield_count for number of times we want to yield within the main test flow. + * @return NONE. + */ +void VolumeSimTest::run_dead_page_recovery_test(u32 seed, u32 yield_count) +{ + RecoverySimState state; + state.seed = seed; + + std::mt19937 rng{state.seed}; + + llfs::StorageSimulation sim{batt::StateMachineEntropySource{ + /*entropy_fn=*/[&rng](usize min_value, usize max_value) -> usize { + std::uniform_int_distribution pick_value{min_value, max_value}; + return pick_value(rng); + }}}; + + // Add pages here. + // + sim.add_page_arena(this->pages_per_device, llfs::PageSize{1 * kKiB}); + sim.register_page_reader(llfs::PageGraphNodeView::page_layout_id(), __FILE__, __LINE__, + llfs::PageGraphNodeView::page_reader()); + + const auto main_simulation_task = [&] { + sim.set_inject_failures_mode(false); + + LLFS_VLOG(1) << "This is the main task: " << BATT_INSPECT(seed) + << BATT_INSPECT(sim.is_running()); + + yield_count = seed; + // Create the simulated Volume and write to volume before initiating halt. + // + { + this->pre_halt_processing( + state, sim, yield_count, [](llfs::Volume& volume, llfs::SlotRange& slot_range) { + LLFS_VLOG(1) << "await_trim() start at offset " << BATT_INSPECT(slot_range.upper_bound); + EXPECT_TRUE(volume.await_trim(slot_range.upper_bound).ok()); + LLFS_VLOG(1) << "await_trim() end"; + + volume.halt(); + volume.join(); + }); + } + + EXPECT_TRUE(state.first_page_id.is_valid()) << BATT_INSPECT(state.seed); + + // After the crash recover DB state. + // + { + this->post_halt_processing(state, sim, yield_count, [](llfs::Volume& volume) { + // Check and make sure we have no drop errors. + // + EXPECT_EQ(volume.page_recycler_metrics().page_drop_error_count, 0); + }); + } + }; + + sim.run_main_task(main_simulation_task); +} + +//==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - +// +/** + * @brief This test function is crafted to recreate a scenario where dead-page recycling happens + * around DB halt/shut-down. + * + * In this simulation test 10 pages are written to the Volume-Log and then first 7 pages are + * trimmed. Before halting the system we make sure volume-trim is completed. + * + * + * Following are the sequence of events + * 1. Write few pages to the log. + * 2. Trim first few pages from the log (trim). + * 3. Page recycler initiates the recycling for the pages (batch creation etc). + * 4. We want to make sure we halt the DB before trimmer trimes the log. + * 4. DB is halted. + * 5. DB is restarted. + * 6. Recycler finds a pending batch to recyle and hands over that to recycler main_task. + * 7. Recycler successfully processes the recycling task and decrements the ref count. + * 8. In the mean time volume-trimmer also post a dead-page recycling request to Recycler. + * 9. Trimmer request gets us into the situation where Recycler find the Page ref count to be + * already 0 and panics. + * + * @param seed for the simulation. + * @param yield_count_pre_halt for number of times we want to yield within the main test flow + * pre-halt. + * @param yield_count_post_halt for number of times we want to yield within the main test flow + * post-halt. + * @return NONE. + */ +void VolumeSimTest::run_dead_page_recovery_test_variant(u32 seed, u32 yield_count_pre_halt, + u32 yield_count_post_halt) +{ + RecoverySimState state; + std::mt19937 rng{state.seed}; + + state.seed = seed; + + llfs::StorageSimulation sim{batt::StateMachineEntropySource{ + /*entropy_fn=*/[&rng](usize min_value, usize max_value) -> usize { + std::uniform_int_distribution pick_value{min_value, max_value}; + return pick_value(rng); + }}}; + + // Add pages here. + // + sim.add_page_arena(this->pages_per_device, llfs::PageSize{1 * kKiB}); + sim.register_page_reader(llfs::PageGraphNodeView::page_layout_id(), __FILE__, __LINE__, + llfs::PageGraphNodeView::page_reader()); + + const auto main_simulation_task = [&] { + sim.set_inject_failures_mode(false); + + LLFS_VLOG(1) << "This is the main task: " << BATT_INSPECT(seed) + << BATT_INSPECT(sim.is_running()); + + // Create the simulated Volume. + // + { + this->pre_halt_processing(state, sim, yield_count_pre_halt, + [](llfs::Volume& volume, llfs::SlotRange& slot_range) { + LLFS_VLOG(1) << "Offset " << BATT_INSPECT(slot_range); + + volume.halt(); + volume.join(); + }); + } + EXPECT_TRUE(state.first_page_id.is_valid()) << BATT_INSPECT(state.seed); + + // After the crash recover DB state. + // + { + this->post_halt_processing(state, sim, yield_count_pre_halt, [&](llfs::Volume& volume) { + // Make sure we leave enough time for VolumeRecycler and Trimmer to run through. + // + batt_task_yield(yield_count_post_halt); + + // Check and make sure we have no drop errors. + // + EXPECT_EQ(volume.page_recycler_metrics().page_drop_error_count, 0); + }); + } + }; + + sim.run_main_task(main_simulation_task); +} + } // namespace diff --git a/src/llfs/volume_trimmer.cpp b/src/llfs/volume_trimmer.cpp index 72b3e64..68c5fe3 100644 --- a/src/llfs/volume_trimmer.cpp +++ b/src/llfs/volume_trimmer.cpp @@ -224,12 +224,13 @@ Status VolumeTrimmer::run() << BATT_INSPECT(this->trim_control_.debug_info()) << BATT_INSPECT(loop_counter) << BATT_INSPECT(this->trim_control_.is_closed()) << BATT_INSPECT(this->trim_delay_)); + LLFS_VLOG(1) << "VolumeTrimmer::run > await_trim_target: " << BATT_INSPECT(least_upper_bound); StatusOr trim_upper_bound = this->await_trim_target(least_upper_bound); BATT_REQUIRE_OK(trim_upper_bound) << this->error_log_level_.load(); LLFS_VLOG(1) << BATT_INSPECT(trim_upper_bound) << BATT_INSPECT(this->trim_control_.is_closed()) - << BATT_INSPECT_STR(this->name_); + << BATT_INSPECT_STR(this->name_) << BATT_INSPECT(least_upper_bound); // Whenever we get a new trim_upper_bound, always check first to see if we are shutting down. If // so, don't start a new trim, as this creates race conditions with the current/former holders diff --git a/src/llfs/volume_trimmer_recovery_visitor.cpp b/src/llfs/volume_trimmer_recovery_visitor.cpp index c90f66a..9bec841 100644 --- a/src/llfs/volume_trimmer_recovery_visitor.cpp +++ b/src/llfs/volume_trimmer_recovery_visitor.cpp @@ -100,7 +100,7 @@ Status VolumeTrimmerRecoveryVisitor::on_volume_trim(const SlotParse& slot, LLFS_VLOG(1) << "VolumeTrimmerRecoveryVisitor::on_volume_trim(slot=" << slot.offset << ") trimmed_region == " << SlotRange{trim_event.old_trim_pos, trim_event.new_trim_pos} - << BATT_INSPECT(is_pending) << trim_event; + << BATT_INSPECT(is_pending) << BATT_INSPECT(this->log_trim_pos_) << trim_event; if (is_pending) { if (this->trim_event_info_ != None) {