From e80df9438e5f8fe18a19df4f599208662885821b Mon Sep 17 00:00:00 2001 From: Julius Goryavsky Date: Sat, 17 Dec 2016 00:14:24 -0500 Subject: [PATCH] - #PXC-347: Non-PRIM during membership change Timers in gcomm (which implemented using ASIO library) can be triggered even during normal shutdown sequence - even after interruption of the main event loop in gcomm subsystem and completion of the corresponding thread. For example, during normal shutdown sequence and after termination of the main event loop in gcomm subsystem and even after completion of the corresponding thread by the close() function (from the gcs_gcomm.cpp file), the following call path still possible: handle_timers -> handle_inactivity_timer -> check_inactive() Function handle_install_timer() also may called from the "gcomm::evs::Proto::handle_timers()" (from evs_proto.cpp module). Also, gcomm::GMCast::reconnect() function (from gmcast.cpp file) may be called from the gcomm::GMCast::handle_timers(). Thus, we have a 100% accurate evidence that after the termination of the main gcomm message loop (using the close() method from the gcs_gcomm.cpp file), we still continue to handle events associated with the timers. Perhaps this is not a direct bug of the server, but an unexpected feature of the implementation of the timers in ASIO, which allows posing the events associated with timers even after an interruption of the main event loop (using "terminate" function). For example, we see the following lines in the "epoll_reactor::run(...)" method from the epoll_reactor.ipp file, which is used in most of our test systems: ``` if (ptr == &interrupter_) { // No need to reset the interrupter since we're leaving the descriptor // in a ready-to-read state and relying on edge-triggered notifications // to make it so that we only get woken up when the descriptor's epoll // registration is updated. #if defined(ASIO_HAS_TIMERFD) if (timer_fd_ == -1) check_timers = true; #else // defined(ASIO_HAS_TIMERFD) check_timers = true; #endif // defined(ASIO_HAS_TIMERFD) } #if defined(ASIO_HAS_TIMERFD) else if (ptr == &timer_fd_) { ... check_timers = true; } #endif // defined(ASIO_HAS_TIMERFD){code} ``` As we can see, even after interruption of the main loop through the "interrupter_" handler, the ASIO code sets the "check_timers" variable to "true", and then it can add all the events associated with the timers to the queue: ``` if (check_timers) { mutex::scoped_lock common_lock(mutex_); timer_queues_.get_ready_timers(ops); ... ``` Later, the timer handlers, which called after the termination of main loop during normal shutdown sequence, can form false "view", in which all the nodes are declared as "partitioned". I think that it is sending this "view" over the network, and this leads to the transfer of entire cluster into NON-PRIMARY state. Therefore, to fix this, I propose to make two changes: 1) We need to stop responding to the timers after termination of the main gcomm-related event loop (by the close() function from the gcs_gcomm.cpp file). 2) I offer completely disable support of the timerfd feature in ASIO, which has already proved to be a source of error. I have already told about one of these problems with ASIO developers (by publishing PR in their GitHub) and I even got a positive reaction to the pull request in their GitHub, but yet there is no official release of the ASIO, which fixes all the problems related to timerfd feature. Shutting down the timerfd feature does not affect the performance of real applications - I was not able to fix the difference in the tests. Unfortunately, I cannot attach TC to the patch, because the only way to make TC for it (that I came up with) requires DEBUG_SYNC, but during normal shutdown sequence, it stops working, as management of the DEBUG_SYNC in MTR is actually carried out through conventional SQL operators, and the system is already in the process of shutdown. --- asio/asio/detail/config.hpp | 14 +++++++++++++- asio/asio/detail/impl/epoll_reactor.ipp | 15 ++++++++++++--- gcomm/src/evs_proto.cpp | 9 +++++++++ gcomm/src/gmcast.cpp | 5 +++-- 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/asio/asio/detail/config.hpp b/asio/asio/detail/config.hpp index 9d7bd526a..80ff503e5 100644 --- a/asio/asio/detail/config.hpp +++ b/asio/asio/detail/config.hpp @@ -654,7 +654,19 @@ # if !defined(ASIO_HAS_TIMERFD) # if defined(ASIO_HAS_EPOLL) # if (__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 8) -# define ASIO_HAS_TIMERFD 1 +// +// Currently timerfd is a source of unidentified problems +// that break GCS unit-tests on some systems during testing +// on the Jenkins. One of these problems has been found and +// corrected: https://github.com/chriskohlhoff/asio/pull/145, +// but there is another problem, because of which some of +// compilations (on the systems with timerfd feature enabled) +// occasionally lead to failures in the GCS unit-tests, that +// may happen even after merging of the proposed correction +// (i.e. problem, which is shown above is not unique one, +// nevertheless after applying the correction probability +// of failures in the GCS unit-test sharply decreased): +//# define ASIO_HAS_TIMERFD 1 # endif // (__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 8) # endif // defined(ASIO_HAS_EPOLL) # endif // !defined(ASIO_HAS_TIMERFD) diff --git a/asio/asio/detail/impl/epoll_reactor.ipp b/asio/asio/detail/impl/epoll_reactor.ipp index 7d6080901..92e0c7c83 100644 --- a/asio/asio/detail/impl/epoll_reactor.ipp +++ b/asio/asio/detail/impl/epoll_reactor.ipp @@ -429,10 +429,19 @@ void epoll_reactor::run(bool block, op_queue& ops) { // If timeout was expired, we should read the expiration counter: uint64_t count; - if (read(timer_fd_, &count, sizeof(count))) + ssize_t len = sizeof(count); + char * buf = reinterpret_cast(&count); + for(;;) { - // Just to ignore return value of the read() function - // without compiler warning... + ssize_t ret = read(timer_fd_, buf, len); + if(ret == len) break; + if (ret < 0) + { + if (EINTR != errno) break; + continue; + } + buf += ret; + len -= ret; } } check_timers = true; diff --git a/gcomm/src/evs_proto.cpp b/gcomm/src/evs_proto.cpp index 1d72cf79d..ce8578be6 100644 --- a/gcomm/src/evs_proto.cpp +++ b/gcomm/src/evs_proto.cpp @@ -883,6 +883,15 @@ gu::datetime::Date gcomm::evs::Proto::handle_timers() { Timer t(TimerList::value(timers_.begin())); timers_.erase(timers_.begin()); + if (state() == S_LEAVING) + { + // Due to the implementation details of the ASIO, + // timers can be triggered during the normal shutdown + // sequence, which leads to problems. We may ignore + // timers, if we are in the process of completing + // the work: + continue; + } switch (t) { case T_INACTIVITY: diff --git a/gcomm/src/gmcast.cpp b/gcomm/src/gmcast.cpp index 6914b1491..cf0bfbe4f 100644 --- a/gcomm/src/gmcast.cpp +++ b/gcomm/src/gmcast.cpp @@ -998,8 +998,9 @@ void gcomm::GMCast::reconnect() { if (ae.retry_cnt() > ae.max_retries()) { - log_info << " cleaning up " << remote_uuid << " (" - << remote_addr << ")"; + log_info << self_string() + << " cleaning up " << remote_uuid + << " (" << remote_addr << ")"; remote_addrs_.erase(i); continue;//no reference to remote_addr or remote_uuid after this }