-
Notifications
You must be signed in to change notification settings - Fork 11
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve error detection during server termination #358
Comments
It would be nice to catch an exit code/a signal number. I've experimented around a bit. It seems, we can't just use OTOH, we can handle SIGCHLD using libev from tarantool sources. diff --git a/src/lua/popen.c b/src/lua/popen.c
index 942f99d083..9fdd4c2e7a 100644
--- a/src/lua/popen.c
+++ b/src/lua/popen.c
@@ -37,6 +37,8 @@
#include <small/region.h>
+#include "tarantool_ev.h"
+
#include "diag.h"
#include "core/popen.h"
#include "core/fiber.h"
@@ -2410,6 +2412,59 @@ lbox_popen_gc(struct lua_State *L)
return 0;
}
+struct ww {
+ pid_t pid;
+ bool done;
+ int wstatus;
+ ev_child ev_sigchld;
+};
+
+static void
+sigchld_handler(EV_P_ ev_child *w, int revents)
+{
+ (void)revents;
+
+ ev_child_stop(EV_A_ w);
+
+ struct ww *ww = container_of(w, struct ww, ev_sigchld);
+ assert(w->rpid == ww->pid);
+ ww->wstatus = w->rstatus;
+ ww->done = true;
+}
+
+static int
+lbox_popen_w(struct lua_State *L)
+{
+ struct ww ww;
+
+ ww.pid = lua_tointeger(L, 1);
+ ww.done = false;
+ ww.wstatus = 0;
+
+ ev_child_init(&ww.ev_sigchld, sigchld_handler, ww.pid, 0);
+ ev_child_start(EV_DEFAULT_ &ww.ev_sigchld);
+
+ while (!ww.done) {
+ fiber_sleep(0.1);
+ }
+
+ lua_createtable(L, 0, 2);
+
+ if (WIFEXITED(ww.wstatus)) {
+ lua_pushliteral(L, "exited");
+ lua_setfield(L, -2, "state");
+ lua_pushinteger(L, WEXITSTATUS(ww.wstatus));
+ lua_setfield(L, -2, "exit_code");
+ } else {
+ lua_pushliteral(L, "signaled");
+ lua_setfield(L, -2, "state");
+ lua_pushinteger(L, WTERMSIG(ww.wstatus));
+ lua_setfield(L, -2, "signo");
+ }
+
+ return 1;
+}
+
/* }}} */
/* {{{ Module initialization */
@@ -2453,6 +2508,7 @@ tarantool_lua_popen_init(struct lua_State *L)
static const struct luaL_Reg popen_methods[] = {
{"new", lbox_popen_new, },
{"shell", lbox_popen_shell, },
+ {"w", lbox_popen_w, },
{NULL, NULL},
};
luaT_newmodule(L, "popen", popen_methods); And use this function from diff --git a/luatest/process.lua b/luatest/process.lua
index 9d447b3..2084dec 100644
--- a/luatest/process.lua
+++ b/luatest/process.lua
@@ -4,6 +4,7 @@ local fun = require('fun')
local ffi = require('ffi')
local fio = require('fio')
local log = require('log')
+local fiber = require('fiber')
local Class = require('luatest.class')
local OutputBeautifier = require('luatest.output_beautifier')
@@ -55,7 +56,15 @@ function Process:start(path, args, env, options)
if output_beautifier then
output_beautifier:enable({track_pid = pid})
end
- return self:from({pid = pid, ignore_gc = options.ignore_gc, output_beautifier = output_beautifier})
+ local x = self:from({pid = pid, ignore_gc = options.ignore_gc, output_beautifier = output_beautifier})
+ x._wait_cond = fiber.cond()
+ x._wait_cond_signaled = false
+ fiber.create(function()
+ x.wstatus = require('popen').w(pid)
+ x._wait_cond_signaled = true
+ x._wait_cond:broadcast()
+ end)
+ return x
end
-- luacov: disable
if options.chdir then
@@ -91,6 +100,13 @@ function Process.mt:is_alive()
return self.pid ~= nil and self.class.is_pid_alive(self.pid)
end
+function Process.mt:wait()
+ if not self._wait_cond_signaled then
+ self._wait_cond:wait()
+ end
+ return self.wstatus
+end
+
function Process.kill_pid(pid, signal, options)
checks('number|string', '?number|string', {quiet = '?boolean'})
-- Signal values are platform-dependent so we can not use ffi here
diff --git a/luatest/server.lua b/luatest/server.lua
index 5506871..d5abcb5 100644
--- a/luatest/server.lua
+++ b/luatest/server.lua
@@ -440,15 +440,15 @@ function Server:stop()
self.net_box = nil
end
- if self.process and self.process:is_alive() then
- self.process:kill()
- local ok, err = pcall(wait_for_condition, 'process is terminated', self, function()
- return not self.process:is_alive()
- end)
- if not ok and not err:find('Process is terminated when waiting for') then
- error(err)
+ if self.process then
+ if self.process:is_alive() then
+ self.process:kill()
end
- if self.process.output_beautifier.stderr:find('Segmentation fault') then
+
+ local wstatus = self.process:wait()
+ local ok = wstatus.state == 'exited' and wstatus.exit_code == 0
+
+ if not ok and self.process.output_beautifier.stderr:find('Segmentation fault') then
error(
('Segmentation fault during process termination (alias: %s, workdir: %s, pid: %d)\n%s')
:format(
@@ -459,6 +459,9 @@ function Server:stop()
)
)
end
+ if not ok then
+ error(('wstatus: %s'):format(json.encode(wstatus)))
+ end
log.debug('Killed server process PID ' .. self.process.pid)
self.process = nil
end (BTW, the built-in It is interesting that I see the exit code 0, when tarantool is terminated by SIGTERM. It is a bit surprising, but I guess that it is implemented deliberately. |
We added default conditions, while working on #349 and #252:
Segmentation fault:
Memory leak from sanitizer:
Obviously, if we add error handling they will extend the server:stop() method.
It is necessary to think about some kind of uniform approach.
Error codes in Tarantool source code.
The text was updated successfully, but these errors were encountered: