From db7e451c49fc8a3d45929c832a9b606f5deb7899 Mon Sep 17 00:00:00 2001 From: Parshintsev Anatoly Date: Tue, 14 May 2024 15:34:31 +0300 Subject: [PATCH] debug: workaround for sporadic failures of some tests due to unexpected data present in pexpect match Problem was observed on UnavailableMultiTest - this test was sporadically failing. When the failure was observed the log of the failing test looked as follows: ``` File "/whatever/RISCVTests/debug/testlib.py", line 504, in if all(targets[hart.id]["State"] == "running" for hart in harts): ~~~~~~~~~~~~~~~~^^^^^^^^^ KeyError: 'State' ``` Adding this modification to testlib.py ``` --- a/debug/testlib.py +++ b/debug/testlib.py @@ -498,6 +498,10 @@ class Openocd: for line in lines[2:]: if line.strip(): data.append(dict(zip(headers, line.split()[1:]))) + str_data = str(data) + sys.stdout.flush() + sys.stdout.write(f"parsed targets:\n{result}\n===\n{str_data}\n---\n") + sys.stdout.flush() return data ``` Allowed me to root cause the issue. Namely we have the following situation: ``` parsed targets: Exception ignored in: Traceback (most recent call last): File "/usr/local/lib/python3.11/tempfile.py", line 450, in __del__ self.close() File "/usr/local/lib/python3.11/tempfile.py", line 446, in close unlink(self.name) FileNotFoundError: [Errno 2] No such file or directory: '/tmp/gdb@38873-8s6ud03x.log' ... TargetName Type Endian TapName State -- ------------------ ---------- ------ ------------------ ------------ 0 riscv.cpu0 riscv little riscv.cpu running 1* riscv.cpu1 riscv little riscv.cpu running === [{'Exception': '"/usr/local/lib/python3.11/tempfile.py",', 'ignored': 'line', 'in:': '450,', ... ``` The only reasonable (to me) explanation for the observed behavior is below. Here is how we connect to TCL-RPC server: ``` self.openocd_cli = pexpect.spawn(f"nc localhost {self.tclrpc_port}") tty.setraw(self.openocd_cli.child_fd) ``` Later we request target list by issuing "targets" command: ``` self.command("targets") ``` Internally, pexpect.spawn implemented as follows: - we fork the process - set up pty and then call execve - all these steps are written in python "Exception ignored" messages are result of exceptions thrown from finalizers of NamedTemporaryFile objects. When exception is thrown from the finalizer - python unconditionally prints a "warning" to stderr. It seems that these messages are polluting our output from "nc" since python GC can be invoked before the execve syscall. The workaround is just to make sure that execve was executed before we rely on the format of command output. To have such a guarantee we just issue a dummy "echo" command and check that we have a proper reply in the output stream. While this explanation looks convincing, the behavior above still looks strange, given that we have https://bugs.python.org/issue14548 which was resolved long ago. However, the proposed workaround fixes the issue. --- debug/testlib.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/debug/testlib.py b/debug/testlib.py index d4826bbdc..1f107be3f 100644 --- a/debug/testlib.py +++ b/debug/testlib.py @@ -367,10 +367,15 @@ def __init__(self, server_cmd=None, config=None, debug=False, timeout=60, self.tclrpc_port = None self.start(cmd, logfile, extra_env) - self.openocd_cli = pexpect.spawn(f"nc localhost {self.tclrpc_port}") + self.openocd_cli = pexpect.spawn(f"nc localhost {self.tclrpc_port}", + echo=False) # TCL-RPC uses \x1a as a watermark for end of message. We set raw # pty mode to disable translation of \x1a to EOF tty.setraw(self.openocd_cli.child_fd) + hello_string = self.command( + "capture { echo \"Hello TCL-RPC!\" }").decode() + if not "Hello TCL-RPC!" in hello_string: + raise RuntimeError(f"TCL-RPC - unexpected reply:\n{hello_string}") def start(self, cmd, logfile, extra_env): combined_env = {**os.environ, **extra_env}