diff --git a/CHANGELOG.md b/CHANGELOG.md index f2959ea96..215955e3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,8 @@ * `$t` symbols in ARM binaries now force creation of Thumb-mode code blocks. * In PE binaries, duplicate imports no longer create duplicate symbols. * Added pattern to match missed symbolic data in pointer arrays. +* Fix symbols associated to functions (Auxdata functionNames) for PE binaries + when Ddisasm is run with option `-F`. * Requires gtirb >=1.12.1, gtirb-pprinter >=2.0.0 # 1.7.0 diff --git a/examples/ex_ml_sym_mangling/Makefile.windows b/examples/ex_ml_sym_mangling/Makefile.windows index 1737a3f9f..69b3dcc05 100644 --- a/examples/ex_ml_sym_mangling/Makefile.windows +++ b/examples/ex_ml_sym_mangling/Makefile.windows @@ -1,4 +1,4 @@ -CC= +CC=cl CFLAGS= EXEC= @@ -16,7 +16,6 @@ clean: rm -f ex *.dll out.txt *.s *.lib *.exp *.o *.err *.obj *.exe check: - $(CC) ex.c foo.lib baz.lib @ ex > check.txt @ FC out.txt check.txt && echo TEST OK diff --git a/src/datalog/symbols.dl b/src/datalog/symbols.dl index 6a0b36768..b1452d3e5 100644 --- a/src/datalog/symbols.dl +++ b/src/datalog/symbols.dl @@ -203,13 +203,26 @@ elf_avoid_symbols("__TMC_END__"). // and no pre-existing function symbols. best_func_symbol(EA,SymbolName):- - symbol_score(EA,SymbolName,"FUNC","Beg",Score), - Score = max S: {symbol_score(EA,_,"FUNC","Beg",S)}. + binary_format("ELF"), + symbol_score(EA,SymbolName,"FUNC","Beg",Score), + Score = max S: {symbol_score(EA,_,"FUNC","Beg",S)}. + +// For non-ELF (e.g. PE), symbols might not have a type +best_func_symbol(EA,SymbolName):- + !binary_format("ELF"), + symbol_score(EA,SymbolName,_,"Beg",Score), + Score = max S: {symbol_score(EA,_,_,"Beg",S)}. inferred_symbol(EA,SymbolName,Scope,"DEFAULT","FUNC","Beg"), best_func_symbol(EA,SymbolName):- function_inference.function_entry(EA), - !symbol_score(EA,_,"FUNC","Beg",_), + ( + binary_format("ELF"), + !symbol_score(EA,_,"FUNC","Beg",_) + ; + !binary_format("ELF"), + !symbol_score(EA,_,_,"Beg",_) + ), SymbolName = cat("FUN_",@to_string_hex(EA)), ( binary_isa("MIPS"), Scope = "GLOBAL"; diff --git a/tests/misc_test.py b/tests/misc_test.py index 866efe91f..753814cdf 100644 --- a/tests/misc_test.py +++ b/tests/misc_test.py @@ -627,6 +627,40 @@ def test_symbol_selection(self): self.assertIn("fun", fun_names) self.assertNotIn("_fun", fun_names) + @unittest.skipUnless( + platform.system() == "Windows", "This test is Windows only" + ) + def test_pe_function_symbol_selection(self): + """ + Test that function names are correctly selected + in PE binaries. + """ + library = "baz.dll" + with cd(ex_dir / "ex_ml_sym_mangling"): + proc = subprocess.run(make("clean"), stdout=subprocess.DEVNULL) + self.assertEqual(proc.returncode, 0) + proc = subprocess.run(make("all"), stdout=subprocess.DEVNULL) + self.assertEqual(proc.returncode, 0) + for extra_args in ([], ["-F"]): + with self.subTest(extra_args=extra_args): + self.assertTrue( + disassemble( + library, format="--ir", extra_args=extra_args + )[0] + ) + + ir_library = gtirb.IR.load_protobuf(library + ".gtirb") + m = ir_library.modules[0] + + # check chosen function names + fun_names = { + sym.name + for sym in m.aux_data["functionNames"].data.values() + } + self.assertIn("Baz", fun_names) + self.assertIn("_Baz", fun_names) + self.assertIn("__Baz", fun_names) + @unittest.skipUnless( platform.system() == "Linux", "This test is linux only." )