From 9f8dae8f2f52beaa8e7216dc8ccd8f94e48c8a36 Mon Sep 17 00:00:00 2001 From: Sfinktah Bungholio Date: Sat, 20 Mar 2021 19:26:32 +1100 Subject: [PATCH 1/2] now you can load structures and adjust them --- HexRaysPyTools/callbacks/scanners.py | 2 +- HexRaysPyTools/core/helper.py | 2 + HexRaysPyTools/core/temporary_structure.py | 135 +++++++++++++++++++-- HexRaysPyTools/core/type_library.py | 2 + HexRaysPyTools/forms.py | 3 + 5 files changed, 131 insertions(+), 13 deletions(-) diff --git a/HexRaysPyTools/callbacks/scanners.py b/HexRaysPyTools/callbacks/scanners.py index c32a948..66aa6a5 100644 --- a/HexRaysPyTools/callbacks/scanners.py +++ b/HexRaysPyTools/callbacks/scanners.py @@ -44,7 +44,7 @@ def activate(self, ctx): class DeepScanVariable(Scanner): description = "Deep Scan Variable" - hotkey = "shift+F" + hotkey = "Shift+Alt+F" def __init__(self): super(DeepScanVariable, self).__init__() diff --git a/HexRaysPyTools/core/helper.py b/HexRaysPyTools/core/helper.py index 20275d3..b8b6f25 100644 --- a/HexRaysPyTools/core/helper.py +++ b/HexRaysPyTools/core/helper.py @@ -352,6 +352,8 @@ def create_padding_udt_member(offset, size): if size == 1: udt_member.type = const.BYTE_TINFO else: + if size < 1 or size > 0xffffffff: + print("HexRaysPyTools::core::helper::create_padding_udt_member: size is out of uint32 range (offset:{} size:{})".format(offset, size)) array_data = idaapi.array_type_data_t() array_data.base = 0 array_data.elem_type = const.BYTE_TINFO diff --git a/HexRaysPyTools/core/temporary_structure.py b/HexRaysPyTools/core/temporary_structure.py index 806c6bc..8e7ff0f 100644 --- a/HexRaysPyTools/core/temporary_structure.py +++ b/HexRaysPyTools/core/temporary_structure.py @@ -2,8 +2,10 @@ import itertools from PyQt5 import QtCore, QtGui, QtWidgets +import ida_name import idaapi import idc +import sys from . import common from . import const @@ -11,14 +13,89 @@ import HexRaysPyTools.api as api from HexRaysPyTools.forms import MyChoose +def get_type_size(type): + sid = idc.get_struc_id(type) + if sid != idc.BADADDR: + return idc.get_struc_size(sid) + + try: + name, tp, fld = idc.parse_decl(type, 1) + if tp: + return idc.SizeOf(tp) + except: + return 0 + +def get_type_tinfo(t): + type_tuple = idaapi.get_named_type(None, t, 1) + tif = idaapi.tinfo_t() + try: + tif.deserialize(None, type_tuple[1], type_tuple[2]) + return tif + except TypeError: + return None -SCORE_TABLE = dict((v, k) for k, v in enumerate( - ['unsigned __int8 *', 'unsigned __int8', '__int8 *', '__int8', '_BYTE', '_BYTE *', '_BYTE **', 'const char **', - 'signed __int16', 'unsigned __int16', '__int16', 'signed __int16 *', 'unsigned __int16 *', '__int16 *', - '_WORD *', '_WORD **', '_QWORD', '_QWORD *', - 'signed int*', 'signed int', 'unsigned int *', 'unsigned int', 'int **', 'char **', 'int *', 'void **', - 'int', '_DWORD *', 'char', '_DWORD', '_WORD', 'void *', 'char *'] -)) +def score_table(type, offset): + alignment = offset % 8 + size = get_type_size(type) + # the pythonic solution escape me, so we will do this by the numbers + # and optimise later. + + score = 0 + + # alignment shows us unlikely possibility like __int64 at offset 5. + # often struct elements are cast to large types for zero-init. there- + # fore we prioritise smaller and correctly aligned data types, with + # (in future) consideration for neighbouring data types and (possibly) + # repeated indications of a given data type. + # + # it might be good to weight how many other vars are disabled depending + # on which choice is made -- though a struct would hide a lot of variables + # (and we would still want the struct) -- we wouldn't necessarily want a + # QWORD if there were 4 x WORDS that wanted to fill up the space + # + # we should also prioritise reads over writes (that is to say, structs are + # often initialised (and copied) with over-large casts). + # + # it would also be useful to see where the data was sourced from -- actually + # i believe that is a "hidden feature" :) + # + # umm... also __int64 should have a very low priority since it's the IDA goto + # type, same with _DWORD (anything starting with _) vs int, and again vs int32_t. + # + # shouldn't really trust the sizes in function definitions if they are default types. + # + # in terms of whether the var is signed or not, that can often be hard to + # tell even when analysing by hand. + # + # and a vauge note that in my own struct maker, i found it easiest to assume QWORD + # first, then just keep going to smaller types as warranted. it was just a clearer + # process that usually worked well. you'll note that smaller types are preferred here + # too. + + if alignment == 0: # 8 + if size in (8, 4, 2, 1): + score += 8 // size + elif alignment == 4: # 8 + if size in (4, 2, 1): + score += 8 // size + elif alignment in (2, 6): + if size in (2, 1): + score += 8 // size + elif alignment in (1, 3, 5, 7): + if size == 1: + score += 8 // size + + tif = get_type_tinfo(type) + if tif is None: + name = "__something_lame" + else: + name = tif.dstr() + if name.startswith("_"): + score >>= 1 + score -= 1 + if score < 0: + score = 0 + return score def parse_vtable_name(address): @@ -77,7 +154,7 @@ def has_collision(self, other): def score(self): """ More score of the member - it better suits as candidate for this offset """ try: - return SCORE_TABLE[self.type_name] + return score_table(self.type_name, self.offset) except KeyError: if self.tinfo and self.tinfo.is_funcptr(): return 0x1000 + len(self.tinfo.dstr()) @@ -141,11 +218,13 @@ def get_information(self): @property def name(self): - name = idaapi.get_name(self.address) - if idaapi.is_valid_typename(name): + name = idc.get_name(self.address) + if ida_name.is_valid_typename(name): return name - name = idc.demangle_name(name, idc.get_inf_attr(idc.INF_SHORT_DN)) - return common.demangled_name_to_c_str(name) + demangled_name = idc.demangle_name(name, idc.get_inf_attr(idc.INF_SHORT_DN)) + if not demangled_name: + raise ValueError("Couldn't demangle name: {} at 0x{:x}".format(name, self.address)) + return common.demangled_name_to_c_str(demangled_name) @property def tinfo(self): @@ -744,6 +823,38 @@ def unpack_substructure(self, indices): member.name = udt_item.name self.add_row(member) + def load_struct(self): + + name = "" + while True: + name = idaapi.ask_str(name, idaapi.HIST_TYPE, "Enter type:") + if name is None: + return + sid = idc.get_struc_id(name) + if sid != idc.BADADDR: + break + + self.default_name = name + + sid = idc.get_struc_id(name) + if sid == idc.BADADDR: + print(("Invalid Struct Name: %s" % name)) + return + + tif = get_type_tinfo(name) + sys.modules["__main__"].tif = tif + nmembers = tif.get_udt_nmembers() + for index in range(nmembers): + u = idaapi.udt_member_t() + u.offset = index + if tif.find_udt_member(u, idaapi.STRMEM_INDEX) != -1: + sys.modules["__main__"].udt = u + member = Member(u.offset // 8, u.type, None) + member.name = u.name + self.add_row(member) + + + def resolve_types(self): current_item = None current_item_score = 0 diff --git a/HexRaysPyTools/core/type_library.py b/HexRaysPyTools/core/type_library.py index e9ed24f..fe6630d 100644 --- a/HexRaysPyTools/core/type_library.py +++ b/HexRaysPyTools/core/type_library.py @@ -31,6 +31,8 @@ def _enable_library_ordinals(library_num): print("[ERROR] Failed to enable ordinals") return + print("HexRaysPyTools DLL: {}".format(dll)) + dll.get_idati.restype = ctypes.POINTER(til_t) idati = dll.get_idati() dll.enable_numbered_types(idati.contents.base[library_num], True) diff --git a/HexRaysPyTools/forms.py b/HexRaysPyTools/forms.py index 1d78d55..a8ff5d0 100644 --- a/HexRaysPyTools/forms.py +++ b/HexRaysPyTools/forms.py @@ -47,6 +47,7 @@ def init_ui(self): btn_unpack = QtWidgets.QPushButton("&Unpack") btn_remove = QtWidgets.QPushButton("&Remove") btn_resolve = QtWidgets.QPushButton("Resolve") + btn_load = QtWidgets.QPushButton("Load") btn_clear = QtWidgets.QPushButton("Clear") # Clear button doesn't have shortcut because it can fuck up all work btn_recognize = QtWidgets.QPushButton("Recognize Shape") btn_recognize.setStyleSheet("QPushButton {width: 100px; height: 20px;}") @@ -81,6 +82,7 @@ def init_ui(self): grid_box.addWidget(btn_unpack, 1, 2) grid_box.addWidget(btn_remove, 1, 3) grid_box.addWidget(btn_resolve, 0, 4) + grid_box.addWidget(btn_load, 1, 4) grid_box.addItem(QtWidgets.QSpacerItem(20, 20, QtWidgets.QSizePolicy.Expanding), 1, 5) grid_box.addWidget(btn_recognize, 0, 6) grid_box.addWidget(btn_clear, 1, 6) @@ -99,6 +101,7 @@ def init_ui(self): btn_unpack.clicked.connect(lambda: self.structure_model.unpack_substructure(struct_view.selectedIndexes())) btn_remove.clicked.connect(lambda: self.structure_model.remove_items(struct_view.selectedIndexes())) btn_resolve.clicked.connect(lambda: self.structure_model.resolve_types()) + btn_load.clicked.connect(lambda: self.structure_model.load_struct()) btn_clear.clicked.connect(lambda: self.structure_model.clear()) btn_recognize.clicked.connect(lambda: self.structure_model.recognize_shape(struct_view.selectedIndexes())) struct_view.activated[QtCore.QModelIndex].connect(self.structure_model.activated) From c695392804ad51db4932fb99235e70c56c7ec6b8 Mon Sep 17 00:00:00 2001 From: Sfinktah Bungholio Date: Thu, 20 May 2021 22:14:43 +1000 Subject: [PATCH 2/2] fixed sized based struct member naming --- HexRaysPyTools/core/temporary_structure.py | 119 +++++++++++++++------ HexRaysPyTools/forms.py | 3 +- 2 files changed, 88 insertions(+), 34 deletions(-) diff --git a/HexRaysPyTools/core/temporary_structure.py b/HexRaysPyTools/core/temporary_structure.py index 8e7ff0f..402037b 100644 --- a/HexRaysPyTools/core/temporary_structure.py +++ b/HexRaysPyTools/core/temporary_structure.py @@ -6,6 +6,7 @@ import idaapi import idc import sys +import re from . import common from . import const @@ -13,6 +14,34 @@ import HexRaysPyTools.api as api from HexRaysPyTools.forms import MyChoose +def log2(v): + """ + http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogObvious + """ + a = [0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9] + n = ((v * 0x077CB531) & 0xffffffff) >> 27 + r = a[n]; + return r + +def get_operand_size_type(tif): + if tif.is_complex(): + return 'field' + if tif.is_floating(): + sizes = ['byte', 'word', 'float', 'double', 'ddouble'] + elif tif.is_integral(): + sizes = ['byte', 'word', 'dword', 'qword', 'dqword', 'tword'] + else: + return 'field' + + size = tif.get_size() + + n = log2(size) # // 8); + # n = log2(size // 8); + try: + return sizes[n] + except IndexError: + return 'field' + def get_type_size(type): sid = idc.get_struc_id(type) if sid != idc.BADADDR: @@ -25,14 +54,14 @@ def get_type_size(type): except: return 0 -def get_type_tinfo(t): - type_tuple = idaapi.get_named_type(None, t, 1) - tif = idaapi.tinfo_t() - try: - tif.deserialize(None, type_tuple[1], type_tuple[2]) - return tif - except TypeError: - return None +def get_tinfo(name): + idati = idaapi.get_idati() + ti = idaapi.tinfo_t() + + for ordinal in range(1, idaapi.get_ordinal_qty(idati)+1): + if ti.get_numbered_type(idati, ordinal) and ti.dstr() == name: + return ti + return None def score_table(type, offset): alignment = offset % 8 @@ -85,7 +114,7 @@ def score_table(type, offset): if size == 1: score += 8 // size - tif = get_type_tinfo(type) + tif = get_tinfo(type) if tif is None: name = "__something_lame" else: @@ -235,7 +264,7 @@ def tinfo(self): return const.DUMMY_FUNC except idaapi.DecompilationFailure: pass - print("[ERROR] Failed to decompile function at 0x{0:08X}".format(self.address)) + print("[ERROR] Failed to decompile function at 0x{:08X}".format(self.address)) return const.DUMMY_FUNC def show_location(self): @@ -248,7 +277,7 @@ def __init__(self, address, offset): @property def tinfo(self): - print("[INFO] Ignoring import function at 0x{0:08X}".format(self.address)) + print("[INFO] Ignoring import function at 0x{:08X}".format(self.address)) tinfo = idaapi.tinfo_t() if idaapi.guess_tinfo(tinfo, self.address): return tinfo @@ -290,7 +319,7 @@ def __init__(self, offset, address, scanned_variable=None, origin=0): AbstractMember.__init__(self, offset + origin, scanned_variable, origin) self.address = address self.virtual_functions = [] - self.name = "__vftable" + ("_{0:X}".format(self.offset) if self.offset else "") + self.name = "__vftable" + ("_{:X}".format(self.offset) if self.offset else "") self.vtable_name, self.have_nice_name = parse_vtable_name(address) self.populate() @@ -310,7 +339,7 @@ def populate(self): break def create_tinfo(self): - # print "(Virtual table) at address: 0x{0:08X} name: {1}".format(self.address, self.name) + # print "(Virtual table) at address: 0x{:08X} name: {}".format(self.address, self.name) udt_data = idaapi.udt_type_data_t() for function in self.virtual_functions: udt_data.push_back(function.get_udt_member()) @@ -319,7 +348,7 @@ def create_tinfo(self): first_entry_idx = duplicates.pop(0) print("[Warning] Found duplicate virtual functions", udt_data[first_entry_idx].name) for num, dup in enumerate(duplicates): - udt_data[dup].name = "duplicate_{0}_{1}".format(first_entry_idx, num + 1) + udt_data[dup].name = "duplicate_{}_{}".format(first_entry_idx, num + 1) tinfo = idaapi.tinfo_t() tinfo.create_ptr(const.DUMMY_FUNC) udt_data[dup].type = tinfo @@ -371,23 +400,23 @@ def show_virtual_functions(self, temp_struct): def scan_virtual_function(self, index, temp_struct): if helper.is_imported_ea(self.virtual_functions[index].address): - print("[INFO] Ignoring import function at 0x{0:08X}".format(self.address)) + print("[INFO] Ignoring import function at 0x{:08X}".format(self.address)) return try: function = idaapi.decompile(self.virtual_functions[index].address) except idaapi.DecompilationFailure: - print("[ERROR] Failed to decompile function at 0x{0:08X}".format(self.address)) + print("[ERROR] Failed to decompile function at 0x{:08X}".format(self.address)) return if helper.FunctionTouchVisitor(function).process(): function = idaapi.decompile(self.virtual_functions[index].address) if function.arguments and function.arguments[0].is_arg_var and helper.is_legal_type(function.arguments[0].tif): from . import variable_scanner - print("[Info] Scanning virtual function at 0x{0:08X}".format(function.entry_ea)) + print("[Info] Scanning virtual function at 0x{:08X}".format(function.entry_ea)) obj = api.VariableObject(function.get_lvars()[0], 0) scanner = variable_scanner.NewDeepSearchVisitor(function, self.offset, obj, temp_struct) scanner.process() else: - print("[Warning] Bad type of first argument in virtual function at 0x{0:08X}".format(function.entry_ea)) + print("[Warning] Bad type of first argument in virtual function at 0x{:08X}".format(function.entry_ea)) def scan_virtual_functions(self, temp_struct): for idx in range(len(self.virtual_functions)): @@ -431,7 +460,7 @@ def check_address(address): functions_count = 0 while True: func_address = helper.get_ptr(address) - # print "[INFO] Address 0x{0:08X}".format(func_address) + # print "[INFO] Address 0x{:08X}".format(func_address) if helper.is_code_ea(func_address) or helper.is_imported_ea(func_address): functions_count += 1 address += const.EA_SIZE @@ -459,6 +488,10 @@ def type_name(self): def font(self): return QtGui.QFont("Consolas", 10, QtGui.QFont.Bold) + @property + def cmt(self): + return '' + @property def size(self): return const.EA_SIZE @@ -468,11 +501,13 @@ class Member(AbstractMember): def __init__(self, offset, tinfo, scanned_variable, origin=0): AbstractMember.__init__(self, offset + origin, scanned_variable, origin) self.tinfo = tinfo - self.name = "field_{0:X}".format(self.offset) + self.name = "{}_{:x}".format(get_operand_size_type(self.tinfo), self.offset) + self.cmt = '' def get_udt_member(self, array_size=0, offset=0): udt_member = idaapi.udt_member_t() - udt_member.name = "field_{0:X}".format(self.offset - offset) if self.name[:6] == "field_" else self.name + udt_member.name = "{}_{:x}".format(get_operand_size_type(self.tinfo), + self.offset - offset) if re.match(r'(byte|(d|q|t|dq|)word|float|(d|dd)ouble)_', self.name) else self.name udt_member.type = self.tinfo if array_size: tmp = idaapi.tinfo_t(self.tinfo) @@ -529,7 +564,7 @@ def __init__(self, *args): """ super(TemporaryStructureModel, self).__init__(*args) self.main_offset = 0 - self.headers = ["Offset", "Type", "Name"] + self.headers = ["Offset", "Type", "Name", "Comment"] self.items = [] self.collisions = [] @@ -546,7 +581,7 @@ def data(self, index, role): item = self.items[row] if role == QtCore.Qt.DisplayRole: if col == 0: - return "0x{0:08X}".format(item.offset) + return "0x{:04X} [{}]".format(item.offset, item.offset) elif col == 1: if item.is_array and item.size > 0: array_size = self.calculate_array_size(row) @@ -555,6 +590,8 @@ def data(self, index, role): return item.type_name elif col == 2: return item.name + elif col == 3: + return item.cmt elif role == QtCore.Qt.ToolTipRole: if col == 0: return self.items[row].offset @@ -563,6 +600,8 @@ def data(self, index, role): elif role == QtCore.Qt.EditRole: if col == 2: return self.items[row].name + if col == 3: + return self.items[row].cmt elif role == QtCore.Qt.FontRole: if col == 1: return item.font @@ -580,10 +619,16 @@ def data(self, index, role): def setData(self, index, value, role): row, col = index.row(), index.column() - if role == QtCore.Qt.EditRole and idaapi.is_ident(str(value)): - self.items[row].name = str(value) - self.dataChanged.emit(index, index) - return True + if col == 2: + if role == QtCore.Qt.EditRole and idaapi.is_ident(str(value)): + self.items[row].name = str(value) + self.dataChanged.emit(index, index) + return True + if col == 3: + if role == QtCore.Qt.EditRole: + self.items[row].cmt = str(value) + self.dataChanged.emit(index, index) + return True return False def headerData(self, section, orientation, role): @@ -591,7 +636,7 @@ def headerData(self, section, orientation, role): return self.headers[section] def flags(self, index): - if index.column() == 2: + if index.column() in (2, 3): return super(TemporaryStructureModel, self).flags(index) | QtWidgets.QAbstractItemView.DoubleClicked return super(TemporaryStructureModel, self).flags(index) @@ -653,8 +698,9 @@ def pack(self, start=0, stop=None): return else: ordinal = idaapi.idc_set_local_type(-1, cdecl, idaapi.PT_TYP) + # TODO: save comments if ordinal: - print("[Info] New type {0} was added to Local Types".format(structure_name)) + print("[Info] New type {} was added to Local Types".format(structure_name)) tid = idaapi.import_type(idaapi.cvar.idati, -1, structure_name) if tid: tinfo = idaapi.create_typedef(structure_name) @@ -664,7 +710,7 @@ def pack(self, start=0, stop=None): scanned_var.apply_type(ptr_tinfo) return tinfo else: - print("[ERROR] Structure {0} probably already exist".format(structure_name)) + print("[ERROR] Structure {} probably already exist".format(structure_name)) def have_member(self, member): if self.items: @@ -756,7 +802,7 @@ def get_recognized_shape(self, start=0, stop=-1): if is_found: result.append((ordinal, idaapi.tinfo_t(tinfo))) chooser = MyChoose( - [[str(x), "0x{0:08X}".format(y.get_size()), y.dstr()] for x, y in result], + [[str(x), "0x{:08X}".format(y.get_size()), y.dstr()] for x, y in result], "Select Structure", [["Ordinal", 5], ["Size", 10], ["Structure name", 50]] ) @@ -821,6 +867,7 @@ def unpack_substructure(self, indices): for udt_item in udt_data: member = Member(offset + udt_item.offset // 8, udt_item.type, None) member.name = udt_item.name + member.cmt = udt_item.cmt self.add_row(member) def load_struct(self): @@ -841,7 +888,7 @@ def load_struct(self): print(("Invalid Struct Name: %s" % name)) return - tif = get_type_tinfo(name) + tif = get_tinfo(name) sys.modules["__main__"].tif = tif nmembers = tif.get_udt_nmembers() for index in range(nmembers): @@ -851,8 +898,14 @@ def load_struct(self): sys.modules["__main__"].udt = u member = Member(u.offset // 8, u.type, None) member.name = u.name - self.add_row(member) + # member.cmt = u.cmt + # u.cmt doesn't work, so we will do something ugly + _typename = tif.get_type_name() + name_sid = idc.get_struc_id(_typename) + member.cmt = idc.get_member_cmt(name_sid, u.offset // 8, 0) or "imported from {}".format(name) + + self.add_row(member) def resolve_types(self): diff --git a/HexRaysPyTools/forms.py b/HexRaysPyTools/forms.py index a8ff5d0..05aa8e6 100644 --- a/HexRaysPyTools/forms.py +++ b/HexRaysPyTools/forms.py @@ -47,7 +47,7 @@ def init_ui(self): btn_unpack = QtWidgets.QPushButton("&Unpack") btn_remove = QtWidgets.QPushButton("&Remove") btn_resolve = QtWidgets.QPushButton("Resolve") - btn_load = QtWidgets.QPushButton("Load") + btn_load = QtWidgets.QPushButton("&Load") btn_clear = QtWidgets.QPushButton("Clear") # Clear button doesn't have shortcut because it can fuck up all work btn_recognize = QtWidgets.QPushButton("Recognize Shape") btn_recognize.setStyleSheet("QPushButton {width: 100px; height: 20px;}") @@ -60,6 +60,7 @@ def init_ui(self): btn_pack.setShortcut("p") btn_unpack.setShortcut("u") btn_remove.setShortcut("r") + btn_load.setShortcut("l") struct_view = QtWidgets.QTableView() struct_view.setModel(self.structure_model)