diff --git a/src/thumbs_up/analyzer_utils.py b/src/thumbs_up/analyzer_utils.py index ca5c4b0..fce0032 100644 --- a/src/thumbs_up/analyzer_utils.py +++ b/src/thumbs_up/analyzer_utils.py @@ -47,10 +47,16 @@ def cleanStart(analyzer, scs, undef=False): if undef: analyzer.logger.info("Undefining code segment: 0x%x - 0x%x", sc.start_ea, sc.end_ea) sark.data.undefine(sc.start_ea, sc.end_ea) - analyzer.logger.info("Marking all known switch tables in the segment") - analyzer.switch_identifier.markSwitchTables(sc) - analyzer.logger.info("Marking all known fptr functions") - analyzer.fptr_identifier.makePointedFunctions() + if analyzer.switch_identifier.hasSwithTables(sc): + analyzer.logger.info("Marking all known switch tables in the segment") + analyzer.switch_identifier.markSwitchTables(sc) + else: + analyzer.logger.debug("No known switch tables in the segment") + if analyzer.fptr_identifier.hasKnownFptrs(): + analyzer.logger.info("Marking all known fptr functions") + analyzer.fptr_identifier.makePointedFunctions() + else: + analyzer.logger.debug("No known fptr functions") for sc in scs: analyzer.logger.info("Re-Analyzing code segment: 0x%x - 0x%x", sc.start_ea, sc.end_ea) idc.plan_and_wait(sc.start_ea, sc.end_ea) @@ -65,7 +71,7 @@ def convertRegion(analyzer, start_ea, end_ea): end_ea (int): effective end address of the region """ wanted_code_type = analyzer.codeType(end_ea) - analyzer.logger.info("Converting code region of type %d to %d: 0x%x - 0x%x (%d)", analyzer.codeType(start_ea), wanted_code_type, start_ea, end_ea, end_ea - start_ea) + analyzer.logger.info("Converting code region of type %d to %d: 0x%x - 0x%x (%d bytes)", analyzer.codeType(start_ea), wanted_code_type, start_ea, end_ea, end_ea - start_ea) # Make sure it will be treated as code ida_bytes.del_items(start_ea, 0, end_ea - start_ea) # manually set the wanted value over the entire region @@ -127,7 +133,7 @@ def functionScan(analyzer, scs): 2. Unknown after a previous function - and it looks like the beginning of a function of the estimated code type """ for sc in scs: - analyzer.logger.debug("Function scanning code segment: 0x%x - 0x%x", sc.start_ea, sc.end_ea) + analyzer.logger.info("Function scanning code segment: 0x%x - 0x%x", sc.start_ea, sc.end_ea) search_func = False just_started = True line = sark.Line(sc.start_ea) @@ -192,7 +198,7 @@ def aggressiveFunctionScan(analyzer, scs): scs (list): list of (sark) code segments """ for sc in scs: - analyzer.logger.debug("Aggressively scanning code segment: 0x%x - 0x%x", sc.start_ea, sc.end_ea) + analyzer.logger.info("Aggressively scanning code segment: 0x%x - 0x%x", sc.start_ea, sc.end_ea) search_func = False just_started = True line = sark.Line(sc.start_ea) diff --git a/src/thumbs_up/analyzers/analyzer.py b/src/thumbs_up/analyzers/analyzer.py index cd48994..dd42322 100644 --- a/src/thumbs_up/analyzers/analyzer.py +++ b/src/thumbs_up/analyzers/analyzer.py @@ -78,6 +78,10 @@ def linkSwitchIdentifier(self): """Link a switch tables identifier to our analyzer.""" raise NotImplementedError("Subclasses should implement this!") + def presentCodeTypes(self): + """Print a list of the supported code types.""" + raise NotImplementedError("Subclasses should implement this!") + def addressSize(self): """Address size in bytes, according to the CPU's bitness. diff --git a/src/thumbs_up/analyzers/arm.py b/src/thumbs_up/analyzers/arm.py index 1640fc2..cdde547 100644 --- a/src/thumbs_up/analyzers/arm.py +++ b/src/thumbs_up/analyzers/arm.py @@ -67,6 +67,13 @@ def linkSwitchIdentifier(self): """Link a switch tables identifier to our analyzer.""" self.switch_identifier = SwitchIdentifier(self) + # Overridden base function + def presentCodeTypes(self): + """Print a list of the supported code types.""" + self.logger.info("Supported ARM code types:") + self.logger.info("0 - ARM") + self.logger.info("1 - THUMB") + # Overridden base function def isCodeContainsData(self): """Check if the code might contain data constants. diff --git a/src/thumbs_up/analyzers/intel.py b/src/thumbs_up/analyzers/intel.py index 7b0cdda..de2ec5e 100644 --- a/src/thumbs_up/analyzers/intel.py +++ b/src/thumbs_up/analyzers/intel.py @@ -62,6 +62,12 @@ def linkSwitchIdentifier(self): """Link a switch tables identifier to our analyzer.""" self.switch_identifier = SwitchIdentifier(self) + # Overridden base function + def presentCodeTypes(self): + """Print a list of the supported code types.""" + self.logger.info("Supported Intel code types:") + self.logger.info("0 - Intel (Only a single code type is supported)") + # Overridden base function def isCodeAligned(self, ea, code_type=None): """Check if the code is aligned according to the given code type. diff --git a/src/thumbs_up/analyzers/mips.py b/src/thumbs_up/analyzers/mips.py index f064c6a..b19572f 100644 --- a/src/thumbs_up/analyzers/mips.py +++ b/src/thumbs_up/analyzers/mips.py @@ -67,6 +67,13 @@ def linkSwitchIdentifier(self): """Link a switch tables identifier to our analyzer.""" self.switch_identifier = SwitchIdentifier(self) + # Overridden base function + def presentCodeTypes(self): + """Print a list of the supported code types.""" + self.logger.info("Supported MIPS code types:") + self.logger.info("0 - MIPS") + self.logger.info("1 - Mips16") + # Overridden base function def isCodeContainsData(self): """Check if the code might contain data constants. diff --git a/src/thumbs_up/thumbs_up_ELF.py b/src/thumbs_up/thumbs_up_ELF.py index 6fe6e83..ac1c179 100644 --- a/src/thumbs_up/thumbs_up_ELF.py +++ b/src/thumbs_up/thumbs_up_ELF.py @@ -155,6 +155,7 @@ def main(): logger.error("Failed to find any code segment, can't continue...") return # Notify the user about our segment decisions + logger.info("Segments, as marked in the ELF:") for sc in code_segments: logger.info("Code Segment: 0x%x - 0x%x", sc.start_ea, sc.end_ea) for sd in data_segments: @@ -172,6 +173,9 @@ def main(): analyzer.linkLocalsIdentifier() analyzer.linkSwitchIdentifier() + # Notify the user about the code types + analyzer.presentCodeTypes() + # Start the analysis logger.info("Starting the analysis") result = analysisStart(analyzer, code_segments, data_segments) diff --git a/src/thumbs_up/thumbs_up_firmware.py b/src/thumbs_up/thumbs_up_firmware.py index 360fd17..18ce453 100644 --- a/src/thumbs_up/thumbs_up_firmware.py +++ b/src/thumbs_up/thumbs_up_firmware.py @@ -102,11 +102,12 @@ def analysisStart(analyzer, scs, sds): analyzer.logger.info("Phase #%d", phase_counter) phase_counter += 1 analyzer.logger.info("Tell IDA to re-analyze all of the code segments, using the added features") - finished = False - while not finished: + num_false_fptrs = -1 + while num_false_fptrs != 0: cleanStart(analyzer, scs, undef=True) # Remove false fptrs - finished = analyzer.fptr_identifier.checkPointedFunctions() == 0 + num_false_fptrs = analyzer.fptr_identifier.checkPointedFunctions() + analyzer.logger.info("Removed %d possibly wrong fptrs", num_false_fptrs) ########################### # 6. Define the functions # @@ -195,6 +196,7 @@ def main(): logger.error("Failed to find any data segment, can't continue...") return # Notify the user about our segment decisions + logger.info("Segments, as marked by the disassembler:") for sc in code_segments: logger.info("Code Segment: 0x%x - 0x%x", sc.start_ea, sc.end_ea) for sd in data_segments: @@ -213,6 +215,9 @@ def main(): analyzer.linkLocalsIdentifier() analyzer.linkSwitchIdentifier() + # Notify the user about the code types + analyzer.presentCodeTypes() + # Start the analysis logger.info("Starting the analysis") result = analysisStart(analyzer, code_segments, data_segments) diff --git a/src/thumbs_up/utils/fptr.py b/src/thumbs_up/utils/fptr.py index bfbbd7f..cec9c3d 100644 --- a/src/thumbs_up/utils/fptr.py +++ b/src/thumbs_up/utils/fptr.py @@ -94,6 +94,16 @@ def isPrintableAddress(self, ea): """ return len(list(filter(lambda x: chr(x) in string.printable, struct.pack("!%s" % (self._analyzer.address_pack_format), ea)))) == self._analyzer.addressSize() + def hasKnownFptrs(self): + """Check if there are known fptrs. + + Return Value: + True iff there are known fptrs we've seen before + """ + for func_ea, code_type in self._ref_ptrs.items(): + return True + return False + def makePointedFunctions(self): """Modify the code and tell IDA that our code fptrs should point to the beginning of functions.""" # We want the list in descending function order diff --git a/src/thumbs_up/utils/function.py b/src/thumbs_up/utils/function.py index 40523fb..885cfa6 100644 --- a/src/thumbs_up/utils/function.py +++ b/src/thumbs_up/utils/function.py @@ -153,7 +153,6 @@ def trainFunctionClassifier(self, scs): Training must happen *after* the calibration phase """ functions = [] - # TODO: check if the loss of samples is worth the risk of training on questionable fptr data for sc in scs: functions += list(filter(lambda func: not self._analyzer.fptr_identifier.isPointedFunction(func.start_ea), sc.functions)) # Each code type is trained on it's own @@ -203,7 +202,6 @@ def calibrateFunctionClassifier(self, scs): True iff the calibration passed and the accuracy is above the minimal threshold """ functions = [] - # TODO: check if the loss of samples is worth the risk of training on questionable fptr data for sc in scs: functions += list(filter(lambda func: not self._analyzer.fptr_identifier.isPointedFunction(func.start_ea), sc.functions)) for code_type in self._analyzer.activeCodeTypes(): @@ -280,6 +278,7 @@ def calibrateFunctionClassifier(self, scs): # ValueError when we only have a single sample and we call fit() except ValueError: self._analyzer.logger.warning("Not enough functions to calibrate the classifier for code type %d", code_type) + self._analyzer.logger.warning("Disabling heuristics for code type %d", code_type) self._analyzer.disableCodeType(code_type) # If reached this point it means that all was OK, if we have some code types left diff --git a/src/thumbs_up/utils/switch_table.py b/src/thumbs_up/utils/switch_table.py index 59ef95f..84a53cf 100644 --- a/src/thumbs_up/utils/switch_table.py +++ b/src/thumbs_up/utils/switch_table.py @@ -67,6 +67,19 @@ def load(self): self._switch_case_cases = [] return False + def hasSwithTables(self, sc): + """Check if there are known switch tables in the code segment. + + Args: + sc (segment): (sark) code segment in which we are interested right now + + Return Value: + True iff the code segment contains a known switch table + """ + for switch_instr, table_start, table_end in filter(lambda x: sc.start_ea <= x[0] and x[1] < sc.end_ea, self._switch_case_entries): + return True + return False + def markSwitchTables(self, sc, aggressive=True): """Help IDA by marking all of the needed information from the observed switch tables. @@ -148,7 +161,7 @@ def observeSwitchTableFeatures(self, scs): continue except Exception: continue - # IDA recognized the switch table exactly at the last code instruction before it + # IDA recognized the switch table exactly at the last code instruction before it begins observer.add(line) # Going to use the easy case