mirror of
https://github.com/isledecomp/isle.git
synced 2026-01-23 16:21:15 +00:00
Brute force string search for BETA10
This commit is contained in:
parent
f242130382
commit
9545493bc9
@ -465,6 +465,22 @@ def _populate_exports(self, export_rva: int, _: int):
|
||||
for (func_addr, name_addr) in combined
|
||||
]
|
||||
|
||||
def iter_string(self, encoding: str = "ascii") -> Iterator[Tuple[int, str]]:
|
||||
"""Search for possible strings at each verified address in .data."""
|
||||
section = self.get_section_by_name(".data")
|
||||
for addr in self._relocated_addrs:
|
||||
if section.contains_vaddr(addr):
|
||||
raw = self.read_string(addr)
|
||||
if raw is None:
|
||||
continue
|
||||
|
||||
try:
|
||||
string = raw.decode(encoding)
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
|
||||
yield (addr, string)
|
||||
|
||||
def get_section_by_name(self, name: str) -> Section:
|
||||
section = next(
|
||||
filter(lambda section: section.match_name(name), self.sections),
|
||||
|
||||
@ -82,8 +82,9 @@ def __init__(
|
||||
|
||||
self._load_cvdump()
|
||||
self._load_markers()
|
||||
self._find_original_strings()
|
||||
# Detect floats first to eliminate potential overlap with string data
|
||||
self._find_float_const()
|
||||
self._find_original_strings()
|
||||
self._match_imports()
|
||||
self._match_exports()
|
||||
self._match_thunks()
|
||||
@ -314,7 +315,7 @@ def _find_original_strings(self):
|
||||
"""Go to the original binary and look for the specified string constants
|
||||
to find a match. This is a (relatively) expensive operation so we only
|
||||
look at strings that we have not already matched via a STRING annotation."""
|
||||
|
||||
# Release builds give each de-duped string a symbol so they are easy to find and match.
|
||||
for string in self._db.get_unmatched_strings():
|
||||
addr = self.orig_bin.find_string(string.encode("latin1"))
|
||||
if addr is None:
|
||||
@ -324,6 +325,24 @@ def _find_original_strings(self):
|
||||
|
||||
self._db.match_string(addr, string)
|
||||
|
||||
# Debug builds do not de-dupe the strings, so we need to find them via brute force scan.
|
||||
# We could try to match the string addrs if there is only one in orig and recomp.
|
||||
# When we sanitize the asm, the result is the same regardless.
|
||||
if self.orig_bin.is_debug:
|
||||
for addr, string in self.orig_bin.iter_string("latin1"):
|
||||
# Arbitrary threshold of 4, but I think this is what Ghidra does too
|
||||
if len(string) > 4 and string[0].isalnum():
|
||||
self._db.set_orig_symbol(
|
||||
addr, SymbolType.STRING, string, len(string)
|
||||
)
|
||||
|
||||
if self.recomp_bin.is_debug:
|
||||
for addr, string in self.recomp_bin.iter_string("latin1"):
|
||||
if len(string) > 4 and string[0].isalnum():
|
||||
self._db.set_recomp_symbol(
|
||||
addr, SymbolType.STRING, string, None, len(string)
|
||||
)
|
||||
|
||||
def _find_float_const(self):
|
||||
"""Add floating point constants in each binary to the database.
|
||||
We are not matching anything right now because these values are not
|
||||
|
||||
Loading…
Reference in New Issue
Block a user