mirror of
https://github.com/isledecomp/isle.git
synced 2026-01-23 16:21:15 +00:00
improved string check
This commit is contained in:
parent
9545493bc9
commit
e8944116e7
@ -325,20 +325,38 @@ def _find_original_strings(self):
|
|||||||
|
|
||||||
self._db.match_string(addr, string)
|
self._db.match_string(addr, string)
|
||||||
|
|
||||||
|
def is_real_string(s: str) -> bool:
|
||||||
|
"""Heuristic to ignore values that only look like strings.
|
||||||
|
This is mostly about short strings (len <= 4) that could be byte or word values.
|
||||||
|
"""
|
||||||
|
# 0x10 is the MSB of the address space for DLLs (LEGO1), so this is a pointer
|
||||||
|
if len(s) == 0 or "\x10" in s:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# assert(0) is common
|
||||||
|
if len(s) == 1 and s[0] != "0":
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Hack because str.isprintable() will fail on strings with newlines or tabs
|
||||||
|
if len(s) <= 4 and "\\x" in repr(s):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
# Debug builds do not de-dupe the strings, so we need to find them via brute force scan.
|
# Debug builds do not de-dupe the strings, so we need to find them via brute force scan.
|
||||||
# We could try to match the string addrs if there is only one in orig and recomp.
|
# We could try to match the string addrs if there is only one in orig and recomp.
|
||||||
# When we sanitize the asm, the result is the same regardless.
|
# When we sanitize the asm, the result is the same regardless.
|
||||||
if self.orig_bin.is_debug:
|
if self.orig_bin.is_debug:
|
||||||
for addr, string in self.orig_bin.iter_string("latin1"):
|
for addr, string in self.orig_bin.iter_string("latin1"):
|
||||||
# Arbitrary threshold of 4, but I think this is what Ghidra does too
|
# Arbitrary threshold of 4, but I think this is what Ghidra does too
|
||||||
if len(string) > 4 and string[0].isalnum():
|
if is_real_string(string):
|
||||||
self._db.set_orig_symbol(
|
self._db.set_orig_symbol(
|
||||||
addr, SymbolType.STRING, string, len(string)
|
addr, SymbolType.STRING, string, len(string)
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.recomp_bin.is_debug:
|
if self.recomp_bin.is_debug:
|
||||||
for addr, string in self.recomp_bin.iter_string("latin1"):
|
for addr, string in self.recomp_bin.iter_string("latin1"):
|
||||||
if len(string) > 4 and string[0].isalnum():
|
if is_real_string(string):
|
||||||
self._db.set_recomp_symbol(
|
self._db.set_recomp_symbol(
|
||||||
addr, SymbolType.STRING, string, None, len(string)
|
addr, SymbolType.STRING, string, None, len(string)
|
||||||
)
|
)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user