Change thunk match strategy

This commit is contained in:
disinvite 2024-04-18 17:30:16 -04:00
parent 9c6120fc37
commit b2a0638f93
2 changed files with 47 additions and 21 deletions

View File

@ -79,8 +79,8 @@ def __init__(
self._load_markers() self._load_markers()
self._find_original_strings() self._find_original_strings()
self._match_imports() self._match_imports()
self._match_thunks()
self._match_exports() self._match_exports()
self._match_thunks()
self._find_vtordisp() self._find_vtordisp()
def _load_cvdump(self): def _load_cvdump(self):
@ -307,9 +307,18 @@ def _match_thunks(self):
func_addr: thunk_addr for (thunk_addr, func_addr) in self.recomp_bin.thunks func_addr: thunk_addr for (thunk_addr, func_addr) in self.recomp_bin.thunks
} }
# Mark all recomp thunks first. This allows us to use their name
# when we sanitize the asm.
for recomp_thunk, recomp_addr in self.recomp_bin.thunks:
recomp_func = self._db.get_by_recomp(recomp_addr)
if recomp_func is None:
continue
self._db.create_recomp_thunk(recomp_thunk, recomp_func.name)
for orig_thunk, orig_addr in self.orig_bin.thunks: for orig_thunk, orig_addr in self.orig_bin.thunks:
orig_func = self._db.get_by_orig(orig_addr) orig_func = self._db.get_by_orig(orig_addr)
if orig_func is None or orig_func.recomp_addr is None: if orig_func is None:
continue continue
# Check whether the thunk destination is a matched symbol # Check whether the thunk destination is a matched symbol
@ -317,10 +326,7 @@ def _match_thunks(self):
if recomp_thunk is None: if recomp_thunk is None:
continue continue
# The thunk symbol should already exist if it is the thunk of an self._db.set_function_pair(orig_thunk, recomp_thunk)
# imported function. Incremental build thunks have no symbol,
# so we need to give it a name for the asm diff output.
self._db.register_thunk(orig_thunk, recomp_thunk, orig_func.name)
# Don't compare thunk functions for now. The comparison isn't # Don't compare thunk functions for now. The comparison isn't
# "useful" in the usual sense. We are only looking at the # "useful" in the usual sense. We are only looking at the
@ -336,9 +342,31 @@ def _match_exports(self):
for recomp_addr, export_name in self.recomp_bin.exports: for recomp_addr, export_name in self.recomp_bin.exports:
orig_addr = orig_exports.get(export_name) orig_addr = orig_exports.get(export_name)
if orig_addr is not None and self._db.set_pair_tentative( if orig_addr is None:
orig_addr, recomp_addr continue
):
try:
# Check whether either of the addresses is actually a thunk.
# This is a quirk of the debug builds. Technically the export
# *is* the thunk, but it's more helpful to mark the actual function.
# It could be the case that only one side is a thunk, but we can
# deal with that.
(opcode, rel_addr) = struct.unpack(
"<Bl", self.recomp_bin.read(recomp_addr, 5)
)
if opcode == 0xE9:
recomp_addr += 5 + rel_addr
(opcode, rel_addr) = struct.unpack(
"<Bl", self.orig_bin.read(orig_addr, 5)
)
if opcode == 0xE9:
orig_addr += 5 + rel_addr
except ValueError:
# Bail out if there's a problem with struct.unpack
continue
if self._db.set_pair_tentative(orig_addr, recomp_addr):
logger.debug("Matched export %s", repr(export_name)) logger.debug("Matched export %s", repr(export_name))
def _find_vtordisp(self): def _find_vtordisp(self):

View File

@ -221,25 +221,23 @@ def set_function_pair(self, orig: int, recomp: int) -> bool:
"""For lineref match or _entry""" """For lineref match or _entry"""
return self.set_pair(orig, recomp, SymbolType.FUNCTION) return self.set_pair(orig, recomp, SymbolType.FUNCTION)
def register_thunk(self, orig: int, recomp: int, name: str) -> bool: def create_recomp_thunk(self, addr: int, name: str) -> bool:
"""orig/recomp are an address pair of a thunk to some other function. """Create a thunk function reference using the recomp address.
We may or may not already have this function tracked in the db. We start from the recomp side for this because we are guaranteed
If not, we need to create it, and we will use the name to have full information from the PDB. We can use a regular function
(of the function being thunked, presumably) to mock up a name for match later to pull in the orig address."""
this symbol."""
# Start by assuming the row exists if self._recomp_used(addr):
if self.set_function_pair(orig, recomp): return False
return True
thunk_name = f"Thunk of '{name}'" thunk_name = f"Thunk of '{name}'"
# Assuming relative jump instruction for thunks (5 bytes) # Assuming relative jump instruction for thunks (5 bytes)
cur = self._db.execute( cur = self._db.execute(
"""INSERT INTO `symbols` """INSERT INTO `symbols`
(orig_addr, recomp_addr, compare_type, name, size) (recomp_addr, compare_type, name, size)
VALUES (?,?,?,?,?)""", VALUES (?,?,?,?)""",
(orig, recomp, SymbolType.FUNCTION.value, thunk_name, 5), (addr, SymbolType.FUNCTION.value, thunk_name, 5),
) )
return cur.rowcount > 0 return cur.rowcount > 0