Change thunk match strategy

This commit is contained in:
disinvite 2024-04-18 17:30:16 -04:00
parent 9c6120fc37
commit b2a0638f93
2 changed files with 47 additions and 21 deletions

View File

@ -79,8 +79,8 @@ def __init__(
self._load_markers()
self._find_original_strings()
self._match_imports()
self._match_thunks()
self._match_exports()
self._match_thunks()
self._find_vtordisp()
def _load_cvdump(self):
@ -307,9 +307,18 @@ def _match_thunks(self):
func_addr: thunk_addr for (thunk_addr, func_addr) in self.recomp_bin.thunks
}
# Mark all recomp thunks first. This allows us to use their name
# when we sanitize the asm.
for recomp_thunk, recomp_addr in self.recomp_bin.thunks:
recomp_func = self._db.get_by_recomp(recomp_addr)
if recomp_func is None:
continue
self._db.create_recomp_thunk(recomp_thunk, recomp_func.name)
for orig_thunk, orig_addr in self.orig_bin.thunks:
orig_func = self._db.get_by_orig(orig_addr)
if orig_func is None or orig_func.recomp_addr is None:
if orig_func is None:
continue
# Check whether the thunk destination is a matched symbol
@ -317,10 +326,7 @@ def _match_thunks(self):
if recomp_thunk is None:
continue
# The thunk symbol should already exist if it is the thunk of an
# imported function. Incremental build thunks have no symbol,
# so we need to give it a name for the asm diff output.
self._db.register_thunk(orig_thunk, recomp_thunk, orig_func.name)
self._db.set_function_pair(orig_thunk, recomp_thunk)
# Don't compare thunk functions for now. The comparison isn't
# "useful" in the usual sense. We are only looking at the
@ -336,9 +342,31 @@ def _match_exports(self):
for recomp_addr, export_name in self.recomp_bin.exports:
orig_addr = orig_exports.get(export_name)
if orig_addr is not None and self._db.set_pair_tentative(
orig_addr, recomp_addr
):
if orig_addr is None:
continue
try:
# Check whether either of the addresses is actually a thunk.
# This is a quirk of the debug builds. Technically the export
# *is* the thunk, but it's more helpful to mark the actual function.
# It could be the case that only one side is a thunk, but we can
# deal with that.
(opcode, rel_addr) = struct.unpack(
"<Bl", self.recomp_bin.read(recomp_addr, 5)
)
if opcode == 0xE9:
recomp_addr += 5 + rel_addr
(opcode, rel_addr) = struct.unpack(
"<Bl", self.orig_bin.read(orig_addr, 5)
)
if opcode == 0xE9:
orig_addr += 5 + rel_addr
except ValueError:
# Bail out if there's a problem with struct.unpack
continue
if self._db.set_pair_tentative(orig_addr, recomp_addr):
logger.debug("Matched export %s", repr(export_name))
def _find_vtordisp(self):

View File

@ -221,25 +221,23 @@ def set_function_pair(self, orig: int, recomp: int) -> bool:
"""For lineref match or _entry"""
return self.set_pair(orig, recomp, SymbolType.FUNCTION)
def register_thunk(self, orig: int, recomp: int, name: str) -> bool:
"""orig/recomp are an address pair of a thunk to some other function.
We may or may not already have this function tracked in the db.
If not, we need to create it, and we will use the name
(of the function being thunked, presumably) to mock up a name for
this symbol."""
def create_recomp_thunk(self, addr: int, name: str) -> bool:
"""Create a thunk function reference using the recomp address.
We start from the recomp side for this because we are guaranteed
to have full information from the PDB. We can use a regular function
match later to pull in the orig address."""
# Start by assuming the row exists
if self.set_function_pair(orig, recomp):
return True
if self._recomp_used(addr):
return False
thunk_name = f"Thunk of '{name}'"
# Assuming relative jump instruction for thunks (5 bytes)
cur = self._db.execute(
"""INSERT INTO `symbols`
(orig_addr, recomp_addr, compare_type, name, size)
VALUES (?,?,?,?,?)""",
(orig, recomp, SymbolType.FUNCTION.value, thunk_name, 5),
(recomp_addr, compare_type, name, size)
VALUES (?,?,?,?)""",
(addr, SymbolType.FUNCTION.value, thunk_name, 5),
)
return cur.rowcount > 0