From bc0df62e20f77f411bc971c2c7b2e0994a271b06 Mon Sep 17 00:00:00 2001 From: jonschz Date: Thu, 11 Jul 2024 19:34:31 +0200 Subject: [PATCH] Spike to fix array comparisons (needs refactor) --- tools/isledecomp/isledecomp/compare/core.py | 70 ++++++++++++++++++- .../isledecomp/isledecomp/cvdump/analysis.py | 5 +- 2 files changed, 73 insertions(+), 2 deletions(-) diff --git a/tools/isledecomp/isledecomp/compare/core.py b/tools/isledecomp/isledecomp/compare/core.py index 1587ef81..d7f99d58 100644 --- a/tools/isledecomp/isledecomp/compare/core.py +++ b/tools/isledecomp/isledecomp/compare/core.py @@ -8,6 +8,7 @@ from isledecomp.bin import Bin as IsleBin, InvalidVirtualAddressError from isledecomp.cvdump.demangler import demangle_string_const from isledecomp.cvdump import Cvdump, CvdumpAnalysis +from isledecomp.cvdump.types import scalar_type_pointer from isledecomp.parser import DecompCodebase from isledecomp.dir import walk_source_dir from isledecomp.types import SymbolType @@ -220,7 +221,62 @@ def orig_bin_checker(addr: int) -> bool: var.offset, var.name, var.parent_function ) else: - self._db.match_variable(var.offset, var.name) + if not self._db.match_variable(var.offset, var.name): + continue + # retrieve compare address + matchinfo = self._db.get_by_orig(var.offset) + if matchinfo is None or matchinfo.recomp_addr is None: + continue + + var_recomp_addr = matchinfo.recomp_addr + + node = next( + ( + x + for x in self.cvdump_analysis.nodes + if x.addr == var_recomp_addr + ), + None, + ) + if node is None or node.data_type is None: + continue + + if not node.data_type.key.startswith("0x"): + # scalar type, no further processing needed + continue + + data_type = self.cv.types.keys[node.data_type.key.lower()] + + if data_type["type"] == "LF_ARRAY": + array_size_bytes = data_type["size"] + + array_type = self.cv.types.get(data_type["array_type"]) + + assert array_type.size is not None + array_type_size = array_type.size + + array_length, modulus = divmod(array_size_bytes, array_type_size) + assert modulus == 0 + + # 0 is already matched when we get here + for i in range(1, array_length): + orig_element_base_addr = var.offset + i * array_type_size + recomp_element_base_addr = var_recomp_addr + i * array_type_size + if array_type.members is None: + self._add_match_in_array( + f"{var.name}[{i}]", + array_type.key, + orig_element_base_addr, + recomp_element_base_addr, + ) + else: + for member in array_type.members: + self._add_match_in_array( + f"{var.name}[{i}].{member.name}", + array_type.key, + orig_element_base_addr + member.offset, + recomp_element_base_addr + member.offset, + ) for tbl in codebase.iter_vtables(): self._db.match_vtable(tbl.offset, tbl.name, tbl.base_class) @@ -245,6 +301,18 @@ def orig_bin_checker(addr: int) -> bool: self._db.match_string(string.offset, string.name) + def _add_match_in_array( + self, name: str, type_id: str, orig_addr: int, recomp_addr: int + ): + self._db.set_recomp_symbol( + recomp_addr, + SymbolType.POINTER if scalar_type_pointer(type_id) else SymbolType.DATA, + name, + name, + 4, + ) + self._db.set_pair(orig_addr, recomp_addr) + def _find_original_strings(self): """Go to the original binary and look for the specified string constants to find a match. This is a (relatively) expensive operation so we only diff --git a/tools/isledecomp/isledecomp/cvdump/analysis.py b/tools/isledecomp/isledecomp/cvdump/analysis.py index 40ef292e..e030035e 100644 --- a/tools/isledecomp/isledecomp/cvdump/analysis.py +++ b/tools/isledecomp/isledecomp/cvdump/analysis.py @@ -5,7 +5,7 @@ from isledecomp.types import SymbolType from .parser import CvdumpParser from .demangler import demangle_string_const, demangle_vtable -from .types import CvdumpKeyError, CvdumpIntegrityError +from .types import CvdumpKeyError, CvdumpIntegrityError, TypeInfo class CvdumpNode: @@ -35,6 +35,8 @@ class CvdumpNode: section_contribution: Optional[int] = None addr: Optional[int] = None symbol_entry: Optional[SymbolsEntry] = None + # Preliminary - only used for non-static variables at the moment + data_type: Optional[TypeInfo] = None def __init__(self, section: int, offset: int) -> None: self.section = section @@ -127,6 +129,7 @@ def __init__(self, parser: CvdumpParser): # get information for built-in "T_" types. g_info = parser.types.get(glo.type) node_dict[key].confirmed_size = g_info.size + node_dict[key].data_type = g_info # Previously we set the symbol type to POINTER here if # the variable was known to be a pointer. We can derive this # information later when it's time to compare the variable,