From 59fd1e55b19cc2e0ad76f3fe836beecca4bb0767 Mon Sep 17 00:00:00 2001 From: jonschz Date: Sat, 8 Jun 2024 22:11:32 +0200 Subject: [PATCH] Fix stack layout offset error --- .../lego_util/pdb_extraction.py | 8 +++- tools/isledecomp/isledecomp/cvdump/symbols.py | 44 ++++++++++++++----- 2 files changed, 41 insertions(+), 11 deletions(-) diff --git a/tools/ghidra_scripts/lego_util/pdb_extraction.py b/tools/ghidra_scripts/lego_util/pdb_extraction.py index 6e5dae12..aaecc32d 100644 --- a/tools/ghidra_scripts/lego_util/pdb_extraction.py +++ b/tools/ghidra_scripts/lego_util/pdb_extraction.py @@ -30,6 +30,7 @@ class CppRegisterSymbol(CppStackOrRegisterSymbol): @dataclass class FunctionSignature: + original_function_symbol: SymbolsEntry call_type: str arglist: list[str] return_type: str @@ -86,6 +87,10 @@ def get_func_signature(self, fn: SymbolsEntry) -> Optional[FunctionSignature]: assert arg_list_type["argcount"] == len(arg_list_pdb_types) stack_symbols: list[CppStackOrRegisterSymbol] = [] + + # for some unexplained reason, the reported stack is offset by 4 when this flag is set + stack_offset_delta = -4 if fn.frame_pointer_present else 0 + for symbol in fn.stack_symbols: if symbol.symbol_type == "S_REGISTER": stack_symbols.append( @@ -101,13 +106,14 @@ def get_func_signature(self, fn: SymbolsEntry) -> Optional[FunctionSignature]: CppStackSymbol( symbol.name, symbol.data_type, - stack_offset, + stack_offset + stack_offset_delta, ) ) call_type = self._call_type_map[function_type["call_type"]] return FunctionSignature( + original_function_symbol=fn, call_type=call_type, arglist=arg_list_pdb_types, return_type=function_type["return_type"], diff --git a/tools/isledecomp/isledecomp/cvdump/symbols.py b/tools/isledecomp/isledecomp/cvdump/symbols.py index 27935de5..bd9ca2f3 100644 --- a/tools/isledecomp/isledecomp/cvdump/symbols.py +++ b/tools/isledecomp/isledecomp/cvdump/symbols.py @@ -1,5 +1,7 @@ +from dataclasses import dataclass, field import logging import re +from re import Match from typing import NamedTuple, Optional @@ -15,15 +17,18 @@ class StackOrRegisterSymbol(NamedTuple): # S_GPROC32 = functions -class SymbolsEntry(NamedTuple): +@dataclass +class SymbolsEntry: + # pylint: disable=too-many-instance-attributes type: str section: int offset: int size: int func_type: str name: str - stack_symbols: list[StackOrRegisterSymbol] - addr: Optional[int] # absolute address, to be set later + stack_symbols: list[StackOrRegisterSymbol] = field(default_factory=list) + frame_pointer_present: bool = False + addr: Optional[int] = None # Absolute address. Will be set later, if at all class CvdumpSymbolsParser: @@ -55,6 +60,16 @@ class CvdumpSymbolsParser: `esi, Type: 0x1E14, this` """ + _debug_start_end_regex = re.compile( + r"^\s*Debug start: (?P\w+), Debug end: (?P\w+)$" + ) + + _parent_end_next_regex = re.compile( + r"\s*Parent: (?P\w+), End: (?P\w+), Next: (?P\w+)$" + ) + + _flags_frame_pointer_regex = re.compile(r"\s*Flags: Frame Ptr Present$") + _register_stack_symbols = ["S_BPREL32", "S_REGISTER"] # List the unhandled types so we can check exhaustiveness @@ -72,16 +87,27 @@ class CvdumpSymbolsParser: def __init__(self): self.symbols: list[SymbolsEntry] = [] - self.current_function = None + self.current_function: Optional[SymbolsEntry] = None def read_line(self, line: str): - if (match := self._symbol_line_generic_regex.match(line)) is None: + if (match := self._symbol_line_generic_regex.match(line)) is not None: + self._parse_generic_case(line, match) + elif (match := self._parent_end_next_regex.match(line)) is not None: + # We do not need this info at the moment, might be useful in the future + pass + elif (match := self._debug_start_end_regex.match(line)) is not None: + # We do not need this info at the moment, might be useful in the future + pass + elif (match := self._flags_frame_pointer_regex.match(line)) is not None: + assert self.current_function is not None + self.current_function.frame_pointer_present = True + else: # Most of these are either `** Module: [...]` or data we do not care about logger.debug("Unhandled line: %s", line[:-1]) - return - symbol_type: str = match.group("symbol_type") - second_part: Optional[str] = match.group("second_part") + def _parse_generic_case(self, line, line_match: Match[str]): + symbol_type: str = line_match.group("symbol_type") + second_part: Optional[str] = line_match.group("second_part") if symbol_type == "S_GPROC32": assert second_part is not None @@ -95,8 +121,6 @@ def read_line(self, line: str): size=int(match.group("size"), 16), func_type=match.group("func_type"), name=match.group("name"), - stack_symbols=[], - addr=None, # will be set later, if at all ) self.symbols.append(self.current_function)