diff --git a/tools/ghidra_scripts/lego_util/function_importer.py b/tools/ghidra_scripts/lego_util/function_importer.py index a09ad190..7e2cd12c 100644 --- a/tools/ghidra_scripts/lego_util/function_importer.py +++ b/tools/ghidra_scripts/lego_util/function_importer.py @@ -10,6 +10,12 @@ from ghidra.program.flatapi import FlatProgramAPI from ghidra.program.model.listing import ParameterImpl from ghidra.program.model.symbol import SourceType +from ghidra.program.model.data import ( + TypeDef, + TypedefDataType, + Pointer, + ComponentOffsetSettingsDefinition, +) from lego_util.pdb_extraction import ( PdbFunction, @@ -17,12 +23,13 @@ CppStackSymbol, ) from lego_util.ghidra_helper import ( + add_data_type_or_reuse_existing, get_or_add_pointer_type, get_ghidra_namespace, sanitize_name, ) -from lego_util.exceptions import StackOffsetMismatchError +from lego_util.exceptions import StackOffsetMismatchError, Lego1Exception from lego_util.type_importer import PdbTypeImporter @@ -106,19 +113,22 @@ def matches_ghidra_function(self, ghidra_function: Function) -> bool: ) return_type_match = True - # match arguments: decide if thiscall or not + # match arguments: decide if thiscall or not, and whether the `this` type matches thiscall_matches = ( self.signature.call_type == ghidra_function.getCallingConventionName() ) + ghidra_params_without_this = list(ghidra_function.getParameters()) + + if thiscall_matches and self.signature.call_type == "__thiscall": + this_argument = ghidra_params_without_this.pop(0) + thiscall_matches = self._this_type_match(this_argument) + if self.is_stub: # We do not import the argument list for stubs, so it should be excluded in matches args_match = True elif thiscall_matches: - if self.signature.call_type == "__thiscall": - args_match = self._matches_thiscall_parameters(ghidra_function) - else: - args_match = self._matches_non_thiscall_parameters(ghidra_function) + args_match = self._parameter_lists_match(ghidra_params_without_this) else: args_match = False @@ -139,16 +149,22 @@ def matches_ghidra_function(self, ghidra_function: Function) -> bool: and args_match ) - def _matches_non_thiscall_parameters(self, ghidra_function: Function) -> bool: - return self._parameter_lists_match(ghidra_function.getParameters()) + def _this_type_match(self, this_parameter: Parameter) -> bool: + if this_parameter.getName() != "this": + logger.info("Expected first argument to be `this` in __thiscall") + return False - def _matches_thiscall_parameters(self, ghidra_function: Function) -> bool: - ghidra_params = list(ghidra_function.getParameters()) + if self.signature.this_adjust != 0: + # In this case, the `this` argument should be custom defined + if not isinstance(this_parameter.getDataType(), TypeDef): + logger.info( + "`this` argument is not a typedef while `this adjust` = %d", + self.signature.this_adjust, + ) + return False + # We are not checking for the _correct_ `this` type here, which we could do in the future - # remove the `this` argument which we don't generate ourselves - ghidra_params.pop(0) - - return self._parameter_lists_match(ghidra_params) + return True def _parameter_lists_match(self, ghidra_params: "list[Parameter]") -> bool: # Remove return storage pointer from comparison if present. @@ -197,6 +213,25 @@ def _parameter_lists_match(self, ghidra_params: "list[Parameter]") -> bool: def overwrite_ghidra_function(self, ghidra_function: Function): """Replace the function declaration in Ghidra by the one derived from C++.""" + + if ghidra_function.hasCustomVariableStorage(): + # Unfortunately, Ghidra has a bug where setting custom variable storage back to `False` + # leads to two `this` parameters. Therefore, we first need to remove all `this` parameters + # and then re-generate a new one + ghidra_function.replaceParameters( + Function.FunctionUpdateType.DYNAMIC_STORAGE_ALL_PARAMS, # this implicitly sets custom variable storage to False + True, + SourceType.USER_DEFINED, + [ + param + for param in ghidra_function.getParameters() + if param.getName() != "this" + ], + ) + + if ghidra_function.hasCustomVariableStorage(): + raise Lego1Exception("Failed to disable custom variable storage.") + ghidra_function.setName(self.name, SourceType.USER_DEFINED) ghidra_function.setParentNamespace(self.namespace) ghidra_function.setReturnType(self.return_type, SourceType.USER_DEFINED) @@ -206,16 +241,18 @@ def overwrite_ghidra_function(self, ghidra_function: Function): logger.debug( "%s is a stub, skipping parameter import", self.get_full_name() ) - return + else: + ghidra_function.replaceParameters( + Function.FunctionUpdateType.DYNAMIC_STORAGE_ALL_PARAMS, + True, # force + SourceType.USER_DEFINED, + self.arguments, + ) + self._import_parameter_names(ghidra_function) - ghidra_function.replaceParameters( - Function.FunctionUpdateType.DYNAMIC_STORAGE_ALL_PARAMS, - True, # force - SourceType.USER_DEFINED, - self.arguments, - ) - - self._import_parameter_names(ghidra_function) + # Special handling for `this adjust` and virtual inheritance + if self.signature.this_adjust != 0: + self._set_this_adjust(ghidra_function) def _import_parameter_names(self, ghidra_function: Function): # When we call `ghidra_function.replaceParameters`, Ghidra will generate the layout. @@ -287,3 +324,50 @@ def get_matching_register_symbol( ), None, ) + + def _set_this_adjust( + self, + ghidra_function: Function, + ): + """ + Then `this adjust` is non-zero, the pointer type of `this` needs to be replaced by an offset version. + The offset can only be set on a typedef on the pointer. We also must enable custom storage so we can modify + the auto-generated `this` parameter. + """ + + # Necessary in order to overwite the auto-generated `this` + ghidra_function.setCustomVariableStorage(True) + + this_parameter = next( + ( + param + for param in ghidra_function.getParameters() + if param.isRegisterVariable() and param.getName() == "this" + ), + None, + ) + + if this_parameter is None: + logger.error( + "Failed to find `this` parameter in a function with `this adjust = %d`", + self.signature.this_adjust, + ) + else: + current_ghidra_type = this_parameter.getDataType() + assert isinstance(current_ghidra_type, Pointer) + class_name = current_ghidra_type.getDataType().getName() + typedef_name = f"{class_name}PtrOffset0x{self.signature.this_adjust:x}" + + typedef_ghidra_type = TypedefDataType( + current_ghidra_type.getCategoryPath(), + typedef_name, + current_ghidra_type, + ) + ComponentOffsetSettingsDefinition.DEF.setValue( + typedef_ghidra_type.getDefaultSettings(), self.signature.this_adjust + ) + typedef_ghidra_type = add_data_type_or_reuse_existing( + self.api, typedef_ghidra_type + ) + + this_parameter.setDataType(typedef_ghidra_type, SourceType.USER_DEFINED) diff --git a/tools/ghidra_scripts/lego_util/pdb_extraction.py b/tools/ghidra_scripts/lego_util/pdb_extraction.py index 0c2ef7dc..4ba1ac71 100644 --- a/tools/ghidra_scripts/lego_util/pdb_extraction.py +++ b/tools/ghidra_scripts/lego_util/pdb_extraction.py @@ -36,6 +36,8 @@ class FunctionSignature: return_type: str class_type: Optional[str] stack_symbols: list[CppStackOrRegisterSymbol] + # if non-zero: an offset to the `this` parameter in a __thiscall + this_adjust: int @dataclass @@ -119,6 +121,9 @@ def get_func_signature(self, fn: SymbolsEntry) -> Optional[FunctionSignature]: call_type = self._call_type_map[function_type["call_type"]] + # parse as hex number, default to 0 + this_adjust = int(function_type.get("this_adjust", "0"), 16) + return FunctionSignature( original_function_symbol=fn, call_type=call_type, @@ -126,6 +131,7 @@ def get_func_signature(self, fn: SymbolsEntry) -> Optional[FunctionSignature]: return_type=function_type["return_type"], class_type=class_type, stack_symbols=stack_symbols, + this_adjust=this_adjust, ) def get_function_list(self) -> list[PdbFunction]: diff --git a/tools/ghidra_scripts/lego_util/type_importer.py b/tools/ghidra_scripts/lego_util/type_importer.py index bf882463..ecc4e96e 100644 --- a/tools/ghidra_scripts/lego_util/type_importer.py +++ b/tools/ghidra_scripts/lego_util/type_importer.py @@ -262,19 +262,18 @@ def _import_class_or_struct( if slim_for_vbase: # Make a "slim" version: shrink the size to the fields that are actually present. # This makes a difference when the current class uses virtual inheritance - assert ( len(components) > 0 ), f"Error: {class_name_with_namespace} should not be empty. There must be at least one direct or indirect vbase pointer." last_component = components[-1] class_size = last_component["offset"] + last_component["type"].getLength() - self._overwrite_struct( - class_name_with_namespace, - new_ghidra_struct, - class_size, - components, - ) + self._overwrite_struct( + class_name_with_namespace, + new_ghidra_struct, + class_size, + components, + ) logger.info("Finished importing class %s", class_name_with_namespace) @@ -352,7 +351,9 @@ def _import_vbaseptr( # Set a default value of -4 for the pointer offset. While this appears to be correct in many cases, # it does not always lead to the best decompile. It can be fine-tuned by hand; the next function call # makes sure that we don't overwrite this value on re-running the import. - ComponentOffsetSettingsDefinition.DEF.setValue(vbase_ghidra_pointer_typedef.getDefaultSettings(), -4) + ComponentOffsetSettingsDefinition.DEF.setValue( + vbase_ghidra_pointer_typedef.getDefaultSettings(), -4 + ) vbase_ghidra_pointer_typedef = add_data_type_or_reuse_existing( self.api, vbase_ghidra_pointer_typedef @@ -400,7 +401,6 @@ def _overwrite_struct( ) for component in components: - offset: int = component["offset"] logger.debug( "Adding component %s to class: %s", component, class_name_with_namespace diff --git a/tools/isledecomp/isledecomp/cvdump/types.py b/tools/isledecomp/isledecomp/cvdump/types.py index f00f1ea1..42a9e985 100644 --- a/tools/isledecomp/isledecomp/cvdump/types.py +++ b/tools/isledecomp/isledecomp/cvdump/types.py @@ -230,7 +230,7 @@ class CvdumpTypesParser: re.compile(r"\s*Arg list type = (?P[\w()]+)$"), re.compile( r"\s*This adjust = (?P[\w()]+)$" - ), # TODO: figure out the meaning + ), # By how much the incoming pointers are shifted in virtual inheritance; hex value without `0x` prefix re.compile( r"\s*Func attr = (?P[\w()]+)$" ), # Only for completeness, is always `none` @@ -559,7 +559,6 @@ def read_fieldlist_line(self, line: str): # virtual base class (direct or indirect) elif (match := self.VBCLASS_RE.match(line)) is not None: - virtual_base_pointer = self.keys[self.last_key].setdefault( "vbase", VirtualBasePointer(