diff --git a/tools/ghidra_scripts/lego_util/function_importer.py b/tools/ghidra_scripts/lego_util/function_importer.py index 80176fc5..a09ad190 100644 --- a/tools/ghidra_scripts/lego_util/function_importer.py +++ b/tools/ghidra_scripts/lego_util/function_importer.py @@ -17,7 +17,7 @@ CppStackSymbol, ) from lego_util.ghidra_helper import ( - add_pointer_type, + get_or_add_pointer_type, get_ghidra_namespace, sanitize_name, ) @@ -91,7 +91,10 @@ def matches_ghidra_function(self, ghidra_function: Function) -> bool: if ( (not return_type_match) and (self.return_type.getLength() > 4) - and (add_pointer_type(self.api, self.return_type) == ghidra_return_type) + and ( + get_or_add_pointer_type(self.api, self.return_type) + == ghidra_return_type + ) and any( param for param in ghidra_function.getParameters() diff --git a/tools/ghidra_scripts/lego_util/ghidra_helper.py b/tools/ghidra_scripts/lego_util/ghidra_helper.py index f7ea4ec7..f6726482 100644 --- a/tools/ghidra_scripts/lego_util/ghidra_helper.py +++ b/tools/ghidra_scripts/lego_util/ghidra_helper.py @@ -11,10 +11,8 @@ # Disable spurious warnings in vscode / pylance # pyright: reportMissingModuleSource=false -from ghidra.program.model.data import PointerDataType -from ghidra.program.model.data import DataTypeConflictHandler from ghidra.program.flatapi import FlatProgramAPI -from ghidra.program.model.data import DataType +from ghidra.program.model.data import DataType, DataTypeConflictHandler, PointerDataType from ghidra.program.model.symbol import Namespace logger = logging.getLogger(__name__) @@ -37,9 +35,15 @@ def get_ghidra_type(api: FlatProgramAPI, type_name: str): raise MultipleTypesFoundInGhidraError(type_name, result) -def add_pointer_type(api: FlatProgramAPI, pointee: DataType) -> DataType: - new_data_type = PointerDataType(pointee) - new_data_type.setCategoryPath(pointee.getCategoryPath()) +def get_or_add_pointer_type(api: FlatProgramAPI, pointee: DataType) -> DataType: + new_pointer_data_type = PointerDataType(pointee) + new_pointer_data_type.setCategoryPath(pointee.getCategoryPath()) + return add_data_type_or_reuse_existing(api, new_pointer_data_type) + + +def add_data_type_or_reuse_existing( + api: FlatProgramAPI, new_data_type: DataType +) -> DataType: result_data_type = ( api.getCurrentProgram() .getDataTypeManager() @@ -47,7 +51,7 @@ def add_pointer_type(api: FlatProgramAPI, pointee: DataType) -> DataType: ) if result_data_type is not new_data_type: logger.debug( - "New pointer replaced by existing one. Fresh pointer: %s (class: %s)", + "Reusing existing data type instead of new one: %s (class: %s)", result_data_type, result_data_type.__class__, ) diff --git a/tools/ghidra_scripts/lego_util/type_importer.py b/tools/ghidra_scripts/lego_util/type_importer.py index 87d725ef..bf882463 100644 --- a/tools/ghidra_scripts/lego_util/type_importer.py +++ b/tools/ghidra_scripts/lego_util/type_importer.py @@ -16,7 +16,8 @@ StructModificationError, ) from lego_util.ghidra_helper import ( - add_pointer_type, + add_data_type_or_reuse_existing, + get_or_add_pointer_type, create_ghidra_namespace, get_ghidra_namespace, get_ghidra_type, @@ -34,6 +35,8 @@ EnumDataType, StructureDataType, StructureInternal, + TypedefDataType, + ComponentOffsetSettingsDefinition, ) from ghidra.util.task import ConsoleTaskMonitor @@ -91,7 +94,7 @@ def import_pdb_type_into_ghidra( ) if type_category == "LF_POINTER": - return add_pointer_type( + return get_or_add_pointer_type( self.api, self.import_pdb_type_into_ghidra( type_pdb["element_type"], slim_for_vbase @@ -308,7 +311,7 @@ def _get_components_from_vbase( vbasepointer: Optional[VirtualBasePointer] = field_list.get("vbase", None) if vbasepointer is not None and any(x.direct for x in vbasepointer.bases): - vbaseptr_type = add_pointer_type( + vbaseptr_type = get_or_add_pointer_type( self.api, self._import_vbaseptr( current_type, class_name_with_namespace, vbasepointer @@ -326,26 +329,43 @@ def _import_vbaseptr( class_name_with_namespace: str, vbasepointer: VirtualBasePointer, ) -> StructureInternal: - pointer_size = 4 + pointer_size = 4 # hard-code to 4 because of 32 bit components = [ { "offset": 0, - "type": add_pointer_type(self.api, current_type), + "type": get_or_add_pointer_type(self.api, current_type), "name": "o_self", } ] for vbase in vbasepointer.bases: vbase_ghidra_type = self.import_pdb_type_into_ghidra(vbase.type) + type_name = vbase_ghidra_type.getName() + + vbase_ghidra_pointer = get_or_add_pointer_type(self.api, vbase_ghidra_type) + vbase_ghidra_pointer_typedef = TypedefDataType( + vbase_ghidra_pointer.getCategoryPath(), + f"{type_name}PtrOffset", + vbase_ghidra_pointer, + ) + # Set a default value of -4 for the pointer offset. While this appears to be correct in many cases, + # it does not always lead to the best decompile. It can be fine-tuned by hand; the next function call + # makes sure that we don't overwrite this value on re-running the import. + ComponentOffsetSettingsDefinition.DEF.setValue(vbase_ghidra_pointer_typedef.getDefaultSettings(), -4) + + vbase_ghidra_pointer_typedef = add_data_type_or_reuse_existing( + self.api, vbase_ghidra_pointer_typedef + ) + components.append( { "offset": vbase.index * pointer_size, - "type": add_pointer_type(self.api, vbase_ghidra_type), - "name": f"o_{vbase_ghidra_type.getName()}", + "type": vbase_ghidra_pointer_typedef, + "name": f"o_{type_name}", } ) - + size = len(components) * pointer_size new_ghidra_struct = self._get_or_create_struct_data_type(