Support offsets in vbase pointers

This commit is contained in:
jonschz 2024-07-30 09:23:40 +02:00
parent 683260935e
commit e4bc8960ca
3 changed files with 44 additions and 17 deletions

View File

@ -17,7 +17,7 @@
CppStackSymbol, CppStackSymbol,
) )
from lego_util.ghidra_helper import ( from lego_util.ghidra_helper import (
add_pointer_type, get_or_add_pointer_type,
get_ghidra_namespace, get_ghidra_namespace,
sanitize_name, sanitize_name,
) )
@ -91,7 +91,10 @@ def matches_ghidra_function(self, ghidra_function: Function) -> bool:
if ( if (
(not return_type_match) (not return_type_match)
and (self.return_type.getLength() > 4) and (self.return_type.getLength() > 4)
and (add_pointer_type(self.api, self.return_type) == ghidra_return_type) and (
get_or_add_pointer_type(self.api, self.return_type)
== ghidra_return_type
)
and any( and any(
param param
for param in ghidra_function.getParameters() for param in ghidra_function.getParameters()

View File

@ -11,10 +11,8 @@
# Disable spurious warnings in vscode / pylance # Disable spurious warnings in vscode / pylance
# pyright: reportMissingModuleSource=false # pyright: reportMissingModuleSource=false
from ghidra.program.model.data import PointerDataType
from ghidra.program.model.data import DataTypeConflictHandler
from ghidra.program.flatapi import FlatProgramAPI from ghidra.program.flatapi import FlatProgramAPI
from ghidra.program.model.data import DataType from ghidra.program.model.data import DataType, DataTypeConflictHandler, PointerDataType
from ghidra.program.model.symbol import Namespace from ghidra.program.model.symbol import Namespace
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -37,9 +35,15 @@ def get_ghidra_type(api: FlatProgramAPI, type_name: str):
raise MultipleTypesFoundInGhidraError(type_name, result) raise MultipleTypesFoundInGhidraError(type_name, result)
def add_pointer_type(api: FlatProgramAPI, pointee: DataType) -> DataType: def get_or_add_pointer_type(api: FlatProgramAPI, pointee: DataType) -> DataType:
new_data_type = PointerDataType(pointee) new_pointer_data_type = PointerDataType(pointee)
new_data_type.setCategoryPath(pointee.getCategoryPath()) new_pointer_data_type.setCategoryPath(pointee.getCategoryPath())
return add_data_type_or_reuse_existing(api, new_pointer_data_type)
def add_data_type_or_reuse_existing(
api: FlatProgramAPI, new_data_type: DataType
) -> DataType:
result_data_type = ( result_data_type = (
api.getCurrentProgram() api.getCurrentProgram()
.getDataTypeManager() .getDataTypeManager()
@ -47,7 +51,7 @@ def add_pointer_type(api: FlatProgramAPI, pointee: DataType) -> DataType:
) )
if result_data_type is not new_data_type: if result_data_type is not new_data_type:
logger.debug( logger.debug(
"New pointer replaced by existing one. Fresh pointer: %s (class: %s)", "Reusing existing data type instead of new one: %s (class: %s)",
result_data_type, result_data_type,
result_data_type.__class__, result_data_type.__class__,
) )

View File

@ -16,7 +16,8 @@
StructModificationError, StructModificationError,
) )
from lego_util.ghidra_helper import ( from lego_util.ghidra_helper import (
add_pointer_type, add_data_type_or_reuse_existing,
get_or_add_pointer_type,
create_ghidra_namespace, create_ghidra_namespace,
get_ghidra_namespace, get_ghidra_namespace,
get_ghidra_type, get_ghidra_type,
@ -34,6 +35,8 @@
EnumDataType, EnumDataType,
StructureDataType, StructureDataType,
StructureInternal, StructureInternal,
TypedefDataType,
ComponentOffsetSettingsDefinition,
) )
from ghidra.util.task import ConsoleTaskMonitor from ghidra.util.task import ConsoleTaskMonitor
@ -91,7 +94,7 @@ def import_pdb_type_into_ghidra(
) )
if type_category == "LF_POINTER": if type_category == "LF_POINTER":
return add_pointer_type( return get_or_add_pointer_type(
self.api, self.api,
self.import_pdb_type_into_ghidra( self.import_pdb_type_into_ghidra(
type_pdb["element_type"], slim_for_vbase type_pdb["element_type"], slim_for_vbase
@ -308,7 +311,7 @@ def _get_components_from_vbase(
vbasepointer: Optional[VirtualBasePointer] = field_list.get("vbase", None) vbasepointer: Optional[VirtualBasePointer] = field_list.get("vbase", None)
if vbasepointer is not None and any(x.direct for x in vbasepointer.bases): if vbasepointer is not None and any(x.direct for x in vbasepointer.bases):
vbaseptr_type = add_pointer_type( vbaseptr_type = get_or_add_pointer_type(
self.api, self.api,
self._import_vbaseptr( self._import_vbaseptr(
current_type, class_name_with_namespace, vbasepointer current_type, class_name_with_namespace, vbasepointer
@ -326,23 +329,40 @@ def _import_vbaseptr(
class_name_with_namespace: str, class_name_with_namespace: str,
vbasepointer: VirtualBasePointer, vbasepointer: VirtualBasePointer,
) -> StructureInternal: ) -> StructureInternal:
pointer_size = 4 pointer_size = 4 # hard-code to 4 because of 32 bit
components = [ components = [
{ {
"offset": 0, "offset": 0,
"type": add_pointer_type(self.api, current_type), "type": get_or_add_pointer_type(self.api, current_type),
"name": "o_self", "name": "o_self",
} }
] ]
for vbase in vbasepointer.bases: for vbase in vbasepointer.bases:
vbase_ghidra_type = self.import_pdb_type_into_ghidra(vbase.type) vbase_ghidra_type = self.import_pdb_type_into_ghidra(vbase.type)
type_name = vbase_ghidra_type.getName()
vbase_ghidra_pointer = get_or_add_pointer_type(self.api, vbase_ghidra_type)
vbase_ghidra_pointer_typedef = TypedefDataType(
vbase_ghidra_pointer.getCategoryPath(),
f"{type_name}PtrOffset",
vbase_ghidra_pointer,
)
# Set a default value of -4 for the pointer offset. While this appears to be correct in many cases,
# it does not always lead to the best decompile. It can be fine-tuned by hand; the next function call
# makes sure that we don't overwrite this value on re-running the import.
ComponentOffsetSettingsDefinition.DEF.setValue(vbase_ghidra_pointer_typedef.getDefaultSettings(), -4)
vbase_ghidra_pointer_typedef = add_data_type_or_reuse_existing(
self.api, vbase_ghidra_pointer_typedef
)
components.append( components.append(
{ {
"offset": vbase.index * pointer_size, "offset": vbase.index * pointer_size,
"type": add_pointer_type(self.api, vbase_ghidra_type), "type": vbase_ghidra_pointer_typedef,
"name": f"o_{vbase_ghidra_type.getName()}", "name": f"o_{type_name}",
} }
) )