Support this adjust

This commit is contained in:
jonschz 2024-07-30 13:22:54 +02:00
parent e4bc8960ca
commit 983715e3aa
4 changed files with 123 additions and 34 deletions

View File

@ -10,6 +10,12 @@
from ghidra.program.flatapi import FlatProgramAPI from ghidra.program.flatapi import FlatProgramAPI
from ghidra.program.model.listing import ParameterImpl from ghidra.program.model.listing import ParameterImpl
from ghidra.program.model.symbol import SourceType from ghidra.program.model.symbol import SourceType
from ghidra.program.model.data import (
TypeDef,
TypedefDataType,
Pointer,
ComponentOffsetSettingsDefinition,
)
from lego_util.pdb_extraction import ( from lego_util.pdb_extraction import (
PdbFunction, PdbFunction,
@ -17,12 +23,13 @@
CppStackSymbol, CppStackSymbol,
) )
from lego_util.ghidra_helper import ( from lego_util.ghidra_helper import (
add_data_type_or_reuse_existing,
get_or_add_pointer_type, get_or_add_pointer_type,
get_ghidra_namespace, get_ghidra_namespace,
sanitize_name, sanitize_name,
) )
from lego_util.exceptions import StackOffsetMismatchError from lego_util.exceptions import StackOffsetMismatchError, Lego1Exception
from lego_util.type_importer import PdbTypeImporter from lego_util.type_importer import PdbTypeImporter
@ -106,19 +113,22 @@ def matches_ghidra_function(self, ghidra_function: Function) -> bool:
) )
return_type_match = True return_type_match = True
# match arguments: decide if thiscall or not # match arguments: decide if thiscall or not, and whether the `this` type matches
thiscall_matches = ( thiscall_matches = (
self.signature.call_type == ghidra_function.getCallingConventionName() self.signature.call_type == ghidra_function.getCallingConventionName()
) )
ghidra_params_without_this = list(ghidra_function.getParameters())
if thiscall_matches and self.signature.call_type == "__thiscall":
this_argument = ghidra_params_without_this.pop(0)
thiscall_matches = self._this_type_match(this_argument)
if self.is_stub: if self.is_stub:
# We do not import the argument list for stubs, so it should be excluded in matches # We do not import the argument list for stubs, so it should be excluded in matches
args_match = True args_match = True
elif thiscall_matches: elif thiscall_matches:
if self.signature.call_type == "__thiscall": args_match = self._parameter_lists_match(ghidra_params_without_this)
args_match = self._matches_thiscall_parameters(ghidra_function)
else:
args_match = self._matches_non_thiscall_parameters(ghidra_function)
else: else:
args_match = False args_match = False
@ -139,16 +149,22 @@ def matches_ghidra_function(self, ghidra_function: Function) -> bool:
and args_match and args_match
) )
def _matches_non_thiscall_parameters(self, ghidra_function: Function) -> bool: def _this_type_match(self, this_parameter: Parameter) -> bool:
return self._parameter_lists_match(ghidra_function.getParameters()) if this_parameter.getName() != "this":
logger.info("Expected first argument to be `this` in __thiscall")
return False
def _matches_thiscall_parameters(self, ghidra_function: Function) -> bool: if self.signature.this_adjust != 0:
ghidra_params = list(ghidra_function.getParameters()) # In this case, the `this` argument should be custom defined
if not isinstance(this_parameter.getDataType(), TypeDef):
logger.info(
"`this` argument is not a typedef while `this adjust` = %d",
self.signature.this_adjust,
)
return False
# We are not checking for the _correct_ `this` type here, which we could do in the future
# remove the `this` argument which we don't generate ourselves return True
ghidra_params.pop(0)
return self._parameter_lists_match(ghidra_params)
def _parameter_lists_match(self, ghidra_params: "list[Parameter]") -> bool: def _parameter_lists_match(self, ghidra_params: "list[Parameter]") -> bool:
# Remove return storage pointer from comparison if present. # Remove return storage pointer from comparison if present.
@ -197,6 +213,25 @@ def _parameter_lists_match(self, ghidra_params: "list[Parameter]") -> bool:
def overwrite_ghidra_function(self, ghidra_function: Function): def overwrite_ghidra_function(self, ghidra_function: Function):
"""Replace the function declaration in Ghidra by the one derived from C++.""" """Replace the function declaration in Ghidra by the one derived from C++."""
if ghidra_function.hasCustomVariableStorage():
# Unfortunately, Ghidra has a bug where setting custom variable storage back to `False`
# leads to two `this` parameters. Therefore, we first need to remove all `this` parameters
# and then re-generate a new one
ghidra_function.replaceParameters(
Function.FunctionUpdateType.DYNAMIC_STORAGE_ALL_PARAMS, # this implicitly sets custom variable storage to False
True,
SourceType.USER_DEFINED,
[
param
for param in ghidra_function.getParameters()
if param.getName() != "this"
],
)
if ghidra_function.hasCustomVariableStorage():
raise Lego1Exception("Failed to disable custom variable storage.")
ghidra_function.setName(self.name, SourceType.USER_DEFINED) ghidra_function.setName(self.name, SourceType.USER_DEFINED)
ghidra_function.setParentNamespace(self.namespace) ghidra_function.setParentNamespace(self.namespace)
ghidra_function.setReturnType(self.return_type, SourceType.USER_DEFINED) ghidra_function.setReturnType(self.return_type, SourceType.USER_DEFINED)
@ -206,17 +241,19 @@ def overwrite_ghidra_function(self, ghidra_function: Function):
logger.debug( logger.debug(
"%s is a stub, skipping parameter import", self.get_full_name() "%s is a stub, skipping parameter import", self.get_full_name()
) )
return else:
ghidra_function.replaceParameters( ghidra_function.replaceParameters(
Function.FunctionUpdateType.DYNAMIC_STORAGE_ALL_PARAMS, Function.FunctionUpdateType.DYNAMIC_STORAGE_ALL_PARAMS,
True, # force True, # force
SourceType.USER_DEFINED, SourceType.USER_DEFINED,
self.arguments, self.arguments,
) )
self._import_parameter_names(ghidra_function) self._import_parameter_names(ghidra_function)
# Special handling for `this adjust` and virtual inheritance
if self.signature.this_adjust != 0:
self._set_this_adjust(ghidra_function)
def _import_parameter_names(self, ghidra_function: Function): def _import_parameter_names(self, ghidra_function: Function):
# When we call `ghidra_function.replaceParameters`, Ghidra will generate the layout. # When we call `ghidra_function.replaceParameters`, Ghidra will generate the layout.
# Now we read the parameters again and match them against the stack layout in the PDB, # Now we read the parameters again and match them against the stack layout in the PDB,
@ -287,3 +324,50 @@ def get_matching_register_symbol(
), ),
None, None,
) )
def _set_this_adjust(
self,
ghidra_function: Function,
):
"""
Then `this adjust` is non-zero, the pointer type of `this` needs to be replaced by an offset version.
The offset can only be set on a typedef on the pointer. We also must enable custom storage so we can modify
the auto-generated `this` parameter.
"""
# Necessary in order to overwite the auto-generated `this`
ghidra_function.setCustomVariableStorage(True)
this_parameter = next(
(
param
for param in ghidra_function.getParameters()
if param.isRegisterVariable() and param.getName() == "this"
),
None,
)
if this_parameter is None:
logger.error(
"Failed to find `this` parameter in a function with `this adjust = %d`",
self.signature.this_adjust,
)
else:
current_ghidra_type = this_parameter.getDataType()
assert isinstance(current_ghidra_type, Pointer)
class_name = current_ghidra_type.getDataType().getName()
typedef_name = f"{class_name}PtrOffset0x{self.signature.this_adjust:x}"
typedef_ghidra_type = TypedefDataType(
current_ghidra_type.getCategoryPath(),
typedef_name,
current_ghidra_type,
)
ComponentOffsetSettingsDefinition.DEF.setValue(
typedef_ghidra_type.getDefaultSettings(), self.signature.this_adjust
)
typedef_ghidra_type = add_data_type_or_reuse_existing(
self.api, typedef_ghidra_type
)
this_parameter.setDataType(typedef_ghidra_type, SourceType.USER_DEFINED)

View File

@ -36,6 +36,8 @@ class FunctionSignature:
return_type: str return_type: str
class_type: Optional[str] class_type: Optional[str]
stack_symbols: list[CppStackOrRegisterSymbol] stack_symbols: list[CppStackOrRegisterSymbol]
# if non-zero: an offset to the `this` parameter in a __thiscall
this_adjust: int
@dataclass @dataclass
@ -119,6 +121,9 @@ def get_func_signature(self, fn: SymbolsEntry) -> Optional[FunctionSignature]:
call_type = self._call_type_map[function_type["call_type"]] call_type = self._call_type_map[function_type["call_type"]]
# parse as hex number, default to 0
this_adjust = int(function_type.get("this_adjust", "0"), 16)
return FunctionSignature( return FunctionSignature(
original_function_symbol=fn, original_function_symbol=fn,
call_type=call_type, call_type=call_type,
@ -126,6 +131,7 @@ def get_func_signature(self, fn: SymbolsEntry) -> Optional[FunctionSignature]:
return_type=function_type["return_type"], return_type=function_type["return_type"],
class_type=class_type, class_type=class_type,
stack_symbols=stack_symbols, stack_symbols=stack_symbols,
this_adjust=this_adjust,
) )
def get_function_list(self) -> list[PdbFunction]: def get_function_list(self) -> list[PdbFunction]:

View File

@ -262,7 +262,6 @@ def _import_class_or_struct(
if slim_for_vbase: if slim_for_vbase:
# Make a "slim" version: shrink the size to the fields that are actually present. # Make a "slim" version: shrink the size to the fields that are actually present.
# This makes a difference when the current class uses virtual inheritance # This makes a difference when the current class uses virtual inheritance
assert ( assert (
len(components) > 0 len(components) > 0
), f"Error: {class_name_with_namespace} should not be empty. There must be at least one direct or indirect vbase pointer." ), f"Error: {class_name_with_namespace} should not be empty. There must be at least one direct or indirect vbase pointer."
@ -352,7 +351,9 @@ def _import_vbaseptr(
# Set a default value of -4 for the pointer offset. While this appears to be correct in many cases, # Set a default value of -4 for the pointer offset. While this appears to be correct in many cases,
# it does not always lead to the best decompile. It can be fine-tuned by hand; the next function call # it does not always lead to the best decompile. It can be fine-tuned by hand; the next function call
# makes sure that we don't overwrite this value on re-running the import. # makes sure that we don't overwrite this value on re-running the import.
ComponentOffsetSettingsDefinition.DEF.setValue(vbase_ghidra_pointer_typedef.getDefaultSettings(), -4) ComponentOffsetSettingsDefinition.DEF.setValue(
vbase_ghidra_pointer_typedef.getDefaultSettings(), -4
)
vbase_ghidra_pointer_typedef = add_data_type_or_reuse_existing( vbase_ghidra_pointer_typedef = add_data_type_or_reuse_existing(
self.api, vbase_ghidra_pointer_typedef self.api, vbase_ghidra_pointer_typedef
@ -400,7 +401,6 @@ def _overwrite_struct(
) )
for component in components: for component in components:
offset: int = component["offset"] offset: int = component["offset"]
logger.debug( logger.debug(
"Adding component %s to class: %s", component, class_name_with_namespace "Adding component %s to class: %s", component, class_name_with_namespace

View File

@ -230,7 +230,7 @@ class CvdumpTypesParser:
re.compile(r"\s*Arg list type = (?P<arg_list_type>[\w()]+)$"), re.compile(r"\s*Arg list type = (?P<arg_list_type>[\w()]+)$"),
re.compile( re.compile(
r"\s*This adjust = (?P<this_adjust>[\w()]+)$" r"\s*This adjust = (?P<this_adjust>[\w()]+)$"
), # TODO: figure out the meaning ), # By how much the incoming pointers are shifted in virtual inheritance; hex value without `0x` prefix
re.compile( re.compile(
r"\s*Func attr = (?P<func_attr>[\w()]+)$" r"\s*Func attr = (?P<func_attr>[\w()]+)$"
), # Only for completeness, is always `none` ), # Only for completeness, is always `none`
@ -559,7 +559,6 @@ def read_fieldlist_line(self, line: str):
# virtual base class (direct or indirect) # virtual base class (direct or indirect)
elif (match := self.VBCLASS_RE.match(line)) is not None: elif (match := self.VBCLASS_RE.match(line)) is not None:
virtual_base_pointer = self.keys[self.last_key].setdefault( virtual_base_pointer = self.keys[self.last_key].setdefault(
"vbase", "vbase",
VirtualBasePointer( VirtualBasePointer(