mirror of
https://github.com/isledecomp/isle.git
synced 2026-01-24 00:31:16 +00:00
451 lines
15 KiB
Python
451 lines
15 KiB
Python
# Experiments for PDB imports.
|
|
#
|
|
# Note that the virtual environment must be set up beforehand, and all packages must be installed.
|
|
# Also, the Python version of the virtual environment must probably match the Python version used for Ghidrathon.
|
|
|
|
# @author J. Schulz
|
|
# @category LEGO1
|
|
# @keybinding
|
|
# @menupath
|
|
# @toolbar
|
|
|
|
from dataclasses import dataclass, field
|
|
import sys
|
|
import logging
|
|
from pathlib import Path
|
|
import traceback
|
|
from typing import TYPE_CHECKING
|
|
|
|
from lego_util.exceptions import Lego1Exception
|
|
from lego_util.statistics import Statistics
|
|
|
|
# pylint: disable=undefined-variable # need to disable this one globally because pylint does not understand e.g. askYesNo()
|
|
if TYPE_CHECKING:
|
|
import ghidra
|
|
from lego_util.headers import * # pylint: disable=wildcard-import
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def setup_logging():
|
|
logging.basicConfig(
|
|
format="%(levelname)-8s %(message)s",
|
|
stream=sys.stdout,
|
|
level=logging.INFO,
|
|
force=True,
|
|
)
|
|
logger.info("Starting...")
|
|
|
|
|
|
@dataclass
|
|
class Globals:
|
|
verbose: bool
|
|
running_from_ghidra: bool = False
|
|
make_changes: bool = False
|
|
prompt_before_changes: bool = True
|
|
# statistics
|
|
statistics: Statistics = field(default_factory=Statistics)
|
|
|
|
|
|
# hard-coded settings that we don't want to prompt in Ghidra every time
|
|
GLOBALS = Globals(verbose=False)
|
|
|
|
|
|
# Disable spurious warnings in vscode / pylance
|
|
# pyright: reportMissingModuleSource=false
|
|
|
|
# This script can be run both from Ghidra and as a standalone.
|
|
# In the latter case, only the C++ parser can be used.
|
|
setup_logging()
|
|
try:
|
|
|
|
# this one contains actual code
|
|
from lego_util.ghidra_helper import (
|
|
get_ghidra_namespace,
|
|
get_ghidra_type,
|
|
)
|
|
|
|
from ghidra.program.model.listing import Function, Parameter
|
|
from ghidra.program.flatapi import FlatProgramAPI
|
|
from ghidra.program.model.listing import ParameterImpl
|
|
from ghidra.program.model.listing import Function
|
|
from ghidra.program.model.symbol import SourceType
|
|
from ghidra.util.exception import CancelledException
|
|
|
|
GLOBALS.make_changes = askYesNo(
|
|
"Make changes?", "Select 'Yes' to apply changes, select 'No' to do a dry run."
|
|
)
|
|
|
|
if GLOBALS.make_changes:
|
|
GLOBALS.prompt_before_changes = askYesNo(
|
|
"Prompt before changes?", "Should each change be confirmed by a prompt?"
|
|
)
|
|
|
|
GLOBALS.running_from_ghidra = True
|
|
except ImportError:
|
|
logger.error(
|
|
"Failed to import Ghidra functions, doing a dry run for the source code parser. "
|
|
"Has this script been launched from Ghidra?"
|
|
)
|
|
GLOBALS.running_from_ghidra = False
|
|
CancelledException = None
|
|
|
|
|
|
def get_repository_root():
|
|
return Path(__file__).absolute().parent.parent.parent
|
|
|
|
|
|
def add_python_path(path: str):
|
|
venv_path = get_repository_root().joinpath(path)
|
|
logger.info("Adding %s to Python Path", venv_path)
|
|
assert venv_path.exists()
|
|
sys.path.insert(1, str(venv_path))
|
|
|
|
|
|
class PdbFunctionWithGhidraObjects:
|
|
"""A representation of a function from the PDB with each type replaced by a Ghidra type instance."""
|
|
|
|
def __init__(
|
|
self,
|
|
fpapi: "FlatProgramAPI",
|
|
match_info: "MatchInfo",
|
|
signature: "FunctionSignature",
|
|
):
|
|
self.api = fpapi
|
|
self.match_info = match_info
|
|
self.signature = signature
|
|
|
|
assert match_info.name is not None
|
|
colon_split = match_info.name.split("::")
|
|
self.name = colon_split.pop()
|
|
namespace_hierachy = colon_split
|
|
self.namespace = get_ghidra_namespace(fpapi, namespace_hierachy)
|
|
|
|
self.return_type = get_ghidra_type(fpapi, signature.return_type)
|
|
self.arguments = [
|
|
ParameterImpl(
|
|
f"param{index}",
|
|
get_ghidra_type(fpapi, type_name),
|
|
fpapi.getCurrentProgram(),
|
|
)
|
|
for (index, type_name) in enumerate(signature.arglist)
|
|
]
|
|
|
|
@property
|
|
def call_type(self):
|
|
return self.signature.call_type
|
|
|
|
@property
|
|
def stack_symbols(self):
|
|
return self.signature.stack_symbols
|
|
|
|
def get_full_name(self) -> str:
|
|
return f"{self.namespace.getName()}::{self.name}"
|
|
|
|
def format_proposed_change(self) -> str:
|
|
return (
|
|
f"{self.return_type} {self.call_type} {self.get_full_name()}"
|
|
+ f"({', '.join(self.signature.arglist)})"
|
|
)
|
|
|
|
def matches_ghidra_function(self, ghidra_function): # type: (Function) -> bool
|
|
"""Checks whether this function declaration already matches the description in Ghidra"""
|
|
name_match = self.name == ghidra_function.getName(False)
|
|
namespace_match = self.namespace == ghidra_function.getParentNamespace()
|
|
return_type_match = self.return_type == ghidra_function.getReturnType()
|
|
# match arguments: decide if thiscall or not
|
|
thiscall_matches = (
|
|
self.signature.call_type == ghidra_function.getCallingConventionName()
|
|
)
|
|
|
|
if thiscall_matches:
|
|
if self.signature.call_type == "__thiscall":
|
|
args_match = self._matches_thiscall_parameters(ghidra_function)
|
|
else:
|
|
args_match = self._matches_non_thiscall_parameters(ghidra_function)
|
|
else:
|
|
args_match = False
|
|
|
|
logger.debug(
|
|
"Matches: namespace=%s name=%s return_type=%s thiscall=%s args=%s",
|
|
namespace_match,
|
|
name_match,
|
|
return_type_match,
|
|
thiscall_matches,
|
|
args_match,
|
|
)
|
|
|
|
return (
|
|
name_match
|
|
and namespace_match
|
|
and return_type_match
|
|
and thiscall_matches
|
|
and args_match
|
|
)
|
|
|
|
def _matches_non_thiscall_parameters(
|
|
self, ghidra_function
|
|
): # type: (Function) -> bool
|
|
return self._parameter_lists_match(ghidra_function.getParameters())
|
|
|
|
def _matches_thiscall_parameters(self, ghidra_function: "Function") -> bool:
|
|
ghidra_params = list(ghidra_function.getParameters())
|
|
|
|
# remove the `this` argument which we don't generate ourselves
|
|
ghidra_params.pop(0)
|
|
|
|
return self._parameter_lists_match(ghidra_params)
|
|
|
|
def _parameter_lists_match(self, ghidra_params: "list[Parameter]") -> bool:
|
|
if len(self.arguments) != len(ghidra_params):
|
|
logger.info("Mismatching argument count")
|
|
return False
|
|
|
|
for this_arg, ghidra_arg in zip(self.arguments, ghidra_params):
|
|
# compare argument types
|
|
if this_arg.getDataType() != ghidra_arg.getDataType():
|
|
logger.debug(
|
|
"Mismatching arg type: expected %s, found %s",
|
|
this_arg.getDataType(),
|
|
ghidra_arg.getDataType(),
|
|
)
|
|
return False
|
|
# compare argument names
|
|
stack_match = self.get_matching_stack_symbol(ghidra_arg.getStackOffset())
|
|
if stack_match is None:
|
|
logger.debug("Not found on stack: %s", ghidra_arg)
|
|
return False
|
|
# "__formal" is the placeholder for arguments without a name
|
|
if stack_match.name not in ["__formal", ghidra_arg.getName()]:
|
|
logger.debug(
|
|
"Argument name mismatch: expected %s, found %s",
|
|
stack_match.name,
|
|
ghidra_arg.getName(),
|
|
)
|
|
return False
|
|
return True
|
|
|
|
def overwrite_ghidra_function(self, ghidra_function): # type: (Function) -> None
|
|
"""Replace the function declaration in Ghidra by the one derived from C++."""
|
|
ghidra_function.setName(self.name, SourceType.USER_DEFINED)
|
|
ghidra_function.setParentNamespace(self.namespace)
|
|
ghidra_function.setReturnType(self.return_type, SourceType.USER_DEFINED)
|
|
ghidra_function.setCallingConvention(self.call_type)
|
|
|
|
ghidra_function.replaceParameters(
|
|
Function.FunctionUpdateType.DYNAMIC_STORAGE_ALL_PARAMS,
|
|
True,
|
|
SourceType.USER_DEFINED,
|
|
self.arguments,
|
|
)
|
|
|
|
# When we set the parameters, Ghidra will generate the layout.
|
|
# Now we read them again and match them against the stack layout in the PDB,
|
|
# both to verify and to set the parameter names.
|
|
ghidra_parameters: "list[ghidra.program.model.listing.Parameter]" = ghidra_function.getParameters() # type: ignore
|
|
|
|
# Try to add Ghidra function names
|
|
for param in ghidra_parameters:
|
|
if param.isStackVariable():
|
|
self._rename_stack_parameter(param)
|
|
else:
|
|
if param.getName() == "this":
|
|
# 'this' parameters are auto-generated and cannot be changed
|
|
continue
|
|
|
|
# TODO: Does this ever happen?
|
|
logger.warning("Unhandled register variable in %s", self.get_full_name)
|
|
continue
|
|
|
|
# Old code for reference:
|
|
#
|
|
# register = param.getRegister().getName().lower()
|
|
# match = self.get_matching_register_symbol(register)
|
|
# if match is None:
|
|
# logger.error(
|
|
# "Could not match register parameter %s to known symbols %s",
|
|
# param,
|
|
# self.stack_symbols,
|
|
# )
|
|
# continue
|
|
|
|
def _rename_stack_parameter(self, param: "Parameter"):
|
|
match = self.get_matching_stack_symbol(param.getStackOffset())
|
|
if match is None:
|
|
raise StackOffsetMismatchError(
|
|
f"Could not find a matching symbol at offset {param.getStackOffset()} in {self.get_full_name()}"
|
|
)
|
|
|
|
if param.getDataType() != get_ghidra_type(self.api, match.data_type):
|
|
logger.error(
|
|
"Type mismatch for parameter: %s in Ghidra, %s in PDB", param, match
|
|
)
|
|
return
|
|
|
|
param.setName(match.name, SourceType.USER_DEFINED)
|
|
|
|
def get_matching_stack_symbol(self, stack_offset: int) -> "CppStackSymbol | None":
|
|
return next(
|
|
(
|
|
symbol
|
|
for symbol in self.stack_symbols
|
|
if isinstance(symbol, CppStackSymbol)
|
|
and symbol.stack_offset == stack_offset
|
|
),
|
|
None,
|
|
)
|
|
|
|
def get_matching_register_symbol(self, register: str) -> "CppRegisterSymbol | None":
|
|
return next(
|
|
(
|
|
symbol
|
|
for symbol in self.stack_symbols
|
|
if isinstance(symbol, CppRegisterSymbol) and symbol.register == register
|
|
),
|
|
None,
|
|
)
|
|
|
|
|
|
def handle_function_in_ghidra(match_info: "MatchInfo", signature: "FunctionSignature"):
|
|
|
|
if not GLOBALS.running_from_ghidra:
|
|
return
|
|
hex_original_address = f"{match_info.orig_addr:x}"
|
|
|
|
# Find the Ghidra function at that address
|
|
ghidra_address = getAddressFactory().getAddress(hex_original_address) # type: ignore
|
|
|
|
fpapi = FlatProgramAPI(currentProgram()) # type: ignore
|
|
|
|
typed_pdb_function = PdbFunctionWithGhidraObjects(fpapi, match_info, signature)
|
|
|
|
if not GLOBALS.make_changes:
|
|
return
|
|
|
|
ghidra_function = getFunctionAt(ghidra_address)
|
|
if ghidra_function is None:
|
|
ghidra_function = createFunction(ghidra_address, "temp")
|
|
assert (
|
|
ghidra_function is not None
|
|
), f"Failed to create function at {ghidra_address}"
|
|
logger.info("Created new function at %s", ghidra_address)
|
|
|
|
if typed_pdb_function.matches_ghidra_function(ghidra_function):
|
|
logger.info(
|
|
"Skipping function '%s', matches already",
|
|
typed_pdb_function.get_full_name(),
|
|
)
|
|
return
|
|
|
|
# Navigate Ghidra to the current function
|
|
state().setCurrentAddress(ghidra_address)
|
|
|
|
if GLOBALS.prompt_before_changes:
|
|
choice = askChoice(
|
|
"Change function?",
|
|
f"Change to: {typed_pdb_function.format_proposed_change()}",
|
|
# "Change to %s" % cpp_function,
|
|
["Yes", "No", "Abort"],
|
|
"Yes",
|
|
)
|
|
if choice == "No":
|
|
return
|
|
if choice != "Yes":
|
|
logger.critical("User quit, terminating")
|
|
raise SystemExit(1)
|
|
|
|
# logger.info("Modifying function %s at 0x%s", cpp_function.full_name(), address)
|
|
|
|
typed_pdb_function.overwrite_ghidra_function(ghidra_function)
|
|
|
|
GLOBALS.statistics.functions_changed += 1
|
|
|
|
if GLOBALS.prompt_before_changes:
|
|
# Add a prompt so we can verify the result immediately
|
|
askChoice("", "Click 'OK' to continue", ["OK"], "OK")
|
|
|
|
|
|
def handle_function_list(isle_compare: "IsleCompare"):
|
|
# try to acquire matched functions
|
|
migration = PdbExtractionForGhidraMigration(isle_compare)
|
|
func_signatures = migration.get_function_list()
|
|
for match_info, signature in func_signatures:
|
|
try:
|
|
handle_function_in_ghidra(match_info, signature)
|
|
GLOBALS.statistics.successes += 1
|
|
except Lego1Exception as e:
|
|
log_and_track_failure(e)
|
|
except RuntimeError as e:
|
|
cause = e.args[0]
|
|
if CancelledException is not None and isinstance(cause, CancelledException):
|
|
# let Ghidra's CancelledException pass through
|
|
raise
|
|
log_and_track_failure(cause, unexpected=True)
|
|
except Exception as e: # pylint: disable=broad-exception-caught
|
|
log_and_track_failure(e, unexpected=True)
|
|
logger.error(traceback.format_exc())
|
|
|
|
|
|
def log_and_track_failure(error: Exception, unexpected: bool = False):
|
|
if GLOBALS.statistics.track_failure_and_tell_if_new(error):
|
|
logger.error(
|
|
"%s%s",
|
|
"Unexpected error: " if unexpected else "",
|
|
error,
|
|
)
|
|
|
|
|
|
def main():
|
|
repo_root = get_repository_root()
|
|
origfile_path = repo_root.joinpath("LEGO1.DLL")
|
|
build_path = repo_root.joinpath("build")
|
|
recompiledfile_path = build_path.joinpath("LEGO1.DLL")
|
|
pdb_path = build_path.joinpath("LEGO1.pdb")
|
|
|
|
if not GLOBALS.verbose:
|
|
logging.getLogger("isledecomp.compare.db").setLevel(logging.CRITICAL)
|
|
logging.getLogger("isledecomp.compare.lines").setLevel(logging.CRITICAL)
|
|
|
|
logger.info("Starting comparison")
|
|
with Bin(str(origfile_path), find_str=True) as origfile, Bin(
|
|
str(recompiledfile_path)
|
|
) as recompfile:
|
|
isle_compare = IsleCompare(origfile, recompfile, str(pdb_path), str(repo_root))
|
|
|
|
logger.info("Comparison complete.")
|
|
|
|
try:
|
|
handle_function_list(isle_compare)
|
|
finally:
|
|
GLOBALS.statistics.log()
|
|
|
|
logger.info("Done")
|
|
|
|
|
|
# sys.path is not reset after running the script, so we should restore it
|
|
sys_path_backup = sys.path.copy()
|
|
try:
|
|
add_python_path(
|
|
".venv/Lib/site-packages"
|
|
) # make modules installed in the venv available in Ghidra
|
|
add_python_path(
|
|
"tools/isledecomp"
|
|
) # needed when isledecomp is installed in editable mode in the venv
|
|
|
|
import setuptools # pylint: disable=unused-import # required to fix a distutils issue in Python 3.12
|
|
from isledecomp import Bin
|
|
from isledecomp.compare import Compare as IsleCompare
|
|
from isledecomp.compare.db import MatchInfo
|
|
from lego_util.pdb_extraction import ( # pylint: disable=ungrouped-imports # these must be imported
|
|
PdbExtractionForGhidraMigration,
|
|
FunctionSignature,
|
|
CppRegisterSymbol,
|
|
CppStackSymbol,
|
|
)
|
|
from lego_util.exceptions import StackOffsetMismatchError
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
finally:
|
|
sys.path = sys_path_backup
|