mirror of
https://github.com/isledecomp/isle.git
synced 2026-01-24 00:31:16 +00:00
301 lines
9.1 KiB
Python
301 lines
9.1 KiB
Python
# Synchronised the function signatures of LEGO1.dll to Ghidra.
|
|
# At startup there will be several prompts for different modes,
|
|
# including a read-only / dry run mode.
|
|
|
|
# @author J. Schulz
|
|
# @category LEGO1
|
|
# @keybinding
|
|
# @menupath
|
|
# @toolbar
|
|
|
|
|
|
# Disable spurious warnings in vscode / pylance
|
|
# pyright: reportMissingModuleSource=false
|
|
|
|
import sys
|
|
import os
|
|
import re
|
|
import traceback
|
|
import logging
|
|
|
|
from lego_util.cpp_parser import (
|
|
CppFunctionDeclaration,
|
|
function_regex,
|
|
class_regex,
|
|
struct_regex,
|
|
namespace_regex,
|
|
)
|
|
from lego_util.file_helper import iterate_dir
|
|
from lego_util.exceptions import (
|
|
Lego1Exception,
|
|
NamespaceNotFoundInGhidraError,
|
|
TypeNotFoundInGhidraError,
|
|
FunctionNotFoundInGhidraError,
|
|
)
|
|
|
|
# # no effect when no Ghidra is used
|
|
# READ_ONLY = False
|
|
# # READ_ONLY = True
|
|
|
|
|
|
# Type annotations are only available in Python 3.5 or later
|
|
if sys.version_info.major > 2:
|
|
from typing import TYPE_CHECKING, TypeVar
|
|
|
|
if TYPE_CHECKING:
|
|
from ghidra.program.model.address import Address, AddressFactory
|
|
from ghidra.program.model.listing import Program
|
|
from ghidra.program.model.data import DataType
|
|
from ghidra.program.model.symbol import Namespace
|
|
from ghidra.app.script import GhidraScript
|
|
from ghidra.app.script import GhidraState
|
|
|
|
# Global stubs, Python 2 and 3 compatible
|
|
|
|
def _get_state(): # type: () -> GhidraState
|
|
return None # type: ignore
|
|
|
|
state = _get_state()
|
|
|
|
def getDataTypes(name): # type: (str) -> list[DataType]
|
|
return # type: ignore
|
|
|
|
def getCurrentProgram(): # type: () -> Program
|
|
return # type: ignore
|
|
|
|
def getFunctionAt(entryPoint): # type: (Address) -> Function
|
|
return # type: ignore
|
|
|
|
def getAddressFactory(): # type: () -> AddressFactory
|
|
return # type: ignore
|
|
|
|
def getNamespace(parent, namespaceName): # type: (Namespace, str) -> Namespace
|
|
return # type: ignore
|
|
|
|
def askYesNo(title, message): # type: (str, str) -> bool
|
|
return # type: ignore
|
|
|
|
T = TypeVar("T")
|
|
|
|
def askChoice(
|
|
title, message, choices, defaultValue
|
|
): # type: (str, str, list[T], T) -> T
|
|
return # type: ignore
|
|
|
|
|
|
# This script can be run both from Ghidra and as a standalone.
|
|
# In the latter case, only the C++ parser can be used.
|
|
try:
|
|
from ghidra.program.model.listing import Function
|
|
from ghidra.program.flatapi import FlatProgramAPI
|
|
|
|
from lego_util.ghidra_helper import CppFunctionWithGhidraTypes
|
|
|
|
# This is needed for Ghidra API calls in submodules
|
|
API = FlatProgramAPI(state.getCurrentProgram())
|
|
|
|
MAKE_CHANGES = askYesNo(
|
|
"Make changes?", "Select 'Yes' to apply changes, select 'No' to do a dry run."
|
|
)
|
|
|
|
if MAKE_CHANGES:
|
|
PROMPT_BEFORE_CHANGE = askYesNo(
|
|
"Prompt before changes?", "Should each change be confirmed by a prompt?"
|
|
)
|
|
else:
|
|
# for the linter, has no effect anyway
|
|
PROMPT_BEFORE_CHANGE = True
|
|
|
|
RUNNING_FROM_GHIDRA = True
|
|
except ImportError:
|
|
RUNNING_FROM_GHIDRA = False
|
|
MAKE_CHANGES = False
|
|
|
|
|
|
CLASSES_AND_STRUCTS = set() # type: set[str]
|
|
NAMESPACES = set() # type: set[str]
|
|
|
|
SUCCESSES = 0
|
|
FAILURES = {} # type: dict[str, int]
|
|
KNOWN_MISSING_TYPES = {} # type: dict[str, int]
|
|
KNOWN_MISSING_NAMESPACES = set() # type: set[str]
|
|
|
|
FUNCTIONS_CHANGED = 0
|
|
|
|
|
|
def main():
|
|
logging.basicConfig(
|
|
format="%(levelname)-8s %(message)s", stream=sys.stdout, level=logging.INFO
|
|
)
|
|
if not RUNNING_FROM_GHIDRA:
|
|
logging.error(
|
|
"Failed to import Ghidra functions, doing a dry run for the source code parser. "
|
|
"Has this script been launched from Ghidra?"
|
|
)
|
|
# navigate to this repository's root and then down to the LEGO1 source
|
|
root_dir = os.path.join(os.path.dirname(__file__), "..", "..", "LEGO1")
|
|
|
|
try:
|
|
# Collect classes and structs first
|
|
iterate_dir(root_dir, search_for_classes_and_structs)
|
|
|
|
# Now do the real work
|
|
iterate_dir(root_dir, search_and_process_functions)
|
|
finally:
|
|
# output statistics even when aborting
|
|
missing_type_list = [
|
|
"%s (%d)" % entry
|
|
for entry in sorted(
|
|
KNOWN_MISSING_TYPES.items(), key=lambda x: x[1], reverse=True
|
|
)
|
|
]
|
|
|
|
logging.info(
|
|
"Missing types: (with number of occurences): %s",
|
|
", ".join(missing_type_list),
|
|
)
|
|
logging.info("Successes: %d", SUCCESSES)
|
|
logging.info("Failures: %s", FAILURES)
|
|
logging.info("Functions changed: %d", FUNCTIONS_CHANGED)
|
|
|
|
|
|
def log_and_track_failure(
|
|
file_path, error, unexpected=False
|
|
): # type: (str, Exception, bool) -> None
|
|
error_type_name = error.__class__.__name__
|
|
FAILURES[error_type_name] = FAILURES.setdefault(error_type_name, 0) + 1
|
|
|
|
if isinstance(error, TypeNotFoundInGhidraError):
|
|
missing_type = error.args[0]
|
|
current_count = KNOWN_MISSING_TYPES.setdefault(missing_type, 0)
|
|
KNOWN_MISSING_TYPES[missing_type] = current_count + 1
|
|
if current_count > 0:
|
|
# Log each missing type only once to reduce log noise
|
|
return
|
|
|
|
if isinstance(error, NamespaceNotFoundInGhidraError):
|
|
namespace = error.get_namespace_str()
|
|
if namespace in KNOWN_MISSING_NAMESPACES:
|
|
# Log each missing namespace only once to reduce log noise
|
|
return
|
|
|
|
KNOWN_MISSING_NAMESPACES.add(namespace)
|
|
|
|
logging.error(
|
|
"%s%s: %s",
|
|
"Unexpected error in " if unexpected else "",
|
|
os.path.basename(file_path),
|
|
error,
|
|
)
|
|
|
|
|
|
def handle_function(lines, startIndex, address): # type: (str, int, str) -> None
|
|
global FUNCTIONS_CHANGED
|
|
|
|
# Parse the C++ function
|
|
while re.match(r"\s*//", lines[startIndex:]):
|
|
startIndex = lines.find("\n", startIndex + 1)
|
|
cpp_function = CppFunctionDeclaration(lines, startIndex, CLASSES_AND_STRUCTS)
|
|
|
|
if cpp_function.return_type in CLASSES_AND_STRUCTS:
|
|
# edge case handling - Ghidra does not understand what happens under the hood.
|
|
# These must be set manually
|
|
logging.error(
|
|
"Unimplemented edge case at 0x%s: Return value is a non-referenced struct or class: %s",
|
|
address,
|
|
cpp_function,
|
|
)
|
|
return
|
|
|
|
if not RUNNING_FROM_GHIDRA:
|
|
return
|
|
|
|
# Find the Ghidra function at that address
|
|
ghidra_address = getAddressFactory().getAddress(address)
|
|
ghidra_function = getFunctionAt(ghidra_address)
|
|
if ghidra_function is None:
|
|
raise FunctionNotFoundInGhidraError(address)
|
|
|
|
# Convert the C++ data types to Ghidra data types
|
|
typed_cpp_function = CppFunctionWithGhidraTypes(API, cpp_function)
|
|
|
|
if typed_cpp_function.matches_ghidra_function(ghidra_function):
|
|
logging.debug(
|
|
"Skipping function '%s', matches already", cpp_function.full_name()
|
|
)
|
|
return
|
|
|
|
if not MAKE_CHANGES:
|
|
return
|
|
|
|
# Navigate Ghidra to the current function
|
|
state.setCurrentAddress(ghidra_address)
|
|
|
|
if PROMPT_BEFORE_CHANGE:
|
|
choice = askChoice(
|
|
"Change function?",
|
|
"Change to %s" % cpp_function,
|
|
["Yes", "No", "Abort"],
|
|
"Yes",
|
|
)
|
|
if choice == "No":
|
|
return
|
|
if choice != "Yes":
|
|
logging.critical("User quit, terminating")
|
|
raise SystemExit(1)
|
|
|
|
logging.info("Modifying function %s at 0x%s", cpp_function.full_name(), address)
|
|
|
|
typed_cpp_function.overwrite_ghidra_function(ghidra_function)
|
|
|
|
FUNCTIONS_CHANGED += 1
|
|
|
|
if PROMPT_BEFORE_CHANGE:
|
|
# Add a prompt so we can verify the result immediately
|
|
askChoice("", "Click 'OK' to continue", ["OK"], "OK")
|
|
|
|
|
|
def search_for_classes_and_structs(header_file): # type: (str) -> None
|
|
global CLASSES_AND_STRUCTS, NAMESPACES
|
|
|
|
if not (header_file.endswith(".h") or header_file.endswith(".cpp")):
|
|
return
|
|
try:
|
|
with open(header_file) as infile:
|
|
headers = infile.read()
|
|
except Exception:
|
|
logging.error(
|
|
"Error handling header file: %s\n%s", header_file, traceback.format_exc()
|
|
)
|
|
return
|
|
|
|
CLASSES_AND_STRUCTS = CLASSES_AND_STRUCTS.union(class_regex.findall(headers))
|
|
CLASSES_AND_STRUCTS = CLASSES_AND_STRUCTS.union(struct_regex.findall(headers))
|
|
NAMESPACES = NAMESPACES.union(namespace_regex.findall(headers))
|
|
|
|
|
|
def search_and_process_functions(path): # type: (str) -> None
|
|
global SUCCESSES
|
|
if not path.endswith(".cpp"):
|
|
return
|
|
|
|
with open(path, "r") as file:
|
|
lines = file.read()
|
|
|
|
# search for '// FUNCTION: LEGO1 0x[...]'
|
|
for match in function_regex.finditer(lines):
|
|
next_line_index = lines.find("\n", match.end()) + 1
|
|
try:
|
|
handle_function(lines, next_line_index, match.groups()[0])
|
|
SUCCESSES += 1
|
|
except Lego1Exception as e:
|
|
log_and_track_failure(path, e)
|
|
|
|
except Exception as e:
|
|
log_and_track_failure(path, e, unexpected=True)
|
|
logging.error(traceback.format_exc())
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|