isle/tools/ghidra_scripts/SyncFunctionsToGhidra.py

301 lines
9.1 KiB
Python

# Synchronised the function signatures of LEGO1.dll to Ghidra.
# At startup there will be several prompts for different modes,
# including a read-only / dry run mode.
# @author J. Schulz
# @category LEGO1
# @keybinding
# @menupath
# @toolbar
# Disable spurious warnings in vscode / pylance
# pyright: reportMissingModuleSource=false
import sys
import os
import re
import traceback
import logging
from lego_util.cpp_parser import (
CppFunctionDeclaration,
function_regex,
class_regex,
struct_regex,
namespace_regex,
)
from lego_util.file_helper import iterate_dir
from lego_util.exceptions import (
Lego1Exception,
NamespaceNotFoundInGhidraError,
TypeNotFoundInGhidraError,
FunctionNotFoundInGhidraError,
)
# # no effect when no Ghidra is used
# READ_ONLY = False
# # READ_ONLY = True
# Type annotations are only available in Python 3.5 or later
if sys.version_info.major > 2:
from typing import TYPE_CHECKING, TypeVar
if TYPE_CHECKING:
from ghidra.program.model.address import Address, AddressFactory
from ghidra.program.model.listing import Program
from ghidra.program.model.data import DataType
from ghidra.program.model.symbol import Namespace
from ghidra.app.script import GhidraScript
from ghidra.app.script import GhidraState
# Global stubs, Python 2 and 3 compatible
def _get_state(): # type: () -> GhidraState
return None # type: ignore
state = _get_state()
def getDataTypes(name): # type: (str) -> list[DataType]
return # type: ignore
def getCurrentProgram(): # type: () -> Program
return # type: ignore
def getFunctionAt(entryPoint): # type: (Address) -> Function
return # type: ignore
def getAddressFactory(): # type: () -> AddressFactory
return # type: ignore
def getNamespace(parent, namespaceName): # type: (Namespace, str) -> Namespace
return # type: ignore
def askYesNo(title, message): # type: (str, str) -> bool
return # type: ignore
T = TypeVar("T")
def askChoice(
title, message, choices, defaultValue
): # type: (str, str, list[T], T) -> T
return # type: ignore
# This script can be run both from Ghidra and as a standalone.
# In the latter case, only the C++ parser can be used.
try:
from ghidra.program.model.listing import Function
from ghidra.program.flatapi import FlatProgramAPI
from lego_util.ghidra_helper import CppFunctionWithGhidraTypes
# This is needed for Ghidra API calls in submodules
API = FlatProgramAPI(state.getCurrentProgram())
MAKE_CHANGES = askYesNo(
"Make changes?", "Select 'Yes' to apply changes, select 'No' to do a dry run."
)
if MAKE_CHANGES:
PROMPT_BEFORE_CHANGE = askYesNo(
"Prompt before changes?", "Should each change be confirmed by a prompt?"
)
else:
# for the linter, has no effect anyway
PROMPT_BEFORE_CHANGE = True
RUNNING_FROM_GHIDRA = True
except ImportError:
RUNNING_FROM_GHIDRA = False
MAKE_CHANGES = False
CLASSES_AND_STRUCTS = set() # type: set[str]
NAMESPACES = set() # type: set[str]
SUCCESSES = 0
FAILURES = {} # type: dict[str, int]
KNOWN_MISSING_TYPES = {} # type: dict[str, int]
KNOWN_MISSING_NAMESPACES = set() # type: set[str]
FUNCTIONS_CHANGED = 0
def main():
logging.basicConfig(
format="%(levelname)-8s %(message)s", stream=sys.stdout, level=logging.INFO
)
if not RUNNING_FROM_GHIDRA:
logging.error(
"Failed to import Ghidra functions, doing a dry run for the source code parser. "
"Has this script been launched from Ghidra?"
)
# navigate to this repository's root and then down to the LEGO1 source
root_dir = os.path.join(os.path.dirname(__file__), "..", "..", "LEGO1")
try:
# Collect classes and structs first
iterate_dir(root_dir, search_for_classes_and_structs)
# Now do the real work
iterate_dir(root_dir, search_and_process_functions)
finally:
# output statistics even when aborting
missing_type_list = [
"%s (%d)" % entry
for entry in sorted(
KNOWN_MISSING_TYPES.items(), key=lambda x: x[1], reverse=True
)
]
logging.info(
"Missing types: (with number of occurences): %s",
", ".join(missing_type_list),
)
logging.info("Successes: %d", SUCCESSES)
logging.info("Failures: %s", FAILURES)
logging.info("Functions changed: %d", FUNCTIONS_CHANGED)
def log_and_track_failure(
file_path, error, unexpected=False
): # type: (str, Exception, bool) -> None
error_type_name = error.__class__.__name__
FAILURES[error_type_name] = FAILURES.setdefault(error_type_name, 0) + 1
if isinstance(error, TypeNotFoundInGhidraError):
missing_type = error.args[0]
current_count = KNOWN_MISSING_TYPES.setdefault(missing_type, 0)
KNOWN_MISSING_TYPES[missing_type] = current_count + 1
if current_count > 0:
# Log each missing type only once to reduce log noise
return
if isinstance(error, NamespaceNotFoundInGhidraError):
namespace = error.get_namespace_str()
if namespace in KNOWN_MISSING_NAMESPACES:
# Log each missing namespace only once to reduce log noise
return
KNOWN_MISSING_NAMESPACES.add(namespace)
logging.error(
"%s%s: %s",
"Unexpected error in " if unexpected else "",
os.path.basename(file_path),
error,
)
def handle_function(lines, startIndex, address): # type: (str, int, str) -> None
global FUNCTIONS_CHANGED
# Parse the C++ function
while re.match(r"\s*//", lines[startIndex:]):
startIndex = lines.find("\n", startIndex + 1)
cpp_function = CppFunctionDeclaration(lines, startIndex, CLASSES_AND_STRUCTS)
if cpp_function.return_type in CLASSES_AND_STRUCTS:
# edge case handling - Ghidra does not understand what happens under the hood.
# These must be set manually
logging.error(
"Unimplemented edge case at 0x%s: Return value is a non-referenced struct or class: %s",
address,
cpp_function,
)
return
if not RUNNING_FROM_GHIDRA:
return
# Find the Ghidra function at that address
ghidra_address = getAddressFactory().getAddress(address)
ghidra_function = getFunctionAt(ghidra_address)
if ghidra_function is None:
raise FunctionNotFoundInGhidraError(address)
# Convert the C++ data types to Ghidra data types
typed_cpp_function = CppFunctionWithGhidraTypes(API, cpp_function)
if typed_cpp_function.matches_ghidra_function(ghidra_function):
logging.debug(
"Skipping function '%s', matches already", cpp_function.full_name()
)
return
if not MAKE_CHANGES:
return
# Navigate Ghidra to the current function
state.setCurrentAddress(ghidra_address)
if PROMPT_BEFORE_CHANGE:
choice = askChoice(
"Change function?",
"Change to %s" % cpp_function,
["Yes", "No", "Abort"],
"Yes",
)
if choice == "No":
return
if choice != "Yes":
logging.critical("User quit, terminating")
raise SystemExit(1)
logging.info("Modifying function %s at 0x%s", cpp_function.full_name(), address)
typed_cpp_function.overwrite_ghidra_function(ghidra_function)
FUNCTIONS_CHANGED += 1
if PROMPT_BEFORE_CHANGE:
# Add a prompt so we can verify the result immediately
askChoice("", "Click 'OK' to continue", ["OK"], "OK")
def search_for_classes_and_structs(header_file): # type: (str) -> None
global CLASSES_AND_STRUCTS, NAMESPACES
if not (header_file.endswith(".h") or header_file.endswith(".cpp")):
return
try:
with open(header_file) as infile:
headers = infile.read()
except Exception:
logging.error(
"Error handling header file: %s\n%s", header_file, traceback.format_exc()
)
return
CLASSES_AND_STRUCTS = CLASSES_AND_STRUCTS.union(class_regex.findall(headers))
CLASSES_AND_STRUCTS = CLASSES_AND_STRUCTS.union(struct_regex.findall(headers))
NAMESPACES = NAMESPACES.union(namespace_regex.findall(headers))
def search_and_process_functions(path): # type: (str) -> None
global SUCCESSES
if not path.endswith(".cpp"):
return
with open(path, "r") as file:
lines = file.read()
# search for '// FUNCTION: LEGO1 0x[...]'
for match in function_regex.finditer(lines):
next_line_index = lines.find("\n", match.end()) + 1
try:
handle_function(lines, next_line_index, match.groups()[0])
SUCCESSES += 1
except Lego1Exception as e:
log_and_track_failure(path, e)
except Exception as e:
log_and_track_failure(path, e, unexpected=True)
logging.error(traceback.format_exc())
if __name__ == "__main__":
main()