mirror of
https://github.com/isledecomp/isle.git
synced 2026-01-24 00:31:16 +00:00
Add draft for Ghidra function import script
This commit is contained in:
parent
45f9f54f21
commit
fd5e8f8d0c
1
.gitignore
vendored
1
.gitignore
vendored
@ -19,3 +19,4 @@ LEGO1.DLL
|
|||||||
LEGO1PROGRESS.*
|
LEGO1PROGRESS.*
|
||||||
ISLEPROGRESS.*
|
ISLEPROGRESS.*
|
||||||
*.pyc
|
*.pyc
|
||||||
|
*$py.class
|
||||||
12
tools/ghidra_scripts/README.md
Normal file
12
tools/ghidra_scripts/README.md
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
# Ghidra Scripts
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
- In Ghidra, _Open Window -> Script Manager_.
|
||||||
|
- Click the _Manage Script Directories_ button on the top right.
|
||||||
|
- Click the _Add_ button and select this file's parent directory.
|
||||||
|
- Close the window and click the _Refresh_ button.
|
||||||
|
- This script should now be available under the folder _LEGO1_.
|
||||||
|
|
||||||
|
## Development
|
||||||
|
- Type hints for Ghira (optional): Download a recent release from https://github.com/VDOO-Connected-Trust/ghidra-pyi-generator,
|
||||||
|
unpack it somewhere, and `pip install` that directory in this virtual environment. This provides types and headers for Python.
|
||||||
300
tools/ghidra_scripts/SyncFunctionsToGhidra.py
Normal file
300
tools/ghidra_scripts/SyncFunctionsToGhidra.py
Normal file
@ -0,0 +1,300 @@
|
|||||||
|
# Synchronised the function signatures of LEGO1.dll to Ghidra.
|
||||||
|
# At startup there will be several prompts for different modes,
|
||||||
|
# including a read-only / dry run mode.
|
||||||
|
|
||||||
|
# @author J. Schulz
|
||||||
|
# @category LEGO1
|
||||||
|
# @keybinding
|
||||||
|
# @menupath
|
||||||
|
# @toolbar
|
||||||
|
|
||||||
|
|
||||||
|
# Disable spurious warnings in vscode / pylance
|
||||||
|
# pyright: reportMissingModuleSource=false
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import traceback
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from lego_util.cpp_parser import (
|
||||||
|
CppFunctionDeclaration,
|
||||||
|
function_regex,
|
||||||
|
class_regex,
|
||||||
|
struct_regex,
|
||||||
|
namespace_regex,
|
||||||
|
)
|
||||||
|
from lego_util.file_helper import iterate_dir
|
||||||
|
from lego_util.exceptions import (
|
||||||
|
Lego1Exception,
|
||||||
|
NamespaceNotFoundInGhidraError,
|
||||||
|
TypeNotFoundInGhidraError,
|
||||||
|
FunctionNotFoundInGhidraError,
|
||||||
|
)
|
||||||
|
|
||||||
|
# # no effect when no Ghidra is used
|
||||||
|
# READ_ONLY = False
|
||||||
|
# # READ_ONLY = True
|
||||||
|
|
||||||
|
|
||||||
|
# Type annotations are only available in Python 3.5 or later
|
||||||
|
if sys.version_info.major > 2:
|
||||||
|
from typing import TYPE_CHECKING, TypeVar
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from ghidra.program.model.address import Address, AddressFactory
|
||||||
|
from ghidra.program.model.listing import Program
|
||||||
|
from ghidra.program.model.data import DataType
|
||||||
|
from ghidra.program.model.symbol import Namespace
|
||||||
|
from ghidra.app.script import GhidraScript
|
||||||
|
from ghidra.app.script import GhidraState
|
||||||
|
|
||||||
|
# Global stubs, Python 2 and 3 compatible
|
||||||
|
|
||||||
|
def _get_state(): # type: () -> GhidraState
|
||||||
|
return None # type: ignore
|
||||||
|
|
||||||
|
state = _get_state()
|
||||||
|
|
||||||
|
def getDataTypes(name): # type: (str) -> list[DataType]
|
||||||
|
return # type: ignore
|
||||||
|
|
||||||
|
def getCurrentProgram(): # type: () -> Program
|
||||||
|
return # type: ignore
|
||||||
|
|
||||||
|
def getFunctionAt(entryPoint): # type: (Address) -> Function
|
||||||
|
return # type: ignore
|
||||||
|
|
||||||
|
def getAddressFactory(): # type: () -> AddressFactory
|
||||||
|
return # type: ignore
|
||||||
|
|
||||||
|
def getNamespace(parent, namespaceName): # type: (Namespace, str) -> Namespace
|
||||||
|
return # type: ignore
|
||||||
|
|
||||||
|
def askYesNo(title, message): # type: (str, str) -> bool
|
||||||
|
return # type: ignore
|
||||||
|
|
||||||
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
def askChoice(
|
||||||
|
title, message, choices, defaultValue
|
||||||
|
): # type: (str, str, list[T], T) -> T
|
||||||
|
return # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
# This script can be run both from Ghidra and as a standalone.
|
||||||
|
# In the latter case, only the C++ parser can be used.
|
||||||
|
try:
|
||||||
|
from ghidra.program.model.listing import Function
|
||||||
|
from ghidra.program.flatapi import FlatProgramAPI
|
||||||
|
|
||||||
|
from lego_util.ghidra_helper import CppFunctionWithGhidraTypes
|
||||||
|
|
||||||
|
# This is needed for Ghidra API calls in submodules
|
||||||
|
API = FlatProgramAPI(state.getCurrentProgram())
|
||||||
|
|
||||||
|
MAKE_CHANGES = askYesNo(
|
||||||
|
"Make changes?", "Select 'Yes' to apply changes, select 'No' to do a dry run."
|
||||||
|
)
|
||||||
|
|
||||||
|
if MAKE_CHANGES:
|
||||||
|
PROMPT_BEFORE_CHANGE = askYesNo(
|
||||||
|
"Prompt before changes?", "Should each change be confirmed by a prompt?"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# for the linter, has no effect anyway
|
||||||
|
PROMPT_BEFORE_CHANGE = True
|
||||||
|
|
||||||
|
RUNNING_FROM_GHIDRA = True
|
||||||
|
except ImportError:
|
||||||
|
RUNNING_FROM_GHIDRA = False
|
||||||
|
MAKE_CHANGES = False
|
||||||
|
|
||||||
|
|
||||||
|
CLASSES_AND_STRUCTS = set() # type: set[str]
|
||||||
|
NAMESPACES = set() # type: set[str]
|
||||||
|
|
||||||
|
SUCCESSES = 0
|
||||||
|
FAILURES = {} # type: dict[str, int]
|
||||||
|
KNOWN_MISSING_TYPES = {} # type: dict[str, int]
|
||||||
|
KNOWN_MISSING_NAMESPACES = set() # type: set[str]
|
||||||
|
|
||||||
|
FUNCTIONS_CHANGED = 0
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
logging.basicConfig(
|
||||||
|
format="%(levelname)-8s %(message)s", stream=sys.stdout, level=logging.INFO
|
||||||
|
)
|
||||||
|
if not RUNNING_FROM_GHIDRA:
|
||||||
|
logging.error(
|
||||||
|
"Failed to import Ghidra functions, doing a dry run for the source code parser. "
|
||||||
|
"Has this script been launched from Ghidra?"
|
||||||
|
)
|
||||||
|
# navigate to this repository's root and then down to the LEGO1 source
|
||||||
|
root_dir = os.path.join(os.path.dirname(__file__), "..", "..", "LEGO1")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Collect classes and structs first
|
||||||
|
iterate_dir(root_dir, search_for_classes_and_structs)
|
||||||
|
|
||||||
|
# Now do the real work
|
||||||
|
iterate_dir(root_dir, search_and_process_functions)
|
||||||
|
finally:
|
||||||
|
# output statistics even when aborting
|
||||||
|
missing_type_list = [
|
||||||
|
"%s (%d)" % entry
|
||||||
|
for entry in sorted(
|
||||||
|
KNOWN_MISSING_TYPES.items(), key=lambda x: x[1], reverse=True
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
logging.info(
|
||||||
|
"Missing types: (with number of occurences): %s",
|
||||||
|
", ".join(missing_type_list),
|
||||||
|
)
|
||||||
|
logging.info("Successes: %d", SUCCESSES)
|
||||||
|
logging.info("Failures: %s", FAILURES)
|
||||||
|
logging.info("Functions changed: %d", FUNCTIONS_CHANGED)
|
||||||
|
|
||||||
|
|
||||||
|
def log_and_track_failure(
|
||||||
|
file_path, error, unexpected=False
|
||||||
|
): # type: (str, Exception, bool) -> None
|
||||||
|
error_type_name = error.__class__.__name__
|
||||||
|
FAILURES[error_type_name] = FAILURES.setdefault(error_type_name, 0) + 1
|
||||||
|
|
||||||
|
if isinstance(error, TypeNotFoundInGhidraError):
|
||||||
|
missing_type = error.args[0]
|
||||||
|
current_count = KNOWN_MISSING_TYPES.setdefault(missing_type, 0)
|
||||||
|
KNOWN_MISSING_TYPES[missing_type] = current_count + 1
|
||||||
|
if current_count > 0:
|
||||||
|
# Log each missing type only once to reduce log noise
|
||||||
|
return
|
||||||
|
|
||||||
|
if isinstance(error, NamespaceNotFoundInGhidraError):
|
||||||
|
namespace = error.get_namespace_str()
|
||||||
|
if namespace in KNOWN_MISSING_NAMESPACES:
|
||||||
|
# Log each missing namespace only once to reduce log noise
|
||||||
|
return
|
||||||
|
|
||||||
|
KNOWN_MISSING_NAMESPACES.add(namespace)
|
||||||
|
|
||||||
|
logging.error(
|
||||||
|
"%s%s: %s",
|
||||||
|
"Unexpected error in " if unexpected else "",
|
||||||
|
os.path.basename(file_path),
|
||||||
|
error,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def handle_function(lines, startIndex, address): # type: (str, int, str) -> None
|
||||||
|
global FUNCTIONS_CHANGED
|
||||||
|
|
||||||
|
# Parse the C++ function
|
||||||
|
while re.match(r"\s*//", lines[startIndex:]):
|
||||||
|
startIndex = lines.find("\n", startIndex + 1)
|
||||||
|
cpp_function = CppFunctionDeclaration(lines, startIndex, CLASSES_AND_STRUCTS)
|
||||||
|
|
||||||
|
if cpp_function.return_type in CLASSES_AND_STRUCTS:
|
||||||
|
# edge case handling - Ghidra does not understand what happens under the hood.
|
||||||
|
# These must be set manually
|
||||||
|
logging.error(
|
||||||
|
"Unimplemented edge case at 0x%s: Return value is a non-referenced struct or class: %s",
|
||||||
|
address,
|
||||||
|
cpp_function,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
if not RUNNING_FROM_GHIDRA:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Find the Ghidra function at that address
|
||||||
|
ghidra_address = getAddressFactory().getAddress(address)
|
||||||
|
ghidra_function = getFunctionAt(ghidra_address)
|
||||||
|
if ghidra_function is None:
|
||||||
|
raise FunctionNotFoundInGhidraError(address)
|
||||||
|
|
||||||
|
# Convert the C++ data types to Ghidra data types
|
||||||
|
typed_cpp_function = CppFunctionWithGhidraTypes(API, cpp_function)
|
||||||
|
|
||||||
|
if typed_cpp_function.matches_ghidra_function(ghidra_function):
|
||||||
|
logging.debug(
|
||||||
|
"Skipping function '%s', matches already", cpp_function.full_name()
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
if not MAKE_CHANGES:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Navigate Ghidra to the current function
|
||||||
|
state.setCurrentAddress(ghidra_address)
|
||||||
|
|
||||||
|
if PROMPT_BEFORE_CHANGE:
|
||||||
|
choice = askChoice(
|
||||||
|
"Change function?",
|
||||||
|
"Change to %s" % cpp_function,
|
||||||
|
["Yes", "No", "Abort"],
|
||||||
|
"Yes",
|
||||||
|
)
|
||||||
|
if choice == "No":
|
||||||
|
return
|
||||||
|
if choice != "Yes":
|
||||||
|
logging.critical("User quit, terminating")
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
logging.info("Modifying function %s at 0x%s", cpp_function.full_name(), address)
|
||||||
|
|
||||||
|
typed_cpp_function.overwrite_ghidra_function(ghidra_function)
|
||||||
|
|
||||||
|
FUNCTIONS_CHANGED += 1
|
||||||
|
|
||||||
|
if PROMPT_BEFORE_CHANGE:
|
||||||
|
# Add a prompt so we can verify the result immediately
|
||||||
|
askChoice("", "Click 'OK' to continue", ["OK"], "OK")
|
||||||
|
|
||||||
|
|
||||||
|
def search_for_classes_and_structs(header_file): # type: (str) -> None
|
||||||
|
global CLASSES_AND_STRUCTS, NAMESPACES
|
||||||
|
|
||||||
|
if not (header_file.endswith(".h") or header_file.endswith(".cpp")):
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
with open(header_file) as infile:
|
||||||
|
headers = infile.read()
|
||||||
|
except Exception:
|
||||||
|
logging.error(
|
||||||
|
"Error handling header file: %s\n%s", header_file, traceback.format_exc()
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
CLASSES_AND_STRUCTS = CLASSES_AND_STRUCTS.union(class_regex.findall(headers))
|
||||||
|
CLASSES_AND_STRUCTS = CLASSES_AND_STRUCTS.union(struct_regex.findall(headers))
|
||||||
|
NAMESPACES = NAMESPACES.union(namespace_regex.findall(headers))
|
||||||
|
|
||||||
|
|
||||||
|
def search_and_process_functions(path): # type: (str) -> None
|
||||||
|
global SUCCESSES
|
||||||
|
if not path.endswith(".cpp"):
|
||||||
|
return
|
||||||
|
|
||||||
|
with open(path, "r") as file:
|
||||||
|
lines = file.read()
|
||||||
|
|
||||||
|
# search for '// FUNCTION: LEGO1 0x[...]'
|
||||||
|
for match in function_regex.finditer(lines):
|
||||||
|
next_line_index = lines.find("\n", match.end()) + 1
|
||||||
|
try:
|
||||||
|
handle_function(lines, next_line_index, match.groups()[0])
|
||||||
|
SUCCESSES += 1
|
||||||
|
except Lego1Exception as e:
|
||||||
|
log_and_track_failure(path, e)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log_and_track_failure(path, e, unexpected=True)
|
||||||
|
logging.error(traceback.format_exc())
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
0
tools/ghidra_scripts/lego_util/__init__.py
Normal file
0
tools/ghidra_scripts/lego_util/__init__.py
Normal file
140
tools/ghidra_scripts/lego_util/cpp_parser.py
Normal file
140
tools/ghidra_scripts/lego_util/cpp_parser.py
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from lego_util.exceptions import (
|
||||||
|
UnsupportedCppSyntaxError,
|
||||||
|
CppUnknownClassOrNamespaceError,
|
||||||
|
)
|
||||||
|
|
||||||
|
function_regex = re.compile(r"\s*// FUNCTION: LEGO1 0x(\w{8})")
|
||||||
|
|
||||||
|
class_regex = re.compile(r"\n\s*class\s(\w+)")
|
||||||
|
|
||||||
|
struct_regex = re.compile(r"\n\s*struct\s(\w+)")
|
||||||
|
|
||||||
|
namespace_regex = re.compile(r"\n\s*namespace\s(\w+)")
|
||||||
|
|
||||||
|
|
||||||
|
class CppFunctionDeclaration:
|
||||||
|
"""
|
||||||
|
A rudimentary parser for C++ function signatures in LEGO1.
|
||||||
|
Assumes that the C++ code has been formatted to some degree.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self, fn, start_index, classes_and_structs
|
||||||
|
): # type: (CppFunctionDeclaration, str, int, set[str]) -> None
|
||||||
|
first_part_str, second_part = self._split_off_declaration_and_arguments(
|
||||||
|
fn[start_index:]
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
first_part = first_part_str.split(" ")
|
||||||
|
full_function_name = first_part.pop()
|
||||||
|
colon_split = full_function_name.split("::")
|
||||||
|
self.name = colon_split.pop()
|
||||||
|
self.namespace_hierachy = colon_split
|
||||||
|
|
||||||
|
if first_part:
|
||||||
|
while True:
|
||||||
|
# desired failure if we only get keywords and no return type
|
||||||
|
self.return_type = first_part.pop(0)
|
||||||
|
if self.return_type not in ["const", "inline"]:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# most likely a constructor or destructor
|
||||||
|
assert self.namespace_hierachy is not None, (
|
||||||
|
"Unhandled function without return type or namespace: " + fn
|
||||||
|
)
|
||||||
|
if self.name.startswith("~"):
|
||||||
|
self.return_type = "void"
|
||||||
|
else:
|
||||||
|
self.return_type = self.name + "*"
|
||||||
|
|
||||||
|
# evaluate if we belong to a class, assume __thiscall
|
||||||
|
self.class_name = None
|
||||||
|
if self.namespace_hierachy:
|
||||||
|
bottom_level_namespace = self.namespace_hierachy[-1]
|
||||||
|
if bottom_level_namespace in classes_and_structs:
|
||||||
|
self.class_name = bottom_level_namespace
|
||||||
|
else:
|
||||||
|
raise CppUnknownClassOrNamespaceError(bottom_level_namespace)
|
||||||
|
|
||||||
|
# don't add a `this` argument, let Ghidra handle that
|
||||||
|
self.flags = first_part
|
||||||
|
if second_part.strip():
|
||||||
|
self.arguments = [
|
||||||
|
self._parse_argument(i, x)
|
||||||
|
for i, x in enumerate(second_part.split(","))
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
self.arguments = []
|
||||||
|
|
||||||
|
except UnsupportedCppSyntaxError as e:
|
||||||
|
raise UnsupportedCppSyntaxError(
|
||||||
|
"%s. In: '%s(%s)'" % (e.args[0], first_part_str, second_part)
|
||||||
|
)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
flags = " ".join(self.flags)
|
||||||
|
full_name = self.full_name()
|
||||||
|
args = ["%s %s" % pair for pair in self.arguments]
|
||||||
|
if self.class_name:
|
||||||
|
# add the "this" argument to the output
|
||||||
|
args = [("%s* this" % self.class_name)] + args
|
||||||
|
return "%s __thiscall %s%s(%s)" % (
|
||||||
|
self.return_type,
|
||||||
|
flags,
|
||||||
|
full_name,
|
||||||
|
", ".join(args),
|
||||||
|
)
|
||||||
|
|
||||||
|
return "%s %s%s(%s)" % (self.return_type, flags, full_name, ", ".join(args))
|
||||||
|
|
||||||
|
def full_name(self):
|
||||||
|
return "::".join(self.namespace_hierachy + [self.name])
|
||||||
|
|
||||||
|
def _parse_argument(
|
||||||
|
self, index, argument_str
|
||||||
|
): # type: (int, str) -> tuple[str, str]
|
||||||
|
"""Returns: (type, name)"""
|
||||||
|
# Cleanup, handle `const`
|
||||||
|
split = (x.strip() for x in argument_str.split(" "))
|
||||||
|
filtered = [x for x in split if len(x) > 0 and x.lower() != "const"]
|
||||||
|
|
||||||
|
if len(filtered) == 0:
|
||||||
|
raise UnsupportedCppSyntaxError(
|
||||||
|
"Expected more arguments: '%s'" % argument_str.strip()
|
||||||
|
)
|
||||||
|
if len(filtered) == 1:
|
||||||
|
# unnamed argument
|
||||||
|
return (filtered[0], "param%d" % (index + 1))
|
||||||
|
if len(filtered) == 2:
|
||||||
|
return (filtered[0], filtered[1])
|
||||||
|
|
||||||
|
raise UnsupportedCppSyntaxError(
|
||||||
|
"Unsupported argument syntax: '%s'" % argument_str.strip()
|
||||||
|
)
|
||||||
|
|
||||||
|
def _split_off_declaration_and_arguments(
|
||||||
|
self, fn
|
||||||
|
): # type: (str) -> tuple[str, str]
|
||||||
|
# handle `unsigned` in arguments and result
|
||||||
|
fn = fn.replace("unsigned ", "u")
|
||||||
|
first_paren = fn.find("(")
|
||||||
|
assert first_paren >= 0, "No opening parenthesis found in function '%s'" % fn
|
||||||
|
|
||||||
|
paren_stack = 1
|
||||||
|
close_paren = first_paren
|
||||||
|
while paren_stack > 0:
|
||||||
|
# In case of unmatched parentheses we run into an IndexError,
|
||||||
|
# which is expected behaviour
|
||||||
|
close_paren += 1
|
||||||
|
if fn[close_paren] == "(":
|
||||||
|
paren_stack += 1
|
||||||
|
elif fn[close_paren] == ")":
|
||||||
|
paren_stack -= 1
|
||||||
|
|
||||||
|
return (
|
||||||
|
fn[:first_paren].replace("\n", ""),
|
||||||
|
fn[first_paren + 1 : close_paren].replace("\n", ""),
|
||||||
|
)
|
||||||
38
tools/ghidra_scripts/lego_util/exceptions.py
Normal file
38
tools/ghidra_scripts/lego_util/exceptions.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
class Lego1Exception(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class TypeNotFoundInGhidraError(Lego1Exception):
|
||||||
|
def __str__(self):
|
||||||
|
return "Type not found in Ghidra: %s" % self.args[0]
|
||||||
|
|
||||||
|
|
||||||
|
class NamespaceNotFoundInGhidraError(Lego1Exception):
|
||||||
|
def __init__(self, namespaceHierachy): # type: (list[str]) -> None
|
||||||
|
super(NamespaceNotFoundInGhidraError, self).__init__(namespaceHierachy)
|
||||||
|
|
||||||
|
def get_namespace_str(self): # type: () -> str
|
||||||
|
return "::".join(self.args[0])
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return "Class or namespace not found in Ghidra: %s" % self.get_namespace_str()
|
||||||
|
|
||||||
|
|
||||||
|
class FunctionNotFoundInGhidraError(Lego1Exception):
|
||||||
|
def __str__(self):
|
||||||
|
return "Function not found in Ghidra at %s" % self.args[0]
|
||||||
|
|
||||||
|
|
||||||
|
class MultipleTypesFoundInGhidraError(Lego1Exception):
|
||||||
|
def __str__(self):
|
||||||
|
return "Found multiple types matching '%s' in Ghidra: %s" % self.args
|
||||||
|
|
||||||
|
|
||||||
|
class UnsupportedCppSyntaxError(Lego1Exception):
|
||||||
|
def __str__(self):
|
||||||
|
return "C++ syntax currently not supported in the parser: %s" % self.args[0]
|
||||||
|
|
||||||
|
|
||||||
|
class CppUnknownClassOrNamespaceError(Lego1Exception):
|
||||||
|
def __str__(self):
|
||||||
|
return "'%s' is neither a known class nor namespace" % self.args[0]
|
||||||
14
tools/ghidra_scripts/lego_util/file_helper.py
Normal file
14
tools/ghidra_scripts/lego_util/file_helper.py
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
if sys.version_info.major > 2:
|
||||||
|
from typing import Callable
|
||||||
|
|
||||||
|
|
||||||
|
def iterate_dir(path, file_callback): # type: (str, Callable[[str], None]) -> None
|
||||||
|
for file_or_dir_name in os.listdir(path): # pathlib not supported
|
||||||
|
child_path = os.path.join(path, file_or_dir_name)
|
||||||
|
if os.path.isdir(child_path):
|
||||||
|
iterate_dir(child_path, file_callback)
|
||||||
|
else:
|
||||||
|
file_callback(child_path)
|
||||||
173
tools/ghidra_scripts/lego_util/ghidra_helper.py
Normal file
173
tools/ghidra_scripts/lego_util/ghidra_helper.py
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
|
||||||
|
from lego_util.exceptions import (
|
||||||
|
NamespaceNotFoundInGhidraError,
|
||||||
|
TypeNotFoundInGhidraError,
|
||||||
|
MultipleTypesFoundInGhidraError,
|
||||||
|
)
|
||||||
|
from lego_util.cpp_parser import CppFunctionDeclaration
|
||||||
|
|
||||||
|
# Disable spurious warnings in vscode / pylance
|
||||||
|
# pyright: reportMissingModuleSource=false
|
||||||
|
|
||||||
|
from ghidra.program.model.data import PointerDataType
|
||||||
|
from ghidra.program.model.data import DataTypeConflictHandler
|
||||||
|
from ghidra.program.model.listing import ParameterImpl
|
||||||
|
from ghidra.program.model.listing import Function
|
||||||
|
from ghidra.program.model.symbol import SourceType
|
||||||
|
|
||||||
|
# Type annotations are only available in Python 3.5 or later
|
||||||
|
if sys.version_info.major > 2:
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from ghidra.program.flatapi import FlatProgramAPI
|
||||||
|
from ghidra.program.model.data import DataType
|
||||||
|
from ghidra.program.model.symbol import Namespace
|
||||||
|
from ghidra.program.model.listing import Parameter
|
||||||
|
|
||||||
|
|
||||||
|
def get_ghidra_type(api, type_name): # type: (FlatProgramAPI, str) -> DataType
|
||||||
|
"""
|
||||||
|
Searches for the type named `typeName` in Ghidra.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
NotFoundInGhidraError:
|
||||||
|
"""
|
||||||
|
|
||||||
|
# references to pointers
|
||||||
|
type_name = type_name.replace("&", " *")
|
||||||
|
# handle reference spacing (void* -> void *)
|
||||||
|
type_name = re.sub(r"(?<!\s)\*", " *", type_name)
|
||||||
|
|
||||||
|
result = api.getDataTypes(type_name)
|
||||||
|
if len(result) == 0:
|
||||||
|
if type_name.endswith("*"):
|
||||||
|
# Create a new pointer type if the dereferenced type exists
|
||||||
|
dereferenced_type = get_ghidra_type(api, type_name[0:-2])
|
||||||
|
return add_pointer_type(api, dereferenced_type)
|
||||||
|
|
||||||
|
raise TypeNotFoundInGhidraError(type_name)
|
||||||
|
if len(result) == 1:
|
||||||
|
return result[0]
|
||||||
|
|
||||||
|
raise MultipleTypesFoundInGhidraError(type_name, result)
|
||||||
|
|
||||||
|
|
||||||
|
def add_pointer_type(api, pointee): # type: (FlatProgramAPI, DataType) -> DataType
|
||||||
|
data_type = PointerDataType(pointee)
|
||||||
|
data_type.setCategoryPath(pointee.categoryPath)
|
||||||
|
api.getCurrentProgram().getDataTypeManager().addDataType(
|
||||||
|
data_type, DataTypeConflictHandler.KEEP_HANDLER
|
||||||
|
)
|
||||||
|
logging.info("Created new pointer type %s", data_type)
|
||||||
|
return data_type
|
||||||
|
|
||||||
|
|
||||||
|
def get_ghidra_namespace(
|
||||||
|
api, namespace_hierachy
|
||||||
|
): # type: (FlatProgramAPI, list[str]) -> Namespace
|
||||||
|
namespace = api.getCurrentProgram().getGlobalNamespace()
|
||||||
|
for part in namespace_hierachy:
|
||||||
|
namespace = api.getNamespace(namespace, part)
|
||||||
|
if namespace is None:
|
||||||
|
raise NamespaceNotFoundInGhidraError(namespace_hierachy)
|
||||||
|
return namespace
|
||||||
|
|
||||||
|
|
||||||
|
class CppFunctionWithGhidraTypes(object):
|
||||||
|
"""Collects the matching Ghidra entities for a C++ function declaration."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self, fpapi, cpp_fn_decl
|
||||||
|
): # type: (FlatProgramAPI, CppFunctionDeclaration) -> None
|
||||||
|
self.name = cpp_fn_decl.name
|
||||||
|
self.class_name = cpp_fn_decl.class_name
|
||||||
|
self.return_type = get_ghidra_type(fpapi, cpp_fn_decl.return_type)
|
||||||
|
self.arguments = [
|
||||||
|
ParameterImpl(
|
||||||
|
name, get_ghidra_type(fpapi, type_name), fpapi.getCurrentProgram()
|
||||||
|
)
|
||||||
|
for (type_name, name) in cpp_fn_decl.arguments
|
||||||
|
]
|
||||||
|
self.namespace = get_ghidra_namespace(fpapi, cpp_fn_decl.namespace_hierachy)
|
||||||
|
|
||||||
|
def matches_ghidra_function(self, ghidra_function): # type: (Function) -> bool
|
||||||
|
"""Checks whether this function declaration already matches the description in Ghidra"""
|
||||||
|
name_match = self.name == ghidra_function.getName(False)
|
||||||
|
namespace_match = self.namespace == ghidra_function.getParentNamespace()
|
||||||
|
return_type_match = self.return_type == ghidra_function.getReturnType()
|
||||||
|
# match arguments: decide if thiscall or not
|
||||||
|
thiscall_matches = (self.class_name is not None) == (
|
||||||
|
ghidra_function.getCallingConventionName() == "__thiscall"
|
||||||
|
)
|
||||||
|
|
||||||
|
if thiscall_matches:
|
||||||
|
if self.class_name is not None:
|
||||||
|
args_match = self._matches_thiscall_parameters(ghidra_function)
|
||||||
|
else:
|
||||||
|
args_match = self._matches_non_thiscall_parameters(ghidra_function)
|
||||||
|
else:
|
||||||
|
args_match = False
|
||||||
|
|
||||||
|
logging.debug(
|
||||||
|
"Matches: namespace=%s name=%s return_type=%s thiscall=%s args=%s",
|
||||||
|
namespace_match,
|
||||||
|
name_match,
|
||||||
|
return_type_match,
|
||||||
|
thiscall_matches,
|
||||||
|
args_match,
|
||||||
|
)
|
||||||
|
|
||||||
|
return (
|
||||||
|
name_match
|
||||||
|
and namespace_match
|
||||||
|
and return_type_match
|
||||||
|
and thiscall_matches
|
||||||
|
and args_match
|
||||||
|
)
|
||||||
|
|
||||||
|
def _matches_non_thiscall_parameters(
|
||||||
|
self, ghidra_function
|
||||||
|
): # type: (Function) -> bool
|
||||||
|
return self._parameter_lists_match(ghidra_function.getParameters())
|
||||||
|
|
||||||
|
def _matches_thiscall_parameters(self, ghidra_function): # type: (Function) -> bool
|
||||||
|
ghidra_params = ghidra_function.getParameters() # type: list[Parameter]
|
||||||
|
|
||||||
|
# remove the `this` argument which we don't generate ourselves
|
||||||
|
ghidra_params.pop(0)
|
||||||
|
|
||||||
|
return self._parameter_lists_match(ghidra_params)
|
||||||
|
|
||||||
|
def _parameter_lists_match(self, ghidra_params): # type: (list[Parameter]) -> bool
|
||||||
|
if len(self.arguments) != len(ghidra_params):
|
||||||
|
return False
|
||||||
|
|
||||||
|
for this_arg, ghidra_arg in zip(self.arguments, ghidra_params):
|
||||||
|
if (
|
||||||
|
this_arg.getName() != ghidra_arg.getName()
|
||||||
|
or this_arg.getDataType() != ghidra_arg.getDataType()
|
||||||
|
):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def overwrite_ghidra_function(self, ghidra_function): # type: (Function) -> None
|
||||||
|
"""Replace the function declaration in Ghidra by the one derived from C++."""
|
||||||
|
ghidra_function.setName(self.name, SourceType.USER_DEFINED)
|
||||||
|
ghidra_function.setParentNamespace(self.namespace)
|
||||||
|
ghidra_function.setReturnType(self.return_type, SourceType.USER_DEFINED)
|
||||||
|
# not sure what calling convention to choose when it's not a __thiscall,
|
||||||
|
# so we play it safe and keep whatever Ghidra has
|
||||||
|
if self.class_name:
|
||||||
|
ghidra_function.setCallingConvention("__thiscall")
|
||||||
|
|
||||||
|
ghidra_function.replaceParameters(
|
||||||
|
Function.FunctionUpdateType.DYNAMIC_STORAGE_ALL_PARAMS,
|
||||||
|
True,
|
||||||
|
SourceType.USER_DEFINED,
|
||||||
|
self.arguments,
|
||||||
|
)
|
||||||
Loading…
Reference in New Issue
Block a user