mirror of
https://github.com/isledecomp/isle.git
synced 2026-01-24 08:41:16 +00:00
141 lines
4.9 KiB
Python
141 lines
4.9 KiB
Python
import re
|
|
|
|
from lego_util.exceptions import (
|
|
UnsupportedCppSyntaxError,
|
|
CppUnknownClassOrNamespaceError,
|
|
)
|
|
|
|
function_regex = re.compile(r"\s*// FUNCTION: LEGO1 0x(\w{8})")
|
|
|
|
class_regex = re.compile(r"\n\s*class\s(\w+)")
|
|
|
|
struct_regex = re.compile(r"\n\s*struct\s(\w+)")
|
|
|
|
namespace_regex = re.compile(r"\n\s*namespace\s(\w+)")
|
|
|
|
|
|
class CppFunctionDeclaration:
|
|
"""
|
|
A rudimentary parser for C++ function signatures in LEGO1.
|
|
Assumes that the C++ code has been formatted to some degree.
|
|
"""
|
|
|
|
def __init__(
|
|
self, fn, start_index, classes_and_structs
|
|
): # type: (CppFunctionDeclaration, str, int, set[str]) -> None
|
|
first_part_str, second_part = self._split_off_declaration_and_arguments(
|
|
fn[start_index:]
|
|
)
|
|
|
|
try:
|
|
first_part = first_part_str.split(" ")
|
|
full_function_name = first_part.pop()
|
|
colon_split = full_function_name.split("::")
|
|
self.name = colon_split.pop()
|
|
self.namespace_hierachy = colon_split
|
|
|
|
if first_part:
|
|
while True:
|
|
# desired failure if we only get keywords and no return type
|
|
self.return_type = first_part.pop(0)
|
|
if self.return_type not in ["const", "inline"]:
|
|
break
|
|
else:
|
|
# most likely a constructor or destructor
|
|
assert self.namespace_hierachy is not None, (
|
|
"Unhandled function without return type or namespace: " + fn
|
|
)
|
|
if self.name.startswith("~"):
|
|
self.return_type = "void"
|
|
else:
|
|
self.return_type = self.name + "*"
|
|
|
|
# evaluate if we belong to a class, assume __thiscall
|
|
self.class_name = None
|
|
if self.namespace_hierachy:
|
|
bottom_level_namespace = self.namespace_hierachy[-1]
|
|
if bottom_level_namespace in classes_and_structs:
|
|
self.class_name = bottom_level_namespace
|
|
else:
|
|
raise CppUnknownClassOrNamespaceError(bottom_level_namespace)
|
|
|
|
# don't add a `this` argument, let Ghidra handle that
|
|
self.flags = first_part
|
|
if second_part.strip():
|
|
self.arguments = [
|
|
self._parse_argument(i, x)
|
|
for i, x in enumerate(second_part.split(","))
|
|
]
|
|
else:
|
|
self.arguments = []
|
|
|
|
except UnsupportedCppSyntaxError as e:
|
|
raise UnsupportedCppSyntaxError(
|
|
"%s. In: '%s(%s)'" % (e.args[0], first_part_str, second_part)
|
|
)
|
|
|
|
def __str__(self):
|
|
flags = " ".join(self.flags)
|
|
full_name = self.full_name()
|
|
args = ["%s %s" % pair for pair in self.arguments]
|
|
if self.class_name:
|
|
# add the "this" argument to the output
|
|
args = [("%s* this" % self.class_name)] + args
|
|
return "%s __thiscall %s%s(%s)" % (
|
|
self.return_type,
|
|
flags,
|
|
full_name,
|
|
", ".join(args),
|
|
)
|
|
|
|
return "%s %s%s(%s)" % (self.return_type, flags, full_name, ", ".join(args))
|
|
|
|
def full_name(self):
|
|
return "::".join(self.namespace_hierachy + [self.name])
|
|
|
|
def _parse_argument(
|
|
self, index, argument_str
|
|
): # type: (int, str) -> tuple[str, str]
|
|
"""Returns: (type, name)"""
|
|
# Cleanup, handle `const`
|
|
split = (x.strip() for x in argument_str.split(" "))
|
|
filtered = [x for x in split if len(x) > 0 and x.lower() != "const"]
|
|
|
|
if len(filtered) == 0:
|
|
raise UnsupportedCppSyntaxError(
|
|
"Expected more arguments: '%s'" % argument_str.strip()
|
|
)
|
|
if len(filtered) == 1:
|
|
# unnamed argument
|
|
return (filtered[0], "param%d" % (index + 1))
|
|
if len(filtered) == 2:
|
|
return (filtered[0], filtered[1])
|
|
|
|
raise UnsupportedCppSyntaxError(
|
|
"Unsupported argument syntax: '%s'" % argument_str.strip()
|
|
)
|
|
|
|
def _split_off_declaration_and_arguments(
|
|
self, fn
|
|
): # type: (str) -> tuple[str, str]
|
|
# handle `unsigned` in arguments and result
|
|
fn = fn.replace("unsigned ", "u")
|
|
first_paren = fn.find("(")
|
|
assert first_paren >= 0, "No opening parenthesis found in function '%s'" % fn
|
|
|
|
paren_stack = 1
|
|
close_paren = first_paren
|
|
while paren_stack > 0:
|
|
# In case of unmatched parentheses we run into an IndexError,
|
|
# which is expected behaviour
|
|
close_paren += 1
|
|
if fn[close_paren] == "(":
|
|
paren_stack += 1
|
|
elif fn[close_paren] == ")":
|
|
paren_stack -= 1
|
|
|
|
return (
|
|
fn[:first_paren].replace("\n", ""),
|
|
fn[first_paren + 1 : close_paren].replace("\n", ""),
|
|
)
|