Implicit lookup by name for functions

This commit is contained in:
disinvite 2023-12-02 13:07:11 -05:00
parent ba4e28b8c4
commit db971ada60
5 changed files with 129 additions and 22 deletions

View File

@ -19,7 +19,15 @@ class ParserError(Enum):
BOGUS_MARKER = 104
# WARN: New function marker appeared while we were inside a function
MISSED_END_OF_FUNCTION = 117
MISSED_END_OF_FUNCTION = 105
# WARN: If we find a curly brace right after the function declaration
# this is wrong but we still have enough to make a match with reccmp
MISSED_START_OF_FUNCTION = 106
# WARN: A blank line appeared between the end of FUNCTION markers
# and the start of the function. We can ignore it, but the line shouldn't be there
UNEXPECTED_BLANK_LINE = 107
# ERROR: We found a marker unexpectedly
UNEXPECTED_MARKER = 200

View File

@ -21,6 +21,7 @@ class ParserSymbol(ParserNode):
@dataclass
class ParserFunction(ParserSymbol):
name: str
lookup_by_name: bool = False
is_stub: bool = False
is_synthetic: bool = False
is_template: bool = False

View File

@ -42,6 +42,10 @@ def marker_is_synthetic(marker: DecompMarker) -> bool:
return marker.type.upper() in ("SYNTHETIC", "TEMPLATE")
def marker_is_template(marker: DecompMarker) -> bool:
return marker.type.upper() == "TEMPLATE"
def marker_is_function(marker: DecompMarker) -> bool:
return marker.type.upper() in ("FUNCTION", "STUB")
@ -55,8 +59,8 @@ def __init__(self):
self.markers: dict = {}
def insert(self, marker: DecompMarker) -> bool:
"""Return True if this insert would overwrite"""
module = marker.module.upper()
# Return True if this insert would overwrite
if module in self.markers:
return True
@ -159,9 +163,12 @@ def _synthetic_marker(self, marker: DecompMarker):
self._syntax_warning(ParserError.DUPLICATE_MODULE)
self.state = ReaderState.IN_TEMPLATE
def _function_done(self, unexpected: bool = False):
def _function_done(self, lookup_by_name: bool = False, unexpected: bool = False):
end_line = self.line_number
if unexpected:
# If we missed the end of the previous function, assume it ended
# on the previous line and that whatever we are tracking next
# begins on the current line.
end_line -= 1
for marker in self.fun_markers.iter():
@ -170,8 +177,10 @@ def _function_done(self, unexpected: bool = False):
line_number=self.function_start,
module=marker.module,
offset=marker.offset,
lookup_by_name=lookup_by_name,
is_stub=marker_is_stub(marker),
is_template=marker_is_synthetic(marker),
is_synthetic=marker_is_synthetic(marker),
is_template=marker_is_template(marker),
name=self.function_sig,
end_line=end_line,
)
@ -262,7 +271,7 @@ def _handle_marker(self, marker: DecompMarker):
self._synthetic_marker(marker)
elif self.state == ReaderState.IN_FUNC:
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
self._function_done(unexpected=True)
self._function_done(lookup_by_name=True, unexpected=True)
self._synthetic_marker(marker)
else:
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
@ -304,6 +313,7 @@ def read_line(self, line: str):
self._handle_marker(marker)
return
line_strip = line.strip()
if self.state == ReaderState.IN_TEMPLATE:
# TEMPLATE functions are a special case. The signature is
# given on the next line (in a // comment)
@ -313,16 +323,32 @@ def read_line(self, line: str):
else:
self.function_sig = name
self._function_starts_here()
self._function_done()
self._function_done(lookup_by_name=True)
elif self.state == ReaderState.WANT_SIG:
# Skip blank lines or comments that come after the offset
# marker. There is not a formal procedure for this, so just
# assume the next "code line" is the function signature
if not is_blank_or_comment(line):
# Ignore blanks on the way to function start or function name
if len(line_strip) == 0:
self._syntax_warning(ParserError.UNEXPECTED_BLANK_LINE)
elif line_strip.startswith("//"):
# If we found a comment, assume implicit lookup-by-name
# function and end here. We know this is not a decomp marker
# because it would have been handled already.
self.function_sig = get_synthetic_name(line)
self._function_starts_here()
self._function_done(lookup_by_name=True)
elif line_strip == "{":
# We missed the function signature but we can recover from this
self.function_sig = "(unknown)"
self._function_starts_here()
self._syntax_warning(ParserError.MISSED_START_OF_FUNCTION)
self.state = ReaderState.IN_FUNC
else:
# Inline functions may end with a comment. Strip that out
# to help parsing.
self.function_sig = remove_trailing_comment(line.strip())
self.function_sig = remove_trailing_comment(line_strip)
# Now check to see if the opening curly bracket is on the
# same line. clang-format should prevent this (BraceWrapping)
@ -340,13 +366,13 @@ def read_line(self, line: str):
self.state = ReaderState.WANT_CURLY
elif self.state == ReaderState.WANT_CURLY:
if line.strip() == "{":
if line_strip == "{":
self.curly_indent_stops = line.index("{")
self._function_starts_here()
self.state = ReaderState.IN_FUNC
elif self.state == ReaderState.IN_FUNC:
if line.strip().startswith("}") and line[self.curly_indent_stops] == "}":
if line_strip.startswith("}") and line[self.curly_indent_stops] == "}":
self._function_done()
elif self.state in (ReaderState.IN_GLOBAL, ReaderState.IN_FUNC_GLOBAL):

View File

@ -11,15 +11,23 @@ def fixture_parser():
return DecompParser()
@pytest.mark.skip(reason="todo")
def test_missing_sig(parser):
"""Bad syntax: function signature is missing"""
parser.read_lines(["// FUNCTION: TEST 0x1234", "{"])
assert parser.state == ReaderState.IN_FUNC
assert len(parser.alerts) == 1
parser.read_line("}")
"""In the hopefully rare scenario that the function signature and marker
are swapped, we still have enough to match witch reccmp"""
parser.read_lines(
[
"void my_function()",
"// FUNCTION: TEST 0x1234",
"{",
"}",
]
)
assert parser.state == ReaderState.SEARCH
assert len(parser.functions) == 1
assert parser.functions[0] != "{"
assert parser.functions[0].line_number == 3
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.MISSED_START_OF_FUNCTION
def test_not_exact_syntax(parser):
@ -210,7 +218,7 @@ def test_synthetic(parser):
]
)
assert len(parser.functions) == 1
assert parser.functions[0].is_template is True
assert parser.functions[0].lookup_by_name is True
assert parser.functions[0].name == "TestClass::TestMethod"
@ -285,3 +293,67 @@ def test_indented_no_curly_hint(parser):
]
)
assert len(parser.alerts) == 0
def test_implicit_lookup_by_name(parser):
"""FUNCTION (or STUB) offsets must directly precede the function signature.
If we detect a comment instead, we assume that this is a lookup-by-name
function and end here."""
parser.read_lines(
[
"// FUNCTION: TEST 0x1234",
"// TestClass::TestMethod()",
]
)
assert parser.state == ReaderState.SEARCH
assert len(parser.functions) == 1
assert parser.functions[0].lookup_by_name is True
assert parser.functions[0].name == "TestClass::TestMethod()"
def test_function_with_spaces(parser):
"""There should not be any spaces between the end of FUNCTION markers
and the start or name of the function. If it's a blank line, we can safely
ignore but should alert to this."""
parser.read_lines(
[
"// FUNCTION: TEST 0x1234",
" ",
"inline void test_function() { };",
]
)
assert len(parser.functions) == 1
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.UNEXPECTED_BLANK_LINE
def test_function_with_spaces_implicit(parser):
"""Same as above, but for implicit lookup-by-name"""
parser.read_lines(
[
"// FUNCTION: TEST 0x1234",
" ",
"// Implicit::Method",
]
)
assert len(parser.functions) == 1
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.UNEXPECTED_BLANK_LINE
@pytest.mark.xfail(reason="will assume implicit lookup-by-name function")
def test_function_is_commented(parser):
"""In an ideal world, we would recognize that there is no code here.
Some editors (or users) might comment the function on each line like this
but hopefully it is rare."""
parser.read_lines(
[
"// FUNCTION: TEST 0x1234",
"// int my_function()",
"// {",
"// return 5;",
"// }",
]
)
assert len(parser.functions) == 0

View File

@ -334,7 +334,7 @@ def gen_svg(svg_file, name_svg, icon, svg_implemented_funcs, total_funcs, raw_ac
else:
continue
if fun.is_template:
if fun.lookup_by_name:
recinfo = syminfo.get_recompiled_address_from_name(fun.name)
if not recinfo:
continue