Implicit lookup by name for functions

This commit is contained in:
disinvite 2023-12-02 13:07:11 -05:00
parent ba4e28b8c4
commit db971ada60
5 changed files with 129 additions and 22 deletions

View File

@ -19,7 +19,15 @@ class ParserError(Enum):
BOGUS_MARKER = 104 BOGUS_MARKER = 104
# WARN: New function marker appeared while we were inside a function # WARN: New function marker appeared while we were inside a function
MISSED_END_OF_FUNCTION = 117 MISSED_END_OF_FUNCTION = 105
# WARN: If we find a curly brace right after the function declaration
# this is wrong but we still have enough to make a match with reccmp
MISSED_START_OF_FUNCTION = 106
# WARN: A blank line appeared between the end of FUNCTION markers
# and the start of the function. We can ignore it, but the line shouldn't be there
UNEXPECTED_BLANK_LINE = 107
# ERROR: We found a marker unexpectedly # ERROR: We found a marker unexpectedly
UNEXPECTED_MARKER = 200 UNEXPECTED_MARKER = 200

View File

@ -21,6 +21,7 @@ class ParserSymbol(ParserNode):
@dataclass @dataclass
class ParserFunction(ParserSymbol): class ParserFunction(ParserSymbol):
name: str name: str
lookup_by_name: bool = False
is_stub: bool = False is_stub: bool = False
is_synthetic: bool = False is_synthetic: bool = False
is_template: bool = False is_template: bool = False

View File

@ -42,6 +42,10 @@ def marker_is_synthetic(marker: DecompMarker) -> bool:
return marker.type.upper() in ("SYNTHETIC", "TEMPLATE") return marker.type.upper() in ("SYNTHETIC", "TEMPLATE")
def marker_is_template(marker: DecompMarker) -> bool:
return marker.type.upper() == "TEMPLATE"
def marker_is_function(marker: DecompMarker) -> bool: def marker_is_function(marker: DecompMarker) -> bool:
return marker.type.upper() in ("FUNCTION", "STUB") return marker.type.upper() in ("FUNCTION", "STUB")
@ -55,8 +59,8 @@ def __init__(self):
self.markers: dict = {} self.markers: dict = {}
def insert(self, marker: DecompMarker) -> bool: def insert(self, marker: DecompMarker) -> bool:
"""Return True if this insert would overwrite"""
module = marker.module.upper() module = marker.module.upper()
# Return True if this insert would overwrite
if module in self.markers: if module in self.markers:
return True return True
@ -159,9 +163,12 @@ def _synthetic_marker(self, marker: DecompMarker):
self._syntax_warning(ParserError.DUPLICATE_MODULE) self._syntax_warning(ParserError.DUPLICATE_MODULE)
self.state = ReaderState.IN_TEMPLATE self.state = ReaderState.IN_TEMPLATE
def _function_done(self, unexpected: bool = False): def _function_done(self, lookup_by_name: bool = False, unexpected: bool = False):
end_line = self.line_number end_line = self.line_number
if unexpected: if unexpected:
# If we missed the end of the previous function, assume it ended
# on the previous line and that whatever we are tracking next
# begins on the current line.
end_line -= 1 end_line -= 1
for marker in self.fun_markers.iter(): for marker in self.fun_markers.iter():
@ -170,8 +177,10 @@ def _function_done(self, unexpected: bool = False):
line_number=self.function_start, line_number=self.function_start,
module=marker.module, module=marker.module,
offset=marker.offset, offset=marker.offset,
lookup_by_name=lookup_by_name,
is_stub=marker_is_stub(marker), is_stub=marker_is_stub(marker),
is_template=marker_is_synthetic(marker), is_synthetic=marker_is_synthetic(marker),
is_template=marker_is_template(marker),
name=self.function_sig, name=self.function_sig,
end_line=end_line, end_line=end_line,
) )
@ -262,7 +271,7 @@ def _handle_marker(self, marker: DecompMarker):
self._synthetic_marker(marker) self._synthetic_marker(marker)
elif self.state == ReaderState.IN_FUNC: elif self.state == ReaderState.IN_FUNC:
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION) self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
self._function_done(unexpected=True) self._function_done(lookup_by_name=True, unexpected=True)
self._synthetic_marker(marker) self._synthetic_marker(marker)
else: else:
self._syntax_error(ParserError.INCOMPATIBLE_MARKER) self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
@ -304,6 +313,7 @@ def read_line(self, line: str):
self._handle_marker(marker) self._handle_marker(marker)
return return
line_strip = line.strip()
if self.state == ReaderState.IN_TEMPLATE: if self.state == ReaderState.IN_TEMPLATE:
# TEMPLATE functions are a special case. The signature is # TEMPLATE functions are a special case. The signature is
# given on the next line (in a // comment) # given on the next line (in a // comment)
@ -313,16 +323,32 @@ def read_line(self, line: str):
else: else:
self.function_sig = name self.function_sig = name
self._function_starts_here() self._function_starts_here()
self._function_done() self._function_done(lookup_by_name=True)
elif self.state == ReaderState.WANT_SIG: elif self.state == ReaderState.WANT_SIG:
# Skip blank lines or comments that come after the offset # Ignore blanks on the way to function start or function name
# marker. There is not a formal procedure for this, so just if len(line_strip) == 0:
# assume the next "code line" is the function signature self._syntax_warning(ParserError.UNEXPECTED_BLANK_LINE)
if not is_blank_or_comment(line):
elif line_strip.startswith("//"):
# If we found a comment, assume implicit lookup-by-name
# function and end here. We know this is not a decomp marker
# because it would have been handled already.
self.function_sig = get_synthetic_name(line)
self._function_starts_here()
self._function_done(lookup_by_name=True)
elif line_strip == "{":
# We missed the function signature but we can recover from this
self.function_sig = "(unknown)"
self._function_starts_here()
self._syntax_warning(ParserError.MISSED_START_OF_FUNCTION)
self.state = ReaderState.IN_FUNC
else:
# Inline functions may end with a comment. Strip that out # Inline functions may end with a comment. Strip that out
# to help parsing. # to help parsing.
self.function_sig = remove_trailing_comment(line.strip()) self.function_sig = remove_trailing_comment(line_strip)
# Now check to see if the opening curly bracket is on the # Now check to see if the opening curly bracket is on the
# same line. clang-format should prevent this (BraceWrapping) # same line. clang-format should prevent this (BraceWrapping)
@ -340,13 +366,13 @@ def read_line(self, line: str):
self.state = ReaderState.WANT_CURLY self.state = ReaderState.WANT_CURLY
elif self.state == ReaderState.WANT_CURLY: elif self.state == ReaderState.WANT_CURLY:
if line.strip() == "{": if line_strip == "{":
self.curly_indent_stops = line.index("{") self.curly_indent_stops = line.index("{")
self._function_starts_here() self._function_starts_here()
self.state = ReaderState.IN_FUNC self.state = ReaderState.IN_FUNC
elif self.state == ReaderState.IN_FUNC: elif self.state == ReaderState.IN_FUNC:
if line.strip().startswith("}") and line[self.curly_indent_stops] == "}": if line_strip.startswith("}") and line[self.curly_indent_stops] == "}":
self._function_done() self._function_done()
elif self.state in (ReaderState.IN_GLOBAL, ReaderState.IN_FUNC_GLOBAL): elif self.state in (ReaderState.IN_GLOBAL, ReaderState.IN_FUNC_GLOBAL):

View File

@ -11,15 +11,23 @@ def fixture_parser():
return DecompParser() return DecompParser()
@pytest.mark.skip(reason="todo")
def test_missing_sig(parser): def test_missing_sig(parser):
"""Bad syntax: function signature is missing""" """In the hopefully rare scenario that the function signature and marker
parser.read_lines(["// FUNCTION: TEST 0x1234", "{"]) are swapped, we still have enough to match witch reccmp"""
assert parser.state == ReaderState.IN_FUNC parser.read_lines(
assert len(parser.alerts) == 1 [
parser.read_line("}") "void my_function()",
"// FUNCTION: TEST 0x1234",
"{",
"}",
]
)
assert parser.state == ReaderState.SEARCH
assert len(parser.functions) == 1 assert len(parser.functions) == 1
assert parser.functions[0] != "{" assert parser.functions[0].line_number == 3
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.MISSED_START_OF_FUNCTION
def test_not_exact_syntax(parser): def test_not_exact_syntax(parser):
@ -210,7 +218,7 @@ def test_synthetic(parser):
] ]
) )
assert len(parser.functions) == 1 assert len(parser.functions) == 1
assert parser.functions[0].is_template is True assert parser.functions[0].lookup_by_name is True
assert parser.functions[0].name == "TestClass::TestMethod" assert parser.functions[0].name == "TestClass::TestMethod"
@ -285,3 +293,67 @@ def test_indented_no_curly_hint(parser):
] ]
) )
assert len(parser.alerts) == 0 assert len(parser.alerts) == 0
def test_implicit_lookup_by_name(parser):
"""FUNCTION (or STUB) offsets must directly precede the function signature.
If we detect a comment instead, we assume that this is a lookup-by-name
function and end here."""
parser.read_lines(
[
"// FUNCTION: TEST 0x1234",
"// TestClass::TestMethod()",
]
)
assert parser.state == ReaderState.SEARCH
assert len(parser.functions) == 1
assert parser.functions[0].lookup_by_name is True
assert parser.functions[0].name == "TestClass::TestMethod()"
def test_function_with_spaces(parser):
"""There should not be any spaces between the end of FUNCTION markers
and the start or name of the function. If it's a blank line, we can safely
ignore but should alert to this."""
parser.read_lines(
[
"// FUNCTION: TEST 0x1234",
" ",
"inline void test_function() { };",
]
)
assert len(parser.functions) == 1
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.UNEXPECTED_BLANK_LINE
def test_function_with_spaces_implicit(parser):
"""Same as above, but for implicit lookup-by-name"""
parser.read_lines(
[
"// FUNCTION: TEST 0x1234",
" ",
"// Implicit::Method",
]
)
assert len(parser.functions) == 1
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.UNEXPECTED_BLANK_LINE
@pytest.mark.xfail(reason="will assume implicit lookup-by-name function")
def test_function_is_commented(parser):
"""In an ideal world, we would recognize that there is no code here.
Some editors (or users) might comment the function on each line like this
but hopefully it is rare."""
parser.read_lines(
[
"// FUNCTION: TEST 0x1234",
"// int my_function()",
"// {",
"// return 5;",
"// }",
]
)
assert len(parser.functions) == 0

View File

@ -334,7 +334,7 @@ def gen_svg(svg_file, name_svg, icon, svg_implemented_funcs, total_funcs, raw_ac
else: else:
continue continue
if fun.is_template: if fun.lookup_by_name:
recinfo = syminfo.get_recompiled_address_from_name(fun.name) recinfo = syminfo.get_recompiled_address_from_name(fun.name)
if not recinfo: if not recinfo:
continue continue