diff --git a/tools/isledecomp/isledecomp/parser/error.py b/tools/isledecomp/isledecomp/parser/error.py index dce99448..c18e3e29 100644 --- a/tools/isledecomp/isledecomp/parser/error.py +++ b/tools/isledecomp/isledecomp/parser/error.py @@ -19,7 +19,15 @@ class ParserError(Enum): BOGUS_MARKER = 104 # WARN: New function marker appeared while we were inside a function - MISSED_END_OF_FUNCTION = 117 + MISSED_END_OF_FUNCTION = 105 + + # WARN: If we find a curly brace right after the function declaration + # this is wrong but we still have enough to make a match with reccmp + MISSED_START_OF_FUNCTION = 106 + + # WARN: A blank line appeared between the end of FUNCTION markers + # and the start of the function. We can ignore it, but the line shouldn't be there + UNEXPECTED_BLANK_LINE = 107 # ERROR: We found a marker unexpectedly UNEXPECTED_MARKER = 200 diff --git a/tools/isledecomp/isledecomp/parser/node.py b/tools/isledecomp/isledecomp/parser/node.py index 96cc3362..0ee87000 100644 --- a/tools/isledecomp/isledecomp/parser/node.py +++ b/tools/isledecomp/isledecomp/parser/node.py @@ -21,6 +21,7 @@ class ParserSymbol(ParserNode): @dataclass class ParserFunction(ParserSymbol): name: str + lookup_by_name: bool = False is_stub: bool = False is_synthetic: bool = False is_template: bool = False diff --git a/tools/isledecomp/isledecomp/parser/parser.py b/tools/isledecomp/isledecomp/parser/parser.py index 24dc0b23..37e8d386 100644 --- a/tools/isledecomp/isledecomp/parser/parser.py +++ b/tools/isledecomp/isledecomp/parser/parser.py @@ -42,6 +42,10 @@ def marker_is_synthetic(marker: DecompMarker) -> bool: return marker.type.upper() in ("SYNTHETIC", "TEMPLATE") +def marker_is_template(marker: DecompMarker) -> bool: + return marker.type.upper() == "TEMPLATE" + + def marker_is_function(marker: DecompMarker) -> bool: return marker.type.upper() in ("FUNCTION", "STUB") @@ -55,8 +59,8 @@ def __init__(self): self.markers: dict = {} def insert(self, marker: DecompMarker) -> bool: + """Return True if this insert would overwrite""" module = marker.module.upper() - # Return True if this insert would overwrite if module in self.markers: return True @@ -159,9 +163,12 @@ def _synthetic_marker(self, marker: DecompMarker): self._syntax_warning(ParserError.DUPLICATE_MODULE) self.state = ReaderState.IN_TEMPLATE - def _function_done(self, unexpected: bool = False): + def _function_done(self, lookup_by_name: bool = False, unexpected: bool = False): end_line = self.line_number if unexpected: + # If we missed the end of the previous function, assume it ended + # on the previous line and that whatever we are tracking next + # begins on the current line. end_line -= 1 for marker in self.fun_markers.iter(): @@ -170,8 +177,10 @@ def _function_done(self, unexpected: bool = False): line_number=self.function_start, module=marker.module, offset=marker.offset, + lookup_by_name=lookup_by_name, is_stub=marker_is_stub(marker), - is_template=marker_is_synthetic(marker), + is_synthetic=marker_is_synthetic(marker), + is_template=marker_is_template(marker), name=self.function_sig, end_line=end_line, ) @@ -262,7 +271,7 @@ def _handle_marker(self, marker: DecompMarker): self._synthetic_marker(marker) elif self.state == ReaderState.IN_FUNC: self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION) - self._function_done(unexpected=True) + self._function_done(lookup_by_name=True, unexpected=True) self._synthetic_marker(marker) else: self._syntax_error(ParserError.INCOMPATIBLE_MARKER) @@ -304,6 +313,7 @@ def read_line(self, line: str): self._handle_marker(marker) return + line_strip = line.strip() if self.state == ReaderState.IN_TEMPLATE: # TEMPLATE functions are a special case. The signature is # given on the next line (in a // comment) @@ -313,16 +323,32 @@ def read_line(self, line: str): else: self.function_sig = name self._function_starts_here() - self._function_done() + self._function_done(lookup_by_name=True) elif self.state == ReaderState.WANT_SIG: - # Skip blank lines or comments that come after the offset - # marker. There is not a formal procedure for this, so just - # assume the next "code line" is the function signature - if not is_blank_or_comment(line): + # Ignore blanks on the way to function start or function name + if len(line_strip) == 0: + self._syntax_warning(ParserError.UNEXPECTED_BLANK_LINE) + + elif line_strip.startswith("//"): + # If we found a comment, assume implicit lookup-by-name + # function and end here. We know this is not a decomp marker + # because it would have been handled already. + self.function_sig = get_synthetic_name(line) + self._function_starts_here() + self._function_done(lookup_by_name=True) + + elif line_strip == "{": + # We missed the function signature but we can recover from this + self.function_sig = "(unknown)" + self._function_starts_here() + self._syntax_warning(ParserError.MISSED_START_OF_FUNCTION) + self.state = ReaderState.IN_FUNC + + else: # Inline functions may end with a comment. Strip that out # to help parsing. - self.function_sig = remove_trailing_comment(line.strip()) + self.function_sig = remove_trailing_comment(line_strip) # Now check to see if the opening curly bracket is on the # same line. clang-format should prevent this (BraceWrapping) @@ -340,13 +366,13 @@ def read_line(self, line: str): self.state = ReaderState.WANT_CURLY elif self.state == ReaderState.WANT_CURLY: - if line.strip() == "{": + if line_strip == "{": self.curly_indent_stops = line.index("{") self._function_starts_here() self.state = ReaderState.IN_FUNC elif self.state == ReaderState.IN_FUNC: - if line.strip().startswith("}") and line[self.curly_indent_stops] == "}": + if line_strip.startswith("}") and line[self.curly_indent_stops] == "}": self._function_done() elif self.state in (ReaderState.IN_GLOBAL, ReaderState.IN_FUNC_GLOBAL): diff --git a/tools/isledecomp/tests/test_parser.py b/tools/isledecomp/tests/test_parser.py index 222a9ac1..7954aee1 100644 --- a/tools/isledecomp/tests/test_parser.py +++ b/tools/isledecomp/tests/test_parser.py @@ -11,15 +11,23 @@ def fixture_parser(): return DecompParser() -@pytest.mark.skip(reason="todo") def test_missing_sig(parser): - """Bad syntax: function signature is missing""" - parser.read_lines(["// FUNCTION: TEST 0x1234", "{"]) - assert parser.state == ReaderState.IN_FUNC - assert len(parser.alerts) == 1 - parser.read_line("}") + """In the hopefully rare scenario that the function signature and marker + are swapped, we still have enough to match witch reccmp""" + parser.read_lines( + [ + "void my_function()", + "// FUNCTION: TEST 0x1234", + "{", + "}", + ] + ) + assert parser.state == ReaderState.SEARCH assert len(parser.functions) == 1 - assert parser.functions[0] != "{" + assert parser.functions[0].line_number == 3 + + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.MISSED_START_OF_FUNCTION def test_not_exact_syntax(parser): @@ -210,7 +218,7 @@ def test_synthetic(parser): ] ) assert len(parser.functions) == 1 - assert parser.functions[0].is_template is True + assert parser.functions[0].lookup_by_name is True assert parser.functions[0].name == "TestClass::TestMethod" @@ -285,3 +293,67 @@ def test_indented_no_curly_hint(parser): ] ) assert len(parser.alerts) == 0 + + +def test_implicit_lookup_by_name(parser): + """FUNCTION (or STUB) offsets must directly precede the function signature. + If we detect a comment instead, we assume that this is a lookup-by-name + function and end here.""" + parser.read_lines( + [ + "// FUNCTION: TEST 0x1234", + "// TestClass::TestMethod()", + ] + ) + assert parser.state == ReaderState.SEARCH + assert len(parser.functions) == 1 + assert parser.functions[0].lookup_by_name is True + assert parser.functions[0].name == "TestClass::TestMethod()" + + +def test_function_with_spaces(parser): + """There should not be any spaces between the end of FUNCTION markers + and the start or name of the function. If it's a blank line, we can safely + ignore but should alert to this.""" + parser.read_lines( + [ + "// FUNCTION: TEST 0x1234", + " ", + "inline void test_function() { };", + ] + ) + assert len(parser.functions) == 1 + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.UNEXPECTED_BLANK_LINE + + +def test_function_with_spaces_implicit(parser): + """Same as above, but for implicit lookup-by-name""" + parser.read_lines( + [ + "// FUNCTION: TEST 0x1234", + " ", + "// Implicit::Method", + ] + ) + assert len(parser.functions) == 1 + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.UNEXPECTED_BLANK_LINE + + +@pytest.mark.xfail(reason="will assume implicit lookup-by-name function") +def test_function_is_commented(parser): + """In an ideal world, we would recognize that there is no code here. + Some editors (or users) might comment the function on each line like this + but hopefully it is rare.""" + parser.read_lines( + [ + "// FUNCTION: TEST 0x1234", + "// int my_function()", + "// {", + "// return 5;", + "// }", + ] + ) + + assert len(parser.functions) == 0 diff --git a/tools/reccmp/reccmp.py b/tools/reccmp/reccmp.py index 1d4cd5b1..f7659703 100755 --- a/tools/reccmp/reccmp.py +++ b/tools/reccmp/reccmp.py @@ -334,7 +334,7 @@ def gen_svg(svg_file, name_svg, icon, svg_implemented_funcs, total_funcs, raw_ac else: continue - if fun.is_template: + if fun.lookup_by_name: recinfo = syminfo.get_recompiled_address_from_name(fun.name) if not recinfo: continue