From 75802101accb5404e4b21315596c6b81a9931f15 Mon Sep 17 00:00:00 2001 From: disinvite Date: Fri, 1 Dec 2023 15:10:32 -0500 Subject: [PATCH 01/17] Merge from parser2 branch --- tools/checkorder/checkorder.py | 34 +- .../isledecomp/isledecomp/parser/__init__.py | 2 +- tools/isledecomp/isledecomp/parser/error.py | 33 ++ tools/isledecomp/isledecomp/parser/node.py | 41 ++ tools/isledecomp/isledecomp/parser/parser.py | 383 +++++++++++++----- tools/isledecomp/isledecomp/parser/util.py | 70 +--- .../isledecomp/tests/samples/basic_class.cpp | 5 +- tools/isledecomp/tests/samples/basic_file.cpp | 6 +- .../tests/samples/global_variables.cpp | 14 + tools/isledecomp/tests/samples/inline.cpp | 4 +- .../tests/samples/missing_offset.cpp | 2 +- .../tests/samples/multiple_offsets.cpp | 12 +- .../tests/samples/oneline_function.cpp | 4 +- .../isledecomp/tests/samples/out_of_order.cpp | 6 +- .../tests/samples/poorly_formatted.cpp | 6 +- tools/isledecomp/tests/test_parser.py | 235 ++++++----- tools/isledecomp/tests/test_parser_samples.py | 141 +++++++ .../tests/test_parser_statechange.py | 150 +++++++ tools/isledecomp/tests/test_parser_util.py | 107 +++-- tools/reccmp/reccmp.py | 20 +- 20 files changed, 923 insertions(+), 352 deletions(-) create mode 100644 tools/isledecomp/isledecomp/parser/error.py create mode 100644 tools/isledecomp/isledecomp/parser/node.py create mode 100644 tools/isledecomp/tests/samples/global_variables.cpp create mode 100644 tools/isledecomp/tests/test_parser_samples.py create mode 100644 tools/isledecomp/tests/test_parser_statechange.py diff --git a/tools/checkorder/checkorder.py b/tools/checkorder/checkorder.py index 1ac8391f..d2f0b23e 100644 --- a/tools/checkorder/checkorder.py +++ b/tools/checkorder/checkorder.py @@ -2,8 +2,7 @@ import sys import argparse from isledecomp.dir import walk_source_dir, is_file_cpp -from isledecomp.parser import find_code_blocks -from isledecomp.parser.util import is_exact_offset_comment +from isledecomp.parser import DecompParser def sig_truncate(sig: str) -> str: @@ -16,23 +15,22 @@ def check_file(filename: str, verbose: bool = False) -> bool: """Open and read the given file, then check whether the code blocks are in order. If verbose, print each block.""" + parser = DecompParser() with open(filename, "r", encoding="utf-8") as f: - code_blocks = find_code_blocks(f) + parser.read_lines(f) - bad_comments = [ - (block.start_line, block.offset_comment) - for block in code_blocks - if not is_exact_offset_comment(block.offset_comment) - ] - just_offsets = [block.offset for block in code_blocks] + just_offsets = [block.offset for block in parser.functions] sorted_offsets = sorted(just_offsets) file_out_of_order = just_offsets != sorted_offsets + # TODO: When we add parser error severity, actual errors that obstruct + # parsing should probably be shown here regardless of verbose mode + # If we detect inexact comments, don't print anything unless we are # in verbose mode. If the file is out of order, we always print the # file name. - should_report = (len(bad_comments) > 0 and verbose) or file_out_of_order + should_report = (len(parser.alerts) > 0 and verbose) or file_out_of_order if not should_report and not file_out_of_order: return False @@ -44,22 +42,22 @@ def check_file(filename: str, verbose: bool = False) -> bool: order_lookup = {k: i for i, k in enumerate(sorted_offsets)} prev_offset = 0 - for block in code_blocks: + for fun in parser.functions: msg = " ".join( [ - " " if block.offset > prev_offset else "!", + " " if fun.offset > prev_offset else "!", f"{block.offset:08x}", - f"{block.end_line - block.start_line:4} lines", - f"{order_lookup[block.offset]:3}", + f"{fun.end_line - fun.line_number:4} lines", + f"{order_lookup[fun.offset]:3}", " ", - sig_truncate(block.signature), + sig_truncate(fun.signature), ] ) print(msg) - prev_offset = block.offset + prev_offset = fun.offset - for line_no, line in bad_comments: - print(f"* line {line_no:3} bad offset comment ({line})") + for alert in parser.alerts: + print(f"* line {alert.line_number:4} {alert.code} ({alert.line})") print() diff --git a/tools/isledecomp/isledecomp/parser/__init__.py b/tools/isledecomp/isledecomp/parser/__init__.py index 0d504619..c9394d4a 100644 --- a/tools/isledecomp/isledecomp/parser/__init__.py +++ b/tools/isledecomp/isledecomp/parser/__init__.py @@ -1 +1 @@ -from .parser import find_code_blocks +from .parser import DecompParser diff --git a/tools/isledecomp/isledecomp/parser/error.py b/tools/isledecomp/isledecomp/parser/error.py new file mode 100644 index 00000000..8bda90da --- /dev/null +++ b/tools/isledecomp/isledecomp/parser/error.py @@ -0,0 +1,33 @@ +from enum import Enum + + +class ParserError(Enum): + # WARN: Stub function exceeds some line number threshold + UNLIKELY_STUB = 100 + + # WARN: Decomp marker is close enough to be recognized, but does not follow syntax exactly + BAD_DECOMP_MARKER = 101 + + # WARN: Multiple markers in sequence do not have distinct modules + DUPLICATE_MODULE = 102 + + # WARN: Detected a dupcliate module/offset pair in the current file + DUPLICATE_OFFSET = 103 + + # WARN: We read a line that matches the decomp marker pattern, but we are not set up + # to handle it + BOGUS_MARKER = 104 + + # WARN: Under a synthetic marker we expected a comment but found a code line instead + SYNTHETIC_NOT_COMMENT = 110 + + # WARN: New function marker appeared while we were inside a function + MISSED_END_OF_FUNCTION = 117 + + # ERROR: We found a marker unexpectedly + UNEXPECTED_MARKER = 200 + + # ERROR: We found a marker where we expected to find one, but it is incompatible + # with the preceding markers. + # For example, a GLOBAL cannot follow FUNCTION/STUB + INCOMPATIBLE_MARKER = 201 diff --git a/tools/isledecomp/isledecomp/parser/node.py b/tools/isledecomp/isledecomp/parser/node.py new file mode 100644 index 00000000..f9fbe3b5 --- /dev/null +++ b/tools/isledecomp/isledecomp/parser/node.py @@ -0,0 +1,41 @@ +from dataclasses import dataclass +from enum import Enum + + +@dataclass +class ParserNode: + line_number: int + + +@dataclass +class ParserAlert(ParserNode): + code: int + line: str + + +@dataclass +class ParserSymbol(ParserNode): + module: str + offset: int + + +@dataclass +class ParserFunction(ParserSymbol): + name: str + is_stub: bool = False + is_synthetic: bool = False + is_template: bool = False + end_line: int = -1 + + +@dataclass +class ParserVariable(ParserSymbol): + name: str + size: int = -1 + is_static: bool = False + + +@dataclass +class ParserVtable(ParserSymbol): + class_name: str + num_entries: int = -1 diff --git a/tools/isledecomp/isledecomp/parser/parser.py b/tools/isledecomp/isledecomp/parser/parser.py index 1039ac41..aebe1c86 100644 --- a/tools/isledecomp/isledecomp/parser/parser.py +++ b/tools/isledecomp/isledecomp/parser/parser.py @@ -1,145 +1,346 @@ # C++ file parser -from typing import List, TextIO +from typing import List, TextIO, Iterable from enum import Enum from .util import ( - CodeBlock, - OffsetMatch, + DecompMarker, is_blank_or_comment, - match_offset_comment, + match_marker, + is_marker_exact, get_template_function_name, remove_trailing_comment, - distinct_by_module, ) +from .node import ( + ParserAlert, + ParserNode, + ParserFunction, + ParserVariable, + ParserVtable, +) +from .error import ParserError class ReaderState(Enum): - WANT_OFFSET = 0 + SEARCH = 0 WANT_SIG = 1 IN_FUNC = 2 IN_TEMPLATE = 3 WANT_CURLY = 4 - FUNCTION_DONE = 5 + IN_GLOBAL = 5 + IN_FUNC_GLOBAL = 6 + IN_VTABLE = 7 -def find_code_blocks(stream: TextIO) -> List[CodeBlock]: - """Read the IO stream (file) line-by-line and give the following report: - Foreach code block (function) in the file, what are its starting and - ending line numbers, and what is the given offset in the original - binary. We expect the result to be ordered by line number because we - are reading the file from start to finish.""" +def marker_is_stub(marker: DecompMarker) -> bool: + return marker.type.upper() == "STUB" - blocks: List[CodeBlock] = [] - offset_matches: List[OffsetMatch] = [] +def marker_is_variable(marker: DecompMarker) -> bool: + return marker.type.upper() == "GLOBAL" - function_sig = None - start_line = None - end_line = None - state = ReaderState.WANT_OFFSET - # 1-based to match cvdump and your text editor - # I know it says 0, but we will increment before each readline() - line_no = 0 - can_seek = True +def marker_is_synthetic(marker: DecompMarker) -> bool: + return marker.type.upper() in ("SYNTHETIC", "TEMPLATE") - while True: - # Do this before reading again so that an EOF will not - # cause us to miss the last function of the file. - if state == ReaderState.FUNCTION_DONE: - # Our list of offset marks could have duplicates on - # module name, so we'll eliminate those now. - for offset_match in distinct_by_module(offset_matches): - block = CodeBlock( - offset=offset_match.address, - signature=function_sig, - start_line=start_line, + +def marker_is_function(marker: DecompMarker) -> bool: + return marker.type.upper() in ("FUNCTION", "STUB") + + +def marker_is_vtable(marker: DecompMarker) -> bool: + return marker.type.upper() == "VTABLE" + + +class MarkerDict: + def __init__(self): + self.markers: dict = {} + + def insert(self, marker: DecompMarker) -> bool: + module = marker.module.upper() + # Return True if this insert would overwrite + if module in self.markers: + return True + + self.markers[module] = (marker.type, marker.offset) + return False + + def iter(self): + for module in self.markers: + (marker_type, offset) = self.markers[module] + yield DecompMarker(marker_type, module, offset) + + def empty(self): + self.markers = {} + + +class DecompParser: + def __init__(self): + self.fun_markers = MarkerDict() + self.var_markers = MarkerDict() + self.tbl_markers = MarkerDict() + self.reset() + + def reset(self): + # Output values + self.functions = [] + self.vtables = [] + self.variables = [] + self.alerts = [] + + # Internal state machine stuff + self.line_number: int = 0 + self.state: ReaderState = ReaderState.SEARCH + + self.last_line: str = "" + self.fun_markers.empty() + self.var_markers.empty() + self.tbl_markers.empty() + self.function_start: int = 0 + self.function_sig: str = "" + + def _recover(self): + """We hit a syntax error and need to reset temp structures""" + self.state = ReaderState.SEARCH + self.fun_markers.empty() + self.var_markers.empty() + self.tbl_markers.empty() + + def _syntax_warning(self, code): + self.alerts.append( + ParserAlert( + line_number=self.line_number, + code=code, + line=self.last_line.strip(), + ) + ) + + def _syntax_error(self, code): + self._syntax_warning(code) + self._recover() + + def _function_starts_here(self): + self.function_start = self.line_number + + def _function_marker(self, marker: DecompMarker): + if self.fun_markers.insert(marker): + self._syntax_warning(ParserError.DUPLICATE_MODULE) + self.state = ReaderState.WANT_SIG + + def _synthetic_marker(self, marker: DecompMarker): + if self.fun_markers.insert(marker): + self._syntax_warning(ParserError.DUPLICATE_MODULE) + self.state = ReaderState.IN_TEMPLATE + + def _function_done(self, unexpected: bool = False): + end_line = self.line_number + if unexpected: + end_line -= -1 + + for marker in self.fun_markers.iter(): + self.functions.append( + ParserFunction( + line_number=self.function_start, + module=marker.module, + offset=marker.offset, + is_stub=marker_is_stub(marker), + is_template=marker_is_synthetic(marker), + name=self.function_sig, end_line=end_line, - offset_comment=offset_match.comment, - module=offset_match.module, - is_template=offset_match.is_template, - is_stub=offset_match.is_stub, ) - blocks.append(block) - offset_matches = [] - state = ReaderState.WANT_OFFSET + ) - if can_seek: - line_no += 1 - line = stream.readline() - if line == "": - break + self.fun_markers.empty() + self.state = ReaderState.SEARCH - new_match = match_offset_comment(line) - if new_match is not None: - # We will allow multiple offsets if we have just begun - # the code block, but not after we hit the curly brace. - if state in ( - ReaderState.WANT_OFFSET, - ReaderState.IN_TEMPLATE, + def _vtable_marker(self, marker: DecompMarker): + if self.tbl_markers.insert(marker): + self._syntax_warning(ParserError.DUPLICATE_MODULE) + self.state = ReaderState.IN_VTABLE + + def _vtable_done(self): + for marker in self.tbl_markers.iter(): + self.vtables.append( + ParserVtable( + line_number=self.line_number, + module=marker.module, + offset=marker.offset, + class_name=self.last_line.strip(), + ) + ) + + self.tbl_markers.empty() + self.state = ReaderState.SEARCH + + def _variable_marker(self, marker: DecompMarker): + if self.var_markers.insert(marker): + self._syntax_warning(ParserError.DUPLICATE_MODULE) + + if self.state in (ReaderState.IN_FUNC, ReaderState.IN_FUNC_GLOBAL): + self.state = ReaderState.IN_FUNC_GLOBAL + else: + self.state = ReaderState.IN_GLOBAL + + def _variable_done(self): + for marker in self.var_markers.iter(): + self.variables.append( + ParserVariable( + line_number=self.line_number, + module=marker.module, + offset=marker.offset, + name=self.last_line.strip(), + ) + ) + + self.var_markers.empty() + if self.state == ReaderState.IN_FUNC_GLOBAL: + self.state = ReaderState.IN_FUNC + else: + self.state = ReaderState.SEARCH + + def _handle_marker(self, marker: DecompMarker): + # Cannot handle any markers between function sig and opening curly brace + if self.state == ReaderState.WANT_CURLY: + self._syntax_error(ParserError.UNEXPECTED_MARKER) + return + + # TODO: How uncertain are we of detecting the end of a function + # in a clang-formatted file? For now we assume we have missed the + # end if we detect a non-GLOBAL marker while state is IN_FUNC. + # Maybe these cases should be syntax errors instead + + if marker_is_function(marker): + if self.state in ( + ReaderState.SEARCH, ReaderState.WANT_SIG, ): - # If we detected an offset marker unexpectedly, - # we are handling it here so we can continue seeking. - can_seek = True - - offset_matches.append(new_match) - - if new_match.is_template: - state = ReaderState.IN_TEMPLATE - else: - state = ReaderState.WANT_SIG - else: + # We will allow multiple offsets if we have just begun + # the code block, but not after we hit the curly brace. + self._function_marker(marker) + elif self.state == ReaderState.IN_FUNC: # We hit another offset unexpectedly. # We can recover easily by just ending the function here. - end_line = line_no - 1 - state = ReaderState.FUNCTION_DONE + self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION) + self._function_done() - # Pause reading here so we handle the offset marker - # on the next loop iteration - can_seek = False + # Start the next function right after so we can + # read the next line. + self._function_marker(marker) + else: + self._syntax_error(ParserError.INCOMPATIBLE_MARKER) - elif state == ReaderState.IN_TEMPLATE: + elif marker_is_synthetic(marker): + if self.state in (ReaderState.SEARCH, ReaderState.IN_TEMPLATE): + self._synthetic_marker(marker) + elif self.state == ReaderState.IN_FUNC: + self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION) + self._function_done() + self._synthetic_marker(marker) + else: + self._syntax_error(ParserError.INCOMPATIBLE_MARKER) + + elif marker_is_variable(marker): + if self.state in ( + ReaderState.SEARCH, + ReaderState.IN_GLOBAL, + ReaderState.IN_FUNC, + ReaderState.IN_FUNC_GLOBAL, + ): + self._variable_marker(marker) + else: + self._syntax_error(ParserError.INCOMPATIBLE_MARKER) + + elif marker_is_vtable(marker): + if self.state in (ReaderState.SEARCH, ReaderState.IN_VTABLE): + self._vtable_marker(marker) + elif self.state == ReaderState.IN_FUNC: + self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION) + self._function_done() + self._vtable_marker(marker) + else: + self._syntax_error(ParserError.INCOMPATIBLE_MARKER) + + else: + self._syntax_warning(ParserError.BOGUS_MARKER) + + def read_line(self, line: str): + self.last_line = line # TODO: Useful or hack for error reporting? + self.line_number += 1 + + marker = match_marker(line) + if marker is not None: + # TODO: what's the best place for this? + # Does it belong with reading or marker handling? + if not is_marker_exact(self.last_line): + self._syntax_warning(ParserError.BAD_DECOMP_MARKER) + self._handle_marker(marker) + return + + if self.state == ReaderState.IN_TEMPLATE: # TEMPLATE functions are a special case. The signature is # given on the next line (in a // comment) - function_sig = get_template_function_name(line) - start_line = line_no - end_line = line_no - state = ReaderState.FUNCTION_DONE + self.function_sig = get_template_function_name(line) + self._function_starts_here() + self._function_done() - elif state == ReaderState.WANT_SIG: + elif self.state == ReaderState.WANT_SIG: # Skip blank lines or comments that come after the offset # marker. There is not a formal procedure for this, so just # assume the next "code line" is the function signature if not is_blank_or_comment(line): # Inline functions may end with a comment. Strip that out # to help parsing. - function_sig = remove_trailing_comment(line.strip()) + self.function_sig = remove_trailing_comment(line.strip()) # Now check to see if the opening curly bracket is on the # same line. clang-format should prevent this (BraceWrapping) # but it is easy to detect. # If the entire function is on one line, handle that too. - if function_sig.endswith("{"): - start_line = line_no - state = ReaderState.IN_FUNC - elif function_sig.endswith("}") or function_sig.endswith("};"): - start_line = line_no - end_line = line_no - state = ReaderState.FUNCTION_DONE + if self.function_sig.endswith("{"): + self._function_starts_here() + self.state = ReaderState.IN_FUNC + elif self.function_sig.endswith("}") or self.function_sig.endswith( + "};" + ): + self._function_starts_here() + self._function_done() else: - state = ReaderState.WANT_CURLY + self.state = ReaderState.WANT_CURLY - elif state == ReaderState.WANT_CURLY: + elif self.state == ReaderState.WANT_CURLY: if line.strip() == "{": - start_line = line_no - state = ReaderState.IN_FUNC + self._function_starts_here() + self.state = ReaderState.IN_FUNC - elif state == ReaderState.IN_FUNC: + elif self.state == ReaderState.IN_FUNC: # Naive but reasonable assumption that functions will end with # a curly brace on its own line with no prepended spaces. if line.startswith("}"): - end_line = line_no - state = ReaderState.FUNCTION_DONE + self._function_done() - return blocks + elif self.state in (ReaderState.IN_GLOBAL, ReaderState.IN_FUNC_GLOBAL): + if not is_blank_or_comment(line): + self._variable_done() + + elif self.state == ReaderState.IN_VTABLE: + if not is_blank_or_comment(line): + self._vtable_done() + + def read_lines(self, lines: Iterable): + for line in lines: + self.read_line(line) + + +def find_code_blocks(stream: TextIO) -> List[ParserNode]: + """Read the IO stream (file) line-by-line and give the following report: + Foreach code block (function) in the file, what are its starting and + ending line numbers, and what is the given offset in the original + binary. We expect the result to be ordered by line number because we + are reading the file from start to finish.""" + + # TODO: this will be replaced shortly. shim for now to avoid + # making more changes elsewhere + p = DecompParser() + for line in stream: + p.read_line(line) + + return p.functions diff --git a/tools/isledecomp/isledecomp/parser/util.py b/tools/isledecomp/isledecomp/parser/util.py index 59fca75b..02d3c976 100644 --- a/tools/isledecomp/isledecomp/parser/util.py +++ b/tools/isledecomp/isledecomp/parser/util.py @@ -4,41 +4,15 @@ from typing import List from collections import namedtuple +DecompMarker = namedtuple("DecompMarker", ["type", "module", "offset"]) -CodeBlock = namedtuple( - "CodeBlock", - [ - "offset", - "signature", - "start_line", - "end_line", - "offset_comment", - "module", - "is_template", - "is_stub", - ], -) -OffsetMatch = namedtuple( - "OffsetMatch", ["module", "address", "is_template", "is_stub", "comment"] -) - -# This has not been formally established, but considering that "STUB" -# is a temporary state for a function, we assume it will appear last, -# after any other modifiers (i.e. TEMPLATE) - -# To match a reasonable variance of formatting for the offset comment -offsetCommentRegex = re.compile( - r"\s*//\s*OFFSET:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+TEMPLATE)?(\s+STUB)?", # nopep8 +markerRegex = re.compile( + r"\s*//\s*(\w+):\s*(\w+)\s+((?:0x)?[a-f0-9]+)", flags=re.I, ) -# To match the exact syntax (text upper case, hex lower case, with spaces) -# that is used in most places -offsetCommentExactRegex = re.compile( - r"^// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)( TEMPLATE)?( STUB)?$" -) # nopep8 - +markerExactRegex = re.compile(r"// ([A-Z]+): ([A-Z0-9]+) (0x[a-f0-9]+)$") # The goal here is to just read whatever is on the next line, so some # flexibility in the formatting seems OK @@ -78,39 +52,15 @@ def is_blank_or_comment(line: str) -> bool: ) -def is_exact_offset_comment(line: str) -> bool: - """If the offset comment does not match our (unofficial) syntax - we may want to alert the user to fix it for style points.""" - return offsetCommentExactRegex.match(line) is not None - - -def match_offset_comment(line: str) -> OffsetMatch | None: - match = offsetCommentRegex.match(line) +def match_marker(line: str) -> DecompMarker | None: + match = markerRegex.match(line) if match is None: return None - return OffsetMatch( - module=match.group(1), - address=int(match.group(2), 16), - is_template=match.group(3) is not None, - is_stub=match.group(4) is not None, - comment=line.strip(), + return DecompMarker( + type=match.group(1), module=match.group(2), offset=int(match.group(3), 16) ) -def distinct_by_module(offsets: List) -> List: - """Given a list of offset markers, return a list with distinct - module names. If module names (case-insensitive) are repeated, - choose the offset that appears first.""" - - if len(offsets) < 2: - return offsets - - # Dict maintains insertion order in python >=3.7 - offsets_dict = {} - for offset in offsets: - module_upper = offset.module.upper() - if module_upper not in offsets_dict: - offsets_dict[module_upper] = offset - - return list(offsets_dict.values()) +def is_marker_exact(line: str) -> bool: + return markerExactRegex.match(line) is not None diff --git a/tools/isledecomp/tests/samples/basic_class.cpp b/tools/isledecomp/tests/samples/basic_class.cpp index 23ce3c39..4316ad4a 100644 --- a/tools/isledecomp/tests/samples/basic_class.cpp +++ b/tools/isledecomp/tests/samples/basic_class.cpp @@ -3,6 +3,7 @@ // A very simple class +// VTABLE: TEST 0x1001002 class TestClass { public: TestClass(); @@ -10,14 +11,14 @@ class TestClass { virtual MxResult Tickle() override; // vtable+08 - // OFFSET: TEST 0x12345678 + // FUNCTION: TEST 0x12345678 inline const char* ClassName() const // vtable+0c { // 0xabcd1234 return "TestClass"; } - // OFFSET: TEST 0xdeadbeef + // FUNCTION: TEST 0xdeadbeef inline MxBool IsA(const char* name) const override // vtable+10 { return !strcmp(name, TestClass::ClassName()); diff --git a/tools/isledecomp/tests/samples/basic_file.cpp b/tools/isledecomp/tests/samples/basic_file.cpp index 6a4017b5..99930de8 100644 --- a/tools/isledecomp/tests/samples/basic_file.cpp +++ b/tools/isledecomp/tests/samples/basic_file.cpp @@ -3,19 +3,19 @@ // A very simple well-formed code file -// OFFSET: TEST 0x1234 +// FUNCTION: TEST 0x1234 void function01() { // TODO } -// OFFSET: TEST 0x2345 +// FUNCTION: TEST 0x2345 void function02() { // TODO } -// OFFSET: TEST 0x3456 +// FUNCTION: TEST 0x3456 void function03() { // TODO diff --git a/tools/isledecomp/tests/samples/global_variables.cpp b/tools/isledecomp/tests/samples/global_variables.cpp new file mode 100644 index 00000000..3be0316a --- /dev/null +++ b/tools/isledecomp/tests/samples/global_variables.cpp @@ -0,0 +1,14 @@ +// Sample for python unit tests +// Not part of the decomp + +// Global variables inside and outside of functions + +// GLOBAL: TEST 0x1000 +const char *g_message = "test"; + +// FUNCTION: TEST 0x1234 +void function01() +{ + // GLOBAL: TEST 0x5555 + static int g_hello = 123; +} diff --git a/tools/isledecomp/tests/samples/inline.cpp b/tools/isledecomp/tests/samples/inline.cpp index 0bfedf6d..8a36c89a 100644 --- a/tools/isledecomp/tests/samples/inline.cpp +++ b/tools/isledecomp/tests/samples/inline.cpp @@ -1,8 +1,8 @@ // Sample for python unit tests // Not part of the decomp -// OFFSET: TEST 0x10000001 +// FUNCTION: TEST 0x10000001 inline const char* OneLineWithComment() const { return "MxDSObject"; }; // hi there -// OFFSET: TEST 0x10000002 +// FUNCTION: TEST 0x10000002 inline const char* OneLine() const { return "MxDSObject"; }; diff --git a/tools/isledecomp/tests/samples/missing_offset.cpp b/tools/isledecomp/tests/samples/missing_offset.cpp index 332fed2c..3f6b3811 100644 --- a/tools/isledecomp/tests/samples/missing_offset.cpp +++ b/tools/isledecomp/tests/samples/missing_offset.cpp @@ -9,7 +9,7 @@ int no_offset_comment() return -1; } -// OFFSET: TEST 0xdeadbeef +// FUNCTION: TEST 0xdeadbeef void regular_ole_function() { printf("hi there"); diff --git a/tools/isledecomp/tests/samples/multiple_offsets.cpp b/tools/isledecomp/tests/samples/multiple_offsets.cpp index eecdd95b..dc3c5bfa 100644 --- a/tools/isledecomp/tests/samples/multiple_offsets.cpp +++ b/tools/isledecomp/tests/samples/multiple_offsets.cpp @@ -3,22 +3,22 @@ // Handling multiple offset markers -// OFFSET: TEST 0x1234 -// OFFSET: HELLO 0x5555 +// FUNCTION: TEST 0x1234 +// FUNCTION: HELLO 0x5555 void different_modules() { // TODO } -// OFFSET: TEST 0x2345 -// OFFSET: TEST 0x1234 +// FUNCTION: TEST 0x2345 +// FUNCTION: TEST 0x1234 void same_module() { // TODO } -// OFFSET: TEST 0x2002 -// OFFSET: test 0x1001 +// FUNCTION: TEST 0x2002 +// FUNCTION: test 0x1001 void same_case_insensitive() { // TODO diff --git a/tools/isledecomp/tests/samples/oneline_function.cpp b/tools/isledecomp/tests/samples/oneline_function.cpp index 8d7fdc5a..feb82314 100644 --- a/tools/isledecomp/tests/samples/oneline_function.cpp +++ b/tools/isledecomp/tests/samples/oneline_function.cpp @@ -1,10 +1,10 @@ // Sample for python unit tests // Not part of the decomp -// OFFSET: TEST 0x1234 +// FUNCTION: TEST 0x1234 void short_function() { static char* msg = "oneliner"; } -// OFFSET: TEST 0x5555 +// FUNCTION: TEST 0x5555 void function_after_one_liner() { // This function comes after the previous that is on a single line. diff --git a/tools/isledecomp/tests/samples/out_of_order.cpp b/tools/isledecomp/tests/samples/out_of_order.cpp index 749c4f2b..951c99e7 100644 --- a/tools/isledecomp/tests/samples/out_of_order.cpp +++ b/tools/isledecomp/tests/samples/out_of_order.cpp @@ -1,19 +1,19 @@ // Sample for python unit tests // Not part of the decomp -// OFFSET: TEST 0x1001 +// FUNCTION: TEST 0x1001 void function_order01() { // TODO } -// OFFSET: TEST 0x1003 +// FUNCTION: TEST 0x1003 void function_order03() { // TODO } -// OFFSET: TEST 0x1002 +// FUNCTION: TEST 0x1002 void function_order02() { // TODO diff --git a/tools/isledecomp/tests/samples/poorly_formatted.cpp b/tools/isledecomp/tests/samples/poorly_formatted.cpp index 32dd774c..69f365ec 100644 --- a/tools/isledecomp/tests/samples/poorly_formatted.cpp +++ b/tools/isledecomp/tests/samples/poorly_formatted.cpp @@ -4,18 +4,18 @@ // While it's reasonable to expect a well-formed file (and clang-format // will make sure we get one), this will put the parser through its paces. -// OFFSET: TEST 0x1234 +// FUNCTION: TEST 0x1234 void curly_with_spaces() { static char* msg = "hello"; } -// OFFSET: TEST 0x5555 +// FUNCTION: TEST 0x5555 void weird_closing_curly() { int x = 123; } -// OFFSET: HELLO 0x5656 +// FUNCTION: HELLO 0x5656 void bad_indenting() { if (0) { diff --git a/tools/isledecomp/tests/test_parser.py b/tools/isledecomp/tests/test_parser.py index 48bb0e44..7731918f 100644 --- a/tools/isledecomp/tests/test_parser.py +++ b/tools/isledecomp/tests/test_parser.py @@ -1,127 +1,170 @@ -import os -from typing import List, TextIO -from isledecomp.parser import find_code_blocks -from isledecomp.parser.util import CodeBlock - -SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "samples") +import pytest +from isledecomp.parser.parser import ( + ReaderState, + DecompParser, +) +from isledecomp.parser.util import DecompMarker +from isledecomp.parser.error import ParserError -def sample_file(filename: str) -> TextIO: - """Wrapper for opening the samples from the directory that does not - depend on the cwd where we run the test""" - full_path = os.path.join(SAMPLE_DIR, filename) - return open(full_path, "r", encoding="utf-8") +@pytest.fixture +def parser(): + return DecompParser() -def code_blocks_are_sorted(blocks: List[CodeBlock]) -> bool: - """Helper to make this more idiomatic""" - just_offsets = [block.offset for block in blocks] - return just_offsets == sorted(just_offsets) +@pytest.mark.skip(reason="todo") +def test_missing_sig(parser): + """Bad syntax: function signature is missing""" + parser.read_lines(["// FUNCTION: TEST 0x1234", "{"]) + assert parser.state == ReaderState.IN_FUNC + assert len(parser.alerts) == 1 + parser.read_line("}") + assert len(parser.functions) == 1 + assert parser.functions[0] != "{" -# Tests are below # +def test_not_exact_syntax(parser): + """Alert to inexact syntax right here in the parser instead of kicking it downstream. + Doing this means we don't have to save the actual text.""" + parser.read_line("// function: test 1234") + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.BAD_DECOMP_MARKER -def test_sanity(): - """Read a very basic file""" - with sample_file("basic_file.cpp") as f: - blocks = find_code_blocks(f) +def test_invalid_marker(parser): + """We matched a decomp marker, but it's not one we care about""" + parser.read_line("// BANANA: TEST 0x1234") + assert parser.state == ReaderState.SEARCH - assert len(blocks) == 3 - assert code_blocks_are_sorted(blocks) is True - # n.b. The parser returns line numbers as 1-based - # Function starts when we see the opening curly brace - assert blocks[0].start_line == 8 - assert blocks[0].end_line == 10 + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.BOGUS_MARKER -def test_oneline(): - """(Assuming clang-format permits this) This sample has a function - on a single line. This will test the end-of-function detection""" - with sample_file("oneline_function.cpp") as f: - blocks = find_code_blocks(f) - - assert len(blocks) == 2 - assert blocks[0].start_line == 5 - assert blocks[0].end_line == 5 +def test_unexpected_marker(parser): + parser.read_lines( + [ + "// FUNCTION: TEST 0x1234", + "// GLOBAL: TEST 0x5000", + ] + ) + assert parser.state == ReaderState.SEARCH + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.INCOMPATIBLE_MARKER -def test_missing_offset(): - """What if the function doesn't have an offset comment?""" - with sample_file("missing_offset.cpp") as f: - blocks = find_code_blocks(f) - - # TODO: For now, the function without the offset will just be ignored. - # Would be the same outcome if the comment was present but mangled and - # we failed to match it. We should detect these cases in the future. - assert len(blocks) == 1 +def test_variable(parser): + parser.read_lines( + [ + "// GLOBAL: HELLO 0x1234", + "int g_value = 5;", + ] + ) + assert len(parser.variables) == 1 -def test_jumbled_case(): - """The parser just reports what it sees. It is the responsibility of - the downstream tools to do something about a jumbled file. - Just verify that we are reading it correctly.""" - with sample_file("out_of_order.cpp") as f: - blocks = find_code_blocks(f) - - assert len(blocks) == 3 - assert code_blocks_are_sorted(blocks) is False +def test_synthetic_plus_marker(parser): + """Should fail with error and not log the synthetic""" + parser.read_lines( + [ + "// SYNTHETIC: HEY 0x555", + "// FUNCTION: HOWDY 0x1234", + ] + ) + assert len(parser.functions) == 0 + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.INCOMPATIBLE_MARKER -def test_bad_file(): - with sample_file("poorly_formatted.cpp") as f: - blocks = find_code_blocks(f) +def test_different_markers_different_module(parser): + """Does it make any sense for a function to be a stub in one module, + but not in another? I don't know. But it's no problem for us.""" + parser.read_lines( + [ + "// FUNCTION: HOWDY 0x1234", + "// STUB: SUP 0x5555", + "void interesting_function() {", + "}", + ] + ) - assert len(blocks) == 3 + assert len(parser.alerts) == 0 + assert len(parser.functions) == 2 -def test_indented(): - """Offsets for functions inside of a class will probably be indented.""" - with sample_file("basic_class.cpp") as f: - blocks = find_code_blocks(f) +def test_different_markers_same_module(parser): + """Now, if something is a regular function but then a stub, + what do we say about that?""" + parser.read_lines( + [ + "// FUNCTION: HOWDY 0x1234", + "// STUB: HOWDY 0x5555", + "void interesting_function() {", + "}", + ] + ) - # TODO: We don't properly detect the end of these functions - # because the closing brace is indented. However... knowing where each - # function ends is less important (for now) than capturing - # all the functions that are there. + # Use first marker declaration, don't replace + assert len(parser.functions) == 1 + assert parser.functions[0].is_stub is False - assert len(blocks) == 2 - assert blocks[0].offset == int("0x12345678", 16) - assert blocks[0].start_line == 15 - # assert blocks[0].end_line == 18 - - assert blocks[1].offset == int("0xdeadbeef", 16) - assert blocks[1].start_line == 22 - # assert blocks[1].end_line == 24 + # Should alert to this + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.DUPLICATE_MODULE -def test_inline(): - with sample_file("inline.cpp") as f: - blocks = find_code_blocks(f) +def test_unexpected_synthetic(parser): + """FUNCTION then SYNTHETIC should fail to report either one""" + parser.read_lines( + [ + "// FUNCTION: HOWDY 0x1234", + "// SYNTHETIC: HOWDY 0x5555", + "void interesting_function() {", + "}", + ] + ) - assert len(blocks) == 2 - for block in blocks: - assert block.start_line is not None - assert block.start_line == block.end_line + assert parser.state == ReaderState.SEARCH + assert len(parser.functions) == 0 + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.INCOMPATIBLE_MARKER -def test_multiple_offsets(): - """If multiple offset marks appear before for a code block, take them - all but ensure module name (case-insensitive) is distinct. - Use first module occurrence in case of duplicates.""" - with sample_file("multiple_offsets.cpp") as f: - blocks = find_code_blocks(f) +@pytest.mark.skip(reason="not implemented yet") +def test_duplicate_offset(parser): + """Repeating the same module/offset in the same file is probably a typo""" + parser.read_lines( + [ + "// GLOBAL: HELLO 0x1234", + "int x = 1;", + "// GLOBAL: HELLO 0x1234", + "int y = 2;", + ] + ) - assert len(blocks) == 4 - assert blocks[0].module == "TEST" - assert blocks[0].start_line == 9 + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.DUPLICATE_OFFSET - assert blocks[1].module == "HELLO" - assert blocks[1].start_line == 9 - # Duplicate modules are ignored - assert blocks[2].start_line == 16 - assert blocks[2].offset == 0x2345 +def test_multiple_variables(parser): + """Theoretically the same global variable can appear in multiple modules""" + parser.read_lines( + [ + "// GLOBAL: HELLO 0x1234", + "// GLOBAL: WUZZUP 0x555", + "const char *g_greeting;", + ] + ) + assert len(parser.alerts) == 0 + assert len(parser.variables) == 2 - assert blocks[3].module == "TEST" - assert blocks[3].offset == 0x2002 + +def test_multiple_vtables(parser): + parser.read_lines( + [ + "// VTABLE: HELLO 0x1234", + "// VTABLE: TEST 0x5432", + "class MxString : public MxCore {", + ] + ) + assert len(parser.alerts) == 0 + assert len(parser.vtables) == 2 diff --git a/tools/isledecomp/tests/test_parser_samples.py b/tools/isledecomp/tests/test_parser_samples.py new file mode 100644 index 00000000..a045e3cc --- /dev/null +++ b/tools/isledecomp/tests/test_parser_samples.py @@ -0,0 +1,141 @@ +import os +import pytest +from typing import List, TextIO +from isledecomp.parser import DecompParser +from isledecomp.parser.node import ParserSymbol + +SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "samples") + + +def sample_file(filename: str) -> TextIO: + """Wrapper for opening the samples from the directory that does not + depend on the cwd where we run the test""" + full_path = os.path.join(SAMPLE_DIR, filename) + return open(full_path, "r", encoding="utf-8") + + +def code_blocks_are_sorted(blocks: List[ParserSymbol]) -> bool: + """Helper to make this more idiomatic""" + just_offsets = [block.offset for block in blocks] + return just_offsets == sorted(just_offsets) + + +@pytest.fixture +def parser(): + return DecompParser() + + +# Tests are below # + + +def test_sanity(parser): + """Read a very basic file""" + with sample_file("basic_file.cpp") as f: + parser.read_lines(f) + + assert len(parser.functions) == 3 + assert code_blocks_are_sorted(parser.functions) is True + # n.b. The parser returns line numbers as 1-based + # Function starts when we see the opening curly brace + assert parser.functions[0].line_number == 8 + assert parser.functions[0].end_line == 10 + + +def test_oneline(parser): + """(Assuming clang-format permits this) This sample has a function + on a single line. This will test the end-of-function detection""" + with sample_file("oneline_function.cpp") as f: + parser.read_lines(f) + + assert len(parser.functions) == 2 + assert parser.functions[0].line_number == 5 + assert parser.functions[0].end_line == 5 + + +def test_missing_offset(parser): + """What if the function doesn't have an offset comment?""" + with sample_file("missing_offset.cpp") as f: + parser.read_lines(f) + + # TODO: For now, the function without the offset will just be ignored. + # Would be the same outcome if the comment was present but mangled and + # we failed to match it. We should detect these cases in the future. + assert len(parser.functions) == 1 + + +def test_jumbled_case(parser): + """The parser just reports what it sees. It is the responsibility of + the downstream tools to do something about a jumbled file. + Just verify that we are reading it correctly.""" + with sample_file("out_of_order.cpp") as f: + parser.read_lines(f) + + assert len(parser.functions) == 3 + assert code_blocks_are_sorted(parser.functions) is False + + +def test_bad_file(parser): + with sample_file("poorly_formatted.cpp") as f: + parser.read_lines(f) + + assert len(parser.functions) == 3 + + +def test_indented(parser): + """Offsets for functions inside of a class will probably be indented.""" + with sample_file("basic_class.cpp") as f: + parser.read_lines(f) + + # TODO: We don't properly detect the end of these functions + # because the closing brace is indented. However... knowing where each + # function ends is less important (for now) than capturing + # all the functions that are there. + + assert len(parser.functions) == 2 + assert parser.functions[0].offset == int("0x12345678", 16) + assert parser.functions[0].line_number == 16 + # assert parser.functions[0].end_line == 19 + + assert parser.functions[1].offset == int("0xdeadbeef", 16) + assert parser.functions[1].line_number == 23 + # assert parser.functions[1].end_line == 25 + + +def test_inline(parser): + with sample_file("inline.cpp") as f: + parser.read_lines(f) + + assert len(parser.functions) == 2 + for fun in parser.functions: + assert fun.line_number is not None + assert fun.line_number == fun.end_line + + +def test_multiple_offsets(parser): + """If multiple offset marks appear before for a code block, take them + all but ensure module name (case-insensitive) is distinct. + Use first module occurrence in case of duplicates.""" + with sample_file("multiple_offsets.cpp") as f: + parser.read_lines(f) + + assert len(parser.functions) == 4 + assert parser.functions[0].module == "TEST" + assert parser.functions[0].line_number == 9 + + assert parser.functions[1].module == "HELLO" + assert parser.functions[1].line_number == 9 + + # Duplicate modules are ignored + assert parser.functions[2].line_number == 16 + assert parser.functions[2].offset == 0x2345 + + assert parser.functions[3].module == "TEST" + assert parser.functions[3].offset == 0x2002 + + +def test_variables(parser): + with sample_file("global_variables.cpp") as f: + parser.read_lines(f) + + assert len(parser.functions) == 1 + assert len(parser.variables) == 2 diff --git a/tools/isledecomp/tests/test_parser_statechange.py b/tools/isledecomp/tests/test_parser_statechange.py new file mode 100644 index 00000000..62c19175 --- /dev/null +++ b/tools/isledecomp/tests/test_parser_statechange.py @@ -0,0 +1,150 @@ +import pytest +from isledecomp.parser.parser import ( + ReaderState as _rs, + DecompParser, +) +from isledecomp.parser.util import DecompMarker +from isledecomp.parser.error import ParserError as _pe + +# fmt: off +state_change_marker_cases = [ + (_rs.SEARCH, "FUNCTION", _rs.WANT_SIG, None), + (_rs.SEARCH, "GLOBAL", _rs.IN_GLOBAL, None), + (_rs.SEARCH, "STUB", _rs.WANT_SIG, None), + (_rs.SEARCH, "SYNTHETIC", _rs.IN_TEMPLATE, None), + (_rs.SEARCH, "TEMPLATE", _rs.IN_TEMPLATE, None), + (_rs.SEARCH, "VTABLE", _rs.IN_VTABLE, None), + + (_rs.WANT_SIG, "FUNCTION", _rs.WANT_SIG, None), + (_rs.WANT_SIG, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.WANT_SIG, "STUB", _rs.WANT_SIG, None), + (_rs.WANT_SIG, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.WANT_SIG, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.WANT_SIG, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + + (_rs.IN_FUNC, "FUNCTION", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION), + (_rs.IN_FUNC, "GLOBAL", _rs.IN_FUNC_GLOBAL, None), + (_rs.IN_FUNC, "STUB", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION), + (_rs.IN_FUNC, "SYNTHETIC", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION), + (_rs.IN_FUNC, "TEMPLATE", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION), + (_rs.IN_FUNC, "VTABLE", _rs.IN_VTABLE, _pe.MISSED_END_OF_FUNCTION), + + (_rs.IN_TEMPLATE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_TEMPLATE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_TEMPLATE, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_TEMPLATE, "SYNTHETIC", _rs.IN_TEMPLATE, None), + (_rs.IN_TEMPLATE, "TEMPLATE", _rs.IN_TEMPLATE, None), + (_rs.IN_TEMPLATE, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + + (_rs.WANT_CURLY, "FUNCTION", _rs.SEARCH, _pe.UNEXPECTED_MARKER), + (_rs.WANT_CURLY, "GLOBAL", _rs.SEARCH, _pe.UNEXPECTED_MARKER), + (_rs.WANT_CURLY, "STUB", _rs.SEARCH, _pe.UNEXPECTED_MARKER), + (_rs.WANT_CURLY, "SYNTHETIC", _rs.SEARCH, _pe.UNEXPECTED_MARKER), + (_rs.WANT_CURLY, "TEMPLATE", _rs.SEARCH, _pe.UNEXPECTED_MARKER), + (_rs.WANT_CURLY, "VTABLE", _rs.SEARCH, _pe.UNEXPECTED_MARKER), + + (_rs.IN_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_GLOBAL, "GLOBAL", _rs.IN_GLOBAL, None), + (_rs.IN_GLOBAL, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + + (_rs.IN_FUNC_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_FUNC_GLOBAL, "GLOBAL", _rs.IN_FUNC_GLOBAL, None), + (_rs.IN_FUNC_GLOBAL, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_FUNC_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_FUNC_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_FUNC_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + + (_rs.IN_VTABLE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_VTABLE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_VTABLE, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_VTABLE, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_VTABLE, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_VTABLE, "VTABLE", _rs.IN_VTABLE, None), +] +# fmt: on + + +@pytest.mark.parametrize( + "state, marker_type, new_state, expected_error", state_change_marker_cases +) +def test_state_change_by_marker( + state: _rs, marker_type: str, new_state: _rs, expected_error: None | _pe +): + p = DecompParser() + p.state = state + p._handle_marker(DecompMarker(marker_type, "TEST", 0x1234)) + assert p.state == new_state + + if expected_error is not None: + assert len(p.alerts) > 0 + assert p.alerts[0].code == expected_error + + +# Reading any of these lines should have no effect in ReaderState.SEARCH +search_lines_no_effect = [ + "", + "\t", + " ", + "int x = 0;", + "// Comment", + "/*", + "*/", + "/* Block comment */", + "{", + "}", +] + + +@pytest.mark.parametrize("line", search_lines_no_effect) +def test_state_search_line(line: str): + p = DecompParser() + p.read_line(line) + assert p.state == _rs.SEARCH + assert len(p.alerts) == 0 + + +global_lines = [ + ("// A comment", _rs.IN_GLOBAL), + ("", _rs.IN_GLOBAL), + ("\t", _rs.IN_GLOBAL), + (" ", _rs.IN_GLOBAL), + # TODO: no check for "likely" variable declaration so these all count + ("void function()", _rs.SEARCH), + ("int x = 123;", _rs.SEARCH), + ("just some text", _rs.SEARCH), +] + + +@pytest.mark.parametrize("line, new_state", global_lines) +def test_state_global_line(line: str, new_state: _rs): + p = DecompParser() + p.read_line("// GLOBAL: TEST 0x1234") + assert p.state == _rs.IN_GLOBAL + p.read_line(line) + assert p.state == new_state + + +# mostly same as above +in_func_global_lines = [ + ("// A comment", _rs.IN_FUNC_GLOBAL), + ("", _rs.IN_FUNC_GLOBAL), + ("\t", _rs.IN_FUNC_GLOBAL), + (" ", _rs.IN_FUNC_GLOBAL), + # TODO: no check for "likely" variable declaration so these all count + ("void function()", _rs.IN_FUNC), + ("int x = 123;", _rs.IN_FUNC), + ("just some text", _rs.IN_FUNC), +] + + +@pytest.mark.parametrize("line, new_state", in_func_global_lines) +def test_state_in_func_global_line(line: str, new_state: _rs): + p = DecompParser() + p.state = _rs.IN_FUNC + p.read_line("// GLOBAL: TEST 0x1234") + assert p.state == _rs.IN_FUNC_GLOBAL + p.read_line(line) + assert p.state == new_state diff --git a/tools/isledecomp/tests/test_parser_util.py b/tools/isledecomp/tests/test_parser_util.py index 91fd285b..bbbf2d3a 100644 --- a/tools/isledecomp/tests/test_parser_util.py +++ b/tools/isledecomp/tests/test_parser_util.py @@ -1,11 +1,12 @@ from collections import namedtuple from typing import List import pytest +from isledecomp.parser.parser import MarkerDict from isledecomp.parser.util import ( + DecompMarker, is_blank_or_comment, - match_offset_comment, - is_exact_offset_comment, - distinct_by_module, + match_marker, + is_marker_exact, ) @@ -28,76 +29,72 @@ def test_is_blank_or_comment(line: str, expected: bool): assert is_blank_or_comment(line) is expected -offset_comment_samples = [ +marker_samples = [ # (can_parse: bool, exact_match: bool, line: str) - # Should match both expected modules with optional STUB marker - (True, True, "// OFFSET: LEGO1 0xdeadbeef"), - (True, True, "// OFFSET: LEGO1 0xdeadbeef STUB"), - (True, True, "// OFFSET: ISLE 0x12345678"), - (True, True, "// OFFSET: ISLE 0x12345678 STUB"), + (True, True, "// FUNCTION: LEGO1 0xdeadbeef"), + (True, True, "// FUNCTION: ISLE 0x12345678"), # No trailing spaces allowed - (True, False, "// OFFSET: LEGO1 0xdeadbeef "), - (True, False, "// OFFSET: LEGO1 0xdeadbeef STUB "), + (True, False, "// FUNCTION: LEGO1 0xdeadbeef "), # Must have exactly one space between elements - (True, False, "//OFFSET: ISLE 0xdeadbeef"), - (True, False, "// OFFSET:ISLE 0xdeadbeef"), - (True, False, "// OFFSET: ISLE 0xdeadbeef"), - (True, False, "// OFFSET: ISLE 0xdeadbeef"), - (True, False, "// OFFSET: ISLE 0xdeadbeef"), - (True, False, "// OFFSET: ISLE 0xdeadbeef STUB"), + (True, False, "//FUNCTION: ISLE 0xdeadbeef"), + (True, False, "// FUNCTION:ISLE 0xdeadbeef"), + (True, False, "// FUNCTION: ISLE 0xdeadbeef"), + (True, False, "// FUNCTION: ISLE 0xdeadbeef"), + (True, False, "// FUNCTION: ISLE 0xdeadbeef"), # Must have 0x prefix for hex number - (True, False, "// OFFSET: ISLE deadbeef"), + (True, False, "// FUNCTION: ISLE deadbeef"), # Offset, module name, and STUB must be uppercase - (True, False, "// offset: ISLE 0xdeadbeef"), - (True, False, "// offset: isle 0xdeadbeef"), - (True, False, "// OFFSET: LEGO1 0xdeadbeef stub"), + (True, False, "// function: ISLE 0xdeadbeef"), + (True, False, "// function: isle 0xdeadbeef"), # Hex string must be lowercase - (True, False, "// OFFSET: ISLE 0xDEADBEEF"), + (True, False, "// FUNCTION: ISLE 0xDEADBEEF"), # TODO: How flexible should we be with matching the module name? - (True, True, "// OFFSET: OMNI 0x12345678"), - (True, True, "// OFFSET: LEG01 0x12345678"), - (True, False, "// OFFSET: hello 0x12345678"), + (True, True, "// FUNCTION: OMNI 0x12345678"), + (True, True, "// FUNCTION: LEG01 0x12345678"), + (True, False, "// FUNCTION: hello 0x12345678"), # Not close enough to match - (False, False, "// OFFSET: ISLE0x12345678"), - (False, False, "// OFFSET: 0x12345678"), + (False, False, "// FUNCTION: ISLE0x12345678"), + (False, False, "// FUNCTION: 0x12345678"), (False, False, "// LEGO1: 0x12345678"), # Hex string shorter than 8 characters - (True, True, "// OFFSET: LEGO1 0x1234"), + (True, True, "// FUNCTION: LEGO1 0x1234"), # TODO: These match but shouldn't. - # (False, False, '// OFFSET: LEGO1 0'), - # (False, False, '// OFFSET: LEGO1 0x'), + # (False, False, '// FUNCTION: LEGO1 0'), + # (False, False, '// FUNCTION: LEGO1 0x'), ] -@pytest.mark.parametrize("match, _, line", offset_comment_samples) -def test_offset_match(line: str, match: bool, _): - did_match = match_offset_comment(line) is not None +@pytest.mark.parametrize("match, _, line", marker_samples) +def test_marker_match(line: str, match: bool, _): + did_match = match_marker(line) is not None assert did_match is match -@pytest.mark.parametrize("_, exact, line", offset_comment_samples) -def test_exact_offset_comment(line: str, exact: bool, _): - assert is_exact_offset_comment(line) is exact +@pytest.mark.parametrize("_, exact, line", marker_samples) +def test_marker_exact(line: str, exact: bool, _): + assert is_marker_exact(line) is exact -# Helper for the next test: cut down version of OffsetMatch -MiniOfs = namedtuple("MiniOfs", ["module", "value"]) - -distinct_by_module_samples = [ - # empty set - ([], []), - # same module name - ([MiniOfs("TEST", 123), MiniOfs("TEST", 555)], [MiniOfs("TEST", 123)]), - # same module name, case-insensitive - ([MiniOfs("test", 123), MiniOfs("TEST", 555)], [MiniOfs("test", 123)]), - # duplicates, non-consecutive - ( - [MiniOfs("test", 123), MiniOfs("abc", 111), MiniOfs("TEST", 555)], - [MiniOfs("test", 123), MiniOfs("abc", 111)], - ), -] +def test_marker_dict_simple(): + d = MarkerDict() + d.insert(DecompMarker("FUNCTION", "TEST", 0x1234)) + markers = list(d.iter()) + assert len(markers) == 1 -@pytest.mark.parametrize("sample, expected", distinct_by_module_samples) -def test_distinct_by_module(sample: List[MiniOfs], expected: List[MiniOfs]): - assert distinct_by_module(sample) == expected +def test_marker_dict_ofs_replace(): + d = MarkerDict() + d.insert(DecompMarker("FUNCTION", "TEST", 0x1234)) + d.insert(DecompMarker("FUNCTION", "TEST", 0x555)) + markers = list(d.iter()) + assert len(markers) == 1 + assert markers[0].offset == 0x1234 + + +def test_marker_dict_type_replace(): + d = MarkerDict() + d.insert(DecompMarker("FUNCTION", "TEST", 0x1234)) + d.insert(DecompMarker("STUB", "TEST", 0x1234)) + markers = list(d.iter()) + assert len(markers) == 1 + assert markers[0].type == "FUNCTION" diff --git a/tools/reccmp/reccmp.py b/tools/reccmp/reccmp.py index 02c16029..1d4cd5b1 100755 --- a/tools/reccmp/reccmp.py +++ b/tools/reccmp/reccmp.py @@ -10,7 +10,7 @@ from isledecomp import ( Bin, - find_code_blocks, + DecompParser, get_file_in_script_dir, OffsetPlaceholderGenerator, print_diff, @@ -313,18 +313,20 @@ def gen_svg(svg_file, name_svg, icon, svg_implemented_funcs, total_funcs, raw_ac # Generate basename of original file, used in locating OFFSET lines basename = os.path.basename(os.path.splitext(original)[0]) + parser = DecompParser() for srcfilename in walk_source_dir(source): + parser.reset() with open(srcfilename, "r", encoding="utf-8") as srcfile: - blocks = find_code_blocks(srcfile) + parser.read_lines(srcfile) - for block in blocks: - if block.is_stub: + for fun in parser.functions: + if fun.is_stub: continue - if block.module != basename: + if fun.module != basename: continue - addr = block.offset + addr = fun.offset # Verbose flag handling if verbose: if addr == verbose: @@ -332,13 +334,13 @@ def gen_svg(svg_file, name_svg, icon, svg_implemented_funcs, total_funcs, raw_ac else: continue - if block.is_template: - recinfo = syminfo.get_recompiled_address_from_name(block.signature) + if fun.is_template: + recinfo = syminfo.get_recompiled_address_from_name(fun.name) if not recinfo: continue else: recinfo = syminfo.get_recompiled_address( - srcfilename, block.start_line + srcfilename, fun.line_number ) if not recinfo: continue From 5a4c9234a9ef1471e68dcdfc1af6a87944d8bbdc Mon Sep 17 00:00:00 2001 From: disinvite Date: Fri, 1 Dec 2023 15:12:33 -0500 Subject: [PATCH 02/17] Allow prepending space for exact marker match --- tools/isledecomp/isledecomp/parser/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/isledecomp/isledecomp/parser/util.py b/tools/isledecomp/isledecomp/parser/util.py index 02d3c976..b62f0586 100644 --- a/tools/isledecomp/isledecomp/parser/util.py +++ b/tools/isledecomp/isledecomp/parser/util.py @@ -12,7 +12,7 @@ flags=re.I, ) -markerExactRegex = re.compile(r"// ([A-Z]+): ([A-Z0-9]+) (0x[a-f0-9]+)$") +markerExactRegex = re.compile(r"\s*// ([A-Z]+): ([A-Z0-9]+) (0x[a-f0-9]+)$") # The goal here is to just read whatever is on the next line, so some # flexibility in the formatting seems OK From 93ed16e74afb57f01addafdaa49e62114220aa1f Mon Sep 17 00:00:00 2001 From: disinvite Date: Fri, 1 Dec 2023 17:17:09 -0500 Subject: [PATCH 03/17] To eliminate noise, require the 0x prefix on offset for marker match --- tools/isledecomp/isledecomp/parser/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/isledecomp/isledecomp/parser/util.py b/tools/isledecomp/isledecomp/parser/util.py index b62f0586..f93b4475 100644 --- a/tools/isledecomp/isledecomp/parser/util.py +++ b/tools/isledecomp/isledecomp/parser/util.py @@ -8,7 +8,7 @@ markerRegex = re.compile( - r"\s*//\s*(\w+):\s*(\w+)\s+((?:0x)?[a-f0-9]+)", + r"\s*//\s*(\w+):\s*(\w+)\s+(0x[a-f0-9]+)", flags=re.I, ) From 1ba8a93bec5df424c0c505f52eeeebeaf2fa6434 Mon Sep 17 00:00:00 2001 From: disinvite Date: Fri, 1 Dec 2023 17:39:38 -0500 Subject: [PATCH 04/17] fix test from previous --- tools/isledecomp/tests/test_parser.py | 2 +- tools/isledecomp/tests/test_parser_util.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/isledecomp/tests/test_parser.py b/tools/isledecomp/tests/test_parser.py index 7731918f..69764f74 100644 --- a/tools/isledecomp/tests/test_parser.py +++ b/tools/isledecomp/tests/test_parser.py @@ -26,7 +26,7 @@ def test_missing_sig(parser): def test_not_exact_syntax(parser): """Alert to inexact syntax right here in the parser instead of kicking it downstream. Doing this means we don't have to save the actual text.""" - parser.read_line("// function: test 1234") + parser.read_line("// function: test 0x1234") assert len(parser.alerts) == 1 assert parser.alerts[0].code == ParserError.BAD_DECOMP_MARKER diff --git a/tools/isledecomp/tests/test_parser_util.py b/tools/isledecomp/tests/test_parser_util.py index bbbf2d3a..a8882721 100644 --- a/tools/isledecomp/tests/test_parser_util.py +++ b/tools/isledecomp/tests/test_parser_util.py @@ -41,8 +41,8 @@ def test_is_blank_or_comment(line: str, expected: bool): (True, False, "// FUNCTION: ISLE 0xdeadbeef"), (True, False, "// FUNCTION: ISLE 0xdeadbeef"), (True, False, "// FUNCTION: ISLE 0xdeadbeef"), - # Must have 0x prefix for hex number - (True, False, "// FUNCTION: ISLE deadbeef"), + # Must have 0x prefix for hex number to match at all + (False, False, "// FUNCTION: ISLE deadbeef"), # Offset, module name, and STUB must be uppercase (True, False, "// function: ISLE 0xdeadbeef"), (True, False, "// function: isle 0xdeadbeef"), From a8387cd50d84aaa8c8d88e8fab1074618a96c50d Mon Sep 17 00:00:00 2001 From: disinvite Date: Fri, 1 Dec 2023 17:40:13 -0500 Subject: [PATCH 05/17] Count tab stops for indented functions to reduce MISSED_END_OF_FUNCTION noise --- tools/isledecomp/isledecomp/parser/parser.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tools/isledecomp/isledecomp/parser/parser.py b/tools/isledecomp/isledecomp/parser/parser.py index aebe1c86..7a984b88 100644 --- a/tools/isledecomp/isledecomp/parser/parser.py +++ b/tools/isledecomp/isledecomp/parser/parser.py @@ -95,6 +95,18 @@ def reset(self): self.fun_markers.empty() self.var_markers.empty() self.tbl_markers.empty() + + # To handle functions that are entirely indented (i.e. those defined + # in class declarations), remember how many whitespace characters + # came before the opening curly brace and match that up at the end. + # This should give us the same or better accuracy for a well-formed file. + # The alternative is counting the curly braces on each line + # but that's probably too cumbersome. + self.curly_indent_stops = 0 + + # For non-synthetic functions, save the line number where the function begins + # (i.e. where we see the curly brace) along with the function signature. + # We will need both when we reach the end of the function. self.function_start: int = 0 self.function_sig: str = "" @@ -150,6 +162,7 @@ def _function_done(self, unexpected: bool = False): ) self.fun_markers.empty() + self.curly_indent_stops = 0 self.state = ReaderState.SEARCH def _vtable_marker(self, marker: DecompMarker): @@ -308,13 +321,12 @@ def read_line(self, line: str): elif self.state == ReaderState.WANT_CURLY: if line.strip() == "{": + self.curly_indent_stops = line.index("{") self._function_starts_here() self.state = ReaderState.IN_FUNC elif self.state == ReaderState.IN_FUNC: - # Naive but reasonable assumption that functions will end with - # a curly brace on its own line with no prepended spaces. - if line.startswith("}"): + if line.strip().startswith("}") and line[self.curly_indent_stops] == "}": self._function_done() elif self.state in (ReaderState.IN_GLOBAL, ReaderState.IN_FUNC_GLOBAL): From 8c815418d261ba8c5f67a9a2cae349fe4ac92db8 Mon Sep 17 00:00:00 2001 From: disinvite Date: Fri, 1 Dec 2023 17:48:56 -0500 Subject: [PATCH 06/17] FUNCTION to SYNTHETIC where needed --- ISLE/isleapp.cpp | 2 +- LEGO1/legoentity.cpp | 2 +- LEGO1/mxcore.h | 4 ++-- LEGO1/mxloopingmidipresenter.cpp | 4 ++-- LEGO1/mxstillpresenter.cpp | 6 +++--- LEGO1/realtime/vector.cpp | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ISLE/isleapp.cpp b/ISLE/isleapp.cpp index 1d4d4641..451f45ad 100644 --- a/ISLE/isleapp.cpp +++ b/ISLE/isleapp.cpp @@ -281,7 +281,7 @@ int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine return msg.wParam; } -// FUNCTION: ISLE 0x401c40 +// SYNTHETIC: ISLE 0x401c40 // MxDSObject::SetAtomId // FUNCTION: ISLE 0x401ca0 diff --git a/LEGO1/legoentity.cpp b/LEGO1/legoentity.cpp index 7e8a953f..edd1d18d 100644 --- a/LEGO1/legoentity.cpp +++ b/LEGO1/legoentity.cpp @@ -7,7 +7,7 @@ DECOMP_SIZE_ASSERT(LegoEntity, 0x68) -// FUNCTION: LEGO1 0x10001090 +// SYNTHETIC: LEGO1 0x10001090 // LegoEntity::SetWorldSpeed // FUNCTION: LEGO1 0x1000c290 diff --git a/LEGO1/mxcore.h b/LEGO1/mxcore.h index 4de691cc..518949c6 100644 --- a/LEGO1/mxcore.h +++ b/LEGO1/mxcore.h @@ -9,10 +9,10 @@ class MxParam; // TODO: Find proper compilation unit to put these -// FUNCTION: LEGO1 0x100140d0 +// SYNTHETIC: LEGO1 0x100140d0 // MxCore::IsA -// FUNCTION: LEGO1 0x100144c0 +// SYNTHETIC: LEGO1 0x100144c0 // MxCore::ClassName // VTABLE: LEGO1 0x100dc0f8 diff --git a/LEGO1/mxloopingmidipresenter.cpp b/LEGO1/mxloopingmidipresenter.cpp index 3d58e56b..55857701 100644 --- a/LEGO1/mxloopingmidipresenter.cpp +++ b/LEGO1/mxloopingmidipresenter.cpp @@ -7,10 +7,10 @@ DECOMP_SIZE_ASSERT(MxLoopingMIDIPresenter, 0x58); -// FUNCTION: LEGO1 0x100b1830 +// SYNTHETIC: LEGO1 0x100b1830 // MxLoopingMIDIPresenter::ClassName -// FUNCTION: LEGO1 0x100b1840 +// SYNTHETIC: LEGO1 0x100b1840 // MxLoopingMIDIPresenter::IsA // SYNTHETIC: LEGO1 0x100b19c0 diff --git a/LEGO1/mxstillpresenter.cpp b/LEGO1/mxstillpresenter.cpp index e3adaa90..1d07e74a 100644 --- a/LEGO1/mxstillpresenter.cpp +++ b/LEGO1/mxstillpresenter.cpp @@ -11,7 +11,7 @@ DECOMP_SIZE_ASSERT(MxStillPresenter, 0x6c); // GLOBAL: LEGO1 0x10101eb0 const char* g_strBMP_ISMAP = "BMP_ISMAP"; -// FUNCTION: LEGO1 0x10043550 +// SYNTHETIC: LEGO1 0x10043550 // MxStillPresenter::~MxStillPresenter // FUNCTION: LEGO1 0x100435b0 @@ -20,10 +20,10 @@ void MxStillPresenter::Destroy() Destroy(FALSE); } -// FUNCTION: LEGO1 0x100435c0 +// SYNTHETIC: LEGO1 0x100435c0 // MxStillPresenter::ClassName -// FUNCTION: LEGO1 0x100435d0 +// SYNTHETIC: LEGO1 0x100435d0 // MxStillPresenter::IsA // SYNTHETIC: LEGO1 0x100436e0 diff --git a/LEGO1/realtime/vector.cpp b/LEGO1/realtime/vector.cpp index 1d41470c..582d6443 100644 --- a/LEGO1/realtime/vector.cpp +++ b/LEGO1/realtime/vector.cpp @@ -60,7 +60,7 @@ float Vector2Impl::DotImpl(float* p_a, float* p_b) const return p_b[0] * p_a[0] + p_b[1] * p_a[1]; } -// FUNCTION: LEGO1 0x10002060 +// SYNTHETIC: LEGO1 0x10002060 // Vector2Impl::SetData // FUNCTION: LEGO1 0x10002070 From d87d665127fae7dd6e5bd48d9af14a0a829bf9e2 Mon Sep 17 00:00:00 2001 From: disinvite Date: Fri, 1 Dec 2023 17:57:17 -0500 Subject: [PATCH 07/17] Missed marker conversion on SetAtomId --- LEGO1/mxdsobject.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LEGO1/mxdsobject.h b/LEGO1/mxdsobject.h index d4563d9a..5410867b 100644 --- a/LEGO1/mxdsobject.h +++ b/LEGO1/mxdsobject.h @@ -7,7 +7,7 @@ #include "mxdstypes.h" // TODO: Find proper compilation unit to put this -// FUNCTION: LEGO1 0x10005530 +// SYNTHETIC: LEGO1 0x10005530 // MxDSObject::SetAtomId // VTABLE: LEGO1 0x100dc868 From a4d92984da3687da8f3191828f6c3d5a604549a3 Mon Sep 17 00:00:00 2001 From: disinvite Date: Fri, 1 Dec 2023 18:29:20 -0500 Subject: [PATCH 08/17] pylint cleanup, remove unused code --- tools/checkorder/checkorder.py | 3 +- tools/isledecomp/isledecomp/parser/node.py | 1 - tools/isledecomp/isledecomp/parser/parser.py | 74 +++++++++---------- tools/isledecomp/isledecomp/parser/util.py | 1 - tools/isledecomp/tests/test_parser.py | 5 +- tools/isledecomp/tests/test_parser_samples.py | 6 +- .../tests/test_parser_statechange.py | 12 +-- tools/isledecomp/tests/test_parser_util.py | 2 - 8 files changed, 49 insertions(+), 55 deletions(-) diff --git a/tools/checkorder/checkorder.py b/tools/checkorder/checkorder.py index d2f0b23e..02636c09 100644 --- a/tools/checkorder/checkorder.py +++ b/tools/checkorder/checkorder.py @@ -19,7 +19,6 @@ def check_file(filename: str, verbose: bool = False) -> bool: with open(filename, "r", encoding="utf-8") as f: parser.read_lines(f) - just_offsets = [block.offset for block in parser.functions] sorted_offsets = sorted(just_offsets) file_out_of_order = just_offsets != sorted_offsets @@ -46,7 +45,7 @@ def check_file(filename: str, verbose: bool = False) -> bool: msg = " ".join( [ " " if fun.offset > prev_offset else "!", - f"{block.offset:08x}", + f"{fun.offset:08x}", f"{fun.end_line - fun.line_number:4} lines", f"{order_lookup[fun.offset]:3}", " ", diff --git a/tools/isledecomp/isledecomp/parser/node.py b/tools/isledecomp/isledecomp/parser/node.py index f9fbe3b5..96cc3362 100644 --- a/tools/isledecomp/isledecomp/parser/node.py +++ b/tools/isledecomp/isledecomp/parser/node.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -from enum import Enum @dataclass diff --git a/tools/isledecomp/isledecomp/parser/parser.py b/tools/isledecomp/isledecomp/parser/parser.py index 7a984b88..772ae4ef 100644 --- a/tools/isledecomp/isledecomp/parser/parser.py +++ b/tools/isledecomp/isledecomp/parser/parser.py @@ -1,6 +1,6 @@ # C++ file parser -from typing import List, TextIO, Iterable +from typing import List, Iterable, Iterator from enum import Enum from .util import ( DecompMarker, @@ -12,7 +12,6 @@ ) from .node import ( ParserAlert, - ParserNode, ParserFunction, ParserVariable, ParserVtable, @@ -64,9 +63,8 @@ def insert(self, marker: DecompMarker) -> bool: self.markers[module] = (marker.type, marker.offset) return False - def iter(self): - for module in self.markers: - (marker_type, offset) = self.markers[module] + def iter(self) -> Iterator[DecompMarker]: + for module, (marker_type, offset) in self.markers.items(): yield DecompMarker(marker_type, module, offset) def empty(self): @@ -74,27 +72,26 @@ def empty(self): class DecompParser: + # pylint: disable=too-many-instance-attributes + # Could combine output lists into a single list to get under the limit, + # but not right now def __init__(self): - self.fun_markers = MarkerDict() - self.var_markers = MarkerDict() - self.tbl_markers = MarkerDict() - self.reset() + # The lists to be populated as we parse + self.functions: List[ParserFunction] = [] + self.vtables: List[ParserVtable] = [] + self.variables: List[ParserVariable] = [] + self.alerts: List[ParserAlert] = [] - def reset(self): - # Output values - self.functions = [] - self.vtables = [] - self.variables = [] - self.alerts = [] - - # Internal state machine stuff self.line_number: int = 0 self.state: ReaderState = ReaderState.SEARCH self.last_line: str = "" - self.fun_markers.empty() - self.var_markers.empty() - self.tbl_markers.empty() + + # To allow for multiple markers where code is shared across different + # modules, save lists of compatible markers that appear in sequence + self.fun_markers = MarkerDict() + self.var_markers = MarkerDict() + self.tbl_markers = MarkerDict() # To handle functions that are entirely indented (i.e. those defined # in class declarations), remember how many whitespace characters @@ -102,7 +99,7 @@ def reset(self): # This should give us the same or better accuracy for a well-formed file. # The alternative is counting the curly braces on each line # but that's probably too cumbersome. - self.curly_indent_stops = 0 + self.curly_indent_stops: int = 0 # For non-synthetic functions, save the line number where the function begins # (i.e. where we see the curly brace) along with the function signature. @@ -110,6 +107,25 @@ def reset(self): self.function_start: int = 0 self.function_sig: str = "" + def reset(self): + self.functions = [] + self.vtables = [] + self.variables = [] + self.alerts = [] + + self.line_number = 0 + self.state = ReaderState.SEARCH + + self.last_line = "" + + self.fun_markers.empty() + self.var_markers.empty() + self.tbl_markers.empty() + + self.curly_indent_stops = 0 + self.function_start = 0 + self.function_sig = "" + def _recover(self): """We hit a syntax error and need to reset temp structures""" self.state = ReaderState.SEARCH @@ -340,19 +356,3 @@ def read_line(self, line: str): def read_lines(self, lines: Iterable): for line in lines: self.read_line(line) - - -def find_code_blocks(stream: TextIO) -> List[ParserNode]: - """Read the IO stream (file) line-by-line and give the following report: - Foreach code block (function) in the file, what are its starting and - ending line numbers, and what is the given offset in the original - binary. We expect the result to be ordered by line number because we - are reading the file from start to finish.""" - - # TODO: this will be replaced shortly. shim for now to avoid - # making more changes elsewhere - p = DecompParser() - for line in stream: - p.read_line(line) - - return p.functions diff --git a/tools/isledecomp/isledecomp/parser/util.py b/tools/isledecomp/isledecomp/parser/util.py index f93b4475..38515cfa 100644 --- a/tools/isledecomp/isledecomp/parser/util.py +++ b/tools/isledecomp/isledecomp/parser/util.py @@ -1,7 +1,6 @@ # C++ Parser utility functions and data structures from __future__ import annotations # python <3.10 compatibility import re -from typing import List from collections import namedtuple DecompMarker = namedtuple("DecompMarker", ["type", "module", "offset"]) diff --git a/tools/isledecomp/tests/test_parser.py b/tools/isledecomp/tests/test_parser.py index 69764f74..fa5343dc 100644 --- a/tools/isledecomp/tests/test_parser.py +++ b/tools/isledecomp/tests/test_parser.py @@ -3,12 +3,11 @@ ReaderState, DecompParser, ) -from isledecomp.parser.util import DecompMarker from isledecomp.parser.error import ParserError -@pytest.fixture -def parser(): +@pytest.fixture(name="parser") +def fixture_parser(): return DecompParser() diff --git a/tools/isledecomp/tests/test_parser_samples.py b/tools/isledecomp/tests/test_parser_samples.py index a045e3cc..e74fda0e 100644 --- a/tools/isledecomp/tests/test_parser_samples.py +++ b/tools/isledecomp/tests/test_parser_samples.py @@ -1,6 +1,6 @@ import os -import pytest from typing import List, TextIO +import pytest from isledecomp.parser import DecompParser from isledecomp.parser.node import ParserSymbol @@ -20,8 +20,8 @@ def code_blocks_are_sorted(blocks: List[ParserSymbol]) -> bool: return just_offsets == sorted(just_offsets) -@pytest.fixture -def parser(): +@pytest.fixture(name="parser") +def fixture_parser(): return DecompParser() diff --git a/tools/isledecomp/tests/test_parser_statechange.py b/tools/isledecomp/tests/test_parser_statechange.py index 62c19175..714de579 100644 --- a/tools/isledecomp/tests/test_parser_statechange.py +++ b/tools/isledecomp/tests/test_parser_statechange.py @@ -3,7 +3,6 @@ ReaderState as _rs, DecompParser, ) -from isledecomp.parser.util import DecompMarker from isledecomp.parser.error import ParserError as _pe # fmt: off @@ -35,28 +34,28 @@ (_rs.IN_TEMPLATE, "SYNTHETIC", _rs.IN_TEMPLATE, None), (_rs.IN_TEMPLATE, "TEMPLATE", _rs.IN_TEMPLATE, None), (_rs.IN_TEMPLATE, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), - + (_rs.WANT_CURLY, "FUNCTION", _rs.SEARCH, _pe.UNEXPECTED_MARKER), (_rs.WANT_CURLY, "GLOBAL", _rs.SEARCH, _pe.UNEXPECTED_MARKER), (_rs.WANT_CURLY, "STUB", _rs.SEARCH, _pe.UNEXPECTED_MARKER), (_rs.WANT_CURLY, "SYNTHETIC", _rs.SEARCH, _pe.UNEXPECTED_MARKER), (_rs.WANT_CURLY, "TEMPLATE", _rs.SEARCH, _pe.UNEXPECTED_MARKER), (_rs.WANT_CURLY, "VTABLE", _rs.SEARCH, _pe.UNEXPECTED_MARKER), - + (_rs.IN_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "GLOBAL", _rs.IN_GLOBAL, None), (_rs.IN_GLOBAL, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), - + (_rs.IN_FUNC_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "GLOBAL", _rs.IN_FUNC_GLOBAL, None), (_rs.IN_FUNC_GLOBAL, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), - + (_rs.IN_VTABLE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_VTABLE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_VTABLE, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), @@ -75,7 +74,8 @@ def test_state_change_by_marker( ): p = DecompParser() p.state = state - p._handle_marker(DecompMarker(marker_type, "TEST", 0x1234)) + mock_line = f"// {marker_type}: TEST 0x1234" + p.read_line(mock_line) assert p.state == new_state if expected_error is not None: diff --git a/tools/isledecomp/tests/test_parser_util.py b/tools/isledecomp/tests/test_parser_util.py index a8882721..131a8ab3 100644 --- a/tools/isledecomp/tests/test_parser_util.py +++ b/tools/isledecomp/tests/test_parser_util.py @@ -1,5 +1,3 @@ -from collections import namedtuple -from typing import List import pytest from isledecomp.parser.parser import MarkerDict from isledecomp.parser.util import ( From 806720f5c9925b14bf153e969d67141c45d80377 Mon Sep 17 00:00:00 2001 From: disinvite Date: Fri, 1 Dec 2023 19:42:36 -0500 Subject: [PATCH 09/17] Fix unexpected function end, add more unit tests --- tools/isledecomp/isledecomp/parser/parser.py | 8 +- tools/isledecomp/tests/test_parser.py | 120 ++++++++++++++++++- 2 files changed, 122 insertions(+), 6 deletions(-) diff --git a/tools/isledecomp/isledecomp/parser/parser.py b/tools/isledecomp/isledecomp/parser/parser.py index 772ae4ef..cea4523c 100644 --- a/tools/isledecomp/isledecomp/parser/parser.py +++ b/tools/isledecomp/isledecomp/parser/parser.py @@ -162,7 +162,7 @@ def _synthetic_marker(self, marker: DecompMarker): def _function_done(self, unexpected: bool = False): end_line = self.line_number if unexpected: - end_line -= -1 + end_line -= 1 for marker in self.fun_markers.iter(): self.functions.append( @@ -249,7 +249,7 @@ def _handle_marker(self, marker: DecompMarker): # We hit another offset unexpectedly. # We can recover easily by just ending the function here. self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION) - self._function_done() + self._function_done(unexpected=True) # Start the next function right after so we can # read the next line. @@ -262,7 +262,7 @@ def _handle_marker(self, marker: DecompMarker): self._synthetic_marker(marker) elif self.state == ReaderState.IN_FUNC: self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION) - self._function_done() + self._function_done(unexpected=True) self._synthetic_marker(marker) else: self._syntax_error(ParserError.INCOMPATIBLE_MARKER) @@ -283,7 +283,7 @@ def _handle_marker(self, marker: DecompMarker): self._vtable_marker(marker) elif self.state == ReaderState.IN_FUNC: self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION) - self._function_done() + self._function_done(unexpected=True) self._vtable_marker(marker) else: self._syntax_error(ParserError.INCOMPATIBLE_MARKER) diff --git a/tools/isledecomp/tests/test_parser.py b/tools/isledecomp/tests/test_parser.py index fa5343dc..ce8ad307 100644 --- a/tools/isledecomp/tests/test_parser.py +++ b/tools/isledecomp/tests/test_parser.py @@ -39,7 +39,8 @@ def test_invalid_marker(parser): assert parser.alerts[0].code == ParserError.BOGUS_MARKER -def test_unexpected_marker(parser): +def test_incompatible_marker(parser): + """The marker we just read cannot be handled in the current parser state""" parser.read_lines( [ "// FUNCTION: TEST 0x1234", @@ -52,6 +53,7 @@ def test_unexpected_marker(parser): def test_variable(parser): + """Should identify a global variable""" parser.read_lines( [ "// GLOBAL: HELLO 0x1234", @@ -62,7 +64,8 @@ def test_variable(parser): def test_synthetic_plus_marker(parser): - """Should fail with error and not log the synthetic""" + """Marker tracking preempts synthetic name detection. + Should fail with error and not log the synthetic""" parser.read_lines( [ "// SYNTHETIC: HEY 0x555", @@ -157,6 +160,21 @@ def test_multiple_variables(parser): assert len(parser.variables) == 2 +def test_multiple_variables_same_module(parser): + """Should not overwrite offset""" + parser.read_lines( + [ + "// GLOBAL: HELLO 0x1234", + "// GLOBAL: HELLO 0x555", + "const char *g_greeting;", + ] + ) + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.DUPLICATE_MODULE + assert len(parser.variables) == 1 + assert parser.variables[0].offset == 0x1234 + + def test_multiple_vtables(parser): parser.read_lines( [ @@ -167,3 +185,101 @@ def test_multiple_vtables(parser): ) assert len(parser.alerts) == 0 assert len(parser.vtables) == 2 + + +def test_multiple_vtables_same_module(parser): + """Should not overwrite offset""" + parser.read_lines( + [ + "// VTABLE: HELLO 0x1234", + "// VTABLE: HELLO 0x5432", + "class MxString : public MxCore {", + ] + ) + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.DUPLICATE_MODULE + assert len(parser.vtables) == 1 + assert parser.vtables[0].offset == 0x1234 + + +def test_synthetic(parser): + parser.read_lines( + [ + "// SYNTHETIC: TEST 0x1234", + "// TestClass::TestMethod", + ] + ) + assert len(parser.functions) == 1 + assert parser.functions[0].is_template is True + assert parser.functions[0].name == "TestClass::TestMethod" + + +def test_synthetic_same_module(parser): + parser.read_lines( + [ + "// SYNTHETIC: TEST 0x1234", + "// SYNTHETIC: TEST 0x555", + "// TestClass::TestMethod", + ] + ) + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.DUPLICATE_MODULE + assert len(parser.functions) == 1 + assert parser.functions[0].offset == 0x1234 + + +@pytest.mark.skip(reason="todo") +def test_synthetic_no_comment(parser): + """Synthetic marker followed by a code line (i.e. non-comment)""" + parser.read_lines( + [ + "// SYNTHETIC: TEST 0x1234", + "int x = 123;", + ] + ) + assert len(parser.functions) == 0 + + +def test_single_line_function(parser): + parser.read_lines( + [ + "// FUNCTION: TEST 0x1234", + "int hello() { return 1234; }", + ] + ) + assert len(parser.functions) == 1 + assert parser.functions[0].line_number == 2 + assert parser.functions[0].end_line == 2 + + +def test_indented_function(parser): + """Track the number of whitespace characters when we begin the function + and check that against each closing curly brace we read. + Should not report a syntax warning if the function is indented""" + parser.read_lines( + [ + " // FUNCTION: TEST 0x1234", + " void indented()", + " {", + " // TODO", + " }", + " // FUNCTION: NEXT 0x555", + ] + ) + assert len(parser.alerts) == 0 + + +@pytest.mark.xfail(reason="todo") +def test_indented_no_curly_hint(parser): + """Same as above, but opening curly brace is on the same line. + Without the hint of how many whitespace characters to check, can we + still identify the end of the function?""" + parser.read_lines( + [ + " // FUNCTION: TEST 0x1234", + " void indented() {", + " }", + " // FUNCTION: NEXT 0x555", + ] + ) + assert len(parser.alerts) == 0 From 9947231f6d48612e9ab0bae2b7da07643cac1890 Mon Sep 17 00:00:00 2001 From: disinvite Date: Fri, 1 Dec 2023 19:58:40 -0500 Subject: [PATCH 10/17] Be more strict about synthetic name syntax --- tools/isledecomp/isledecomp/parser/error.py | 6 +++--- tools/isledecomp/isledecomp/parser/parser.py | 12 ++++++++---- tools/isledecomp/isledecomp/parser/util.py | 8 ++++---- tools/isledecomp/tests/test_parser.py | 4 +++- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/tools/isledecomp/isledecomp/parser/error.py b/tools/isledecomp/isledecomp/parser/error.py index 8bda90da..dce99448 100644 --- a/tools/isledecomp/isledecomp/parser/error.py +++ b/tools/isledecomp/isledecomp/parser/error.py @@ -18,9 +18,6 @@ class ParserError(Enum): # to handle it BOGUS_MARKER = 104 - # WARN: Under a synthetic marker we expected a comment but found a code line instead - SYNTHETIC_NOT_COMMENT = 110 - # WARN: New function marker appeared while we were inside a function MISSED_END_OF_FUNCTION = 117 @@ -31,3 +28,6 @@ class ParserError(Enum): # with the preceding markers. # For example, a GLOBAL cannot follow FUNCTION/STUB INCOMPATIBLE_MARKER = 201 + + # ERROR: The line following a synthetic marker was not a comment + BAD_SYNTHETIC = 202 diff --git a/tools/isledecomp/isledecomp/parser/parser.py b/tools/isledecomp/isledecomp/parser/parser.py index cea4523c..24dc0b23 100644 --- a/tools/isledecomp/isledecomp/parser/parser.py +++ b/tools/isledecomp/isledecomp/parser/parser.py @@ -7,7 +7,7 @@ is_blank_or_comment, match_marker, is_marker_exact, - get_template_function_name, + get_synthetic_name, remove_trailing_comment, ) from .node import ( @@ -307,9 +307,13 @@ def read_line(self, line: str): if self.state == ReaderState.IN_TEMPLATE: # TEMPLATE functions are a special case. The signature is # given on the next line (in a // comment) - self.function_sig = get_template_function_name(line) - self._function_starts_here() - self._function_done() + name = get_synthetic_name(line) + if name is None: + self._syntax_error(ParserError.BAD_SYNTHETIC) + else: + self.function_sig = name + self._function_starts_here() + self._function_done() elif self.state == ReaderState.WANT_SIG: # Skip blank lines or comments that come after the offset diff --git a/tools/isledecomp/isledecomp/parser/util.py b/tools/isledecomp/isledecomp/parser/util.py index 38515cfa..e0b25d85 100644 --- a/tools/isledecomp/isledecomp/parser/util.py +++ b/tools/isledecomp/isledecomp/parser/util.py @@ -23,15 +23,15 @@ trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$") -def get_template_function_name(line: str) -> str: - """Parse function signature for special TEMPLATE functions""" +def get_synthetic_name(line: str) -> str | None: + """Synthetic names appear on a single line comment on the line after the marker. + If that's not what we have, return None""" template_match = templateCommentRegex.match(line) - # If we don't match, you get whatever is on the line as the signature if template_match is not None: return template_match.group(1) - return line + return None def remove_trailing_comment(line: str) -> str: diff --git a/tools/isledecomp/tests/test_parser.py b/tools/isledecomp/tests/test_parser.py index ce8ad307..222a9ac1 100644 --- a/tools/isledecomp/tests/test_parser.py +++ b/tools/isledecomp/tests/test_parser.py @@ -228,7 +228,6 @@ def test_synthetic_same_module(parser): assert parser.functions[0].offset == 0x1234 -@pytest.mark.skip(reason="todo") def test_synthetic_no_comment(parser): """Synthetic marker followed by a code line (i.e. non-comment)""" parser.read_lines( @@ -238,6 +237,9 @@ def test_synthetic_no_comment(parser): ] ) assert len(parser.functions) == 0 + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.BAD_SYNTHETIC + assert parser.state == ReaderState.SEARCH def test_single_line_function(parser): From 97468dcd8231683aec72bce8d0b5927d73b6014a Mon Sep 17 00:00:00 2001 From: disinvite Date: Sat, 2 Dec 2023 12:30:48 -0500 Subject: [PATCH 11/17] Revert "Missed marker conversion on SetAtomId" This reverts commit d87d665127fae7dd6e5bd48d9af14a0a829bf9e2. --- LEGO1/mxdsobject.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LEGO1/mxdsobject.h b/LEGO1/mxdsobject.h index 5410867b..d4563d9a 100644 --- a/LEGO1/mxdsobject.h +++ b/LEGO1/mxdsobject.h @@ -7,7 +7,7 @@ #include "mxdstypes.h" // TODO: Find proper compilation unit to put this -// SYNTHETIC: LEGO1 0x10005530 +// FUNCTION: LEGO1 0x10005530 // MxDSObject::SetAtomId // VTABLE: LEGO1 0x100dc868 From ba4e28b8c48b157a6f4ec64427f6814f89857f97 Mon Sep 17 00:00:00 2001 From: disinvite Date: Sat, 2 Dec 2023 12:31:09 -0500 Subject: [PATCH 12/17] Revert "FUNCTION to SYNTHETIC where needed" This reverts commit 8c815418d261ba8c5f67a9a2cae349fe4ac92db8. --- ISLE/isleapp.cpp | 2 +- LEGO1/legoentity.cpp | 2 +- LEGO1/mxcore.h | 4 ++-- LEGO1/mxloopingmidipresenter.cpp | 4 ++-- LEGO1/mxstillpresenter.cpp | 6 +++--- LEGO1/realtime/vector.cpp | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ISLE/isleapp.cpp b/ISLE/isleapp.cpp index 451f45ad..1d4d4641 100644 --- a/ISLE/isleapp.cpp +++ b/ISLE/isleapp.cpp @@ -281,7 +281,7 @@ int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine return msg.wParam; } -// SYNTHETIC: ISLE 0x401c40 +// FUNCTION: ISLE 0x401c40 // MxDSObject::SetAtomId // FUNCTION: ISLE 0x401ca0 diff --git a/LEGO1/legoentity.cpp b/LEGO1/legoentity.cpp index edd1d18d..7e8a953f 100644 --- a/LEGO1/legoentity.cpp +++ b/LEGO1/legoentity.cpp @@ -7,7 +7,7 @@ DECOMP_SIZE_ASSERT(LegoEntity, 0x68) -// SYNTHETIC: LEGO1 0x10001090 +// FUNCTION: LEGO1 0x10001090 // LegoEntity::SetWorldSpeed // FUNCTION: LEGO1 0x1000c290 diff --git a/LEGO1/mxcore.h b/LEGO1/mxcore.h index 518949c6..4de691cc 100644 --- a/LEGO1/mxcore.h +++ b/LEGO1/mxcore.h @@ -9,10 +9,10 @@ class MxParam; // TODO: Find proper compilation unit to put these -// SYNTHETIC: LEGO1 0x100140d0 +// FUNCTION: LEGO1 0x100140d0 // MxCore::IsA -// SYNTHETIC: LEGO1 0x100144c0 +// FUNCTION: LEGO1 0x100144c0 // MxCore::ClassName // VTABLE: LEGO1 0x100dc0f8 diff --git a/LEGO1/mxloopingmidipresenter.cpp b/LEGO1/mxloopingmidipresenter.cpp index 55857701..3d58e56b 100644 --- a/LEGO1/mxloopingmidipresenter.cpp +++ b/LEGO1/mxloopingmidipresenter.cpp @@ -7,10 +7,10 @@ DECOMP_SIZE_ASSERT(MxLoopingMIDIPresenter, 0x58); -// SYNTHETIC: LEGO1 0x100b1830 +// FUNCTION: LEGO1 0x100b1830 // MxLoopingMIDIPresenter::ClassName -// SYNTHETIC: LEGO1 0x100b1840 +// FUNCTION: LEGO1 0x100b1840 // MxLoopingMIDIPresenter::IsA // SYNTHETIC: LEGO1 0x100b19c0 diff --git a/LEGO1/mxstillpresenter.cpp b/LEGO1/mxstillpresenter.cpp index 1d07e74a..e3adaa90 100644 --- a/LEGO1/mxstillpresenter.cpp +++ b/LEGO1/mxstillpresenter.cpp @@ -11,7 +11,7 @@ DECOMP_SIZE_ASSERT(MxStillPresenter, 0x6c); // GLOBAL: LEGO1 0x10101eb0 const char* g_strBMP_ISMAP = "BMP_ISMAP"; -// SYNTHETIC: LEGO1 0x10043550 +// FUNCTION: LEGO1 0x10043550 // MxStillPresenter::~MxStillPresenter // FUNCTION: LEGO1 0x100435b0 @@ -20,10 +20,10 @@ void MxStillPresenter::Destroy() Destroy(FALSE); } -// SYNTHETIC: LEGO1 0x100435c0 +// FUNCTION: LEGO1 0x100435c0 // MxStillPresenter::ClassName -// SYNTHETIC: LEGO1 0x100435d0 +// FUNCTION: LEGO1 0x100435d0 // MxStillPresenter::IsA // SYNTHETIC: LEGO1 0x100436e0 diff --git a/LEGO1/realtime/vector.cpp b/LEGO1/realtime/vector.cpp index 582d6443..1d41470c 100644 --- a/LEGO1/realtime/vector.cpp +++ b/LEGO1/realtime/vector.cpp @@ -60,7 +60,7 @@ float Vector2Impl::DotImpl(float* p_a, float* p_b) const return p_b[0] * p_a[0] + p_b[1] * p_a[1]; } -// SYNTHETIC: LEGO1 0x10002060 +// FUNCTION: LEGO1 0x10002060 // Vector2Impl::SetData // FUNCTION: LEGO1 0x10002070 From db971ada604038d6f27b5930cce919048e1ba8fd Mon Sep 17 00:00:00 2001 From: disinvite Date: Sat, 2 Dec 2023 13:07:11 -0500 Subject: [PATCH 13/17] Implicit lookup by name for functions --- tools/isledecomp/isledecomp/parser/error.py | 10 ++- tools/isledecomp/isledecomp/parser/node.py | 1 + tools/isledecomp/isledecomp/parser/parser.py | 50 ++++++++--- tools/isledecomp/tests/test_parser.py | 88 ++++++++++++++++++-- tools/reccmp/reccmp.py | 2 +- 5 files changed, 129 insertions(+), 22 deletions(-) diff --git a/tools/isledecomp/isledecomp/parser/error.py b/tools/isledecomp/isledecomp/parser/error.py index dce99448..c18e3e29 100644 --- a/tools/isledecomp/isledecomp/parser/error.py +++ b/tools/isledecomp/isledecomp/parser/error.py @@ -19,7 +19,15 @@ class ParserError(Enum): BOGUS_MARKER = 104 # WARN: New function marker appeared while we were inside a function - MISSED_END_OF_FUNCTION = 117 + MISSED_END_OF_FUNCTION = 105 + + # WARN: If we find a curly brace right after the function declaration + # this is wrong but we still have enough to make a match with reccmp + MISSED_START_OF_FUNCTION = 106 + + # WARN: A blank line appeared between the end of FUNCTION markers + # and the start of the function. We can ignore it, but the line shouldn't be there + UNEXPECTED_BLANK_LINE = 107 # ERROR: We found a marker unexpectedly UNEXPECTED_MARKER = 200 diff --git a/tools/isledecomp/isledecomp/parser/node.py b/tools/isledecomp/isledecomp/parser/node.py index 96cc3362..0ee87000 100644 --- a/tools/isledecomp/isledecomp/parser/node.py +++ b/tools/isledecomp/isledecomp/parser/node.py @@ -21,6 +21,7 @@ class ParserSymbol(ParserNode): @dataclass class ParserFunction(ParserSymbol): name: str + lookup_by_name: bool = False is_stub: bool = False is_synthetic: bool = False is_template: bool = False diff --git a/tools/isledecomp/isledecomp/parser/parser.py b/tools/isledecomp/isledecomp/parser/parser.py index 24dc0b23..37e8d386 100644 --- a/tools/isledecomp/isledecomp/parser/parser.py +++ b/tools/isledecomp/isledecomp/parser/parser.py @@ -42,6 +42,10 @@ def marker_is_synthetic(marker: DecompMarker) -> bool: return marker.type.upper() in ("SYNTHETIC", "TEMPLATE") +def marker_is_template(marker: DecompMarker) -> bool: + return marker.type.upper() == "TEMPLATE" + + def marker_is_function(marker: DecompMarker) -> bool: return marker.type.upper() in ("FUNCTION", "STUB") @@ -55,8 +59,8 @@ def __init__(self): self.markers: dict = {} def insert(self, marker: DecompMarker) -> bool: + """Return True if this insert would overwrite""" module = marker.module.upper() - # Return True if this insert would overwrite if module in self.markers: return True @@ -159,9 +163,12 @@ def _synthetic_marker(self, marker: DecompMarker): self._syntax_warning(ParserError.DUPLICATE_MODULE) self.state = ReaderState.IN_TEMPLATE - def _function_done(self, unexpected: bool = False): + def _function_done(self, lookup_by_name: bool = False, unexpected: bool = False): end_line = self.line_number if unexpected: + # If we missed the end of the previous function, assume it ended + # on the previous line and that whatever we are tracking next + # begins on the current line. end_line -= 1 for marker in self.fun_markers.iter(): @@ -170,8 +177,10 @@ def _function_done(self, unexpected: bool = False): line_number=self.function_start, module=marker.module, offset=marker.offset, + lookup_by_name=lookup_by_name, is_stub=marker_is_stub(marker), - is_template=marker_is_synthetic(marker), + is_synthetic=marker_is_synthetic(marker), + is_template=marker_is_template(marker), name=self.function_sig, end_line=end_line, ) @@ -262,7 +271,7 @@ def _handle_marker(self, marker: DecompMarker): self._synthetic_marker(marker) elif self.state == ReaderState.IN_FUNC: self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION) - self._function_done(unexpected=True) + self._function_done(lookup_by_name=True, unexpected=True) self._synthetic_marker(marker) else: self._syntax_error(ParserError.INCOMPATIBLE_MARKER) @@ -304,6 +313,7 @@ def read_line(self, line: str): self._handle_marker(marker) return + line_strip = line.strip() if self.state == ReaderState.IN_TEMPLATE: # TEMPLATE functions are a special case. The signature is # given on the next line (in a // comment) @@ -313,16 +323,32 @@ def read_line(self, line: str): else: self.function_sig = name self._function_starts_here() - self._function_done() + self._function_done(lookup_by_name=True) elif self.state == ReaderState.WANT_SIG: - # Skip blank lines or comments that come after the offset - # marker. There is not a formal procedure for this, so just - # assume the next "code line" is the function signature - if not is_blank_or_comment(line): + # Ignore blanks on the way to function start or function name + if len(line_strip) == 0: + self._syntax_warning(ParserError.UNEXPECTED_BLANK_LINE) + + elif line_strip.startswith("//"): + # If we found a comment, assume implicit lookup-by-name + # function and end here. We know this is not a decomp marker + # because it would have been handled already. + self.function_sig = get_synthetic_name(line) + self._function_starts_here() + self._function_done(lookup_by_name=True) + + elif line_strip == "{": + # We missed the function signature but we can recover from this + self.function_sig = "(unknown)" + self._function_starts_here() + self._syntax_warning(ParserError.MISSED_START_OF_FUNCTION) + self.state = ReaderState.IN_FUNC + + else: # Inline functions may end with a comment. Strip that out # to help parsing. - self.function_sig = remove_trailing_comment(line.strip()) + self.function_sig = remove_trailing_comment(line_strip) # Now check to see if the opening curly bracket is on the # same line. clang-format should prevent this (BraceWrapping) @@ -340,13 +366,13 @@ def read_line(self, line: str): self.state = ReaderState.WANT_CURLY elif self.state == ReaderState.WANT_CURLY: - if line.strip() == "{": + if line_strip == "{": self.curly_indent_stops = line.index("{") self._function_starts_here() self.state = ReaderState.IN_FUNC elif self.state == ReaderState.IN_FUNC: - if line.strip().startswith("}") and line[self.curly_indent_stops] == "}": + if line_strip.startswith("}") and line[self.curly_indent_stops] == "}": self._function_done() elif self.state in (ReaderState.IN_GLOBAL, ReaderState.IN_FUNC_GLOBAL): diff --git a/tools/isledecomp/tests/test_parser.py b/tools/isledecomp/tests/test_parser.py index 222a9ac1..7954aee1 100644 --- a/tools/isledecomp/tests/test_parser.py +++ b/tools/isledecomp/tests/test_parser.py @@ -11,15 +11,23 @@ def fixture_parser(): return DecompParser() -@pytest.mark.skip(reason="todo") def test_missing_sig(parser): - """Bad syntax: function signature is missing""" - parser.read_lines(["// FUNCTION: TEST 0x1234", "{"]) - assert parser.state == ReaderState.IN_FUNC - assert len(parser.alerts) == 1 - parser.read_line("}") + """In the hopefully rare scenario that the function signature and marker + are swapped, we still have enough to match witch reccmp""" + parser.read_lines( + [ + "void my_function()", + "// FUNCTION: TEST 0x1234", + "{", + "}", + ] + ) + assert parser.state == ReaderState.SEARCH assert len(parser.functions) == 1 - assert parser.functions[0] != "{" + assert parser.functions[0].line_number == 3 + + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.MISSED_START_OF_FUNCTION def test_not_exact_syntax(parser): @@ -210,7 +218,7 @@ def test_synthetic(parser): ] ) assert len(parser.functions) == 1 - assert parser.functions[0].is_template is True + assert parser.functions[0].lookup_by_name is True assert parser.functions[0].name == "TestClass::TestMethod" @@ -285,3 +293,67 @@ def test_indented_no_curly_hint(parser): ] ) assert len(parser.alerts) == 0 + + +def test_implicit_lookup_by_name(parser): + """FUNCTION (or STUB) offsets must directly precede the function signature. + If we detect a comment instead, we assume that this is a lookup-by-name + function and end here.""" + parser.read_lines( + [ + "// FUNCTION: TEST 0x1234", + "// TestClass::TestMethod()", + ] + ) + assert parser.state == ReaderState.SEARCH + assert len(parser.functions) == 1 + assert parser.functions[0].lookup_by_name is True + assert parser.functions[0].name == "TestClass::TestMethod()" + + +def test_function_with_spaces(parser): + """There should not be any spaces between the end of FUNCTION markers + and the start or name of the function. If it's a blank line, we can safely + ignore but should alert to this.""" + parser.read_lines( + [ + "// FUNCTION: TEST 0x1234", + " ", + "inline void test_function() { };", + ] + ) + assert len(parser.functions) == 1 + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.UNEXPECTED_BLANK_LINE + + +def test_function_with_spaces_implicit(parser): + """Same as above, but for implicit lookup-by-name""" + parser.read_lines( + [ + "// FUNCTION: TEST 0x1234", + " ", + "// Implicit::Method", + ] + ) + assert len(parser.functions) == 1 + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.UNEXPECTED_BLANK_LINE + + +@pytest.mark.xfail(reason="will assume implicit lookup-by-name function") +def test_function_is_commented(parser): + """In an ideal world, we would recognize that there is no code here. + Some editors (or users) might comment the function on each line like this + but hopefully it is rare.""" + parser.read_lines( + [ + "// FUNCTION: TEST 0x1234", + "// int my_function()", + "// {", + "// return 5;", + "// }", + ] + ) + + assert len(parser.functions) == 0 diff --git a/tools/reccmp/reccmp.py b/tools/reccmp/reccmp.py index 1d4cd5b1..f7659703 100755 --- a/tools/reccmp/reccmp.py +++ b/tools/reccmp/reccmp.py @@ -334,7 +334,7 @@ def gen_svg(svg_file, name_svg, icon, svg_implemented_funcs, total_funcs, raw_ac else: continue - if fun.is_template: + if fun.lookup_by_name: recinfo = syminfo.get_recompiled_address_from_name(fun.name) if not recinfo: continue From 9f1302e8d807b6efd871dfee9dcb753488314889 Mon Sep 17 00:00:00 2001 From: disinvite Date: Sat, 2 Dec 2023 18:31:46 -0500 Subject: [PATCH 14/17] Fix VTABLE SYNTHETIC and other decomp markers --- LEGO1/legoinputmanager.h | 6 +++--- LEGO1/legopathcontrollerlist.h | 6 +++--- LEGO1/mxdirect3d.h | 3 ++- LEGO1/mxdsactionlist.h | 4 ++-- LEGO1/mxpalette.cpp | 3 ++- LEGO1/mxpresenterlist.h | 6 +++--- LEGO1/mxregionlist.h | 14 ++++++++------ LEGO1/mxstreamchunklist.h | 4 ++-- LEGO1/mxstreamlist.cpp | 2 +- LEGO1/mxvariabletable.h | 4 ++-- 10 files changed, 28 insertions(+), 24 deletions(-) diff --git a/LEGO1/legoinputmanager.h b/LEGO1/legoinputmanager.h index a6e34a44..9a4fe6e7 100644 --- a/LEGO1/legoinputmanager.h +++ b/LEGO1/legoinputmanager.h @@ -12,13 +12,13 @@ class LegoControlManager; -// VTABLE: LEGO1 0x100d87b8 SYNTHETIC +// VTABLE: LEGO1 0x100d87b8 // class MxCollection -// VTABLE: LEGO1 0x100d87d0 SYNTHETIC +// VTABLE: LEGO1 0x100d87d0 // class MxList -// VTABLE: LEGO1 0x100d87e8 SYNTHETIC +// VTABLE: LEGO1 0x100d87e8 // class MxQueue // VTABLE: LEGO1 0x100d8800 diff --git a/LEGO1/legopathcontrollerlist.h b/LEGO1/legopathcontrollerlist.h index dbda40e9..5a7d4c60 100644 --- a/LEGO1/legopathcontrollerlist.h +++ b/LEGO1/legopathcontrollerlist.h @@ -5,7 +5,7 @@ #include "mxlist.h" #include "mxtypes.h" -// VTABLE: LEGO1 0x100d6320 SYNTHETIC +// VTABLE: LEGO1 0x100d6320 // class MxPtrList // VTABLE: LEGO1 0x100d6338 @@ -17,10 +17,10 @@ class LegoPathControllerList : public MxPtrList { static void Destroy(LegoPathController*); }; -// VTABLE: LEGO1 0x100d6380 SYNTHETIC +// VTABLE: LEGO1 0x100d6380 // class MxCollection -// VTABLE: LEGO1 0x100d6398 SYNTHETIC +// VTABLE: LEGO1 0x100d6398 // class MxList #endif // LEGOPATHCONTROLLERLIST_H diff --git a/LEGO1/mxdirect3d.h b/LEGO1/mxdirect3d.h index 0d0b48fd..f038ee7f 100644 --- a/LEGO1/mxdirect3d.h +++ b/LEGO1/mxdirect3d.h @@ -17,7 +17,8 @@ class MxDeviceModeFinder { MxDirectDraw::DeviceModesInfo* m_deviceInfo; // +0xe0 }; -// VTABLE: LEGO1 0x100db814 (or 0x100d9cc8?) +// VTABLE: LEGO1 0x100db814 +// or is it 0x100d9cc8? // SIZE 0x198 class MxDeviceEnumerate { public: diff --git a/LEGO1/mxdsactionlist.h b/LEGO1/mxdsactionlist.h index 7050dc53..19929d20 100644 --- a/LEGO1/mxdsactionlist.h +++ b/LEGO1/mxdsactionlist.h @@ -6,10 +6,10 @@ class MxDSAction; -// VTABLE: LEGO1 0x100dcea8 SYNTHETIC +// VTABLE: LEGO1 0x100dcea8 // class MxCollection -// VTABLE: LEGO1 0x100dcec0 SYNTHETIC +// VTABLE: LEGO1 0x100dcec0 // class MxList // VTABLE: LEGO1 0x100dced8 diff --git a/LEGO1/mxpalette.cpp b/LEGO1/mxpalette.cpp index b5b30257..aefe57c2 100644 --- a/LEGO1/mxpalette.cpp +++ b/LEGO1/mxpalette.cpp @@ -3,7 +3,8 @@ #include "mxomni.h" #include "mxvideomanager.h" -// GLOBAL: LEGO1 0x10102188 0x400 +// GLOBAL: LEGO1 0x10102188 +// SIZE: 0x400 PALETTEENTRY g_defaultPaletteEntries[256] = { {0u, 0u, 0u, 0u}, {128u, 0u, 0u, 0u}, {0u, 128u, 0u, 0u}, {128u, 128u, 0u, 0u}, {0u, 0u, 128u, 0u}, {128u, 0u, 128u, 0u}, {0u, 128u, 128u, 0u}, {128u, 128u, 128u, 0u}, diff --git a/LEGO1/mxpresenterlist.h b/LEGO1/mxpresenterlist.h index 6b84ffab..d96374b9 100644 --- a/LEGO1/mxpresenterlist.h +++ b/LEGO1/mxpresenterlist.h @@ -5,7 +5,7 @@ class MxPresenter; -// VTABLE: LEGO1 0x100d62f0 SYNTHETIC +// VTABLE: LEGO1 0x100d62f0 // class MxPtrList // VTABLE: LEGO1 0x100d6308 @@ -17,10 +17,10 @@ class MxPresenterList : public MxPtrList { typedef MxListCursorChildChild MxPresenterListCursor; -// VTABLE: LEGO1 0x100d6350 SYNTHETIC +// VTABLE: LEGO1 0x100d6350 // class MxCollection -// VTABLE: LEGO1 0x100d6368 SYNTHETIC +// VTABLE: LEGO1 0x100d6368 // class MxList #endif // MXPRESENTERLIST_H diff --git a/LEGO1/mxregionlist.h b/LEGO1/mxregionlist.h index 48bf9050..3aa0487b 100644 --- a/LEGO1/mxregionlist.h +++ b/LEGO1/mxregionlist.h @@ -6,13 +6,13 @@ struct MxRegionTopBottom; struct MxRegionLeftRight; -// VTABLE: LEGO1 0x100dcb10 SYNTHETIC +// VTABLE: LEGO1 0x100dcb10 // class MxCollection -// VTABLE: LEGO1 0x100dcb28 SYNTHETIC +// VTABLE: LEGO1 0x100dcb28 // class MxList -// VTABLE: LEGO1 0x100dcb40 SYNTHETIC +// VTABLE: LEGO1 0x100dcb40 // class MxPtrList // VTABLE: LEGO1 0x100dcb58 @@ -24,18 +24,20 @@ class MxRegionList : public MxPtrList { }; // VTABLE: LEGO1 0x100dcb88 +// class MxListCursorChildChild typedef MxListCursorChildChild MxRegionListCursor; // VTABLE: LEGO1 0x100dcc10 +// class MxListCursorChildChild typedef MxListCursorChildChild MxRegionLeftRightListCursor; -// VTABLE: LEGO1 0x100dcc40 SYNTHETIC +// VTABLE: LEGO1 0x100dcc40 // class MxCollection -// VTABLE: LEGO1 0x100dcc58 SYNTHETIC +// VTABLE: LEGO1 0x100dcc58 // class MxList -// VTABLE: LEGO1 0x100dcc70 SYNTHETIC +// VTABLE: LEGO1 0x100dcc70 // class MxPtrList // VTABLE: LEGO1 0x100dcc88 diff --git a/LEGO1/mxstreamchunklist.h b/LEGO1/mxstreamchunklist.h index d8d9f170..4924b6ff 100644 --- a/LEGO1/mxstreamchunklist.h +++ b/LEGO1/mxstreamchunklist.h @@ -6,10 +6,10 @@ class MxStreamChunk; -// VTABLE: LEGO1 0x100dc5d0 SYNTHETIC +// VTABLE: LEGO1 0x100dc5d0 // class MxCollection -// VTABLE: LEGO1 0x100dc5e8 SYNTHETIC +// VTABLE: LEGO1 0x100dc5e8 // class MxList // VTABLE: LEGO1 0x100dc600 diff --git a/LEGO1/mxstreamlist.cpp b/LEGO1/mxstreamlist.cpp index 0d7d7051..1fa4dd48 100644 --- a/LEGO1/mxstreamlist.cpp +++ b/LEGO1/mxstreamlist.cpp @@ -8,7 +8,7 @@ DECOMP_SIZE_ASSERT(MxStreamListMxDSSubscriber, 0xc); // FUNCTION: LEGO1 0x100bfa80 MxDSAction* MxStreamListMxDSAction::Find(MxDSAction* p_action, MxBool p_delete) { - // DECOMP: ALPHA 0x1008b99d ? + // DECOMP ALPHA 0x1008b99d ? MxDSAction* found = NULL; diff --git a/LEGO1/mxvariabletable.h b/LEGO1/mxvariabletable.h index 7da6e0fc..1a91da26 100644 --- a/LEGO1/mxvariabletable.h +++ b/LEGO1/mxvariabletable.h @@ -20,10 +20,10 @@ class MxVariableTable : public MxHashTable { virtual MxU32 Hash(MxVariable*) override; // vtable+0x18 }; -// VTABLE: LEGO1 0x100dc1b0 SYNTHETIC +// VTABLE: LEGO1 0x100dc1b0 // class MxCollection -// VTABLE: LEGO1 0x100dc1e8 SYNTHETIC +// VTABLE: LEGO1 0x100dc1e8 // class MxHashTable #endif // MXVARIABLETABLE_H From cbeb4168e0603bd31abf3ab920359adcf2cf43cf Mon Sep 17 00:00:00 2001 From: disinvite Date: Sat, 2 Dec 2023 18:32:39 -0500 Subject: [PATCH 15/17] Get vtable class name --- tools/isledecomp/isledecomp/parser/parser.py | 14 +++++--- tools/isledecomp/isledecomp/parser/util.py | 30 +++++++++++++++++ tools/isledecomp/tests/test_parser.py | 1 + tools/isledecomp/tests/test_parser_util.py | 34 ++++++++++++++++++++ 4 files changed, 75 insertions(+), 4 deletions(-) diff --git a/tools/isledecomp/isledecomp/parser/parser.py b/tools/isledecomp/isledecomp/parser/parser.py index 37e8d386..89b79928 100644 --- a/tools/isledecomp/isledecomp/parser/parser.py +++ b/tools/isledecomp/isledecomp/parser/parser.py @@ -7,6 +7,7 @@ is_blank_or_comment, match_marker, is_marker_exact, + get_class_name, get_synthetic_name, remove_trailing_comment, ) @@ -195,14 +196,18 @@ def _vtable_marker(self, marker: DecompMarker): self._syntax_warning(ParserError.DUPLICATE_MODULE) self.state = ReaderState.IN_VTABLE - def _vtable_done(self): + def _vtable_done(self, class_name: str = None): + if class_name is None: + # Best we can do + class_name = self.last_line.strip() + for marker in self.tbl_markers.iter(): self.vtables.append( ParserVtable( line_number=self.line_number, module=marker.module, offset=marker.offset, - class_name=self.last_line.strip(), + class_name=class_name, ) ) @@ -380,8 +385,9 @@ def read_line(self, line: str): self._variable_done() elif self.state == ReaderState.IN_VTABLE: - if not is_blank_or_comment(line): - self._vtable_done() + vtable_class = get_class_name(line) + if vtable_class is not None: + self._vtable_done(class_name=vtable_class) def read_lines(self, lines: Iterable): for line in lines: diff --git a/tools/isledecomp/isledecomp/parser/util.py b/tools/isledecomp/isledecomp/parser/util.py index e0b25d85..a3e9360f 100644 --- a/tools/isledecomp/isledecomp/parser/util.py +++ b/tools/isledecomp/isledecomp/parser/util.py @@ -63,3 +63,33 @@ def match_marker(line: str) -> DecompMarker | None: def is_marker_exact(line: str) -> bool: return markerExactRegex.match(line) is not None + + +template_class_decl_regex = re.compile( + r"\s*(?:\/\/)?\s*class (\w+)<([\w]+)\s*(\*+)?\s*>" +) + + +class_decl_regex = re.compile(r"\s*(?:\/\/)?\s*class (\w+)") + + +def get_class_name(line: str) -> str | None: + """For VTABLE markers, extract the class name from the code line or comment + where it appears.""" + + match = template_class_decl_regex.match(line) + if match is not None: + # For template classes, we should reformat the class name so it matches + # the output from cvdump: one space between the template type and any asterisks + # if it is a pointer type. + (class_name, template_type, asterisks) = match.groups() + if asterisks is not None: + return f"{class_name}<{template_type} {asterisks}>" + + return f"{class_name}<{template_type}>" + + match = class_decl_regex.match(line) + if match is not None: + return match.group(1) + + return None diff --git a/tools/isledecomp/tests/test_parser.py b/tools/isledecomp/tests/test_parser.py index 7954aee1..30092a97 100644 --- a/tools/isledecomp/tests/test_parser.py +++ b/tools/isledecomp/tests/test_parser.py @@ -193,6 +193,7 @@ def test_multiple_vtables(parser): ) assert len(parser.alerts) == 0 assert len(parser.vtables) == 2 + assert parser.vtables[0].class_name == "MxString" def test_multiple_vtables_same_module(parser): diff --git a/tools/isledecomp/tests/test_parser_util.py b/tools/isledecomp/tests/test_parser_util.py index 131a8ab3..5cbe5c8b 100644 --- a/tools/isledecomp/tests/test_parser_util.py +++ b/tools/isledecomp/tests/test_parser_util.py @@ -5,6 +5,7 @@ is_blank_or_comment, match_marker, is_marker_exact, + get_class_name, ) @@ -96,3 +97,36 @@ def test_marker_dict_type_replace(): markers = list(d.iter()) assert len(markers) == 1 assert markers[0].type == "FUNCTION" + + +class_name_match_cases = [ + ("class MxString {", "MxString"), + ("// class MxString", "MxString"), + ("class MxString : public MxCore {", "MxString"), + ("class MxPtrList", "MxPtrList"), + # If it is possible to match the symbol MxList::`vftable' + # we should get the correct class name if possible. If the template type is a pointer, + # the asterisk and class name are separated by one space. + ("// class MxList", "MxList"), + ("// class MxList", "MxList"), + ("// class MxList", "MxList"), + # I don't know if this would ever come up, but sure, why not? + ("// class MxList", "MxList"), +] + + +@pytest.mark.parametrize("line, class_name", class_name_match_cases) +def test_get_class_name(line: str, class_name: str): + assert get_class_name(line) == class_name + + +class_name_no_match_cases = [ + "MxString { ", + "clas MxString", + "// MxPtrList::`scalar deleting destructor'", +] + + +@pytest.mark.parametrize("line", class_name_no_match_cases) +def test_get_class_name_none(line: str): + assert get_class_name(line) is None From cbc6105b5728379dc55d4de744f19ff7fe534583 Mon Sep 17 00:00:00 2001 From: disinvite Date: Sat, 2 Dec 2023 18:57:23 -0500 Subject: [PATCH 16/17] Vtable marker should identify struct --- tools/isledecomp/isledecomp/parser/util.py | 4 ++-- tools/isledecomp/tests/test_parser_util.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/isledecomp/isledecomp/parser/util.py b/tools/isledecomp/isledecomp/parser/util.py index a3e9360f..99ab1c56 100644 --- a/tools/isledecomp/isledecomp/parser/util.py +++ b/tools/isledecomp/isledecomp/parser/util.py @@ -66,11 +66,11 @@ def is_marker_exact(line: str) -> bool: template_class_decl_regex = re.compile( - r"\s*(?:\/\/)?\s*class (\w+)<([\w]+)\s*(\*+)?\s*>" + r"\s*(?:\/\/)?\s*(?:class|struct) (\w+)<([\w]+)\s*(\*+)?\s*>" ) -class_decl_regex = re.compile(r"\s*(?:\/\/)?\s*class (\w+)") +class_decl_regex = re.compile(r"\s*(?:\/\/)?\s*(?:class|struct) (\w+)") def get_class_name(line: str) -> str | None: diff --git a/tools/isledecomp/tests/test_parser_util.py b/tools/isledecomp/tests/test_parser_util.py index 5cbe5c8b..643abf3e 100644 --- a/tools/isledecomp/tests/test_parser_util.py +++ b/tools/isledecomp/tests/test_parser_util.py @@ -100,6 +100,7 @@ def test_marker_dict_type_replace(): class_name_match_cases = [ + ("struct MxString {", "MxString"), ("class MxString {", "MxString"), ("// class MxString", "MxString"), ("class MxString : public MxCore {", "MxString"), From dec876adcc9bb9fff78e45c4fc3b627d71b8322d Mon Sep 17 00:00:00 2001 From: disinvite Date: Sat, 2 Dec 2023 18:58:23 -0500 Subject: [PATCH 17/17] No colon for SIZE comment --- LEGO1/mxpalette.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LEGO1/mxpalette.cpp b/LEGO1/mxpalette.cpp index aefe57c2..bc1df5ff 100644 --- a/LEGO1/mxpalette.cpp +++ b/LEGO1/mxpalette.cpp @@ -4,7 +4,7 @@ #include "mxvideomanager.h" // GLOBAL: LEGO1 0x10102188 -// SIZE: 0x400 +// SIZE 0x400 PALETTEENTRY g_defaultPaletteEntries[256] = { {0u, 0u, 0u, 0u}, {128u, 0u, 0u, 0u}, {0u, 128u, 0u, 0u}, {128u, 128u, 0u, 0u}, {0u, 0u, 128u, 0u}, {128u, 0u, 128u, 0u}, {0u, 128u, 128u, 0u}, {128u, 128u, 128u, 0u},