diff --git a/LEGO1/legoinputmanager.h b/LEGO1/legoinputmanager.h index a6e34a44..9a4fe6e7 100644 --- a/LEGO1/legoinputmanager.h +++ b/LEGO1/legoinputmanager.h @@ -12,13 +12,13 @@ class LegoControlManager; -// VTABLE: LEGO1 0x100d87b8 SYNTHETIC +// VTABLE: LEGO1 0x100d87b8 // class MxCollection -// VTABLE: LEGO1 0x100d87d0 SYNTHETIC +// VTABLE: LEGO1 0x100d87d0 // class MxList -// VTABLE: LEGO1 0x100d87e8 SYNTHETIC +// VTABLE: LEGO1 0x100d87e8 // class MxQueue // VTABLE: LEGO1 0x100d8800 diff --git a/LEGO1/legopathcontrollerlist.h b/LEGO1/legopathcontrollerlist.h index dbda40e9..5a7d4c60 100644 --- a/LEGO1/legopathcontrollerlist.h +++ b/LEGO1/legopathcontrollerlist.h @@ -5,7 +5,7 @@ #include "mxlist.h" #include "mxtypes.h" -// VTABLE: LEGO1 0x100d6320 SYNTHETIC +// VTABLE: LEGO1 0x100d6320 // class MxPtrList // VTABLE: LEGO1 0x100d6338 @@ -17,10 +17,10 @@ class LegoPathControllerList : public MxPtrList { static void Destroy(LegoPathController*); }; -// VTABLE: LEGO1 0x100d6380 SYNTHETIC +// VTABLE: LEGO1 0x100d6380 // class MxCollection -// VTABLE: LEGO1 0x100d6398 SYNTHETIC +// VTABLE: LEGO1 0x100d6398 // class MxList #endif // LEGOPATHCONTROLLERLIST_H diff --git a/LEGO1/mxdirect3d.h b/LEGO1/mxdirect3d.h index 0d0b48fd..f038ee7f 100644 --- a/LEGO1/mxdirect3d.h +++ b/LEGO1/mxdirect3d.h @@ -17,7 +17,8 @@ class MxDeviceModeFinder { MxDirectDraw::DeviceModesInfo* m_deviceInfo; // +0xe0 }; -// VTABLE: LEGO1 0x100db814 (or 0x100d9cc8?) +// VTABLE: LEGO1 0x100db814 +// or is it 0x100d9cc8? // SIZE 0x198 class MxDeviceEnumerate { public: diff --git a/LEGO1/mxdsactionlist.h b/LEGO1/mxdsactionlist.h index 7050dc53..19929d20 100644 --- a/LEGO1/mxdsactionlist.h +++ b/LEGO1/mxdsactionlist.h @@ -6,10 +6,10 @@ class MxDSAction; -// VTABLE: LEGO1 0x100dcea8 SYNTHETIC +// VTABLE: LEGO1 0x100dcea8 // class MxCollection -// VTABLE: LEGO1 0x100dcec0 SYNTHETIC +// VTABLE: LEGO1 0x100dcec0 // class MxList // VTABLE: LEGO1 0x100dced8 diff --git a/LEGO1/mxpalette.cpp b/LEGO1/mxpalette.cpp index b5b30257..bc1df5ff 100644 --- a/LEGO1/mxpalette.cpp +++ b/LEGO1/mxpalette.cpp @@ -3,7 +3,8 @@ #include "mxomni.h" #include "mxvideomanager.h" -// GLOBAL: LEGO1 0x10102188 0x400 +// GLOBAL: LEGO1 0x10102188 +// SIZE 0x400 PALETTEENTRY g_defaultPaletteEntries[256] = { {0u, 0u, 0u, 0u}, {128u, 0u, 0u, 0u}, {0u, 128u, 0u, 0u}, {128u, 128u, 0u, 0u}, {0u, 0u, 128u, 0u}, {128u, 0u, 128u, 0u}, {0u, 128u, 128u, 0u}, {128u, 128u, 128u, 0u}, diff --git a/LEGO1/mxpresenterlist.h b/LEGO1/mxpresenterlist.h index 6b84ffab..d96374b9 100644 --- a/LEGO1/mxpresenterlist.h +++ b/LEGO1/mxpresenterlist.h @@ -5,7 +5,7 @@ class MxPresenter; -// VTABLE: LEGO1 0x100d62f0 SYNTHETIC +// VTABLE: LEGO1 0x100d62f0 // class MxPtrList // VTABLE: LEGO1 0x100d6308 @@ -17,10 +17,10 @@ class MxPresenterList : public MxPtrList { typedef MxListCursorChildChild MxPresenterListCursor; -// VTABLE: LEGO1 0x100d6350 SYNTHETIC +// VTABLE: LEGO1 0x100d6350 // class MxCollection -// VTABLE: LEGO1 0x100d6368 SYNTHETIC +// VTABLE: LEGO1 0x100d6368 // class MxList #endif // MXPRESENTERLIST_H diff --git a/LEGO1/mxregionlist.h b/LEGO1/mxregionlist.h index 48bf9050..3aa0487b 100644 --- a/LEGO1/mxregionlist.h +++ b/LEGO1/mxregionlist.h @@ -6,13 +6,13 @@ struct MxRegionTopBottom; struct MxRegionLeftRight; -// VTABLE: LEGO1 0x100dcb10 SYNTHETIC +// VTABLE: LEGO1 0x100dcb10 // class MxCollection -// VTABLE: LEGO1 0x100dcb28 SYNTHETIC +// VTABLE: LEGO1 0x100dcb28 // class MxList -// VTABLE: LEGO1 0x100dcb40 SYNTHETIC +// VTABLE: LEGO1 0x100dcb40 // class MxPtrList // VTABLE: LEGO1 0x100dcb58 @@ -24,18 +24,20 @@ class MxRegionList : public MxPtrList { }; // VTABLE: LEGO1 0x100dcb88 +// class MxListCursorChildChild typedef MxListCursorChildChild MxRegionListCursor; // VTABLE: LEGO1 0x100dcc10 +// class MxListCursorChildChild typedef MxListCursorChildChild MxRegionLeftRightListCursor; -// VTABLE: LEGO1 0x100dcc40 SYNTHETIC +// VTABLE: LEGO1 0x100dcc40 // class MxCollection -// VTABLE: LEGO1 0x100dcc58 SYNTHETIC +// VTABLE: LEGO1 0x100dcc58 // class MxList -// VTABLE: LEGO1 0x100dcc70 SYNTHETIC +// VTABLE: LEGO1 0x100dcc70 // class MxPtrList // VTABLE: LEGO1 0x100dcc88 diff --git a/LEGO1/mxstreamchunklist.h b/LEGO1/mxstreamchunklist.h index d8d9f170..4924b6ff 100644 --- a/LEGO1/mxstreamchunklist.h +++ b/LEGO1/mxstreamchunklist.h @@ -6,10 +6,10 @@ class MxStreamChunk; -// VTABLE: LEGO1 0x100dc5d0 SYNTHETIC +// VTABLE: LEGO1 0x100dc5d0 // class MxCollection -// VTABLE: LEGO1 0x100dc5e8 SYNTHETIC +// VTABLE: LEGO1 0x100dc5e8 // class MxList // VTABLE: LEGO1 0x100dc600 diff --git a/LEGO1/mxstreamlist.cpp b/LEGO1/mxstreamlist.cpp index 0d7d7051..1fa4dd48 100644 --- a/LEGO1/mxstreamlist.cpp +++ b/LEGO1/mxstreamlist.cpp @@ -8,7 +8,7 @@ DECOMP_SIZE_ASSERT(MxStreamListMxDSSubscriber, 0xc); // FUNCTION: LEGO1 0x100bfa80 MxDSAction* MxStreamListMxDSAction::Find(MxDSAction* p_action, MxBool p_delete) { - // DECOMP: ALPHA 0x1008b99d ? + // DECOMP ALPHA 0x1008b99d ? MxDSAction* found = NULL; diff --git a/LEGO1/mxvariabletable.h b/LEGO1/mxvariabletable.h index 7da6e0fc..1a91da26 100644 --- a/LEGO1/mxvariabletable.h +++ b/LEGO1/mxvariabletable.h @@ -20,10 +20,10 @@ class MxVariableTable : public MxHashTable { virtual MxU32 Hash(MxVariable*) override; // vtable+0x18 }; -// VTABLE: LEGO1 0x100dc1b0 SYNTHETIC +// VTABLE: LEGO1 0x100dc1b0 // class MxCollection -// VTABLE: LEGO1 0x100dc1e8 SYNTHETIC +// VTABLE: LEGO1 0x100dc1e8 // class MxHashTable #endif // MXVARIABLETABLE_H diff --git a/tools/checkorder/checkorder.py b/tools/checkorder/checkorder.py index 1ac8391f..02636c09 100644 --- a/tools/checkorder/checkorder.py +++ b/tools/checkorder/checkorder.py @@ -2,8 +2,7 @@ import sys import argparse from isledecomp.dir import walk_source_dir, is_file_cpp -from isledecomp.parser import find_code_blocks -from isledecomp.parser.util import is_exact_offset_comment +from isledecomp.parser import DecompParser def sig_truncate(sig: str) -> str: @@ -16,23 +15,21 @@ def check_file(filename: str, verbose: bool = False) -> bool: """Open and read the given file, then check whether the code blocks are in order. If verbose, print each block.""" + parser = DecompParser() with open(filename, "r", encoding="utf-8") as f: - code_blocks = find_code_blocks(f) + parser.read_lines(f) - bad_comments = [ - (block.start_line, block.offset_comment) - for block in code_blocks - if not is_exact_offset_comment(block.offset_comment) - ] - - just_offsets = [block.offset for block in code_blocks] + just_offsets = [block.offset for block in parser.functions] sorted_offsets = sorted(just_offsets) file_out_of_order = just_offsets != sorted_offsets + # TODO: When we add parser error severity, actual errors that obstruct + # parsing should probably be shown here regardless of verbose mode + # If we detect inexact comments, don't print anything unless we are # in verbose mode. If the file is out of order, we always print the # file name. - should_report = (len(bad_comments) > 0 and verbose) or file_out_of_order + should_report = (len(parser.alerts) > 0 and verbose) or file_out_of_order if not should_report and not file_out_of_order: return False @@ -44,22 +41,22 @@ def check_file(filename: str, verbose: bool = False) -> bool: order_lookup = {k: i for i, k in enumerate(sorted_offsets)} prev_offset = 0 - for block in code_blocks: + for fun in parser.functions: msg = " ".join( [ - " " if block.offset > prev_offset else "!", - f"{block.offset:08x}", - f"{block.end_line - block.start_line:4} lines", - f"{order_lookup[block.offset]:3}", + " " if fun.offset > prev_offset else "!", + f"{fun.offset:08x}", + f"{fun.end_line - fun.line_number:4} lines", + f"{order_lookup[fun.offset]:3}", " ", - sig_truncate(block.signature), + sig_truncate(fun.signature), ] ) print(msg) - prev_offset = block.offset + prev_offset = fun.offset - for line_no, line in bad_comments: - print(f"* line {line_no:3} bad offset comment ({line})") + for alert in parser.alerts: + print(f"* line {alert.line_number:4} {alert.code} ({alert.line})") print() diff --git a/tools/isledecomp/isledecomp/parser/__init__.py b/tools/isledecomp/isledecomp/parser/__init__.py index 0d504619..c9394d4a 100644 --- a/tools/isledecomp/isledecomp/parser/__init__.py +++ b/tools/isledecomp/isledecomp/parser/__init__.py @@ -1 +1 @@ -from .parser import find_code_blocks +from .parser import DecompParser diff --git a/tools/isledecomp/isledecomp/parser/error.py b/tools/isledecomp/isledecomp/parser/error.py new file mode 100644 index 00000000..c18e3e29 --- /dev/null +++ b/tools/isledecomp/isledecomp/parser/error.py @@ -0,0 +1,41 @@ +from enum import Enum + + +class ParserError(Enum): + # WARN: Stub function exceeds some line number threshold + UNLIKELY_STUB = 100 + + # WARN: Decomp marker is close enough to be recognized, but does not follow syntax exactly + BAD_DECOMP_MARKER = 101 + + # WARN: Multiple markers in sequence do not have distinct modules + DUPLICATE_MODULE = 102 + + # WARN: Detected a dupcliate module/offset pair in the current file + DUPLICATE_OFFSET = 103 + + # WARN: We read a line that matches the decomp marker pattern, but we are not set up + # to handle it + BOGUS_MARKER = 104 + + # WARN: New function marker appeared while we were inside a function + MISSED_END_OF_FUNCTION = 105 + + # WARN: If we find a curly brace right after the function declaration + # this is wrong but we still have enough to make a match with reccmp + MISSED_START_OF_FUNCTION = 106 + + # WARN: A blank line appeared between the end of FUNCTION markers + # and the start of the function. We can ignore it, but the line shouldn't be there + UNEXPECTED_BLANK_LINE = 107 + + # ERROR: We found a marker unexpectedly + UNEXPECTED_MARKER = 200 + + # ERROR: We found a marker where we expected to find one, but it is incompatible + # with the preceding markers. + # For example, a GLOBAL cannot follow FUNCTION/STUB + INCOMPATIBLE_MARKER = 201 + + # ERROR: The line following a synthetic marker was not a comment + BAD_SYNTHETIC = 202 diff --git a/tools/isledecomp/isledecomp/parser/node.py b/tools/isledecomp/isledecomp/parser/node.py new file mode 100644 index 00000000..0ee87000 --- /dev/null +++ b/tools/isledecomp/isledecomp/parser/node.py @@ -0,0 +1,41 @@ +from dataclasses import dataclass + + +@dataclass +class ParserNode: + line_number: int + + +@dataclass +class ParserAlert(ParserNode): + code: int + line: str + + +@dataclass +class ParserSymbol(ParserNode): + module: str + offset: int + + +@dataclass +class ParserFunction(ParserSymbol): + name: str + lookup_by_name: bool = False + is_stub: bool = False + is_synthetic: bool = False + is_template: bool = False + end_line: int = -1 + + +@dataclass +class ParserVariable(ParserSymbol): + name: str + size: int = -1 + is_static: bool = False + + +@dataclass +class ParserVtable(ParserSymbol): + class_name: str + num_entries: int = -1 diff --git a/tools/isledecomp/isledecomp/parser/parser.py b/tools/isledecomp/isledecomp/parser/parser.py index 1039ac41..89b79928 100644 --- a/tools/isledecomp/isledecomp/parser/parser.py +++ b/tools/isledecomp/isledecomp/parser/parser.py @@ -1,145 +1,394 @@ # C++ file parser -from typing import List, TextIO +from typing import List, Iterable, Iterator from enum import Enum from .util import ( - CodeBlock, - OffsetMatch, + DecompMarker, is_blank_or_comment, - match_offset_comment, - get_template_function_name, + match_marker, + is_marker_exact, + get_class_name, + get_synthetic_name, remove_trailing_comment, - distinct_by_module, ) +from .node import ( + ParserAlert, + ParserFunction, + ParserVariable, + ParserVtable, +) +from .error import ParserError class ReaderState(Enum): - WANT_OFFSET = 0 + SEARCH = 0 WANT_SIG = 1 IN_FUNC = 2 IN_TEMPLATE = 3 WANT_CURLY = 4 - FUNCTION_DONE = 5 + IN_GLOBAL = 5 + IN_FUNC_GLOBAL = 6 + IN_VTABLE = 7 -def find_code_blocks(stream: TextIO) -> List[CodeBlock]: - """Read the IO stream (file) line-by-line and give the following report: - Foreach code block (function) in the file, what are its starting and - ending line numbers, and what is the given offset in the original - binary. We expect the result to be ordered by line number because we - are reading the file from start to finish.""" +def marker_is_stub(marker: DecompMarker) -> bool: + return marker.type.upper() == "STUB" - blocks: List[CodeBlock] = [] - offset_matches: List[OffsetMatch] = [] +def marker_is_variable(marker: DecompMarker) -> bool: + return marker.type.upper() == "GLOBAL" - function_sig = None - start_line = None - end_line = None - state = ReaderState.WANT_OFFSET - # 1-based to match cvdump and your text editor - # I know it says 0, but we will increment before each readline() - line_no = 0 - can_seek = True +def marker_is_synthetic(marker: DecompMarker) -> bool: + return marker.type.upper() in ("SYNTHETIC", "TEMPLATE") - while True: - # Do this before reading again so that an EOF will not - # cause us to miss the last function of the file. - if state == ReaderState.FUNCTION_DONE: - # Our list of offset marks could have duplicates on - # module name, so we'll eliminate those now. - for offset_match in distinct_by_module(offset_matches): - block = CodeBlock( - offset=offset_match.address, - signature=function_sig, - start_line=start_line, + +def marker_is_template(marker: DecompMarker) -> bool: + return marker.type.upper() == "TEMPLATE" + + +def marker_is_function(marker: DecompMarker) -> bool: + return marker.type.upper() in ("FUNCTION", "STUB") + + +def marker_is_vtable(marker: DecompMarker) -> bool: + return marker.type.upper() == "VTABLE" + + +class MarkerDict: + def __init__(self): + self.markers: dict = {} + + def insert(self, marker: DecompMarker) -> bool: + """Return True if this insert would overwrite""" + module = marker.module.upper() + if module in self.markers: + return True + + self.markers[module] = (marker.type, marker.offset) + return False + + def iter(self) -> Iterator[DecompMarker]: + for module, (marker_type, offset) in self.markers.items(): + yield DecompMarker(marker_type, module, offset) + + def empty(self): + self.markers = {} + + +class DecompParser: + # pylint: disable=too-many-instance-attributes + # Could combine output lists into a single list to get under the limit, + # but not right now + def __init__(self): + # The lists to be populated as we parse + self.functions: List[ParserFunction] = [] + self.vtables: List[ParserVtable] = [] + self.variables: List[ParserVariable] = [] + self.alerts: List[ParserAlert] = [] + + self.line_number: int = 0 + self.state: ReaderState = ReaderState.SEARCH + + self.last_line: str = "" + + # To allow for multiple markers where code is shared across different + # modules, save lists of compatible markers that appear in sequence + self.fun_markers = MarkerDict() + self.var_markers = MarkerDict() + self.tbl_markers = MarkerDict() + + # To handle functions that are entirely indented (i.e. those defined + # in class declarations), remember how many whitespace characters + # came before the opening curly brace and match that up at the end. + # This should give us the same or better accuracy for a well-formed file. + # The alternative is counting the curly braces on each line + # but that's probably too cumbersome. + self.curly_indent_stops: int = 0 + + # For non-synthetic functions, save the line number where the function begins + # (i.e. where we see the curly brace) along with the function signature. + # We will need both when we reach the end of the function. + self.function_start: int = 0 + self.function_sig: str = "" + + def reset(self): + self.functions = [] + self.vtables = [] + self.variables = [] + self.alerts = [] + + self.line_number = 0 + self.state = ReaderState.SEARCH + + self.last_line = "" + + self.fun_markers.empty() + self.var_markers.empty() + self.tbl_markers.empty() + + self.curly_indent_stops = 0 + self.function_start = 0 + self.function_sig = "" + + def _recover(self): + """We hit a syntax error and need to reset temp structures""" + self.state = ReaderState.SEARCH + self.fun_markers.empty() + self.var_markers.empty() + self.tbl_markers.empty() + + def _syntax_warning(self, code): + self.alerts.append( + ParserAlert( + line_number=self.line_number, + code=code, + line=self.last_line.strip(), + ) + ) + + def _syntax_error(self, code): + self._syntax_warning(code) + self._recover() + + def _function_starts_here(self): + self.function_start = self.line_number + + def _function_marker(self, marker: DecompMarker): + if self.fun_markers.insert(marker): + self._syntax_warning(ParserError.DUPLICATE_MODULE) + self.state = ReaderState.WANT_SIG + + def _synthetic_marker(self, marker: DecompMarker): + if self.fun_markers.insert(marker): + self._syntax_warning(ParserError.DUPLICATE_MODULE) + self.state = ReaderState.IN_TEMPLATE + + def _function_done(self, lookup_by_name: bool = False, unexpected: bool = False): + end_line = self.line_number + if unexpected: + # If we missed the end of the previous function, assume it ended + # on the previous line and that whatever we are tracking next + # begins on the current line. + end_line -= 1 + + for marker in self.fun_markers.iter(): + self.functions.append( + ParserFunction( + line_number=self.function_start, + module=marker.module, + offset=marker.offset, + lookup_by_name=lookup_by_name, + is_stub=marker_is_stub(marker), + is_synthetic=marker_is_synthetic(marker), + is_template=marker_is_template(marker), + name=self.function_sig, end_line=end_line, - offset_comment=offset_match.comment, - module=offset_match.module, - is_template=offset_match.is_template, - is_stub=offset_match.is_stub, ) - blocks.append(block) - offset_matches = [] - state = ReaderState.WANT_OFFSET + ) - if can_seek: - line_no += 1 - line = stream.readline() - if line == "": - break + self.fun_markers.empty() + self.curly_indent_stops = 0 + self.state = ReaderState.SEARCH - new_match = match_offset_comment(line) - if new_match is not None: - # We will allow multiple offsets if we have just begun - # the code block, but not after we hit the curly brace. - if state in ( - ReaderState.WANT_OFFSET, - ReaderState.IN_TEMPLATE, + def _vtable_marker(self, marker: DecompMarker): + if self.tbl_markers.insert(marker): + self._syntax_warning(ParserError.DUPLICATE_MODULE) + self.state = ReaderState.IN_VTABLE + + def _vtable_done(self, class_name: str = None): + if class_name is None: + # Best we can do + class_name = self.last_line.strip() + + for marker in self.tbl_markers.iter(): + self.vtables.append( + ParserVtable( + line_number=self.line_number, + module=marker.module, + offset=marker.offset, + class_name=class_name, + ) + ) + + self.tbl_markers.empty() + self.state = ReaderState.SEARCH + + def _variable_marker(self, marker: DecompMarker): + if self.var_markers.insert(marker): + self._syntax_warning(ParserError.DUPLICATE_MODULE) + + if self.state in (ReaderState.IN_FUNC, ReaderState.IN_FUNC_GLOBAL): + self.state = ReaderState.IN_FUNC_GLOBAL + else: + self.state = ReaderState.IN_GLOBAL + + def _variable_done(self): + for marker in self.var_markers.iter(): + self.variables.append( + ParserVariable( + line_number=self.line_number, + module=marker.module, + offset=marker.offset, + name=self.last_line.strip(), + ) + ) + + self.var_markers.empty() + if self.state == ReaderState.IN_FUNC_GLOBAL: + self.state = ReaderState.IN_FUNC + else: + self.state = ReaderState.SEARCH + + def _handle_marker(self, marker: DecompMarker): + # Cannot handle any markers between function sig and opening curly brace + if self.state == ReaderState.WANT_CURLY: + self._syntax_error(ParserError.UNEXPECTED_MARKER) + return + + # TODO: How uncertain are we of detecting the end of a function + # in a clang-formatted file? For now we assume we have missed the + # end if we detect a non-GLOBAL marker while state is IN_FUNC. + # Maybe these cases should be syntax errors instead + + if marker_is_function(marker): + if self.state in ( + ReaderState.SEARCH, ReaderState.WANT_SIG, ): - # If we detected an offset marker unexpectedly, - # we are handling it here so we can continue seeking. - can_seek = True - - offset_matches.append(new_match) - - if new_match.is_template: - state = ReaderState.IN_TEMPLATE - else: - state = ReaderState.WANT_SIG - else: + # We will allow multiple offsets if we have just begun + # the code block, but not after we hit the curly brace. + self._function_marker(marker) + elif self.state == ReaderState.IN_FUNC: # We hit another offset unexpectedly. # We can recover easily by just ending the function here. - end_line = line_no - 1 - state = ReaderState.FUNCTION_DONE + self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION) + self._function_done(unexpected=True) - # Pause reading here so we handle the offset marker - # on the next loop iteration - can_seek = False + # Start the next function right after so we can + # read the next line. + self._function_marker(marker) + else: + self._syntax_error(ParserError.INCOMPATIBLE_MARKER) - elif state == ReaderState.IN_TEMPLATE: + elif marker_is_synthetic(marker): + if self.state in (ReaderState.SEARCH, ReaderState.IN_TEMPLATE): + self._synthetic_marker(marker) + elif self.state == ReaderState.IN_FUNC: + self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION) + self._function_done(lookup_by_name=True, unexpected=True) + self._synthetic_marker(marker) + else: + self._syntax_error(ParserError.INCOMPATIBLE_MARKER) + + elif marker_is_variable(marker): + if self.state in ( + ReaderState.SEARCH, + ReaderState.IN_GLOBAL, + ReaderState.IN_FUNC, + ReaderState.IN_FUNC_GLOBAL, + ): + self._variable_marker(marker) + else: + self._syntax_error(ParserError.INCOMPATIBLE_MARKER) + + elif marker_is_vtable(marker): + if self.state in (ReaderState.SEARCH, ReaderState.IN_VTABLE): + self._vtable_marker(marker) + elif self.state == ReaderState.IN_FUNC: + self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION) + self._function_done(unexpected=True) + self._vtable_marker(marker) + else: + self._syntax_error(ParserError.INCOMPATIBLE_MARKER) + + else: + self._syntax_warning(ParserError.BOGUS_MARKER) + + def read_line(self, line: str): + self.last_line = line # TODO: Useful or hack for error reporting? + self.line_number += 1 + + marker = match_marker(line) + if marker is not None: + # TODO: what's the best place for this? + # Does it belong with reading or marker handling? + if not is_marker_exact(self.last_line): + self._syntax_warning(ParserError.BAD_DECOMP_MARKER) + self._handle_marker(marker) + return + + line_strip = line.strip() + if self.state == ReaderState.IN_TEMPLATE: # TEMPLATE functions are a special case. The signature is # given on the next line (in a // comment) - function_sig = get_template_function_name(line) - start_line = line_no - end_line = line_no - state = ReaderState.FUNCTION_DONE + name = get_synthetic_name(line) + if name is None: + self._syntax_error(ParserError.BAD_SYNTHETIC) + else: + self.function_sig = name + self._function_starts_here() + self._function_done(lookup_by_name=True) - elif state == ReaderState.WANT_SIG: - # Skip blank lines or comments that come after the offset - # marker. There is not a formal procedure for this, so just - # assume the next "code line" is the function signature - if not is_blank_or_comment(line): + elif self.state == ReaderState.WANT_SIG: + # Ignore blanks on the way to function start or function name + if len(line_strip) == 0: + self._syntax_warning(ParserError.UNEXPECTED_BLANK_LINE) + + elif line_strip.startswith("//"): + # If we found a comment, assume implicit lookup-by-name + # function and end here. We know this is not a decomp marker + # because it would have been handled already. + self.function_sig = get_synthetic_name(line) + self._function_starts_here() + self._function_done(lookup_by_name=True) + + elif line_strip == "{": + # We missed the function signature but we can recover from this + self.function_sig = "(unknown)" + self._function_starts_here() + self._syntax_warning(ParserError.MISSED_START_OF_FUNCTION) + self.state = ReaderState.IN_FUNC + + else: # Inline functions may end with a comment. Strip that out # to help parsing. - function_sig = remove_trailing_comment(line.strip()) + self.function_sig = remove_trailing_comment(line_strip) # Now check to see if the opening curly bracket is on the # same line. clang-format should prevent this (BraceWrapping) # but it is easy to detect. # If the entire function is on one line, handle that too. - if function_sig.endswith("{"): - start_line = line_no - state = ReaderState.IN_FUNC - elif function_sig.endswith("}") or function_sig.endswith("};"): - start_line = line_no - end_line = line_no - state = ReaderState.FUNCTION_DONE + if self.function_sig.endswith("{"): + self._function_starts_here() + self.state = ReaderState.IN_FUNC + elif self.function_sig.endswith("}") or self.function_sig.endswith( + "};" + ): + self._function_starts_here() + self._function_done() else: - state = ReaderState.WANT_CURLY + self.state = ReaderState.WANT_CURLY - elif state == ReaderState.WANT_CURLY: - if line.strip() == "{": - start_line = line_no - state = ReaderState.IN_FUNC + elif self.state == ReaderState.WANT_CURLY: + if line_strip == "{": + self.curly_indent_stops = line.index("{") + self._function_starts_here() + self.state = ReaderState.IN_FUNC - elif state == ReaderState.IN_FUNC: - # Naive but reasonable assumption that functions will end with - # a curly brace on its own line with no prepended spaces. - if line.startswith("}"): - end_line = line_no - state = ReaderState.FUNCTION_DONE + elif self.state == ReaderState.IN_FUNC: + if line_strip.startswith("}") and line[self.curly_indent_stops] == "}": + self._function_done() - return blocks + elif self.state in (ReaderState.IN_GLOBAL, ReaderState.IN_FUNC_GLOBAL): + if not is_blank_or_comment(line): + self._variable_done() + + elif self.state == ReaderState.IN_VTABLE: + vtable_class = get_class_name(line) + if vtable_class is not None: + self._vtable_done(class_name=vtable_class) + + def read_lines(self, lines: Iterable): + for line in lines: + self.read_line(line) diff --git a/tools/isledecomp/isledecomp/parser/util.py b/tools/isledecomp/isledecomp/parser/util.py index 59fca75b..99ab1c56 100644 --- a/tools/isledecomp/isledecomp/parser/util.py +++ b/tools/isledecomp/isledecomp/parser/util.py @@ -1,44 +1,17 @@ # C++ Parser utility functions and data structures from __future__ import annotations # python <3.10 compatibility import re -from typing import List from collections import namedtuple +DecompMarker = namedtuple("DecompMarker", ["type", "module", "offset"]) -CodeBlock = namedtuple( - "CodeBlock", - [ - "offset", - "signature", - "start_line", - "end_line", - "offset_comment", - "module", - "is_template", - "is_stub", - ], -) -OffsetMatch = namedtuple( - "OffsetMatch", ["module", "address", "is_template", "is_stub", "comment"] -) - -# This has not been formally established, but considering that "STUB" -# is a temporary state for a function, we assume it will appear last, -# after any other modifiers (i.e. TEMPLATE) - -# To match a reasonable variance of formatting for the offset comment -offsetCommentRegex = re.compile( - r"\s*//\s*OFFSET:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+TEMPLATE)?(\s+STUB)?", # nopep8 +markerRegex = re.compile( + r"\s*//\s*(\w+):\s*(\w+)\s+(0x[a-f0-9]+)", flags=re.I, ) -# To match the exact syntax (text upper case, hex lower case, with spaces) -# that is used in most places -offsetCommentExactRegex = re.compile( - r"^// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)( TEMPLATE)?( STUB)?$" -) # nopep8 - +markerExactRegex = re.compile(r"\s*// ([A-Z]+): ([A-Z0-9]+) (0x[a-f0-9]+)$") # The goal here is to just read whatever is on the next line, so some # flexibility in the formatting seems OK @@ -50,15 +23,15 @@ trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$") -def get_template_function_name(line: str) -> str: - """Parse function signature for special TEMPLATE functions""" +def get_synthetic_name(line: str) -> str | None: + """Synthetic names appear on a single line comment on the line after the marker. + If that's not what we have, return None""" template_match = templateCommentRegex.match(line) - # If we don't match, you get whatever is on the line as the signature if template_match is not None: return template_match.group(1) - return line + return None def remove_trailing_comment(line: str) -> str: @@ -78,39 +51,45 @@ def is_blank_or_comment(line: str) -> bool: ) -def is_exact_offset_comment(line: str) -> bool: - """If the offset comment does not match our (unofficial) syntax - we may want to alert the user to fix it for style points.""" - return offsetCommentExactRegex.match(line) is not None - - -def match_offset_comment(line: str) -> OffsetMatch | None: - match = offsetCommentRegex.match(line) +def match_marker(line: str) -> DecompMarker | None: + match = markerRegex.match(line) if match is None: return None - return OffsetMatch( - module=match.group(1), - address=int(match.group(2), 16), - is_template=match.group(3) is not None, - is_stub=match.group(4) is not None, - comment=line.strip(), + return DecompMarker( + type=match.group(1), module=match.group(2), offset=int(match.group(3), 16) ) -def distinct_by_module(offsets: List) -> List: - """Given a list of offset markers, return a list with distinct - module names. If module names (case-insensitive) are repeated, - choose the offset that appears first.""" +def is_marker_exact(line: str) -> bool: + return markerExactRegex.match(line) is not None - if len(offsets) < 2: - return offsets - # Dict maintains insertion order in python >=3.7 - offsets_dict = {} - for offset in offsets: - module_upper = offset.module.upper() - if module_upper not in offsets_dict: - offsets_dict[module_upper] = offset +template_class_decl_regex = re.compile( + r"\s*(?:\/\/)?\s*(?:class|struct) (\w+)<([\w]+)\s*(\*+)?\s*>" +) - return list(offsets_dict.values()) + +class_decl_regex = re.compile(r"\s*(?:\/\/)?\s*(?:class|struct) (\w+)") + + +def get_class_name(line: str) -> str | None: + """For VTABLE markers, extract the class name from the code line or comment + where it appears.""" + + match = template_class_decl_regex.match(line) + if match is not None: + # For template classes, we should reformat the class name so it matches + # the output from cvdump: one space between the template type and any asterisks + # if it is a pointer type. + (class_name, template_type, asterisks) = match.groups() + if asterisks is not None: + return f"{class_name}<{template_type} {asterisks}>" + + return f"{class_name}<{template_type}>" + + match = class_decl_regex.match(line) + if match is not None: + return match.group(1) + + return None diff --git a/tools/isledecomp/tests/samples/basic_class.cpp b/tools/isledecomp/tests/samples/basic_class.cpp index 23ce3c39..4316ad4a 100644 --- a/tools/isledecomp/tests/samples/basic_class.cpp +++ b/tools/isledecomp/tests/samples/basic_class.cpp @@ -3,6 +3,7 @@ // A very simple class +// VTABLE: TEST 0x1001002 class TestClass { public: TestClass(); @@ -10,14 +11,14 @@ class TestClass { virtual MxResult Tickle() override; // vtable+08 - // OFFSET: TEST 0x12345678 + // FUNCTION: TEST 0x12345678 inline const char* ClassName() const // vtable+0c { // 0xabcd1234 return "TestClass"; } - // OFFSET: TEST 0xdeadbeef + // FUNCTION: TEST 0xdeadbeef inline MxBool IsA(const char* name) const override // vtable+10 { return !strcmp(name, TestClass::ClassName()); diff --git a/tools/isledecomp/tests/samples/basic_file.cpp b/tools/isledecomp/tests/samples/basic_file.cpp index 6a4017b5..99930de8 100644 --- a/tools/isledecomp/tests/samples/basic_file.cpp +++ b/tools/isledecomp/tests/samples/basic_file.cpp @@ -3,19 +3,19 @@ // A very simple well-formed code file -// OFFSET: TEST 0x1234 +// FUNCTION: TEST 0x1234 void function01() { // TODO } -// OFFSET: TEST 0x2345 +// FUNCTION: TEST 0x2345 void function02() { // TODO } -// OFFSET: TEST 0x3456 +// FUNCTION: TEST 0x3456 void function03() { // TODO diff --git a/tools/isledecomp/tests/samples/global_variables.cpp b/tools/isledecomp/tests/samples/global_variables.cpp new file mode 100644 index 00000000..3be0316a --- /dev/null +++ b/tools/isledecomp/tests/samples/global_variables.cpp @@ -0,0 +1,14 @@ +// Sample for python unit tests +// Not part of the decomp + +// Global variables inside and outside of functions + +// GLOBAL: TEST 0x1000 +const char *g_message = "test"; + +// FUNCTION: TEST 0x1234 +void function01() +{ + // GLOBAL: TEST 0x5555 + static int g_hello = 123; +} diff --git a/tools/isledecomp/tests/samples/inline.cpp b/tools/isledecomp/tests/samples/inline.cpp index 0bfedf6d..8a36c89a 100644 --- a/tools/isledecomp/tests/samples/inline.cpp +++ b/tools/isledecomp/tests/samples/inline.cpp @@ -1,8 +1,8 @@ // Sample for python unit tests // Not part of the decomp -// OFFSET: TEST 0x10000001 +// FUNCTION: TEST 0x10000001 inline const char* OneLineWithComment() const { return "MxDSObject"; }; // hi there -// OFFSET: TEST 0x10000002 +// FUNCTION: TEST 0x10000002 inline const char* OneLine() const { return "MxDSObject"; }; diff --git a/tools/isledecomp/tests/samples/missing_offset.cpp b/tools/isledecomp/tests/samples/missing_offset.cpp index 332fed2c..3f6b3811 100644 --- a/tools/isledecomp/tests/samples/missing_offset.cpp +++ b/tools/isledecomp/tests/samples/missing_offset.cpp @@ -9,7 +9,7 @@ int no_offset_comment() return -1; } -// OFFSET: TEST 0xdeadbeef +// FUNCTION: TEST 0xdeadbeef void regular_ole_function() { printf("hi there"); diff --git a/tools/isledecomp/tests/samples/multiple_offsets.cpp b/tools/isledecomp/tests/samples/multiple_offsets.cpp index eecdd95b..dc3c5bfa 100644 --- a/tools/isledecomp/tests/samples/multiple_offsets.cpp +++ b/tools/isledecomp/tests/samples/multiple_offsets.cpp @@ -3,22 +3,22 @@ // Handling multiple offset markers -// OFFSET: TEST 0x1234 -// OFFSET: HELLO 0x5555 +// FUNCTION: TEST 0x1234 +// FUNCTION: HELLO 0x5555 void different_modules() { // TODO } -// OFFSET: TEST 0x2345 -// OFFSET: TEST 0x1234 +// FUNCTION: TEST 0x2345 +// FUNCTION: TEST 0x1234 void same_module() { // TODO } -// OFFSET: TEST 0x2002 -// OFFSET: test 0x1001 +// FUNCTION: TEST 0x2002 +// FUNCTION: test 0x1001 void same_case_insensitive() { // TODO diff --git a/tools/isledecomp/tests/samples/oneline_function.cpp b/tools/isledecomp/tests/samples/oneline_function.cpp index 8d7fdc5a..feb82314 100644 --- a/tools/isledecomp/tests/samples/oneline_function.cpp +++ b/tools/isledecomp/tests/samples/oneline_function.cpp @@ -1,10 +1,10 @@ // Sample for python unit tests // Not part of the decomp -// OFFSET: TEST 0x1234 +// FUNCTION: TEST 0x1234 void short_function() { static char* msg = "oneliner"; } -// OFFSET: TEST 0x5555 +// FUNCTION: TEST 0x5555 void function_after_one_liner() { // This function comes after the previous that is on a single line. diff --git a/tools/isledecomp/tests/samples/out_of_order.cpp b/tools/isledecomp/tests/samples/out_of_order.cpp index 749c4f2b..951c99e7 100644 --- a/tools/isledecomp/tests/samples/out_of_order.cpp +++ b/tools/isledecomp/tests/samples/out_of_order.cpp @@ -1,19 +1,19 @@ // Sample for python unit tests // Not part of the decomp -// OFFSET: TEST 0x1001 +// FUNCTION: TEST 0x1001 void function_order01() { // TODO } -// OFFSET: TEST 0x1003 +// FUNCTION: TEST 0x1003 void function_order03() { // TODO } -// OFFSET: TEST 0x1002 +// FUNCTION: TEST 0x1002 void function_order02() { // TODO diff --git a/tools/isledecomp/tests/samples/poorly_formatted.cpp b/tools/isledecomp/tests/samples/poorly_formatted.cpp index 32dd774c..69f365ec 100644 --- a/tools/isledecomp/tests/samples/poorly_formatted.cpp +++ b/tools/isledecomp/tests/samples/poorly_formatted.cpp @@ -4,18 +4,18 @@ // While it's reasonable to expect a well-formed file (and clang-format // will make sure we get one), this will put the parser through its paces. -// OFFSET: TEST 0x1234 +// FUNCTION: TEST 0x1234 void curly_with_spaces() { static char* msg = "hello"; } -// OFFSET: TEST 0x5555 +// FUNCTION: TEST 0x5555 void weird_closing_curly() { int x = 123; } -// OFFSET: HELLO 0x5656 +// FUNCTION: HELLO 0x5656 void bad_indenting() { if (0) { diff --git a/tools/isledecomp/tests/test_parser.py b/tools/isledecomp/tests/test_parser.py index 48bb0e44..30092a97 100644 --- a/tools/isledecomp/tests/test_parser.py +++ b/tools/isledecomp/tests/test_parser.py @@ -1,127 +1,360 @@ -import os -from typing import List, TextIO -from isledecomp.parser import find_code_blocks -from isledecomp.parser.util import CodeBlock - -SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "samples") +import pytest +from isledecomp.parser.parser import ( + ReaderState, + DecompParser, +) +from isledecomp.parser.error import ParserError -def sample_file(filename: str) -> TextIO: - """Wrapper for opening the samples from the directory that does not - depend on the cwd where we run the test""" - full_path = os.path.join(SAMPLE_DIR, filename) - return open(full_path, "r", encoding="utf-8") +@pytest.fixture(name="parser") +def fixture_parser(): + return DecompParser() -def code_blocks_are_sorted(blocks: List[CodeBlock]) -> bool: - """Helper to make this more idiomatic""" - just_offsets = [block.offset for block in blocks] - return just_offsets == sorted(just_offsets) +def test_missing_sig(parser): + """In the hopefully rare scenario that the function signature and marker + are swapped, we still have enough to match witch reccmp""" + parser.read_lines( + [ + "void my_function()", + "// FUNCTION: TEST 0x1234", + "{", + "}", + ] + ) + assert parser.state == ReaderState.SEARCH + assert len(parser.functions) == 1 + assert parser.functions[0].line_number == 3 + + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.MISSED_START_OF_FUNCTION -# Tests are below # +def test_not_exact_syntax(parser): + """Alert to inexact syntax right here in the parser instead of kicking it downstream. + Doing this means we don't have to save the actual text.""" + parser.read_line("// function: test 0x1234") + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.BAD_DECOMP_MARKER -def test_sanity(): - """Read a very basic file""" - with sample_file("basic_file.cpp") as f: - blocks = find_code_blocks(f) +def test_invalid_marker(parser): + """We matched a decomp marker, but it's not one we care about""" + parser.read_line("// BANANA: TEST 0x1234") + assert parser.state == ReaderState.SEARCH - assert len(blocks) == 3 - assert code_blocks_are_sorted(blocks) is True - # n.b. The parser returns line numbers as 1-based - # Function starts when we see the opening curly brace - assert blocks[0].start_line == 8 - assert blocks[0].end_line == 10 + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.BOGUS_MARKER -def test_oneline(): - """(Assuming clang-format permits this) This sample has a function - on a single line. This will test the end-of-function detection""" - with sample_file("oneline_function.cpp") as f: - blocks = find_code_blocks(f) - - assert len(blocks) == 2 - assert blocks[0].start_line == 5 - assert blocks[0].end_line == 5 +def test_incompatible_marker(parser): + """The marker we just read cannot be handled in the current parser state""" + parser.read_lines( + [ + "// FUNCTION: TEST 0x1234", + "// GLOBAL: TEST 0x5000", + ] + ) + assert parser.state == ReaderState.SEARCH + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.INCOMPATIBLE_MARKER -def test_missing_offset(): - """What if the function doesn't have an offset comment?""" - with sample_file("missing_offset.cpp") as f: - blocks = find_code_blocks(f) - - # TODO: For now, the function without the offset will just be ignored. - # Would be the same outcome if the comment was present but mangled and - # we failed to match it. We should detect these cases in the future. - assert len(blocks) == 1 +def test_variable(parser): + """Should identify a global variable""" + parser.read_lines( + [ + "// GLOBAL: HELLO 0x1234", + "int g_value = 5;", + ] + ) + assert len(parser.variables) == 1 -def test_jumbled_case(): - """The parser just reports what it sees. It is the responsibility of - the downstream tools to do something about a jumbled file. - Just verify that we are reading it correctly.""" - with sample_file("out_of_order.cpp") as f: - blocks = find_code_blocks(f) - - assert len(blocks) == 3 - assert code_blocks_are_sorted(blocks) is False +def test_synthetic_plus_marker(parser): + """Marker tracking preempts synthetic name detection. + Should fail with error and not log the synthetic""" + parser.read_lines( + [ + "// SYNTHETIC: HEY 0x555", + "// FUNCTION: HOWDY 0x1234", + ] + ) + assert len(parser.functions) == 0 + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.INCOMPATIBLE_MARKER -def test_bad_file(): - with sample_file("poorly_formatted.cpp") as f: - blocks = find_code_blocks(f) +def test_different_markers_different_module(parser): + """Does it make any sense for a function to be a stub in one module, + but not in another? I don't know. But it's no problem for us.""" + parser.read_lines( + [ + "// FUNCTION: HOWDY 0x1234", + "// STUB: SUP 0x5555", + "void interesting_function() {", + "}", + ] + ) - assert len(blocks) == 3 + assert len(parser.alerts) == 0 + assert len(parser.functions) == 2 -def test_indented(): - """Offsets for functions inside of a class will probably be indented.""" - with sample_file("basic_class.cpp") as f: - blocks = find_code_blocks(f) +def test_different_markers_same_module(parser): + """Now, if something is a regular function but then a stub, + what do we say about that?""" + parser.read_lines( + [ + "// FUNCTION: HOWDY 0x1234", + "// STUB: HOWDY 0x5555", + "void interesting_function() {", + "}", + ] + ) - # TODO: We don't properly detect the end of these functions - # because the closing brace is indented. However... knowing where each - # function ends is less important (for now) than capturing - # all the functions that are there. + # Use first marker declaration, don't replace + assert len(parser.functions) == 1 + assert parser.functions[0].is_stub is False - assert len(blocks) == 2 - assert blocks[0].offset == int("0x12345678", 16) - assert blocks[0].start_line == 15 - # assert blocks[0].end_line == 18 - - assert blocks[1].offset == int("0xdeadbeef", 16) - assert blocks[1].start_line == 22 - # assert blocks[1].end_line == 24 + # Should alert to this + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.DUPLICATE_MODULE -def test_inline(): - with sample_file("inline.cpp") as f: - blocks = find_code_blocks(f) +def test_unexpected_synthetic(parser): + """FUNCTION then SYNTHETIC should fail to report either one""" + parser.read_lines( + [ + "// FUNCTION: HOWDY 0x1234", + "// SYNTHETIC: HOWDY 0x5555", + "void interesting_function() {", + "}", + ] + ) - assert len(blocks) == 2 - for block in blocks: - assert block.start_line is not None - assert block.start_line == block.end_line + assert parser.state == ReaderState.SEARCH + assert len(parser.functions) == 0 + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.INCOMPATIBLE_MARKER -def test_multiple_offsets(): - """If multiple offset marks appear before for a code block, take them - all but ensure module name (case-insensitive) is distinct. - Use first module occurrence in case of duplicates.""" - with sample_file("multiple_offsets.cpp") as f: - blocks = find_code_blocks(f) +@pytest.mark.skip(reason="not implemented yet") +def test_duplicate_offset(parser): + """Repeating the same module/offset in the same file is probably a typo""" + parser.read_lines( + [ + "// GLOBAL: HELLO 0x1234", + "int x = 1;", + "// GLOBAL: HELLO 0x1234", + "int y = 2;", + ] + ) - assert len(blocks) == 4 - assert blocks[0].module == "TEST" - assert blocks[0].start_line == 9 + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.DUPLICATE_OFFSET - assert blocks[1].module == "HELLO" - assert blocks[1].start_line == 9 - # Duplicate modules are ignored - assert blocks[2].start_line == 16 - assert blocks[2].offset == 0x2345 +def test_multiple_variables(parser): + """Theoretically the same global variable can appear in multiple modules""" + parser.read_lines( + [ + "// GLOBAL: HELLO 0x1234", + "// GLOBAL: WUZZUP 0x555", + "const char *g_greeting;", + ] + ) + assert len(parser.alerts) == 0 + assert len(parser.variables) == 2 - assert blocks[3].module == "TEST" - assert blocks[3].offset == 0x2002 + +def test_multiple_variables_same_module(parser): + """Should not overwrite offset""" + parser.read_lines( + [ + "// GLOBAL: HELLO 0x1234", + "// GLOBAL: HELLO 0x555", + "const char *g_greeting;", + ] + ) + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.DUPLICATE_MODULE + assert len(parser.variables) == 1 + assert parser.variables[0].offset == 0x1234 + + +def test_multiple_vtables(parser): + parser.read_lines( + [ + "// VTABLE: HELLO 0x1234", + "// VTABLE: TEST 0x5432", + "class MxString : public MxCore {", + ] + ) + assert len(parser.alerts) == 0 + assert len(parser.vtables) == 2 + assert parser.vtables[0].class_name == "MxString" + + +def test_multiple_vtables_same_module(parser): + """Should not overwrite offset""" + parser.read_lines( + [ + "// VTABLE: HELLO 0x1234", + "// VTABLE: HELLO 0x5432", + "class MxString : public MxCore {", + ] + ) + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.DUPLICATE_MODULE + assert len(parser.vtables) == 1 + assert parser.vtables[0].offset == 0x1234 + + +def test_synthetic(parser): + parser.read_lines( + [ + "// SYNTHETIC: TEST 0x1234", + "// TestClass::TestMethod", + ] + ) + assert len(parser.functions) == 1 + assert parser.functions[0].lookup_by_name is True + assert parser.functions[0].name == "TestClass::TestMethod" + + +def test_synthetic_same_module(parser): + parser.read_lines( + [ + "// SYNTHETIC: TEST 0x1234", + "// SYNTHETIC: TEST 0x555", + "// TestClass::TestMethod", + ] + ) + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.DUPLICATE_MODULE + assert len(parser.functions) == 1 + assert parser.functions[0].offset == 0x1234 + + +def test_synthetic_no_comment(parser): + """Synthetic marker followed by a code line (i.e. non-comment)""" + parser.read_lines( + [ + "// SYNTHETIC: TEST 0x1234", + "int x = 123;", + ] + ) + assert len(parser.functions) == 0 + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.BAD_SYNTHETIC + assert parser.state == ReaderState.SEARCH + + +def test_single_line_function(parser): + parser.read_lines( + [ + "// FUNCTION: TEST 0x1234", + "int hello() { return 1234; }", + ] + ) + assert len(parser.functions) == 1 + assert parser.functions[0].line_number == 2 + assert parser.functions[0].end_line == 2 + + +def test_indented_function(parser): + """Track the number of whitespace characters when we begin the function + and check that against each closing curly brace we read. + Should not report a syntax warning if the function is indented""" + parser.read_lines( + [ + " // FUNCTION: TEST 0x1234", + " void indented()", + " {", + " // TODO", + " }", + " // FUNCTION: NEXT 0x555", + ] + ) + assert len(parser.alerts) == 0 + + +@pytest.mark.xfail(reason="todo") +def test_indented_no_curly_hint(parser): + """Same as above, but opening curly brace is on the same line. + Without the hint of how many whitespace characters to check, can we + still identify the end of the function?""" + parser.read_lines( + [ + " // FUNCTION: TEST 0x1234", + " void indented() {", + " }", + " // FUNCTION: NEXT 0x555", + ] + ) + assert len(parser.alerts) == 0 + + +def test_implicit_lookup_by_name(parser): + """FUNCTION (or STUB) offsets must directly precede the function signature. + If we detect a comment instead, we assume that this is a lookup-by-name + function and end here.""" + parser.read_lines( + [ + "// FUNCTION: TEST 0x1234", + "// TestClass::TestMethod()", + ] + ) + assert parser.state == ReaderState.SEARCH + assert len(parser.functions) == 1 + assert parser.functions[0].lookup_by_name is True + assert parser.functions[0].name == "TestClass::TestMethod()" + + +def test_function_with_spaces(parser): + """There should not be any spaces between the end of FUNCTION markers + and the start or name of the function. If it's a blank line, we can safely + ignore but should alert to this.""" + parser.read_lines( + [ + "// FUNCTION: TEST 0x1234", + " ", + "inline void test_function() { };", + ] + ) + assert len(parser.functions) == 1 + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.UNEXPECTED_BLANK_LINE + + +def test_function_with_spaces_implicit(parser): + """Same as above, but for implicit lookup-by-name""" + parser.read_lines( + [ + "// FUNCTION: TEST 0x1234", + " ", + "// Implicit::Method", + ] + ) + assert len(parser.functions) == 1 + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.UNEXPECTED_BLANK_LINE + + +@pytest.mark.xfail(reason="will assume implicit lookup-by-name function") +def test_function_is_commented(parser): + """In an ideal world, we would recognize that there is no code here. + Some editors (or users) might comment the function on each line like this + but hopefully it is rare.""" + parser.read_lines( + [ + "// FUNCTION: TEST 0x1234", + "// int my_function()", + "// {", + "// return 5;", + "// }", + ] + ) + + assert len(parser.functions) == 0 diff --git a/tools/isledecomp/tests/test_parser_samples.py b/tools/isledecomp/tests/test_parser_samples.py new file mode 100644 index 00000000..e74fda0e --- /dev/null +++ b/tools/isledecomp/tests/test_parser_samples.py @@ -0,0 +1,141 @@ +import os +from typing import List, TextIO +import pytest +from isledecomp.parser import DecompParser +from isledecomp.parser.node import ParserSymbol + +SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "samples") + + +def sample_file(filename: str) -> TextIO: + """Wrapper for opening the samples from the directory that does not + depend on the cwd where we run the test""" + full_path = os.path.join(SAMPLE_DIR, filename) + return open(full_path, "r", encoding="utf-8") + + +def code_blocks_are_sorted(blocks: List[ParserSymbol]) -> bool: + """Helper to make this more idiomatic""" + just_offsets = [block.offset for block in blocks] + return just_offsets == sorted(just_offsets) + + +@pytest.fixture(name="parser") +def fixture_parser(): + return DecompParser() + + +# Tests are below # + + +def test_sanity(parser): + """Read a very basic file""" + with sample_file("basic_file.cpp") as f: + parser.read_lines(f) + + assert len(parser.functions) == 3 + assert code_blocks_are_sorted(parser.functions) is True + # n.b. The parser returns line numbers as 1-based + # Function starts when we see the opening curly brace + assert parser.functions[0].line_number == 8 + assert parser.functions[0].end_line == 10 + + +def test_oneline(parser): + """(Assuming clang-format permits this) This sample has a function + on a single line. This will test the end-of-function detection""" + with sample_file("oneline_function.cpp") as f: + parser.read_lines(f) + + assert len(parser.functions) == 2 + assert parser.functions[0].line_number == 5 + assert parser.functions[0].end_line == 5 + + +def test_missing_offset(parser): + """What if the function doesn't have an offset comment?""" + with sample_file("missing_offset.cpp") as f: + parser.read_lines(f) + + # TODO: For now, the function without the offset will just be ignored. + # Would be the same outcome if the comment was present but mangled and + # we failed to match it. We should detect these cases in the future. + assert len(parser.functions) == 1 + + +def test_jumbled_case(parser): + """The parser just reports what it sees. It is the responsibility of + the downstream tools to do something about a jumbled file. + Just verify that we are reading it correctly.""" + with sample_file("out_of_order.cpp") as f: + parser.read_lines(f) + + assert len(parser.functions) == 3 + assert code_blocks_are_sorted(parser.functions) is False + + +def test_bad_file(parser): + with sample_file("poorly_formatted.cpp") as f: + parser.read_lines(f) + + assert len(parser.functions) == 3 + + +def test_indented(parser): + """Offsets for functions inside of a class will probably be indented.""" + with sample_file("basic_class.cpp") as f: + parser.read_lines(f) + + # TODO: We don't properly detect the end of these functions + # because the closing brace is indented. However... knowing where each + # function ends is less important (for now) than capturing + # all the functions that are there. + + assert len(parser.functions) == 2 + assert parser.functions[0].offset == int("0x12345678", 16) + assert parser.functions[0].line_number == 16 + # assert parser.functions[0].end_line == 19 + + assert parser.functions[1].offset == int("0xdeadbeef", 16) + assert parser.functions[1].line_number == 23 + # assert parser.functions[1].end_line == 25 + + +def test_inline(parser): + with sample_file("inline.cpp") as f: + parser.read_lines(f) + + assert len(parser.functions) == 2 + for fun in parser.functions: + assert fun.line_number is not None + assert fun.line_number == fun.end_line + + +def test_multiple_offsets(parser): + """If multiple offset marks appear before for a code block, take them + all but ensure module name (case-insensitive) is distinct. + Use first module occurrence in case of duplicates.""" + with sample_file("multiple_offsets.cpp") as f: + parser.read_lines(f) + + assert len(parser.functions) == 4 + assert parser.functions[0].module == "TEST" + assert parser.functions[0].line_number == 9 + + assert parser.functions[1].module == "HELLO" + assert parser.functions[1].line_number == 9 + + # Duplicate modules are ignored + assert parser.functions[2].line_number == 16 + assert parser.functions[2].offset == 0x2345 + + assert parser.functions[3].module == "TEST" + assert parser.functions[3].offset == 0x2002 + + +def test_variables(parser): + with sample_file("global_variables.cpp") as f: + parser.read_lines(f) + + assert len(parser.functions) == 1 + assert len(parser.variables) == 2 diff --git a/tools/isledecomp/tests/test_parser_statechange.py b/tools/isledecomp/tests/test_parser_statechange.py new file mode 100644 index 00000000..714de579 --- /dev/null +++ b/tools/isledecomp/tests/test_parser_statechange.py @@ -0,0 +1,150 @@ +import pytest +from isledecomp.parser.parser import ( + ReaderState as _rs, + DecompParser, +) +from isledecomp.parser.error import ParserError as _pe + +# fmt: off +state_change_marker_cases = [ + (_rs.SEARCH, "FUNCTION", _rs.WANT_SIG, None), + (_rs.SEARCH, "GLOBAL", _rs.IN_GLOBAL, None), + (_rs.SEARCH, "STUB", _rs.WANT_SIG, None), + (_rs.SEARCH, "SYNTHETIC", _rs.IN_TEMPLATE, None), + (_rs.SEARCH, "TEMPLATE", _rs.IN_TEMPLATE, None), + (_rs.SEARCH, "VTABLE", _rs.IN_VTABLE, None), + + (_rs.WANT_SIG, "FUNCTION", _rs.WANT_SIG, None), + (_rs.WANT_SIG, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.WANT_SIG, "STUB", _rs.WANT_SIG, None), + (_rs.WANT_SIG, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.WANT_SIG, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.WANT_SIG, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + + (_rs.IN_FUNC, "FUNCTION", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION), + (_rs.IN_FUNC, "GLOBAL", _rs.IN_FUNC_GLOBAL, None), + (_rs.IN_FUNC, "STUB", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION), + (_rs.IN_FUNC, "SYNTHETIC", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION), + (_rs.IN_FUNC, "TEMPLATE", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION), + (_rs.IN_FUNC, "VTABLE", _rs.IN_VTABLE, _pe.MISSED_END_OF_FUNCTION), + + (_rs.IN_TEMPLATE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_TEMPLATE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_TEMPLATE, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_TEMPLATE, "SYNTHETIC", _rs.IN_TEMPLATE, None), + (_rs.IN_TEMPLATE, "TEMPLATE", _rs.IN_TEMPLATE, None), + (_rs.IN_TEMPLATE, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + + (_rs.WANT_CURLY, "FUNCTION", _rs.SEARCH, _pe.UNEXPECTED_MARKER), + (_rs.WANT_CURLY, "GLOBAL", _rs.SEARCH, _pe.UNEXPECTED_MARKER), + (_rs.WANT_CURLY, "STUB", _rs.SEARCH, _pe.UNEXPECTED_MARKER), + (_rs.WANT_CURLY, "SYNTHETIC", _rs.SEARCH, _pe.UNEXPECTED_MARKER), + (_rs.WANT_CURLY, "TEMPLATE", _rs.SEARCH, _pe.UNEXPECTED_MARKER), + (_rs.WANT_CURLY, "VTABLE", _rs.SEARCH, _pe.UNEXPECTED_MARKER), + + (_rs.IN_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_GLOBAL, "GLOBAL", _rs.IN_GLOBAL, None), + (_rs.IN_GLOBAL, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + + (_rs.IN_FUNC_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_FUNC_GLOBAL, "GLOBAL", _rs.IN_FUNC_GLOBAL, None), + (_rs.IN_FUNC_GLOBAL, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_FUNC_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_FUNC_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_FUNC_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + + (_rs.IN_VTABLE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_VTABLE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_VTABLE, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_VTABLE, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_VTABLE, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_VTABLE, "VTABLE", _rs.IN_VTABLE, None), +] +# fmt: on + + +@pytest.mark.parametrize( + "state, marker_type, new_state, expected_error", state_change_marker_cases +) +def test_state_change_by_marker( + state: _rs, marker_type: str, new_state: _rs, expected_error: None | _pe +): + p = DecompParser() + p.state = state + mock_line = f"// {marker_type}: TEST 0x1234" + p.read_line(mock_line) + assert p.state == new_state + + if expected_error is not None: + assert len(p.alerts) > 0 + assert p.alerts[0].code == expected_error + + +# Reading any of these lines should have no effect in ReaderState.SEARCH +search_lines_no_effect = [ + "", + "\t", + " ", + "int x = 0;", + "// Comment", + "/*", + "*/", + "/* Block comment */", + "{", + "}", +] + + +@pytest.mark.parametrize("line", search_lines_no_effect) +def test_state_search_line(line: str): + p = DecompParser() + p.read_line(line) + assert p.state == _rs.SEARCH + assert len(p.alerts) == 0 + + +global_lines = [ + ("// A comment", _rs.IN_GLOBAL), + ("", _rs.IN_GLOBAL), + ("\t", _rs.IN_GLOBAL), + (" ", _rs.IN_GLOBAL), + # TODO: no check for "likely" variable declaration so these all count + ("void function()", _rs.SEARCH), + ("int x = 123;", _rs.SEARCH), + ("just some text", _rs.SEARCH), +] + + +@pytest.mark.parametrize("line, new_state", global_lines) +def test_state_global_line(line: str, new_state: _rs): + p = DecompParser() + p.read_line("// GLOBAL: TEST 0x1234") + assert p.state == _rs.IN_GLOBAL + p.read_line(line) + assert p.state == new_state + + +# mostly same as above +in_func_global_lines = [ + ("// A comment", _rs.IN_FUNC_GLOBAL), + ("", _rs.IN_FUNC_GLOBAL), + ("\t", _rs.IN_FUNC_GLOBAL), + (" ", _rs.IN_FUNC_GLOBAL), + # TODO: no check for "likely" variable declaration so these all count + ("void function()", _rs.IN_FUNC), + ("int x = 123;", _rs.IN_FUNC), + ("just some text", _rs.IN_FUNC), +] + + +@pytest.mark.parametrize("line, new_state", in_func_global_lines) +def test_state_in_func_global_line(line: str, new_state: _rs): + p = DecompParser() + p.state = _rs.IN_FUNC + p.read_line("// GLOBAL: TEST 0x1234") + assert p.state == _rs.IN_FUNC_GLOBAL + p.read_line(line) + assert p.state == new_state diff --git a/tools/isledecomp/tests/test_parser_util.py b/tools/isledecomp/tests/test_parser_util.py index 91fd285b..643abf3e 100644 --- a/tools/isledecomp/tests/test_parser_util.py +++ b/tools/isledecomp/tests/test_parser_util.py @@ -1,11 +1,11 @@ -from collections import namedtuple -from typing import List import pytest +from isledecomp.parser.parser import MarkerDict from isledecomp.parser.util import ( + DecompMarker, is_blank_or_comment, - match_offset_comment, - is_exact_offset_comment, - distinct_by_module, + match_marker, + is_marker_exact, + get_class_name, ) @@ -28,76 +28,106 @@ def test_is_blank_or_comment(line: str, expected: bool): assert is_blank_or_comment(line) is expected -offset_comment_samples = [ +marker_samples = [ # (can_parse: bool, exact_match: bool, line: str) - # Should match both expected modules with optional STUB marker - (True, True, "// OFFSET: LEGO1 0xdeadbeef"), - (True, True, "// OFFSET: LEGO1 0xdeadbeef STUB"), - (True, True, "// OFFSET: ISLE 0x12345678"), - (True, True, "// OFFSET: ISLE 0x12345678 STUB"), + (True, True, "// FUNCTION: LEGO1 0xdeadbeef"), + (True, True, "// FUNCTION: ISLE 0x12345678"), # No trailing spaces allowed - (True, False, "// OFFSET: LEGO1 0xdeadbeef "), - (True, False, "// OFFSET: LEGO1 0xdeadbeef STUB "), + (True, False, "// FUNCTION: LEGO1 0xdeadbeef "), # Must have exactly one space between elements - (True, False, "//OFFSET: ISLE 0xdeadbeef"), - (True, False, "// OFFSET:ISLE 0xdeadbeef"), - (True, False, "// OFFSET: ISLE 0xdeadbeef"), - (True, False, "// OFFSET: ISLE 0xdeadbeef"), - (True, False, "// OFFSET: ISLE 0xdeadbeef"), - (True, False, "// OFFSET: ISLE 0xdeadbeef STUB"), - # Must have 0x prefix for hex number - (True, False, "// OFFSET: ISLE deadbeef"), + (True, False, "//FUNCTION: ISLE 0xdeadbeef"), + (True, False, "// FUNCTION:ISLE 0xdeadbeef"), + (True, False, "// FUNCTION: ISLE 0xdeadbeef"), + (True, False, "// FUNCTION: ISLE 0xdeadbeef"), + (True, False, "// FUNCTION: ISLE 0xdeadbeef"), + # Must have 0x prefix for hex number to match at all + (False, False, "// FUNCTION: ISLE deadbeef"), # Offset, module name, and STUB must be uppercase - (True, False, "// offset: ISLE 0xdeadbeef"), - (True, False, "// offset: isle 0xdeadbeef"), - (True, False, "// OFFSET: LEGO1 0xdeadbeef stub"), + (True, False, "// function: ISLE 0xdeadbeef"), + (True, False, "// function: isle 0xdeadbeef"), # Hex string must be lowercase - (True, False, "// OFFSET: ISLE 0xDEADBEEF"), + (True, False, "// FUNCTION: ISLE 0xDEADBEEF"), # TODO: How flexible should we be with matching the module name? - (True, True, "// OFFSET: OMNI 0x12345678"), - (True, True, "// OFFSET: LEG01 0x12345678"), - (True, False, "// OFFSET: hello 0x12345678"), + (True, True, "// FUNCTION: OMNI 0x12345678"), + (True, True, "// FUNCTION: LEG01 0x12345678"), + (True, False, "// FUNCTION: hello 0x12345678"), # Not close enough to match - (False, False, "// OFFSET: ISLE0x12345678"), - (False, False, "// OFFSET: 0x12345678"), + (False, False, "// FUNCTION: ISLE0x12345678"), + (False, False, "// FUNCTION: 0x12345678"), (False, False, "// LEGO1: 0x12345678"), # Hex string shorter than 8 characters - (True, True, "// OFFSET: LEGO1 0x1234"), + (True, True, "// FUNCTION: LEGO1 0x1234"), # TODO: These match but shouldn't. - # (False, False, '// OFFSET: LEGO1 0'), - # (False, False, '// OFFSET: LEGO1 0x'), + # (False, False, '// FUNCTION: LEGO1 0'), + # (False, False, '// FUNCTION: LEGO1 0x'), ] -@pytest.mark.parametrize("match, _, line", offset_comment_samples) -def test_offset_match(line: str, match: bool, _): - did_match = match_offset_comment(line) is not None +@pytest.mark.parametrize("match, _, line", marker_samples) +def test_marker_match(line: str, match: bool, _): + did_match = match_marker(line) is not None assert did_match is match -@pytest.mark.parametrize("_, exact, line", offset_comment_samples) -def test_exact_offset_comment(line: str, exact: bool, _): - assert is_exact_offset_comment(line) is exact +@pytest.mark.parametrize("_, exact, line", marker_samples) +def test_marker_exact(line: str, exact: bool, _): + assert is_marker_exact(line) is exact -# Helper for the next test: cut down version of OffsetMatch -MiniOfs = namedtuple("MiniOfs", ["module", "value"]) +def test_marker_dict_simple(): + d = MarkerDict() + d.insert(DecompMarker("FUNCTION", "TEST", 0x1234)) + markers = list(d.iter()) + assert len(markers) == 1 -distinct_by_module_samples = [ - # empty set - ([], []), - # same module name - ([MiniOfs("TEST", 123), MiniOfs("TEST", 555)], [MiniOfs("TEST", 123)]), - # same module name, case-insensitive - ([MiniOfs("test", 123), MiniOfs("TEST", 555)], [MiniOfs("test", 123)]), - # duplicates, non-consecutive - ( - [MiniOfs("test", 123), MiniOfs("abc", 111), MiniOfs("TEST", 555)], - [MiniOfs("test", 123), MiniOfs("abc", 111)], - ), + +def test_marker_dict_ofs_replace(): + d = MarkerDict() + d.insert(DecompMarker("FUNCTION", "TEST", 0x1234)) + d.insert(DecompMarker("FUNCTION", "TEST", 0x555)) + markers = list(d.iter()) + assert len(markers) == 1 + assert markers[0].offset == 0x1234 + + +def test_marker_dict_type_replace(): + d = MarkerDict() + d.insert(DecompMarker("FUNCTION", "TEST", 0x1234)) + d.insert(DecompMarker("STUB", "TEST", 0x1234)) + markers = list(d.iter()) + assert len(markers) == 1 + assert markers[0].type == "FUNCTION" + + +class_name_match_cases = [ + ("struct MxString {", "MxString"), + ("class MxString {", "MxString"), + ("// class MxString", "MxString"), + ("class MxString : public MxCore {", "MxString"), + ("class MxPtrList", "MxPtrList"), + # If it is possible to match the symbol MxList::`vftable' + # we should get the correct class name if possible. If the template type is a pointer, + # the asterisk and class name are separated by one space. + ("// class MxList", "MxList"), + ("// class MxList", "MxList"), + ("// class MxList", "MxList"), + # I don't know if this would ever come up, but sure, why not? + ("// class MxList", "MxList"), ] -@pytest.mark.parametrize("sample, expected", distinct_by_module_samples) -def test_distinct_by_module(sample: List[MiniOfs], expected: List[MiniOfs]): - assert distinct_by_module(sample) == expected +@pytest.mark.parametrize("line, class_name", class_name_match_cases) +def test_get_class_name(line: str, class_name: str): + assert get_class_name(line) == class_name + + +class_name_no_match_cases = [ + "MxString { ", + "clas MxString", + "// MxPtrList::`scalar deleting destructor'", +] + + +@pytest.mark.parametrize("line", class_name_no_match_cases) +def test_get_class_name_none(line: str): + assert get_class_name(line) is None diff --git a/tools/reccmp/reccmp.py b/tools/reccmp/reccmp.py index 02c16029..f7659703 100755 --- a/tools/reccmp/reccmp.py +++ b/tools/reccmp/reccmp.py @@ -10,7 +10,7 @@ from isledecomp import ( Bin, - find_code_blocks, + DecompParser, get_file_in_script_dir, OffsetPlaceholderGenerator, print_diff, @@ -313,18 +313,20 @@ def gen_svg(svg_file, name_svg, icon, svg_implemented_funcs, total_funcs, raw_ac # Generate basename of original file, used in locating OFFSET lines basename = os.path.basename(os.path.splitext(original)[0]) + parser = DecompParser() for srcfilename in walk_source_dir(source): + parser.reset() with open(srcfilename, "r", encoding="utf-8") as srcfile: - blocks = find_code_blocks(srcfile) + parser.read_lines(srcfile) - for block in blocks: - if block.is_stub: + for fun in parser.functions: + if fun.is_stub: continue - if block.module != basename: + if fun.module != basename: continue - addr = block.offset + addr = fun.offset # Verbose flag handling if verbose: if addr == verbose: @@ -332,13 +334,13 @@ def gen_svg(svg_file, name_svg, icon, svg_implemented_funcs, total_funcs, raw_ac else: continue - if block.is_template: - recinfo = syminfo.get_recompiled_address_from_name(block.signature) + if fun.lookup_by_name: + recinfo = syminfo.get_recompiled_address_from_name(fun.name) if not recinfo: continue else: recinfo = syminfo.get_recompiled_address( - srcfilename, block.start_line + srcfilename, fun.line_number ) if not recinfo: continue