diff --git a/tools/checkorder/checkorder.py b/tools/checkorder/checkorder.py index d2f0b23e..02636c09 100644 --- a/tools/checkorder/checkorder.py +++ b/tools/checkorder/checkorder.py @@ -19,7 +19,6 @@ def check_file(filename: str, verbose: bool = False) -> bool: with open(filename, "r", encoding="utf-8") as f: parser.read_lines(f) - just_offsets = [block.offset for block in parser.functions] sorted_offsets = sorted(just_offsets) file_out_of_order = just_offsets != sorted_offsets @@ -46,7 +45,7 @@ def check_file(filename: str, verbose: bool = False) -> bool: msg = " ".join( [ " " if fun.offset > prev_offset else "!", - f"{block.offset:08x}", + f"{fun.offset:08x}", f"{fun.end_line - fun.line_number:4} lines", f"{order_lookup[fun.offset]:3}", " ", diff --git a/tools/isledecomp/isledecomp/parser/node.py b/tools/isledecomp/isledecomp/parser/node.py index f9fbe3b5..96cc3362 100644 --- a/tools/isledecomp/isledecomp/parser/node.py +++ b/tools/isledecomp/isledecomp/parser/node.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -from enum import Enum @dataclass diff --git a/tools/isledecomp/isledecomp/parser/parser.py b/tools/isledecomp/isledecomp/parser/parser.py index 7a984b88..772ae4ef 100644 --- a/tools/isledecomp/isledecomp/parser/parser.py +++ b/tools/isledecomp/isledecomp/parser/parser.py @@ -1,6 +1,6 @@ # C++ file parser -from typing import List, TextIO, Iterable +from typing import List, Iterable, Iterator from enum import Enum from .util import ( DecompMarker, @@ -12,7 +12,6 @@ ) from .node import ( ParserAlert, - ParserNode, ParserFunction, ParserVariable, ParserVtable, @@ -64,9 +63,8 @@ def insert(self, marker: DecompMarker) -> bool: self.markers[module] = (marker.type, marker.offset) return False - def iter(self): - for module in self.markers: - (marker_type, offset) = self.markers[module] + def iter(self) -> Iterator[DecompMarker]: + for module, (marker_type, offset) in self.markers.items(): yield DecompMarker(marker_type, module, offset) def empty(self): @@ -74,27 +72,26 @@ def empty(self): class DecompParser: + # pylint: disable=too-many-instance-attributes + # Could combine output lists into a single list to get under the limit, + # but not right now def __init__(self): - self.fun_markers = MarkerDict() - self.var_markers = MarkerDict() - self.tbl_markers = MarkerDict() - self.reset() + # The lists to be populated as we parse + self.functions: List[ParserFunction] = [] + self.vtables: List[ParserVtable] = [] + self.variables: List[ParserVariable] = [] + self.alerts: List[ParserAlert] = [] - def reset(self): - # Output values - self.functions = [] - self.vtables = [] - self.variables = [] - self.alerts = [] - - # Internal state machine stuff self.line_number: int = 0 self.state: ReaderState = ReaderState.SEARCH self.last_line: str = "" - self.fun_markers.empty() - self.var_markers.empty() - self.tbl_markers.empty() + + # To allow for multiple markers where code is shared across different + # modules, save lists of compatible markers that appear in sequence + self.fun_markers = MarkerDict() + self.var_markers = MarkerDict() + self.tbl_markers = MarkerDict() # To handle functions that are entirely indented (i.e. those defined # in class declarations), remember how many whitespace characters @@ -102,7 +99,7 @@ def reset(self): # This should give us the same or better accuracy for a well-formed file. # The alternative is counting the curly braces on each line # but that's probably too cumbersome. - self.curly_indent_stops = 0 + self.curly_indent_stops: int = 0 # For non-synthetic functions, save the line number where the function begins # (i.e. where we see the curly brace) along with the function signature. @@ -110,6 +107,25 @@ def reset(self): self.function_start: int = 0 self.function_sig: str = "" + def reset(self): + self.functions = [] + self.vtables = [] + self.variables = [] + self.alerts = [] + + self.line_number = 0 + self.state = ReaderState.SEARCH + + self.last_line = "" + + self.fun_markers.empty() + self.var_markers.empty() + self.tbl_markers.empty() + + self.curly_indent_stops = 0 + self.function_start = 0 + self.function_sig = "" + def _recover(self): """We hit a syntax error and need to reset temp structures""" self.state = ReaderState.SEARCH @@ -340,19 +356,3 @@ def read_line(self, line: str): def read_lines(self, lines: Iterable): for line in lines: self.read_line(line) - - -def find_code_blocks(stream: TextIO) -> List[ParserNode]: - """Read the IO stream (file) line-by-line and give the following report: - Foreach code block (function) in the file, what are its starting and - ending line numbers, and what is the given offset in the original - binary. We expect the result to be ordered by line number because we - are reading the file from start to finish.""" - - # TODO: this will be replaced shortly. shim for now to avoid - # making more changes elsewhere - p = DecompParser() - for line in stream: - p.read_line(line) - - return p.functions diff --git a/tools/isledecomp/isledecomp/parser/util.py b/tools/isledecomp/isledecomp/parser/util.py index f93b4475..38515cfa 100644 --- a/tools/isledecomp/isledecomp/parser/util.py +++ b/tools/isledecomp/isledecomp/parser/util.py @@ -1,7 +1,6 @@ # C++ Parser utility functions and data structures from __future__ import annotations # python <3.10 compatibility import re -from typing import List from collections import namedtuple DecompMarker = namedtuple("DecompMarker", ["type", "module", "offset"]) diff --git a/tools/isledecomp/tests/test_parser.py b/tools/isledecomp/tests/test_parser.py index 69764f74..fa5343dc 100644 --- a/tools/isledecomp/tests/test_parser.py +++ b/tools/isledecomp/tests/test_parser.py @@ -3,12 +3,11 @@ ReaderState, DecompParser, ) -from isledecomp.parser.util import DecompMarker from isledecomp.parser.error import ParserError -@pytest.fixture -def parser(): +@pytest.fixture(name="parser") +def fixture_parser(): return DecompParser() diff --git a/tools/isledecomp/tests/test_parser_samples.py b/tools/isledecomp/tests/test_parser_samples.py index a045e3cc..e74fda0e 100644 --- a/tools/isledecomp/tests/test_parser_samples.py +++ b/tools/isledecomp/tests/test_parser_samples.py @@ -1,6 +1,6 @@ import os -import pytest from typing import List, TextIO +import pytest from isledecomp.parser import DecompParser from isledecomp.parser.node import ParserSymbol @@ -20,8 +20,8 @@ def code_blocks_are_sorted(blocks: List[ParserSymbol]) -> bool: return just_offsets == sorted(just_offsets) -@pytest.fixture -def parser(): +@pytest.fixture(name="parser") +def fixture_parser(): return DecompParser() diff --git a/tools/isledecomp/tests/test_parser_statechange.py b/tools/isledecomp/tests/test_parser_statechange.py index 62c19175..714de579 100644 --- a/tools/isledecomp/tests/test_parser_statechange.py +++ b/tools/isledecomp/tests/test_parser_statechange.py @@ -3,7 +3,6 @@ ReaderState as _rs, DecompParser, ) -from isledecomp.parser.util import DecompMarker from isledecomp.parser.error import ParserError as _pe # fmt: off @@ -35,28 +34,28 @@ (_rs.IN_TEMPLATE, "SYNTHETIC", _rs.IN_TEMPLATE, None), (_rs.IN_TEMPLATE, "TEMPLATE", _rs.IN_TEMPLATE, None), (_rs.IN_TEMPLATE, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), - + (_rs.WANT_CURLY, "FUNCTION", _rs.SEARCH, _pe.UNEXPECTED_MARKER), (_rs.WANT_CURLY, "GLOBAL", _rs.SEARCH, _pe.UNEXPECTED_MARKER), (_rs.WANT_CURLY, "STUB", _rs.SEARCH, _pe.UNEXPECTED_MARKER), (_rs.WANT_CURLY, "SYNTHETIC", _rs.SEARCH, _pe.UNEXPECTED_MARKER), (_rs.WANT_CURLY, "TEMPLATE", _rs.SEARCH, _pe.UNEXPECTED_MARKER), (_rs.WANT_CURLY, "VTABLE", _rs.SEARCH, _pe.UNEXPECTED_MARKER), - + (_rs.IN_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "GLOBAL", _rs.IN_GLOBAL, None), (_rs.IN_GLOBAL, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), - + (_rs.IN_FUNC_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "GLOBAL", _rs.IN_FUNC_GLOBAL, None), (_rs.IN_FUNC_GLOBAL, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), - + (_rs.IN_VTABLE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_VTABLE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_VTABLE, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), @@ -75,7 +74,8 @@ def test_state_change_by_marker( ): p = DecompParser() p.state = state - p._handle_marker(DecompMarker(marker_type, "TEST", 0x1234)) + mock_line = f"// {marker_type}: TEST 0x1234" + p.read_line(mock_line) assert p.state == new_state if expected_error is not None: diff --git a/tools/isledecomp/tests/test_parser_util.py b/tools/isledecomp/tests/test_parser_util.py index a8882721..131a8ab3 100644 --- a/tools/isledecomp/tests/test_parser_util.py +++ b/tools/isledecomp/tests/test_parser_util.py @@ -1,5 +1,3 @@ -from collections import namedtuple -from typing import List import pytest from isledecomp.parser.parser import MarkerDict from isledecomp.parser.util import (