diff --git a/.github/workflows/analyze.yml b/.github/workflows/analyze.yml new file mode 100644 index 00000000..e139b8bf --- /dev/null +++ b/.github/workflows/analyze.yml @@ -0,0 +1,32 @@ +name: Analyze + +on: [push, pull_request] + +jobs: + decomplint-isle: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: Install python libraries + run: | + pip install -r tools/requirements.txt + + - name: Run decomplint.py + run: | + python3 tools/decomplint/decomplint.py ISLE --module ISLE --warnfail + + decomplint-lego1: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: Install python libraries + run: | + pip install -r tools/requirements.txt + + - name: Run decomplint.py + run: | + python3 tools/decomplint/decomplint.py LEGO1 --module LEGO1 --warnfail diff --git a/.github/workflows/order.yml b/.github/workflows/order.yml deleted file mode 100644 index d7ae315c..00000000 --- a/.github/workflows/order.yml +++ /dev/null @@ -1,19 +0,0 @@ -name: Check order - -on: [push, pull_request] - -jobs: - checkorder: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - - - name: Install python libraries - run: | - pip install -r tools/requirements.txt - - - name: Run checkorder.py - run: | - python3 tools/checkorder/checkorder.py --verbose --enforce ISLE - python3 tools/checkorder/checkorder.py --verbose --enforce LEGO1 diff --git a/tools/checkorder/checkorder.py b/tools/checkorder/checkorder.py deleted file mode 100644 index 0cdba670..00000000 --- a/tools/checkorder/checkorder.py +++ /dev/null @@ -1,127 +0,0 @@ -import os -import sys -import argparse -from isledecomp.dir import walk_source_dir, is_file_cpp -from isledecomp.parser import DecompParser - - -def sig_truncate(sig: str) -> str: - """Helper to truncate function names to 50 chars and append ellipsis - if needed. Goal is to stay under 80 columns for tool output.""" - return f"{sig[:47]}{'...' if len(sig) >= 50 else ''}" - - -def check_file(filename: str, verbose: bool = False) -> bool: - """Open and read the given file, then check whether the code blocks - are in order. If verbose, print each block.""" - - parser = DecompParser() - with open(filename, "r", encoding="utf-8") as f: - parser.read_lines(f) - - just_offsets = [block.offset for block in parser.functions] - sorted_offsets = sorted(just_offsets) - file_out_of_order = just_offsets != sorted_offsets - - # TODO: When we add parser error severity, actual errors that obstruct - # parsing should probably be shown here regardless of verbose mode - - # If we detect inexact comments, don't print anything unless we are - # in verbose mode. If the file is out of order, we always print the - # file name. - should_report = (len(parser.alerts) > 0 and verbose) or file_out_of_order - - if not should_report and not file_out_of_order: - return False - - # Else: we are alerting to some problem in this file - print(filename) - if verbose: - if file_out_of_order: - order_lookup = {k: i for i, k in enumerate(sorted_offsets)} - prev_offset = 0 - - for fun in parser.functions: - msg = " ".join( - [ - " " if fun.offset > prev_offset else "!", - f"{fun.offset:08x}", - f"{fun.end_line - fun.line_number:4} lines", - f"{order_lookup[fun.offset]:3}", - " ", - sig_truncate(fun.name), - ] - ) - print(msg) - prev_offset = fun.offset - - for alert in parser.alerts: - print(f"* line {alert.line_number:4} {alert.code} ({alert.line})") - - print() - - return file_out_of_order - - -def parse_args(test_args: list | None = None) -> dict: - p = argparse.ArgumentParser( - description="Checks the source files to make sure the function offset comments are in order", - ) - p.add_argument("target", help="The file or directory to check.") - p.add_argument( - "--enforce", - action=argparse.BooleanOptionalAction, - default=False, - help="Fail with error code if target is out of order.", - ) - p.add_argument( - "--verbose", - action=argparse.BooleanOptionalAction, - default=False, - help=( - "Display each code block in the file and show " - "where each consecutive run of blocks is broken." - ), - ) - - if test_args is None: - args = p.parse_args() - else: - args = p.parse_args(test_args) - - return vars(args) - - -def main(): - args = parse_args() - - if os.path.isdir(args["target"]): - files_to_check = list(walk_source_dir(args["target"])) - elif os.path.isfile(args["target"]) and is_file_cpp(args["target"]): - files_to_check = [args["target"]] - else: - sys.exit("Invalid target") - - files_out_of_order = 0 - - for file in files_to_check: - is_jumbled = check_file(file, args["verbose"]) - if is_jumbled: - files_out_of_order += 1 - - if files_out_of_order > 0: - error_message = " ".join( - [ - str(files_out_of_order), - "files are" if files_out_of_order > 1 else "file is", - "out of order", - ] - ) - print(error_message) - - if files_out_of_order > 0 and args["enforce"]: - sys.exit(1) - - -if __name__ == "__main__": - main() diff --git a/tools/decomplint/decomplint.py b/tools/decomplint/decomplint.py new file mode 100644 index 00000000..2ceb42c2 --- /dev/null +++ b/tools/decomplint/decomplint.py @@ -0,0 +1,99 @@ +import os +import sys +import argparse +import colorama +from isledecomp.dir import walk_source_dir, is_file_cpp +from isledecomp.parser import DecompLinter + +colorama.init() + + +def display_errors(alerts, filename): + sorted_alerts = sorted(alerts, key=lambda a: a.line_number) + + for alert in sorted_alerts: + error_type = ( + f"{colorama.Fore.RED}error: " + if alert.is_error() + else f"{colorama.Fore.YELLOW}warning: " + ) + components = [ + colorama.Fore.LIGHTWHITE_EX, + filename, + ":", + str(alert.line_number), + " : ", + error_type, + colorama.Fore.LIGHTWHITE_EX, + alert.code.name.lower(), + ] + print("".join(components)) + + if alert.line is not None: + print(f"{colorama.Fore.WHITE} {alert.line}") + + +def parse_args() -> argparse.Namespace: + p = argparse.ArgumentParser( + description="Syntax checking and linting for decomp annotation markers." + ) + p.add_argument("target", help="The file or directory to check.") + p.add_argument( + "--module", + required=False, + type=str, + help="If present, run targeted checks for markers from the given module.", + ) + p.add_argument( + "--warnfail", + action=argparse.BooleanOptionalAction, + default=False, + help="Fail if syntax warnings are found.", + ) + + (args, _) = p.parse_known_args() + return args + + +def process_files(files, module=None): + warning_count = 0 + error_count = 0 + + linter = DecompLinter() + for filename in files: + success = linter.check_file(filename, module) + + warnings = [a for a in linter.alerts if a.is_warning()] + errors = [a for a in linter.alerts if a.is_error()] + + error_count += len(errors) + warning_count += len(warnings) + + if not success: + display_errors(linter.alerts, filename) + print() + + return (warning_count, error_count) + + +def main(): + args = parse_args() + + if os.path.isdir(args.target): + files_to_check = list(walk_source_dir(args.target)) + elif os.path.isfile(args.target) and is_file_cpp(args.target): + files_to_check = [args.target] + else: + sys.exit("Invalid target") + + (warning_count, error_count) = process_files(files_to_check, module=args.module) + + print(colorama.Style.RESET_ALL, end="") + + would_fail = error_count > 0 or (warning_count > 0 and args.warnfail) + if would_fail: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tools/isledecomp/isledecomp/parser/__init__.py b/tools/isledecomp/isledecomp/parser/__init__.py index c9394d4a..3034a562 100644 --- a/tools/isledecomp/isledecomp/parser/__init__.py +++ b/tools/isledecomp/isledecomp/parser/__init__.py @@ -1 +1,2 @@ from .parser import DecompParser +from .linter import DecompLinter diff --git a/tools/isledecomp/isledecomp/parser/error.py b/tools/isledecomp/isledecomp/parser/error.py index c18e3e29..638a806e 100644 --- a/tools/isledecomp/isledecomp/parser/error.py +++ b/tools/isledecomp/isledecomp/parser/error.py @@ -1,6 +1,9 @@ from enum import Enum +from typing import Optional +from dataclasses import dataclass +# TODO: poorly chosen name, should be AlertType or AlertCode or something class ParserError(Enum): # WARN: Stub function exceeds some line number threshold UNLIKELY_STUB = 100 @@ -29,6 +32,16 @@ class ParserError(Enum): # and the start of the function. We can ignore it, but the line shouldn't be there UNEXPECTED_BLANK_LINE = 107 + # WARN: We called the finish() method for the parser but had not reached the starting + # state of SEARCH + UNEXPECTED_END_OF_FILE = 108 + + # WARN: We found a marker to be referenced by name outside of a header file. + BYNAME_FUNCTION_IN_CPP = 109 + + # This code or higher is an error, not a warning + DECOMP_ERROR_START = 200 + # ERROR: We found a marker unexpectedly UNEXPECTED_MARKER = 200 @@ -39,3 +52,20 @@ class ParserError(Enum): # ERROR: The line following a synthetic marker was not a comment BAD_SYNTHETIC = 202 + + # ERROR: This function offset comes before the previous offset from the same module + # This hopefully gives some hint about which functions need to be rearranged. + FUNCTION_OUT_OF_ORDER = 203 + + +@dataclass +class ParserAlert: + code: ParserError + line_number: int + line: Optional[str] = None + + def is_warning(self) -> bool: + return self.code.value < ParserError.DECOMP_ERROR_START.value + + def is_error(self) -> bool: + return self.code.value >= ParserError.DECOMP_ERROR_START.value diff --git a/tools/isledecomp/isledecomp/parser/linter.py b/tools/isledecomp/isledecomp/parser/linter.py new file mode 100644 index 00000000..99c8e272 --- /dev/null +++ b/tools/isledecomp/isledecomp/parser/linter.py @@ -0,0 +1,99 @@ +from typing import List, Optional +from .parser import DecompParser +from .error import ParserAlert, ParserError + + +def get_checkorder_filter(module): + """Return a filter function on implemented functions in the given module""" + return lambda fun: fun.module == module and not fun.lookup_by_name + + +class DecompLinter: + def __init__(self) -> None: + self.alerts: List[ParserAlert] = [] + self._parser = DecompParser() + self._filename: str = "" + self._module: Optional[str] = None + + def reset(self): + self.alerts = [] + self._parser.reset() + self._filename = "" + self._module = None + + def file_is_header(self): + return self._filename.lower().endswith(".h") + + def _check_function_order(self): + """Rules: + 1. Only markers that are implemented in the file are considered. This means we + only look at markers that are cross-referenced with cvdump output by their line + number. Markers with the lookup_by_name flag set are ignored because we cannot + directly influence their order. + + 2. Order should be considered for a single module only. If we have multiple + markers for a single function (i.e. for LEGO1 functions linked statically to + ISLE) then the virtual address space will be very different. If we don't check + for one module only, we would incorrectly report that the file is out of order. + """ + + if self._module is None: + return + + checkorder_filter = get_checkorder_filter(self._module) + last_offset = None + for fun in filter(checkorder_filter, self._parser.functions): + if last_offset is not None: + if fun.offset < last_offset: + self.alerts.append( + ParserAlert( + code=ParserError.FUNCTION_OUT_OF_ORDER, + line_number=fun.line_number, + ) + ) + + last_offset = fun.offset + + def _check_offset_uniqueness(self): + # TODO + pass + + def _check_byname_allowed(self): + if self.file_is_header(): + return + + for fun in self._parser.functions: + if fun.lookup_by_name: + self.alerts.append( + ParserAlert( + code=ParserError.BYNAME_FUNCTION_IN_CPP, + line_number=fun.line_number, + ) + ) + + def check_lines(self, lines, filename, module=None): + """`lines` is a generic iterable to allow for testing with a list of strings. + We assume lines has the entire contents of the compilation unit.""" + + self.reset() + self._filename = filename + self._module = module + + self._parser.read_lines(lines) + + self._parser.finish() + self.alerts = self._parser.alerts[::] + + if self._module is not None: + self._check_byname_allowed() + self._check_offset_uniqueness() + + if not self.file_is_header(): + self._check_function_order() + + return len(self.alerts) == 0 + + def check_file(self, filename, module=None): + """Convenience method for decomplint cli tool""" + with open(filename, "r", encoding="utf-8") as f: + return self.check_lines(f, filename, module) diff --git a/tools/isledecomp/isledecomp/parser/node.py b/tools/isledecomp/isledecomp/parser/node.py index 0ee87000..721a24b9 100644 --- a/tools/isledecomp/isledecomp/parser/node.py +++ b/tools/isledecomp/isledecomp/parser/node.py @@ -6,12 +6,6 @@ class ParserNode: line_number: int -@dataclass -class ParserAlert(ParserNode): - code: int - line: str - - @dataclass class ParserSymbol(ParserNode): module: str diff --git a/tools/isledecomp/isledecomp/parser/parser.py b/tools/isledecomp/isledecomp/parser/parser.py index 89b79928..336d4b0c 100644 --- a/tools/isledecomp/isledecomp/parser/parser.py +++ b/tools/isledecomp/isledecomp/parser/parser.py @@ -12,12 +12,11 @@ remove_trailing_comment, ) from .node import ( - ParserAlert, ParserFunction, ParserVariable, ParserVtable, ) -from .error import ParserError +from .error import ParserAlert, ParserError class ReaderState(Enum): @@ -29,6 +28,7 @@ class ReaderState(Enum): IN_GLOBAL = 5 IN_FUNC_GLOBAL = 6 IN_VTABLE = 7 + DONE = 100 def marker_is_stub(marker: DecompMarker) -> bool: @@ -56,7 +56,7 @@ def marker_is_vtable(marker: DecompMarker) -> bool: class MarkerDict: - def __init__(self): + def __init__(self) -> None: self.markers: dict = {} def insert(self, marker: DecompMarker) -> bool: @@ -80,7 +80,7 @@ class DecompParser: # pylint: disable=too-many-instance-attributes # Could combine output lists into a single list to get under the limit, # but not right now - def __init__(self): + def __init__(self) -> None: # The lists to be populated as we parse self.functions: List[ParserFunction] = [] self.vtables: List[ParserVtable] = [] @@ -306,6 +306,9 @@ def _handle_marker(self, marker: DecompMarker): self._syntax_warning(ParserError.BOGUS_MARKER) def read_line(self, line: str): + if self.state == ReaderState.DONE: + return + self.last_line = line # TODO: Useful or hack for error reporting? self.line_number += 1 @@ -392,3 +395,9 @@ def read_line(self, line: str): def read_lines(self, lines: Iterable): for line in lines: self.read_line(line) + + def finish(self): + if self.state != ReaderState.SEARCH: + self._syntax_warning(ParserError.UNEXPECTED_END_OF_FILE) + + self.state = ReaderState.DONE diff --git a/tools/isledecomp/tests/test_linter.py b/tools/isledecomp/tests/test_linter.py new file mode 100644 index 00000000..cb3b8f27 --- /dev/null +++ b/tools/isledecomp/tests/test_linter.py @@ -0,0 +1,85 @@ +import pytest +from isledecomp.parser import DecompLinter +from isledecomp.parser.error import ParserError + + +@pytest.fixture(name="linter") +def fixture_linter(): + return DecompLinter() + + +def test_simple_in_order(linter): + lines = [ + "// FUNCTION: TEST 0x1000", + "void function1() {}", + "// FUNCTION: TEST 0x2000", + "void function2() {}", + "// FUNCTION: TEST 0x3000", + "void function3() {}", + ] + assert linter.check_lines(lines, "test.cpp", "TEST") is True + + +def test_simple_not_in_order(linter): + lines = [ + "// FUNCTION: TEST 0x1000", + "void function1() {}", + "// FUNCTION: TEST 0x3000", + "void function3() {}", + "// FUNCTION: TEST 0x2000", + "void function2() {}", + ] + assert linter.check_lines(lines, "test.cpp", "TEST") is False + assert len(linter.alerts) == 1 + + assert linter.alerts[0].code == ParserError.FUNCTION_OUT_OF_ORDER + # N.B. Line number given is the start of the function, not the marker + assert linter.alerts[0].line_number == 6 + + +def test_byname_ignored(linter): + """Should ignore lookup-by-name markers when checking order.""" + lines = [ + "// FUNCTION: TEST 0x1000", + "void function1() {}", + "// FUNCTION: TEST 0x3000", + "// MyClass::MyMethod", + "// FUNCTION: TEST 0x2000", + "void function2() {}", + ] + # This will fail because byname lookup does not belong in the cpp file + assert linter.check_lines(lines, "test.cpp", "TEST") is False + # but it should not fail for function order. + assert all( + alert.code != ParserError.FUNCTION_OUT_OF_ORDER for alert in linter.alerts + ) + + +def test_module_isolation(linter): + """Should check the order of markers from a single module only.""" + lines = [ + "// FUNCTION: ALPHA 0x0001", + "// FUNCTION: TEST 0x1000", + "void function1() {}", + "// FUNCTION: ALPHA 0x0002", + "// FUNCTION: TEST 0x2000", + "void function2() {}", + "// FUNCTION: ALPHA 0x0003", + "// FUNCTION: TEST 0x3000", + "void function3() {}", + ] + + assert linter.check_lines(lines, "test.cpp", "TEST") is True + assert linter.check_lines(lines, "test.cpp", "ALPHA") is True + + +def test_byname_headers_only(linter): + """Markers that ar referenced by name with cvdump belong in header files only.""" + lines = [ + "// FUNCTION: TEST 0x1000", + "// MyClass::~MyClass", + ] + + assert linter.check_lines(lines, "test.h", "TEST") is True + assert linter.check_lines(lines, "test.cpp", "TEST") is False + assert linter.alerts[0].code == ParserError.BYNAME_FUNCTION_IN_CPP diff --git a/tools/isledecomp/tests/test_parser.py b/tools/isledecomp/tests/test_parser.py index 30092a97..c31602f1 100644 --- a/tools/isledecomp/tests/test_parser.py +++ b/tools/isledecomp/tests/test_parser.py @@ -358,3 +358,20 @@ def test_function_is_commented(parser): ) assert len(parser.functions) == 0 + + +def test_unexpected_eof(parser): + """If a decomp marker finds its way to the last line of the file, + report that we could not get anything from it.""" + parser.read_lines( + [ + "// FUNCTION: TEST 0x1234", + "// Cls::Method", + "// FUNCTION: TEST 0x5555", + ] + ) + parser.finish() + + assert len(parser.functions) == 1 + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.UNEXPECTED_END_OF_FILE