From 5d0788457cc24ec63aae889daa3eb21578b8143f Mon Sep 17 00:00:00 2001 From: disinvite Date: Sun, 10 Dec 2023 20:33:28 -0500 Subject: [PATCH] linter --- tools/checkorder/checkorder.py | 150 ++++++++---------- .../isledecomp/isledecomp/parser/__init__.py | 1 + tools/isledecomp/isledecomp/parser/error.py | 26 +++ tools/isledecomp/isledecomp/parser/linter.py | 96 +++++++++++ tools/isledecomp/isledecomp/parser/node.py | 6 - tools/isledecomp/isledecomp/parser/parser.py | 13 +- tools/isledecomp/tests/test_linter.py | 70 ++++++++ tools/isledecomp/tests/test_parser.py | 17 ++ 8 files changed, 285 insertions(+), 94 deletions(-) create mode 100644 tools/isledecomp/isledecomp/parser/linter.py create mode 100644 tools/isledecomp/tests/test_linter.py diff --git a/tools/checkorder/checkorder.py b/tools/checkorder/checkorder.py index 0cdba670..2263ebbe 100644 --- a/tools/checkorder/checkorder.py +++ b/tools/checkorder/checkorder.py @@ -1,97 +1,87 @@ import os import sys import argparse +import colorama from isledecomp.dir import walk_source_dir, is_file_cpp -from isledecomp.parser import DecompParser +from isledecomp.parser import DecompLinter + +colorama.init() -def sig_truncate(sig: str) -> str: - """Helper to truncate function names to 50 chars and append ellipsis - if needed. Goal is to stay under 80 columns for tool output.""" - return f"{sig[:47]}{'...' if len(sig) >= 50 else ''}" +def display_errors(alerts, filename): + sorted_alerts = sorted(alerts, key=lambda a: a.line_number) + + for alert in sorted_alerts: + error_type = ( + f"{colorama.Fore.RED}error: " + if alert.is_error() + else f"{colorama.Fore.YELLOW}warning: " + ) + components = [ + colorama.Fore.LIGHTWHITE_EX, + filename, + ":", + str(alert.line_number), + " : ", + error_type, + colorama.Fore.LIGHTWHITE_EX, + alert.code.name.lower(), + ] + print("".join(components)) + + if alert.line is not None: + print(f"{colorama.Fore.WHITE} {alert.line}") -def check_file(filename: str, verbose: bool = False) -> bool: - """Open and read the given file, then check whether the code blocks - are in order. If verbose, print each block.""" - - parser = DecompParser() - with open(filename, "r", encoding="utf-8") as f: - parser.read_lines(f) - - just_offsets = [block.offset for block in parser.functions] - sorted_offsets = sorted(just_offsets) - file_out_of_order = just_offsets != sorted_offsets - - # TODO: When we add parser error severity, actual errors that obstruct - # parsing should probably be shown here regardless of verbose mode - - # If we detect inexact comments, don't print anything unless we are - # in verbose mode. If the file is out of order, we always print the - # file name. - should_report = (len(parser.alerts) > 0 and verbose) or file_out_of_order - - if not should_report and not file_out_of_order: - return False - - # Else: we are alerting to some problem in this file - print(filename) - if verbose: - if file_out_of_order: - order_lookup = {k: i for i, k in enumerate(sorted_offsets)} - prev_offset = 0 - - for fun in parser.functions: - msg = " ".join( - [ - " " if fun.offset > prev_offset else "!", - f"{fun.offset:08x}", - f"{fun.end_line - fun.line_number:4} lines", - f"{order_lookup[fun.offset]:3}", - " ", - sig_truncate(fun.name), - ] - ) - print(msg) - prev_offset = fun.offset - - for alert in parser.alerts: - print(f"* line {alert.line_number:4} {alert.code} ({alert.line})") - - print() - - return file_out_of_order - - -def parse_args(test_args: list | None = None) -> dict: +def parse_args() -> dict: p = argparse.ArgumentParser( - description="Checks the source files to make sure the function offset comments are in order", + description="Syntax checking and linting for decomp annotation markers." ) p.add_argument("target", help="The file or directory to check.") p.add_argument( "--enforce", action=argparse.BooleanOptionalAction, default=False, - help="Fail with error code if target is out of order.", + help="Fail if syntax errors are found.", ) p.add_argument( - "--verbose", + "--module", + required=False, + type=str, + help="If present, run targeted checks for markers from the given module.", + ) + p.add_argument( + "--warnfail", action=argparse.BooleanOptionalAction, default=False, - help=( - "Display each code block in the file and show " - "where each consecutive run of blocks is broken." - ), + help="Fail if syntax warnings are found and enforce is enabled.", ) - if test_args is None: - args = p.parse_args() - else: - args = p.parse_args(test_args) - + args = p.parse_args() return vars(args) +def process_files(files, module=None): + warning_count = 0 + error_count = 0 + + linter = DecompLinter() + for filename in files: + success = linter.check_file(filename, module) + + warnings = [a for a in linter.alerts if a.is_warning()] + errors = [a for a in linter.alerts if a.is_error()] + + error_count += len(errors) + warning_count += len(warnings) + + if not success: + display_errors(linter.alerts, filename) + print() + + return (warning_count, error_count) + + def main(): args = parse_args() @@ -102,24 +92,12 @@ def main(): else: sys.exit("Invalid target") - files_out_of_order = 0 + (warning_count, error_count) = process_files(files_to_check, module=args["module"]) - for file in files_to_check: - is_jumbled = check_file(file, args["verbose"]) - if is_jumbled: - files_out_of_order += 1 + print(colorama.Style.RESET_ALL, end="") - if files_out_of_order > 0: - error_message = " ".join( - [ - str(files_out_of_order), - "files are" if files_out_of_order > 1 else "file is", - "out of order", - ] - ) - print(error_message) - - if files_out_of_order > 0 and args["enforce"]: + would_fail = error_count > 0 or (warning_count > 0 and args["warnfail"]) + if args["enforce"] and would_fail: sys.exit(1) diff --git a/tools/isledecomp/isledecomp/parser/__init__.py b/tools/isledecomp/isledecomp/parser/__init__.py index c9394d4a..3034a562 100644 --- a/tools/isledecomp/isledecomp/parser/__init__.py +++ b/tools/isledecomp/isledecomp/parser/__init__.py @@ -1 +1,2 @@ from .parser import DecompParser +from .linter import DecompLinter diff --git a/tools/isledecomp/isledecomp/parser/error.py b/tools/isledecomp/isledecomp/parser/error.py index c18e3e29..6de98c56 100644 --- a/tools/isledecomp/isledecomp/parser/error.py +++ b/tools/isledecomp/isledecomp/parser/error.py @@ -1,6 +1,8 @@ from enum import Enum +from dataclasses import dataclass +# TODO: poorly chosen name, should be AlertType or AlertCode or something class ParserError(Enum): # WARN: Stub function exceeds some line number threshold UNLIKELY_STUB = 100 @@ -29,6 +31,13 @@ class ParserError(Enum): # and the start of the function. We can ignore it, but the line shouldn't be there UNEXPECTED_BLANK_LINE = 107 + # WARN: We called the finish() method for the parser but had not reached the starting + # state of SEARCH + UNEXPECTED_END_OF_FILE = 108 + + # This code or higher is an error, not a warning + DECOMP_ERROR_START = 200 + # ERROR: We found a marker unexpectedly UNEXPECTED_MARKER = 200 @@ -39,3 +48,20 @@ class ParserError(Enum): # ERROR: The line following a synthetic marker was not a comment BAD_SYNTHETIC = 202 + + # ERROR: This function offset comes before the previous offset from the same module + # This hopefully gives some hint about which functions need to be rearranged. + FUNCTION_OUT_OF_ORDER = 203 + + +@dataclass +class ParserAlert: + code: ParserError + line_number: int + line: str | None = None + + def is_warning(self) -> bool: + return self.code.value < ParserError.DECOMP_ERROR_START.value + + def is_error(self) -> bool: + return self.code.value >= ParserError.DECOMP_ERROR_START.value diff --git a/tools/isledecomp/isledecomp/parser/linter.py b/tools/isledecomp/isledecomp/parser/linter.py new file mode 100644 index 00000000..21666539 --- /dev/null +++ b/tools/isledecomp/isledecomp/parser/linter.py @@ -0,0 +1,96 @@ +from .parser import DecompParser +from .error import ParserAlert, ParserError + + +# TODO: You get an error for each function out of order. +# Same as the verbose display but just as main line errors. +# TODO: Sort alerts by line number at the end? +# TODO: No more verbose mode. This is a linter and you get warnings always. +# TODO: --warnfail flag? + + +def get_checkorder_filter(module): + """Return a filter function on implemented functions in the given module""" + return lambda fun: fun.module == module and not fun.lookup_by_name + + +class DecompLinter: + def __init__(self): + self.alerts = [] + self._parser = DecompParser() + self._filename = "" + self._module = None + + def reset(self): + self.alerts = [] + self._parser.reset() + self._filename = "" + self._module = None + + def file_is_header(self): + return self._filename.lower().endswith(".h") + + def _check_function_order(self): + """Rules: + 1. Only markers that are implemented in the file are considered. This means we + only look at markers that are cross-referenced with cvdump output by their line + number. Markers with the lookup_by_name flag set are ignored because we cannot + directly influence their order. + + 2. Order should be considered for a single module only. If we have multiple + markers for a single function (i.e. for LEGO1 functions linked statically to + ISLE) then the virtual address space will be very different. If we don't check + for one module only, we would incorrectly report that the file is out of order. + """ + + if self._module is None: + return + + checkorder_filter = get_checkorder_filter(self._module) + last_offset = None + for fun in filter(checkorder_filter, self._parser.functions): + if last_offset is not None: + if fun.offset < last_offset: + self.alerts.append( + ParserAlert( + code=ParserError.FUNCTION_OUT_OF_ORDER, + line_number=fun.line_number, + ) + ) + + last_offset = fun.offset + + def _check_symbol_uniqueness(self): + # TODO + pass + + def _check_byname_allowed(self): + # TODO + pass + + def check_lines(self, lines, filename, module=None): + """`lines` is a generic iterable to allow for testing with a list of strings. + We assume lines has the entire contents of the compilation unit.""" + + self.reset() + self._filename = filename + self._module = module + + self._parser.read_lines(lines) + + self._parser.finish() + self.alerts = self._parser.alerts[::] + + if self._module is not None: + self._check_byname_allowed() + self._check_symbol_uniqueness() + + if not self.file_is_header(): + self._check_function_order() + + return len(self.alerts) == 0 + + def check_file(self, filename, module=None): + """Convenience method for decomplint cli tool""" + with open(filename, "r", encoding="utf-8") as f: + return self.check_lines(f, filename, module) diff --git a/tools/isledecomp/isledecomp/parser/node.py b/tools/isledecomp/isledecomp/parser/node.py index 0ee87000..721a24b9 100644 --- a/tools/isledecomp/isledecomp/parser/node.py +++ b/tools/isledecomp/isledecomp/parser/node.py @@ -6,12 +6,6 @@ class ParserNode: line_number: int -@dataclass -class ParserAlert(ParserNode): - code: int - line: str - - @dataclass class ParserSymbol(ParserNode): module: str diff --git a/tools/isledecomp/isledecomp/parser/parser.py b/tools/isledecomp/isledecomp/parser/parser.py index 89b79928..d133014c 100644 --- a/tools/isledecomp/isledecomp/parser/parser.py +++ b/tools/isledecomp/isledecomp/parser/parser.py @@ -12,12 +12,11 @@ remove_trailing_comment, ) from .node import ( - ParserAlert, ParserFunction, ParserVariable, ParserVtable, ) -from .error import ParserError +from .error import ParserAlert, ParserError class ReaderState(Enum): @@ -29,6 +28,7 @@ class ReaderState(Enum): IN_GLOBAL = 5 IN_FUNC_GLOBAL = 6 IN_VTABLE = 7 + DONE = 100 def marker_is_stub(marker: DecompMarker) -> bool: @@ -306,6 +306,9 @@ def _handle_marker(self, marker: DecompMarker): self._syntax_warning(ParserError.BOGUS_MARKER) def read_line(self, line: str): + if self.state == ReaderState.DONE: + return + self.last_line = line # TODO: Useful or hack for error reporting? self.line_number += 1 @@ -392,3 +395,9 @@ def read_line(self, line: str): def read_lines(self, lines: Iterable): for line in lines: self.read_line(line) + + def finish(self): + if self.state != ReaderState.SEARCH: + self._syntax_warning(ParserError.UNEXPECTED_END_OF_FILE) + + self.state = ReaderState.DONE diff --git a/tools/isledecomp/tests/test_linter.py b/tools/isledecomp/tests/test_linter.py new file mode 100644 index 00000000..6560e957 --- /dev/null +++ b/tools/isledecomp/tests/test_linter.py @@ -0,0 +1,70 @@ +import pytest +from isledecomp.parser import DecompLinter +from isledecomp.parser.error import ParserError + + +@pytest.fixture(name="linter") +def fixture_linter(): + return DecompLinter() + + +def test_simple_in_order(linter): + lines = [ + "// FUNCTION: TEST 0x1000", + "void function1() {}", + "// FUNCTION: TEST 0x2000", + "void function2() {}", + "// FUNCTION: TEST 0x3000", + "void function3() {}", + ] + assert linter.check_lines(lines, "test.cpp", "TEST") is True + + +def test_simple_not_in_order(linter): + lines = [ + "// FUNCTION: TEST 0x1000", + "void function1() {}", + "// FUNCTION: TEST 0x3000", + "void function3() {}", + "// FUNCTION: TEST 0x2000", + "void function2() {}", + ] + assert linter.check_lines(lines, "test.cpp", "TEST") is False + assert len(linter.alerts) == 1 + + assert linter.alerts[0].code == ParserError.FUNCTION_OUT_OF_ORDER + # N.B. Line number given is the start of the function, not the marker + assert linter.alerts[0].line_number == 6 + + +def test_byname_ignored(linter): + """Should ignore lookup-by-name markers when checking order.""" + lines = [ + "// FUNCTION: TEST 0x1000", + "void function1() {}", + "// FUNCTION: TEST 0x3000", + "// MyClass::MyMethod", + "// FUNCTION: TEST 0x2000", + "void function2() {}", + ] + # TODO: This will fail after enforcing that bynames belong in headers + assert linter.check_lines(lines, "test.cpp", "TEST") is True + assert len(linter.alerts) == 0 + + +def test_module_isolation(linter): + """Should check the order of markers from a single module only.""" + lines = [ + "// FUNCTION: ALPHA 0x0001", + "// FUNCTION: TEST 0x1000", + "void function1() {}", + "// FUNCTION: ALPHA 0x0002", + "// FUNCTION: TEST 0x2000", + "void function2() {}", + "// FUNCTION: ALPHA 0x0003", + "// FUNCTION: TEST 0x3000", + "void function3() {}", + ] + + assert linter.check_lines(lines, "test.cpp", "TEST") is True + assert linter.check_lines(lines, "test.cpp", "ALPHA") is True diff --git a/tools/isledecomp/tests/test_parser.py b/tools/isledecomp/tests/test_parser.py index 30092a97..c31602f1 100644 --- a/tools/isledecomp/tests/test_parser.py +++ b/tools/isledecomp/tests/test_parser.py @@ -358,3 +358,20 @@ def test_function_is_commented(parser): ) assert len(parser.functions) == 0 + + +def test_unexpected_eof(parser): + """If a decomp marker finds its way to the last line of the file, + report that we could not get anything from it.""" + parser.read_lines( + [ + "// FUNCTION: TEST 0x1234", + "// Cls::Method", + "// FUNCTION: TEST 0x5555", + ] + ) + parser.finish() + + assert len(parser.functions) == 1 + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.UNEXPECTED_END_OF_FILE