Merge pull request #4 from disinvite/checkorder-changes

Decomp linter tool
This commit is contained in:
Christian Semmler 2023-12-12 14:21:51 -05:00 committed by GitHub
commit 1cf1844b69
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 376 additions and 156 deletions

32
.github/workflows/analyze.yml vendored Normal file
View File

@ -0,0 +1,32 @@
name: Analyze
on: [push, pull_request]
jobs:
decomplint-isle:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install python libraries
run: |
pip install -r tools/requirements.txt
- name: Run decomplint.py
run: |
python3 tools/decomplint/decomplint.py ISLE --module ISLE --warnfail
decomplint-lego1:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install python libraries
run: |
pip install -r tools/requirements.txt
- name: Run decomplint.py
run: |
python3 tools/decomplint/decomplint.py LEGO1 --module LEGO1 --warnfail

View File

@ -1,19 +0,0 @@
name: Check order
on: [push, pull_request]
jobs:
checkorder:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Install python libraries
run: |
pip install -r tools/requirements.txt
- name: Run checkorder.py
run: |
python3 tools/checkorder/checkorder.py --verbose --enforce ISLE
python3 tools/checkorder/checkorder.py --verbose --enforce LEGO1

View File

@ -1,127 +0,0 @@
import os
import sys
import argparse
from isledecomp.dir import walk_source_dir, is_file_cpp
from isledecomp.parser import DecompParser
def sig_truncate(sig: str) -> str:
"""Helper to truncate function names to 50 chars and append ellipsis
if needed. Goal is to stay under 80 columns for tool output."""
return f"{sig[:47]}{'...' if len(sig) >= 50 else ''}"
def check_file(filename: str, verbose: bool = False) -> bool:
"""Open and read the given file, then check whether the code blocks
are in order. If verbose, print each block."""
parser = DecompParser()
with open(filename, "r", encoding="utf-8") as f:
parser.read_lines(f)
just_offsets = [block.offset for block in parser.functions]
sorted_offsets = sorted(just_offsets)
file_out_of_order = just_offsets != sorted_offsets
# TODO: When we add parser error severity, actual errors that obstruct
# parsing should probably be shown here regardless of verbose mode
# If we detect inexact comments, don't print anything unless we are
# in verbose mode. If the file is out of order, we always print the
# file name.
should_report = (len(parser.alerts) > 0 and verbose) or file_out_of_order
if not should_report and not file_out_of_order:
return False
# Else: we are alerting to some problem in this file
print(filename)
if verbose:
if file_out_of_order:
order_lookup = {k: i for i, k in enumerate(sorted_offsets)}
prev_offset = 0
for fun in parser.functions:
msg = " ".join(
[
" " if fun.offset > prev_offset else "!",
f"{fun.offset:08x}",
f"{fun.end_line - fun.line_number:4} lines",
f"{order_lookup[fun.offset]:3}",
" ",
sig_truncate(fun.name),
]
)
print(msg)
prev_offset = fun.offset
for alert in parser.alerts:
print(f"* line {alert.line_number:4} {alert.code} ({alert.line})")
print()
return file_out_of_order
def parse_args(test_args: list | None = None) -> dict:
p = argparse.ArgumentParser(
description="Checks the source files to make sure the function offset comments are in order",
)
p.add_argument("target", help="The file or directory to check.")
p.add_argument(
"--enforce",
action=argparse.BooleanOptionalAction,
default=False,
help="Fail with error code if target is out of order.",
)
p.add_argument(
"--verbose",
action=argparse.BooleanOptionalAction,
default=False,
help=(
"Display each code block in the file and show "
"where each consecutive run of blocks is broken."
),
)
if test_args is None:
args = p.parse_args()
else:
args = p.parse_args(test_args)
return vars(args)
def main():
args = parse_args()
if os.path.isdir(args["target"]):
files_to_check = list(walk_source_dir(args["target"]))
elif os.path.isfile(args["target"]) and is_file_cpp(args["target"]):
files_to_check = [args["target"]]
else:
sys.exit("Invalid target")
files_out_of_order = 0
for file in files_to_check:
is_jumbled = check_file(file, args["verbose"])
if is_jumbled:
files_out_of_order += 1
if files_out_of_order > 0:
error_message = " ".join(
[
str(files_out_of_order),
"files are" if files_out_of_order > 1 else "file is",
"out of order",
]
)
print(error_message)
if files_out_of_order > 0 and args["enforce"]:
sys.exit(1)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,99 @@
import os
import sys
import argparse
import colorama
from isledecomp.dir import walk_source_dir, is_file_cpp
from isledecomp.parser import DecompLinter
colorama.init()
def display_errors(alerts, filename):
sorted_alerts = sorted(alerts, key=lambda a: a.line_number)
for alert in sorted_alerts:
error_type = (
f"{colorama.Fore.RED}error: "
if alert.is_error()
else f"{colorama.Fore.YELLOW}warning: "
)
components = [
colorama.Fore.LIGHTWHITE_EX,
filename,
":",
str(alert.line_number),
" : ",
error_type,
colorama.Fore.LIGHTWHITE_EX,
alert.code.name.lower(),
]
print("".join(components))
if alert.line is not None:
print(f"{colorama.Fore.WHITE} {alert.line}")
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(
description="Syntax checking and linting for decomp annotation markers."
)
p.add_argument("target", help="The file or directory to check.")
p.add_argument(
"--module",
required=False,
type=str,
help="If present, run targeted checks for markers from the given module.",
)
p.add_argument(
"--warnfail",
action=argparse.BooleanOptionalAction,
default=False,
help="Fail if syntax warnings are found.",
)
(args, _) = p.parse_known_args()
return args
def process_files(files, module=None):
warning_count = 0
error_count = 0
linter = DecompLinter()
for filename in files:
success = linter.check_file(filename, module)
warnings = [a for a in linter.alerts if a.is_warning()]
errors = [a for a in linter.alerts if a.is_error()]
error_count += len(errors)
warning_count += len(warnings)
if not success:
display_errors(linter.alerts, filename)
print()
return (warning_count, error_count)
def main():
args = parse_args()
if os.path.isdir(args.target):
files_to_check = list(walk_source_dir(args.target))
elif os.path.isfile(args.target) and is_file_cpp(args.target):
files_to_check = [args.target]
else:
sys.exit("Invalid target")
(warning_count, error_count) = process_files(files_to_check, module=args.module)
print(colorama.Style.RESET_ALL, end="")
would_fail = error_count > 0 or (warning_count > 0 and args.warnfail)
if would_fail:
sys.exit(1)
if __name__ == "__main__":
main()

View File

@ -1 +1,2 @@
from .parser import DecompParser from .parser import DecompParser
from .linter import DecompLinter

View File

@ -1,6 +1,9 @@
from enum import Enum from enum import Enum
from typing import Optional
from dataclasses import dataclass
# TODO: poorly chosen name, should be AlertType or AlertCode or something
class ParserError(Enum): class ParserError(Enum):
# WARN: Stub function exceeds some line number threshold # WARN: Stub function exceeds some line number threshold
UNLIKELY_STUB = 100 UNLIKELY_STUB = 100
@ -29,6 +32,16 @@ class ParserError(Enum):
# and the start of the function. We can ignore it, but the line shouldn't be there # and the start of the function. We can ignore it, but the line shouldn't be there
UNEXPECTED_BLANK_LINE = 107 UNEXPECTED_BLANK_LINE = 107
# WARN: We called the finish() method for the parser but had not reached the starting
# state of SEARCH
UNEXPECTED_END_OF_FILE = 108
# WARN: We found a marker to be referenced by name outside of a header file.
BYNAME_FUNCTION_IN_CPP = 109
# This code or higher is an error, not a warning
DECOMP_ERROR_START = 200
# ERROR: We found a marker unexpectedly # ERROR: We found a marker unexpectedly
UNEXPECTED_MARKER = 200 UNEXPECTED_MARKER = 200
@ -39,3 +52,20 @@ class ParserError(Enum):
# ERROR: The line following a synthetic marker was not a comment # ERROR: The line following a synthetic marker was not a comment
BAD_SYNTHETIC = 202 BAD_SYNTHETIC = 202
# ERROR: This function offset comes before the previous offset from the same module
# This hopefully gives some hint about which functions need to be rearranged.
FUNCTION_OUT_OF_ORDER = 203
@dataclass
class ParserAlert:
code: ParserError
line_number: int
line: Optional[str] = None
def is_warning(self) -> bool:
return self.code.value < ParserError.DECOMP_ERROR_START.value
def is_error(self) -> bool:
return self.code.value >= ParserError.DECOMP_ERROR_START.value

View File

@ -0,0 +1,99 @@
from typing import List, Optional
from .parser import DecompParser
from .error import ParserAlert, ParserError
def get_checkorder_filter(module):
"""Return a filter function on implemented functions in the given module"""
return lambda fun: fun.module == module and not fun.lookup_by_name
class DecompLinter:
def __init__(self) -> None:
self.alerts: List[ParserAlert] = []
self._parser = DecompParser()
self._filename: str = ""
self._module: Optional[str] = None
def reset(self):
self.alerts = []
self._parser.reset()
self._filename = ""
self._module = None
def file_is_header(self):
return self._filename.lower().endswith(".h")
def _check_function_order(self):
"""Rules:
1. Only markers that are implemented in the file are considered. This means we
only look at markers that are cross-referenced with cvdump output by their line
number. Markers with the lookup_by_name flag set are ignored because we cannot
directly influence their order.
2. Order should be considered for a single module only. If we have multiple
markers for a single function (i.e. for LEGO1 functions linked statically to
ISLE) then the virtual address space will be very different. If we don't check
for one module only, we would incorrectly report that the file is out of order.
"""
if self._module is None:
return
checkorder_filter = get_checkorder_filter(self._module)
last_offset = None
for fun in filter(checkorder_filter, self._parser.functions):
if last_offset is not None:
if fun.offset < last_offset:
self.alerts.append(
ParserAlert(
code=ParserError.FUNCTION_OUT_OF_ORDER,
line_number=fun.line_number,
)
)
last_offset = fun.offset
def _check_offset_uniqueness(self):
# TODO
pass
def _check_byname_allowed(self):
if self.file_is_header():
return
for fun in self._parser.functions:
if fun.lookup_by_name:
self.alerts.append(
ParserAlert(
code=ParserError.BYNAME_FUNCTION_IN_CPP,
line_number=fun.line_number,
)
)
def check_lines(self, lines, filename, module=None):
"""`lines` is a generic iterable to allow for testing with a list of strings.
We assume lines has the entire contents of the compilation unit."""
self.reset()
self._filename = filename
self._module = module
self._parser.read_lines(lines)
self._parser.finish()
self.alerts = self._parser.alerts[::]
if self._module is not None:
self._check_byname_allowed()
self._check_offset_uniqueness()
if not self.file_is_header():
self._check_function_order()
return len(self.alerts) == 0
def check_file(self, filename, module=None):
"""Convenience method for decomplint cli tool"""
with open(filename, "r", encoding="utf-8") as f:
return self.check_lines(f, filename, module)

View File

@ -6,12 +6,6 @@ class ParserNode:
line_number: int line_number: int
@dataclass
class ParserAlert(ParserNode):
code: int
line: str
@dataclass @dataclass
class ParserSymbol(ParserNode): class ParserSymbol(ParserNode):
module: str module: str

View File

@ -12,12 +12,11 @@
remove_trailing_comment, remove_trailing_comment,
) )
from .node import ( from .node import (
ParserAlert,
ParserFunction, ParserFunction,
ParserVariable, ParserVariable,
ParserVtable, ParserVtable,
) )
from .error import ParserError from .error import ParserAlert, ParserError
class ReaderState(Enum): class ReaderState(Enum):
@ -29,6 +28,7 @@ class ReaderState(Enum):
IN_GLOBAL = 5 IN_GLOBAL = 5
IN_FUNC_GLOBAL = 6 IN_FUNC_GLOBAL = 6
IN_VTABLE = 7 IN_VTABLE = 7
DONE = 100
def marker_is_stub(marker: DecompMarker) -> bool: def marker_is_stub(marker: DecompMarker) -> bool:
@ -56,7 +56,7 @@ def marker_is_vtable(marker: DecompMarker) -> bool:
class MarkerDict: class MarkerDict:
def __init__(self): def __init__(self) -> None:
self.markers: dict = {} self.markers: dict = {}
def insert(self, marker: DecompMarker) -> bool: def insert(self, marker: DecompMarker) -> bool:
@ -80,7 +80,7 @@ class DecompParser:
# pylint: disable=too-many-instance-attributes # pylint: disable=too-many-instance-attributes
# Could combine output lists into a single list to get under the limit, # Could combine output lists into a single list to get under the limit,
# but not right now # but not right now
def __init__(self): def __init__(self) -> None:
# The lists to be populated as we parse # The lists to be populated as we parse
self.functions: List[ParserFunction] = [] self.functions: List[ParserFunction] = []
self.vtables: List[ParserVtable] = [] self.vtables: List[ParserVtable] = []
@ -306,6 +306,9 @@ def _handle_marker(self, marker: DecompMarker):
self._syntax_warning(ParserError.BOGUS_MARKER) self._syntax_warning(ParserError.BOGUS_MARKER)
def read_line(self, line: str): def read_line(self, line: str):
if self.state == ReaderState.DONE:
return
self.last_line = line # TODO: Useful or hack for error reporting? self.last_line = line # TODO: Useful or hack for error reporting?
self.line_number += 1 self.line_number += 1
@ -392,3 +395,9 @@ def read_line(self, line: str):
def read_lines(self, lines: Iterable): def read_lines(self, lines: Iterable):
for line in lines: for line in lines:
self.read_line(line) self.read_line(line)
def finish(self):
if self.state != ReaderState.SEARCH:
self._syntax_warning(ParserError.UNEXPECTED_END_OF_FILE)
self.state = ReaderState.DONE

View File

@ -0,0 +1,85 @@
import pytest
from isledecomp.parser import DecompLinter
from isledecomp.parser.error import ParserError
@pytest.fixture(name="linter")
def fixture_linter():
return DecompLinter()
def test_simple_in_order(linter):
lines = [
"// FUNCTION: TEST 0x1000",
"void function1() {}",
"// FUNCTION: TEST 0x2000",
"void function2() {}",
"// FUNCTION: TEST 0x3000",
"void function3() {}",
]
assert linter.check_lines(lines, "test.cpp", "TEST") is True
def test_simple_not_in_order(linter):
lines = [
"// FUNCTION: TEST 0x1000",
"void function1() {}",
"// FUNCTION: TEST 0x3000",
"void function3() {}",
"// FUNCTION: TEST 0x2000",
"void function2() {}",
]
assert linter.check_lines(lines, "test.cpp", "TEST") is False
assert len(linter.alerts) == 1
assert linter.alerts[0].code == ParserError.FUNCTION_OUT_OF_ORDER
# N.B. Line number given is the start of the function, not the marker
assert linter.alerts[0].line_number == 6
def test_byname_ignored(linter):
"""Should ignore lookup-by-name markers when checking order."""
lines = [
"// FUNCTION: TEST 0x1000",
"void function1() {}",
"// FUNCTION: TEST 0x3000",
"// MyClass::MyMethod",
"// FUNCTION: TEST 0x2000",
"void function2() {}",
]
# This will fail because byname lookup does not belong in the cpp file
assert linter.check_lines(lines, "test.cpp", "TEST") is False
# but it should not fail for function order.
assert all(
alert.code != ParserError.FUNCTION_OUT_OF_ORDER for alert in linter.alerts
)
def test_module_isolation(linter):
"""Should check the order of markers from a single module only."""
lines = [
"// FUNCTION: ALPHA 0x0001",
"// FUNCTION: TEST 0x1000",
"void function1() {}",
"// FUNCTION: ALPHA 0x0002",
"// FUNCTION: TEST 0x2000",
"void function2() {}",
"// FUNCTION: ALPHA 0x0003",
"// FUNCTION: TEST 0x3000",
"void function3() {}",
]
assert linter.check_lines(lines, "test.cpp", "TEST") is True
assert linter.check_lines(lines, "test.cpp", "ALPHA") is True
def test_byname_headers_only(linter):
"""Markers that ar referenced by name with cvdump belong in header files only."""
lines = [
"// FUNCTION: TEST 0x1000",
"// MyClass::~MyClass",
]
assert linter.check_lines(lines, "test.h", "TEST") is True
assert linter.check_lines(lines, "test.cpp", "TEST") is False
assert linter.alerts[0].code == ParserError.BYNAME_FUNCTION_IN_CPP

View File

@ -358,3 +358,20 @@ def test_function_is_commented(parser):
) )
assert len(parser.functions) == 0 assert len(parser.functions) == 0
def test_unexpected_eof(parser):
"""If a decomp marker finds its way to the last line of the file,
report that we could not get anything from it."""
parser.read_lines(
[
"// FUNCTION: TEST 0x1234",
"// Cls::Method",
"// FUNCTION: TEST 0x5555",
]
)
parser.finish()
assert len(parser.functions) == 1
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.UNEXPECTED_END_OF_FILE