diff --git a/tools/checkorder/checkorder.py b/tools/checkorder/checkorder.py new file mode 100644 index 00000000..a9b738d8 --- /dev/null +++ b/tools/checkorder/checkorder.py @@ -0,0 +1,214 @@ +import os +import re +import sys +import argparse +from typing import List, Iterator, TextIO +from collections import namedtuple +from enum import Enum + + +class ReaderState(Enum): + WANT_OFFSET = 0 + WANT_SIG = 1 + IN_FUNC = 2 + + +CodeBlock = namedtuple('CodeBlock', + ['offset', 'signature', 'start_line', 'end_line']) + +# To match a reasonable variance of formatting for the offset comment +offsetCommentRegex = re.compile(r'//\s?OFFSET:\s?LEGO1 (?:0x)?([a-f0-9]+)', + flags=re.I) + +# To match the exact syntax (text upper case, hex lower case, with spaces) +# that is used in most places +offsetCommentExactRegex = re.compile(r'// OFFSET: LEGO1 (0x[a-f][0-9]+)') + + +def is_blank_or_comment(line: str) -> bool: + """Helper to read ahead adter the offset comment is matched. + There could be blank lines or other comments before the + function signature, and we want to skip those.""" + line_strip = line.strip() + return (len(line_strip) == 0 + or line.startswith('//') + or line.startswith('/*') + or line.startswith('*/')) + + +def is_exact_offset_comment(line: str) -> bool: + """If the offset comment does not match our (unofficial) syntax + we may want to alert the user to fix it for style points.""" + return offsetCommentExactRegex.match(line) is not None + + +def match_offset_comment(line: str) -> str | None: + # TODO: intended to skip the expensive regex match, but is it necessary? + if not line.startswith('//'): + return None + + match = offsetCommentRegex.match(line) + return match.group(1) if match is not None else None + + +def find_code_blocks(stream: TextIO) -> List[CodeBlock]: + """Read the IO stream (file) line-by-line and give the following report: + Foreach code block (function) in the file, what are its starting and + ending line numbers, and what is the given offset in the original + binary. We expect the result to be ordered by line number because we + are reading the file from start to finish.""" + + blocks = [] + + offset = None + function_sig = None + start_line = None + state = ReaderState.WANT_OFFSET + + for line_no, line in enumerate(stream): + if state in (ReaderState.WANT_SIG, ReaderState.IN_FUNC): + # Naive but reasonable assumption that functions will end with + # a curly brace on its own line with no prepended spaces. + if line.startswith('}'): + # TODO: could streamline this and the next case + block = CodeBlock(offset=offset, + signature=function_sig, + start_line=start_line, + end_line=line_no) + + blocks.append(block) + state = ReaderState.WANT_OFFSET + elif match_offset_comment(line) is not None: + # We hit another offset unexpectedly before detecting the + # end of the function. We can recover easily by just + # ending the function here. + block = CodeBlock(offset=offset, + signature=function_sig, + start_line=start_line, + end_line=line_no - 1) + + blocks.append(block) + state = ReaderState.WANT_OFFSET + + # We want to grab the function signature so we can identify + # the code block. Skip any blank lines or comments + # that follow the offset comment. + elif (not is_blank_or_comment(line) + and state == ReaderState.WANT_SIG): + function_sig = line.strip() + state = ReaderState.IN_FUNC + + if state == ReaderState.WANT_OFFSET: + match = match_offset_comment(line) + if match is not None: + offset = int(match, 16) + start_line = line_no + state = ReaderState.WANT_SIG + + return blocks + + +def file_is_cpp(filename: str) -> bool: + (basefile, ext) = os.path.splitext(filename) + return ext.lower() == '.cpp' + + +def walk_source_dir(source: str) -> Iterator[tuple]: + """Generator to walk the given directory recursively and return + any .cpp files found.""" + + for subdir, dirs, files in os.walk(source): + for file in files: + if not file_is_cpp(file): + continue + + yield os.path.join(subdir, file) + + +def sig_truncate(sig: str) -> str: + return f"{sig[:47]}{'...' if len(sig) >= 50 else ''}" + + +def check_file(filename: str, verbose: bool = False) -> bool: + """Open and read the given file, then check whether the code blocks + are in order. If verbose, print each block.""" + + with open(filename, 'r') as f: + code_blocks = find_code_blocks(f) + + just_offsets = [block.offset for block in code_blocks] + sorted_offsets = sorted(just_offsets) + if just_offsets == sorted_offsets: + return False + + print(filename) + if verbose: + order_lookup = {k: i for i, k in enumerate(sorted_offsets)} + prev_offset = 0 + + for block in code_blocks: + msg = ' '.join([ + ' ' if block.offset > prev_offset else '!', + f'{block.offset:08x}', + f'{block.end_line - block.start_line:4} lines', + f'{order_lookup[block.offset]:3}', + ' ', + sig_truncate(block.signature), + ]) + print(msg) + prev_offset = block.offset + + print() + return True + + +def parse_args(test_args: list | None = None) -> dict: + p = argparse.ArgumentParser() + p.add_argument('target', help='The file or directory to check.') + p.add_argument('--enforce', action=argparse.BooleanOptionalAction, + default=False, + help='Fail with error code if target is out of order.') + p.add_argument('--verbose', action=argparse.BooleanOptionalAction, + default=False, + help=('Display each code block in the file and show ' + 'where each consecutive run of blocks is broken.')) + + if test_args is None: + args = p.parse_args() + else: + args = p.parse_args(test_args) + + return vars(args) + + +def main(): + args = parse_args() + + if os.path.isdir(args['target']): + files_to_check = list(walk_source_dir(args['target'])) + elif os.path.isfile(args['target']) and file_is_cpp(args['target']): + files_to_check = [args['target']] + else: + sys.exit('Invalid target') + + files_out_of_order = 0 + + for file in files_to_check: + is_jumbled = check_file(file, args['verbose']) + if is_jumbled: + files_out_of_order += 1 + + if files_out_of_order > 0: + error_message = ' '.join([ + str(files_out_of_order), + 'files are' if files_out_of_order > 1 else 'file is', + 'out of order' + ]) + print(error_message) + + if files_out_of_order > 0 and args['enforce']: + sys.exit(1) + + +if __name__ == '__main__': + main()