isle/tools/checkorder/checkorder.py
2023-10-21 19:11:00 -04:00

215 lines
7.0 KiB
Python

import os
import re
import sys
import argparse
from typing import List, Iterator, TextIO
from collections import namedtuple
from enum import Enum
class ReaderState(Enum):
WANT_OFFSET = 0
WANT_SIG = 1
IN_FUNC = 2
CodeBlock = namedtuple('CodeBlock',
['offset', 'signature', 'start_line', 'end_line'])
# To match a reasonable variance of formatting for the offset comment
offsetCommentRegex = re.compile(r'//\s?OFFSET:\s?LEGO1 (?:0x)?([a-f0-9]+)',
flags=re.I)
# To match the exact syntax (text upper case, hex lower case, with spaces)
# that is used in most places
offsetCommentExactRegex = re.compile(r'// OFFSET: LEGO1 (0x[a-f][0-9]+)')
def is_blank_or_comment(line: str) -> bool:
"""Helper to read ahead adter the offset comment is matched.
There could be blank lines or other comments before the
function signature, and we want to skip those."""
line_strip = line.strip()
return (len(line_strip) == 0
or line.startswith('//')
or line.startswith('/*')
or line.startswith('*/'))
def is_exact_offset_comment(line: str) -> bool:
"""If the offset comment does not match our (unofficial) syntax
we may want to alert the user to fix it for style points."""
return offsetCommentExactRegex.match(line) is not None
def match_offset_comment(line: str) -> str | None:
# TODO: intended to skip the expensive regex match, but is it necessary?
if not line.startswith('//'):
return None
match = offsetCommentRegex.match(line)
return match.group(1) if match is not None else None
def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
"""Read the IO stream (file) line-by-line and give the following report:
Foreach code block (function) in the file, what are its starting and
ending line numbers, and what is the given offset in the original
binary. We expect the result to be ordered by line number because we
are reading the file from start to finish."""
blocks = []
offset = None
function_sig = None
start_line = None
state = ReaderState.WANT_OFFSET
for line_no, line in enumerate(stream):
if state in (ReaderState.WANT_SIG, ReaderState.IN_FUNC):
# Naive but reasonable assumption that functions will end with
# a curly brace on its own line with no prepended spaces.
if line.startswith('}'):
# TODO: could streamline this and the next case
block = CodeBlock(offset=offset,
signature=function_sig,
start_line=start_line,
end_line=line_no)
blocks.append(block)
state = ReaderState.WANT_OFFSET
elif match_offset_comment(line) is not None:
# We hit another offset unexpectedly before detecting the
# end of the function. We can recover easily by just
# ending the function here.
block = CodeBlock(offset=offset,
signature=function_sig,
start_line=start_line,
end_line=line_no - 1)
blocks.append(block)
state = ReaderState.WANT_OFFSET
# We want to grab the function signature so we can identify
# the code block. Skip any blank lines or comments
# that follow the offset comment.
elif (not is_blank_or_comment(line)
and state == ReaderState.WANT_SIG):
function_sig = line.strip()
state = ReaderState.IN_FUNC
if state == ReaderState.WANT_OFFSET:
match = match_offset_comment(line)
if match is not None:
offset = int(match, 16)
start_line = line_no
state = ReaderState.WANT_SIG
return blocks
def file_is_cpp(filename: str) -> bool:
(basefile, ext) = os.path.splitext(filename)
return ext.lower() == '.cpp'
def walk_source_dir(source: str) -> Iterator[tuple]:
"""Generator to walk the given directory recursively and return
any .cpp files found."""
for subdir, dirs, files in os.walk(source):
for file in files:
if not file_is_cpp(file):
continue
yield os.path.join(subdir, file)
def sig_truncate(sig: str) -> str:
return f"{sig[:47]}{'...' if len(sig) >= 50 else ''}"
def check_file(filename: str, verbose: bool = False) -> bool:
"""Open and read the given file, then check whether the code blocks
are in order. If verbose, print each block."""
with open(filename, 'r') as f:
code_blocks = find_code_blocks(f)
just_offsets = [block.offset for block in code_blocks]
sorted_offsets = sorted(just_offsets)
if just_offsets == sorted_offsets:
return False
print(filename)
if verbose:
order_lookup = {k: i for i, k in enumerate(sorted_offsets)}
prev_offset = 0
for block in code_blocks:
msg = ' '.join([
' ' if block.offset > prev_offset else '!',
f'{block.offset:08x}',
f'{block.end_line - block.start_line:4} lines',
f'{order_lookup[block.offset]:3}',
' ',
sig_truncate(block.signature),
])
print(msg)
prev_offset = block.offset
print()
return True
def parse_args(test_args: list | None = None) -> dict:
p = argparse.ArgumentParser()
p.add_argument('target', help='The file or directory to check.')
p.add_argument('--enforce', action=argparse.BooleanOptionalAction,
default=False,
help='Fail with error code if target is out of order.')
p.add_argument('--verbose', action=argparse.BooleanOptionalAction,
default=False,
help=('Display each code block in the file and show '
'where each consecutive run of blocks is broken.'))
if test_args is None:
args = p.parse_args()
else:
args = p.parse_args(test_args)
return vars(args)
def main():
args = parse_args()
if os.path.isdir(args['target']):
files_to_check = list(walk_source_dir(args['target']))
elif os.path.isfile(args['target']) and file_is_cpp(args['target']):
files_to_check = [args['target']]
else:
sys.exit('Invalid target')
files_out_of_order = 0
for file in files_to_check:
is_jumbled = check_file(file, args['verbose'])
if is_jumbled:
files_out_of_order += 1
if files_out_of_order > 0:
error_message = ' '.join([
str(files_out_of_order),
'files are' if files_out_of_order > 1 else 'file is',
'out of order'
])
print(error_message)
if files_out_of_order > 0 and args['enforce']:
sys.exit(1)
if __name__ == '__main__':
main()