isle/tools/checkorder/checkorder.py

246 lines
8.3 KiB
Python

import os
import re
import sys
import argparse
from typing import List, Iterator, TextIO
from collections import namedtuple
from enum import Enum
class ReaderState(Enum):
WANT_OFFSET = 0
WANT_SIG = 1
IN_FUNC = 2
CodeBlock = namedtuple('CodeBlock',
['offset', 'signature', 'start_line', 'end_line'])
# To match a reasonable variance of formatting for the offset comment
offsetCommentRegex = re.compile(r'//\s?OFFSET:\s?\w+ (?:0x)?([a-f0-9]+)',
flags=re.I)
# To match the exact syntax (text upper case, hex lower case, with spaces)
# that is used in most places
offsetCommentExactRegex = re.compile(r'// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)')
def is_blank_or_comment(line: str) -> bool:
"""Helper to read ahead adter the offset comment is matched.
There could be blank lines or other comments before the
function signature, and we want to skip those."""
line_strip = line.strip()
return (len(line_strip) == 0
or line_strip.startswith('//')
or line_strip.startswith('/*')
or line_strip.endswith('*/'))
def is_exact_offset_comment(line: str) -> bool:
"""If the offset comment does not match our (unofficial) syntax
we may want to alert the user to fix it for style points."""
return offsetCommentExactRegex.match(line) is not None
def match_offset_comment(line: str) -> str | None:
# TODO: intended to skip the expensive regex match, but is it necessary?
if not line.startswith('//'):
return None
match = offsetCommentRegex.match(line)
return match.group(1) if match is not None else None
def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
"""Read the IO stream (file) line-by-line and give the following report:
Foreach code block (function) in the file, what are its starting and
ending line numbers, and what is the given offset in the original
binary. We expect the result to be ordered by line number because we
are reading the file from start to finish."""
blocks = []
offset = None
function_sig = None
start_line = None
state = ReaderState.WANT_OFFSET
for line_no, line in enumerate(stream):
if state in (ReaderState.WANT_SIG, ReaderState.IN_FUNC):
# Naive but reasonable assumption that functions will end with
# a curly brace on its own line with no prepended spaces.
if line.startswith('}'):
# TODO: could streamline this and the next case
block = CodeBlock(offset=offset,
signature=function_sig,
start_line=start_line,
end_line=line_no)
blocks.append(block)
state = ReaderState.WANT_OFFSET
elif match_offset_comment(line) is not None:
# We hit another offset unexpectedly before detecting the
# end of the function. We can recover easily by just
# ending the function here.
block = CodeBlock(offset=offset,
signature=function_sig,
start_line=start_line,
end_line=line_no - 1)
blocks.append(block)
state = ReaderState.WANT_OFFSET
# We want to grab the function signature so we can identify
# the code block. Skip any blank lines or comments
# that follow the offset comment.
elif (not is_blank_or_comment(line)
and state == ReaderState.WANT_SIG):
function_sig = line.strip()
state = ReaderState.IN_FUNC
if state == ReaderState.WANT_OFFSET:
match = match_offset_comment(line)
if match is not None:
offset = int(match, 16)
start_line = line_no
state = ReaderState.WANT_SIG
return blocks
def file_is_cpp(filename: str) -> bool:
# TODO: expand to check header files also?
(basefile, ext) = os.path.splitext(filename)
return ext.lower() == '.cpp'
def walk_source_dir(source: str) -> Iterator[tuple]:
"""Generator to walk the given directory recursively and return
any .cpp files found."""
for subdir, dirs, files in os.walk(source):
for file in files:
if not file_is_cpp(file):
continue
yield os.path.join(subdir, file)
def sig_truncate(sig: str) -> str:
"""Helper to truncate function names to 50 chars and append ellipsis
if needed. Goal is to stay under 80 columns for tool output."""
return f"{sig[:47]}{'...' if len(sig) >= 50 else ''}"
def get_inexact_offset_comments(stream: TextIO) -> [tuple]:
"""Read the file stream and return the line number and string
for any offset comments that don't exactly match the template."""
return ([
(line_no, line.strip())
for line_no, line in enumerate(stream)
if match_offset_comment(line) and not is_exact_offset_comment(line)
])
def check_file(filename: str, verbose: bool = False) -> bool:
"""Open and read the given file, then check whether the code blocks
are in order. If verbose, print each block."""
with open(filename, 'r') as f:
code_blocks = find_code_blocks(f)
# TODO: Should combine these checks if/when we refactor.
# This is just for simplicity / proof of concept.
f.seek(os.SEEK_SET, 0)
bad_comments = get_inexact_offset_comments(f)
just_offsets = [block.offset for block in code_blocks]
sorted_offsets = sorted(just_offsets)
file_out_of_order = just_offsets != sorted_offsets
# If we detect inexact comments, don't print anything unless we are
# in verbose mode. If the file is out of order, we always print the
# file name.
should_report = ((len(bad_comments) > 0 and verbose)
or file_out_of_order)
if not should_report and not file_out_of_order:
return False
# Else: we are alerting to some problem in this file
print(filename)
if verbose:
if file_out_of_order:
order_lookup = {k: i for i, k in enumerate(sorted_offsets)}
prev_offset = 0
for block in code_blocks:
msg = ' '.join([
' ' if block.offset > prev_offset else '!',
f'{block.offset:08x}',
f'{block.end_line - block.start_line:4} lines',
f'{order_lookup[block.offset]:3}',
' ',
sig_truncate(block.signature),
])
print(msg)
prev_offset = block.offset
for (line_no, line) in bad_comments:
print(f'* line {line_no:3} bad offset comment ({line})')
print()
return file_out_of_order
def parse_args(test_args: list | None = None) -> dict:
p = argparse.ArgumentParser()
p.add_argument('target', help='The file or directory to check.')
p.add_argument('--enforce', action=argparse.BooleanOptionalAction,
default=False,
help='Fail with error code if target is out of order.')
p.add_argument('--verbose', action=argparse.BooleanOptionalAction,
default=False,
help=('Display each code block in the file and show '
'where each consecutive run of blocks is broken.'))
if test_args is None:
args = p.parse_args()
else:
args = p.parse_args(test_args)
return vars(args)
def main():
args = parse_args()
if os.path.isdir(args['target']):
files_to_check = list(walk_source_dir(args['target']))
elif os.path.isfile(args['target']) and file_is_cpp(args['target']):
files_to_check = [args['target']]
else:
sys.exit('Invalid target')
files_out_of_order = 0
for file in files_to_check:
is_jumbled = check_file(file, args['verbose'])
if is_jumbled:
files_out_of_order += 1
if files_out_of_order > 0:
error_message = ' '.join([
str(files_out_of_order),
'files are' if files_out_of_order > 1 else 'file is',
'out of order'
])
print(error_message)
if files_out_of_order > 0 and args['enforce']:
sys.exit(1)
if __name__ == '__main__':
main()