mirror of
https://github.com/isledecomp/isle.git
synced 2026-01-24 00:31:16 +00:00
246 lines
8.3 KiB
Python
246 lines
8.3 KiB
Python
import os
|
|
import re
|
|
import sys
|
|
import argparse
|
|
from typing import List, Iterator, TextIO
|
|
from collections import namedtuple
|
|
from enum import Enum
|
|
|
|
|
|
class ReaderState(Enum):
|
|
WANT_OFFSET = 0
|
|
WANT_SIG = 1
|
|
IN_FUNC = 2
|
|
|
|
|
|
CodeBlock = namedtuple('CodeBlock',
|
|
['offset', 'signature', 'start_line', 'end_line'])
|
|
|
|
# To match a reasonable variance of formatting for the offset comment
|
|
offsetCommentRegex = re.compile(r'//\s?OFFSET:\s?\w+ (?:0x)?([a-f0-9]+)',
|
|
flags=re.I)
|
|
|
|
# To match the exact syntax (text upper case, hex lower case, with spaces)
|
|
# that is used in most places
|
|
offsetCommentExactRegex = re.compile(r'// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)')
|
|
|
|
|
|
def is_blank_or_comment(line: str) -> bool:
|
|
"""Helper to read ahead adter the offset comment is matched.
|
|
There could be blank lines or other comments before the
|
|
function signature, and we want to skip those."""
|
|
line_strip = line.strip()
|
|
return (len(line_strip) == 0
|
|
or line_strip.startswith('//')
|
|
or line_strip.startswith('/*')
|
|
or line_strip.endswith('*/'))
|
|
|
|
|
|
def is_exact_offset_comment(line: str) -> bool:
|
|
"""If the offset comment does not match our (unofficial) syntax
|
|
we may want to alert the user to fix it for style points."""
|
|
return offsetCommentExactRegex.match(line) is not None
|
|
|
|
|
|
def match_offset_comment(line: str) -> str | None:
|
|
# TODO: intended to skip the expensive regex match, but is it necessary?
|
|
if not line.startswith('//'):
|
|
return None
|
|
|
|
match = offsetCommentRegex.match(line)
|
|
return match.group(1) if match is not None else None
|
|
|
|
|
|
def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
|
|
"""Read the IO stream (file) line-by-line and give the following report:
|
|
Foreach code block (function) in the file, what are its starting and
|
|
ending line numbers, and what is the given offset in the original
|
|
binary. We expect the result to be ordered by line number because we
|
|
are reading the file from start to finish."""
|
|
|
|
blocks = []
|
|
|
|
offset = None
|
|
function_sig = None
|
|
start_line = None
|
|
state = ReaderState.WANT_OFFSET
|
|
|
|
for line_no, line in enumerate(stream):
|
|
if state in (ReaderState.WANT_SIG, ReaderState.IN_FUNC):
|
|
# Naive but reasonable assumption that functions will end with
|
|
# a curly brace on its own line with no prepended spaces.
|
|
if line.startswith('}'):
|
|
# TODO: could streamline this and the next case
|
|
block = CodeBlock(offset=offset,
|
|
signature=function_sig,
|
|
start_line=start_line,
|
|
end_line=line_no)
|
|
|
|
blocks.append(block)
|
|
state = ReaderState.WANT_OFFSET
|
|
elif match_offset_comment(line) is not None:
|
|
# We hit another offset unexpectedly before detecting the
|
|
# end of the function. We can recover easily by just
|
|
# ending the function here.
|
|
block = CodeBlock(offset=offset,
|
|
signature=function_sig,
|
|
start_line=start_line,
|
|
end_line=line_no - 1)
|
|
|
|
blocks.append(block)
|
|
state = ReaderState.WANT_OFFSET
|
|
|
|
# We want to grab the function signature so we can identify
|
|
# the code block. Skip any blank lines or comments
|
|
# that follow the offset comment.
|
|
elif (not is_blank_or_comment(line)
|
|
and state == ReaderState.WANT_SIG):
|
|
function_sig = line.strip()
|
|
state = ReaderState.IN_FUNC
|
|
|
|
if state == ReaderState.WANT_OFFSET:
|
|
match = match_offset_comment(line)
|
|
if match is not None:
|
|
offset = int(match, 16)
|
|
start_line = line_no
|
|
state = ReaderState.WANT_SIG
|
|
|
|
return blocks
|
|
|
|
|
|
def file_is_cpp(filename: str) -> bool:
|
|
# TODO: expand to check header files also?
|
|
(basefile, ext) = os.path.splitext(filename)
|
|
return ext.lower() == '.cpp'
|
|
|
|
|
|
def walk_source_dir(source: str) -> Iterator[tuple]:
|
|
"""Generator to walk the given directory recursively and return
|
|
any .cpp files found."""
|
|
|
|
for subdir, dirs, files in os.walk(source):
|
|
for file in files:
|
|
if not file_is_cpp(file):
|
|
continue
|
|
|
|
yield os.path.join(subdir, file)
|
|
|
|
|
|
def sig_truncate(sig: str) -> str:
|
|
"""Helper to truncate function names to 50 chars and append ellipsis
|
|
if needed. Goal is to stay under 80 columns for tool output."""
|
|
return f"{sig[:47]}{'...' if len(sig) >= 50 else ''}"
|
|
|
|
|
|
def get_inexact_offset_comments(stream: TextIO) -> [tuple]:
|
|
"""Read the file stream and return the line number and string
|
|
for any offset comments that don't exactly match the template."""
|
|
return ([
|
|
(line_no, line.strip())
|
|
for line_no, line in enumerate(stream)
|
|
if match_offset_comment(line) and not is_exact_offset_comment(line)
|
|
])
|
|
|
|
|
|
def check_file(filename: str, verbose: bool = False) -> bool:
|
|
"""Open and read the given file, then check whether the code blocks
|
|
are in order. If verbose, print each block."""
|
|
|
|
with open(filename, 'r') as f:
|
|
code_blocks = find_code_blocks(f)
|
|
# TODO: Should combine these checks if/when we refactor.
|
|
# This is just for simplicity / proof of concept.
|
|
f.seek(os.SEEK_SET, 0)
|
|
bad_comments = get_inexact_offset_comments(f)
|
|
|
|
just_offsets = [block.offset for block in code_blocks]
|
|
sorted_offsets = sorted(just_offsets)
|
|
file_out_of_order = just_offsets != sorted_offsets
|
|
|
|
# If we detect inexact comments, don't print anything unless we are
|
|
# in verbose mode. If the file is out of order, we always print the
|
|
# file name.
|
|
should_report = ((len(bad_comments) > 0 and verbose)
|
|
or file_out_of_order)
|
|
|
|
if not should_report and not file_out_of_order:
|
|
return False
|
|
|
|
# Else: we are alerting to some problem in this file
|
|
print(filename)
|
|
if verbose:
|
|
if file_out_of_order:
|
|
order_lookup = {k: i for i, k in enumerate(sorted_offsets)}
|
|
prev_offset = 0
|
|
|
|
for block in code_blocks:
|
|
msg = ' '.join([
|
|
' ' if block.offset > prev_offset else '!',
|
|
f'{block.offset:08x}',
|
|
f'{block.end_line - block.start_line:4} lines',
|
|
f'{order_lookup[block.offset]:3}',
|
|
' ',
|
|
sig_truncate(block.signature),
|
|
])
|
|
print(msg)
|
|
prev_offset = block.offset
|
|
|
|
for (line_no, line) in bad_comments:
|
|
print(f'* line {line_no:3} bad offset comment ({line})')
|
|
|
|
print()
|
|
|
|
return file_out_of_order
|
|
|
|
|
|
def parse_args(test_args: list | None = None) -> dict:
|
|
p = argparse.ArgumentParser()
|
|
p.add_argument('target', help='The file or directory to check.')
|
|
p.add_argument('--enforce', action=argparse.BooleanOptionalAction,
|
|
default=False,
|
|
help='Fail with error code if target is out of order.')
|
|
p.add_argument('--verbose', action=argparse.BooleanOptionalAction,
|
|
default=False,
|
|
help=('Display each code block in the file and show '
|
|
'where each consecutive run of blocks is broken.'))
|
|
|
|
if test_args is None:
|
|
args = p.parse_args()
|
|
else:
|
|
args = p.parse_args(test_args)
|
|
|
|
return vars(args)
|
|
|
|
|
|
def main():
|
|
args = parse_args()
|
|
|
|
if os.path.isdir(args['target']):
|
|
files_to_check = list(walk_source_dir(args['target']))
|
|
elif os.path.isfile(args['target']) and file_is_cpp(args['target']):
|
|
files_to_check = [args['target']]
|
|
else:
|
|
sys.exit('Invalid target')
|
|
|
|
files_out_of_order = 0
|
|
|
|
for file in files_to_check:
|
|
is_jumbled = check_file(file, args['verbose'])
|
|
if is_jumbled:
|
|
files_out_of_order += 1
|
|
|
|
if files_out_of_order > 0:
|
|
error_message = ' '.join([
|
|
str(files_out_of_order),
|
|
'files are' if files_out_of_order > 1 else 'file is',
|
|
'out of order'
|
|
])
|
|
print(error_message)
|
|
|
|
if files_out_of_order > 0 and args['enforce']:
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|