mirror of
https://github.com/isledecomp/isle.git
synced 2026-01-27 18:21:15 +00:00
Allow multiple offset markers, pep8 cleanup
This commit is contained in:
parent
f734d2733d
commit
09688ed83f
@ -10,6 +10,7 @@
|
|||||||
is_exact_offset_comment,
|
is_exact_offset_comment,
|
||||||
template_function_name,
|
template_function_name,
|
||||||
remove_trailing_comment,
|
remove_trailing_comment,
|
||||||
|
distinct_module,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -31,11 +32,8 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
|
|||||||
|
|
||||||
blocks = []
|
blocks = []
|
||||||
|
|
||||||
offset_match = OffsetMatch(module=None,
|
offset_matches = []
|
||||||
address=None,
|
|
||||||
is_template=None,
|
|
||||||
is_stub=None)
|
|
||||||
offset_comment = None
|
|
||||||
function_sig = None
|
function_sig = None
|
||||||
start_line = None
|
start_line = None
|
||||||
end_line = None
|
end_line = None
|
||||||
@ -50,15 +48,19 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
|
|||||||
# Do this before reading again so that an EOF will not
|
# Do this before reading again so that an EOF will not
|
||||||
# cause us to miss the last function of the file.
|
# cause us to miss the last function of the file.
|
||||||
if state == ReaderState.FUNCTION_DONE:
|
if state == ReaderState.FUNCTION_DONE:
|
||||||
block = CodeBlock(offset=offset_match.address,
|
# Our list of offset marks could have duplicates on
|
||||||
signature=function_sig,
|
# module name, so we'll eliminate those now.
|
||||||
start_line=start_line,
|
for offset_match in distinct_module(offset_matches):
|
||||||
end_line=end_line,
|
block = CodeBlock(offset=offset_match.address,
|
||||||
offset_comment=offset_comment,
|
signature=function_sig,
|
||||||
module=offset_match.module,
|
start_line=start_line,
|
||||||
is_template=offset_match.is_template,
|
end_line=end_line,
|
||||||
is_stub=offset_match.is_stub)
|
offset_comment=offset_match.comment,
|
||||||
blocks.append(block)
|
module=offset_match.module,
|
||||||
|
is_template=offset_match.is_template,
|
||||||
|
is_stub=offset_match.is_stub)
|
||||||
|
blocks.append(block)
|
||||||
|
offset_matches = []
|
||||||
state = ReaderState.WANT_OFFSET
|
state = ReaderState.WANT_OFFSET
|
||||||
|
|
||||||
if can_seek:
|
if can_seek:
|
||||||
@ -67,19 +69,33 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
|
|||||||
if line == '':
|
if line == '':
|
||||||
break
|
break
|
||||||
|
|
||||||
if (state != ReaderState.WANT_OFFSET and
|
new_match = match_offset_comment(line)
|
||||||
match_offset_comment(line) is not None):
|
if new_match is not None:
|
||||||
# We hit another offset unexpectedly.
|
# We will allow multiple offsets if we have just begun
|
||||||
# We can recover easily by just ending the function here.
|
# the code block, but not after we hit the curly brace.
|
||||||
end_line = line_no - 1
|
if state in (ReaderState.WANT_OFFSET, ReaderState.IN_TEMPLATE,
|
||||||
state = ReaderState.FUNCTION_DONE
|
ReaderState.WANT_SIG):
|
||||||
|
# If we detected an offset marker unexpectedly,
|
||||||
|
# we are handling it here so we can continue seeking.
|
||||||
|
can_seek = True
|
||||||
|
|
||||||
# Pause reading here so we handle the offset marker
|
offset_matches.append(new_match)
|
||||||
# on the next loop iteration
|
|
||||||
can_seek = False
|
|
||||||
|
|
||||||
# Regular state machine handling begins now
|
if new_match.is_template:
|
||||||
if state == ReaderState.IN_TEMPLATE:
|
state = ReaderState.IN_TEMPLATE
|
||||||
|
else:
|
||||||
|
state = ReaderState.WANT_SIG
|
||||||
|
else:
|
||||||
|
# We hit another offset unexpectedly.
|
||||||
|
# We can recover easily by just ending the function here.
|
||||||
|
end_line = line_no - 1
|
||||||
|
state = ReaderState.FUNCTION_DONE
|
||||||
|
|
||||||
|
# Pause reading here so we handle the offset marker
|
||||||
|
# on the next loop iteration
|
||||||
|
can_seek = False
|
||||||
|
|
||||||
|
elif state == ReaderState.IN_TEMPLATE:
|
||||||
# TEMPLATE functions are a special case. The signature is
|
# TEMPLATE functions are a special case. The signature is
|
||||||
# given on the next line (in a // comment)
|
# given on the next line (in a // comment)
|
||||||
function_sig = template_function_name(line)
|
function_sig = template_function_name(line)
|
||||||
@ -92,13 +108,14 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
|
|||||||
# marker. There is not a formal procedure for this, so just
|
# marker. There is not a formal procedure for this, so just
|
||||||
# assume the next "code line" is the function signature
|
# assume the next "code line" is the function signature
|
||||||
if not is_blank_or_comment(line):
|
if not is_blank_or_comment(line):
|
||||||
|
# Inline functions may end with a comment. Strip that out
|
||||||
|
# to help parsing.
|
||||||
function_sig = remove_trailing_comment(line.strip())
|
function_sig = remove_trailing_comment(line.strip())
|
||||||
|
|
||||||
# Now check to see if the opening curly bracket is on the
|
# Now check to see if the opening curly bracket is on the
|
||||||
# same line. clang-format should prevent this (BraceWrapping)
|
# same line. clang-format should prevent this (BraceWrapping)
|
||||||
# but it is easy to detect.
|
# but it is easy to detect.
|
||||||
# If the entire function is on one line, we can handle that
|
# If the entire function is on one line, handle that too.
|
||||||
# too, although this should be limited to inlines.
|
|
||||||
if function_sig.endswith('{'):
|
if function_sig.endswith('{'):
|
||||||
start_line = line_no
|
start_line = line_no
|
||||||
state = ReaderState.IN_FUNC
|
state = ReaderState.IN_FUNC
|
||||||
@ -122,18 +139,4 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
|
|||||||
end_line = line_no
|
end_line = line_no
|
||||||
state = ReaderState.FUNCTION_DONE
|
state = ReaderState.FUNCTION_DONE
|
||||||
|
|
||||||
elif state == ReaderState.WANT_OFFSET:
|
|
||||||
# If we detected an offset marker unexpectedly, we are handling
|
|
||||||
# it here so we can continue seeking.
|
|
||||||
can_seek = True
|
|
||||||
match = match_offset_comment(line)
|
|
||||||
if match is not None:
|
|
||||||
offset_match = match
|
|
||||||
offset_comment = line.strip()
|
|
||||||
|
|
||||||
if match.is_template:
|
|
||||||
state = ReaderState.IN_TEMPLATE
|
|
||||||
else:
|
|
||||||
state = ReaderState.WANT_SIG
|
|
||||||
|
|
||||||
return blocks
|
return blocks
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
# C++ Parser utility functions and data structures
|
# C++ Parser utility functions and data structures
|
||||||
from __future__ import annotations # python <3.10 compatibility
|
from __future__ import annotations # python <3.10 compatibility
|
||||||
import re
|
import re
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
|
|
||||||
@ -8,8 +8,8 @@
|
|||||||
['offset', 'signature', 'start_line', 'end_line',
|
['offset', 'signature', 'start_line', 'end_line',
|
||||||
'offset_comment', 'module', 'is_template', 'is_stub'])
|
'offset_comment', 'module', 'is_template', 'is_stub'])
|
||||||
|
|
||||||
OffsetMatch = namedtuple('OffsetMatch', ['module', 'address',
|
OffsetMatch = namedtuple('OffsetMatch', ['module', 'address', 'is_template',
|
||||||
'is_template', 'is_stub'])
|
'is_stub', 'comment'])
|
||||||
|
|
||||||
# This has not been formally established, but considering that "STUB"
|
# This has not been formally established, but considering that "STUB"
|
||||||
# is a temporary state for a function, we assume it will appear last,
|
# is a temporary state for a function, we assume it will appear last,
|
||||||
@ -74,4 +74,23 @@ def match_offset_comment(line: str) -> OffsetMatch | None:
|
|||||||
return OffsetMatch(module=match.group(1),
|
return OffsetMatch(module=match.group(1),
|
||||||
address=int(match.group(2), 16),
|
address=int(match.group(2), 16),
|
||||||
is_template=match.group(3) is not None,
|
is_template=match.group(3) is not None,
|
||||||
is_stub=match.group(4) is not None)
|
is_stub=match.group(4) is not None,
|
||||||
|
comment=line.strip())
|
||||||
|
|
||||||
|
|
||||||
|
def distinct_module(offsets: [OffsetMatch]) -> [OffsetMatch]:
|
||||||
|
"""Given a list of offset markers, return a list with distinct
|
||||||
|
module names. If module names (case-insensitive) are repeated,
|
||||||
|
choose the offset that appears first."""
|
||||||
|
|
||||||
|
if len(offsets) < 2:
|
||||||
|
return offsets
|
||||||
|
|
||||||
|
# Dict maintains insertion order in python >=3.7
|
||||||
|
offsets_dict = {}
|
||||||
|
for offset in offsets:
|
||||||
|
module_upper = offset.module.upper()
|
||||||
|
if module_upper not in offsets_dict:
|
||||||
|
offsets_dict[module_upper] = offset
|
||||||
|
|
||||||
|
return list(offsets_dict.values())
|
||||||
|
|||||||
25
tools/isledecomp/tests/samples/multiple_offsets.cpp
Normal file
25
tools/isledecomp/tests/samples/multiple_offsets.cpp
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
// Sample for python unit tests
|
||||||
|
// Not part of the decomp
|
||||||
|
|
||||||
|
// Handling multiple offset markers
|
||||||
|
|
||||||
|
// OFFSET: TEST 0x1234
|
||||||
|
// OFFSET: HELLO 0x5555
|
||||||
|
void different_modules()
|
||||||
|
{
|
||||||
|
// TODO
|
||||||
|
}
|
||||||
|
|
||||||
|
// OFFSET: TEST 0x2345
|
||||||
|
// OFFSET: TEST 0x1234
|
||||||
|
void same_module()
|
||||||
|
{
|
||||||
|
// TODO
|
||||||
|
}
|
||||||
|
|
||||||
|
// OFFSET: TEST 0x2002
|
||||||
|
// OFFSET: test 0x1001
|
||||||
|
void same_case_insensitive()
|
||||||
|
{
|
||||||
|
// TODO
|
||||||
|
}
|
||||||
@ -89,11 +89,12 @@ def test_indented():
|
|||||||
assert len(blocks) == 2
|
assert len(blocks) == 2
|
||||||
assert blocks[0].offset == int('0x12345678', 16)
|
assert blocks[0].offset == int('0x12345678', 16)
|
||||||
assert blocks[0].start_line == 15
|
assert blocks[0].start_line == 15
|
||||||
#assert blocks[0].end_line == 18
|
# assert blocks[0].end_line == 18
|
||||||
|
|
||||||
assert blocks[1].offset == int('0xdeadbeef', 16)
|
assert blocks[1].offset == int('0xdeadbeef', 16)
|
||||||
assert blocks[1].start_line == 22
|
assert blocks[1].start_line == 22
|
||||||
#assert blocks[1].end_line == 24
|
# assert blocks[1].end_line == 24
|
||||||
|
|
||||||
|
|
||||||
def test_inline():
|
def test_inline():
|
||||||
with sample_file('inline.cpp') as f:
|
with sample_file('inline.cpp') as f:
|
||||||
@ -103,3 +104,25 @@ def test_inline():
|
|||||||
for block in blocks:
|
for block in blocks:
|
||||||
assert block.start_line is not None
|
assert block.start_line is not None
|
||||||
assert block.start_line == block.end_line
|
assert block.start_line == block.end_line
|
||||||
|
|
||||||
|
|
||||||
|
def test_multiple_offsets():
|
||||||
|
"""If multiple offset marks appear before for a code block, take them
|
||||||
|
all but ensure module name (case-insensitive) is distinct.
|
||||||
|
Use first module occurrence in case of duplicates."""
|
||||||
|
with sample_file('multiple_offsets.cpp') as f:
|
||||||
|
blocks = find_code_blocks(f)
|
||||||
|
|
||||||
|
assert len(blocks) == 4
|
||||||
|
assert blocks[0].module == 'TEST'
|
||||||
|
assert blocks[0].start_line == 9
|
||||||
|
|
||||||
|
assert blocks[1].module == 'HELLO'
|
||||||
|
assert blocks[1].start_line == 9
|
||||||
|
|
||||||
|
# Duplicate modules are ignored
|
||||||
|
assert blocks[2].start_line == 16
|
||||||
|
assert blocks[2].offset == 0x2345
|
||||||
|
|
||||||
|
assert blocks[3].module == 'TEST'
|
||||||
|
assert blocks[3].offset == 0x2002
|
||||||
|
|||||||
@ -1,8 +1,10 @@
|
|||||||
import pytest
|
import pytest
|
||||||
|
from collections import namedtuple
|
||||||
from isledecomp.parser.util import (
|
from isledecomp.parser.util import (
|
||||||
is_blank_or_comment,
|
is_blank_or_comment,
|
||||||
match_offset_comment,
|
match_offset_comment,
|
||||||
is_exact_offset_comment,
|
is_exact_offset_comment,
|
||||||
|
distinct_module,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -85,3 +87,26 @@ def test_offset_match(line: str, match: bool, exact):
|
|||||||
@pytest.mark.parametrize('match, exact, line', offset_comment_samples)
|
@pytest.mark.parametrize('match, exact, line', offset_comment_samples)
|
||||||
def test_exact_offset_comment(line: str, exact: bool, match):
|
def test_exact_offset_comment(line: str, exact: bool, match):
|
||||||
assert is_exact_offset_comment(line) is exact
|
assert is_exact_offset_comment(line) is exact
|
||||||
|
|
||||||
|
|
||||||
|
# Helper for the next test: cut down version of OffsetMatch
|
||||||
|
MiniOfs = namedtuple('MiniOfs', ['module', 'value'])
|
||||||
|
|
||||||
|
distinct_module_samples = [
|
||||||
|
# empty set
|
||||||
|
([], []),
|
||||||
|
# same module name
|
||||||
|
([MiniOfs('TEST', 123), MiniOfs('TEST', 555)],
|
||||||
|
[MiniOfs('TEST', 123)]),
|
||||||
|
# same module name, case-insensitive
|
||||||
|
([MiniOfs('test', 123), MiniOfs('TEST', 555)],
|
||||||
|
[MiniOfs('test', 123)]),
|
||||||
|
# duplicates, non-consecutive
|
||||||
|
([MiniOfs('test', 123), MiniOfs('abc', 111), MiniOfs('TEST', 555)],
|
||||||
|
[MiniOfs('test', 123), MiniOfs('abc', 111)]),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('sample, expected', distinct_module_samples)
|
||||||
|
def test_distinct_module(sample: [MiniOfs], expected: [MiniOfs]):
|
||||||
|
assert distinct_module(sample) == expected
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user