Merge pull request #1 from disinvite/new-syntax

Refactored parser
This commit is contained in:
Christian Semmler 2023-12-02 19:21:05 -05:00 committed by GitHub
commit 931e8c2f54
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 1272 additions and 390 deletions

View File

@ -12,13 +12,13 @@
class LegoControlManager; class LegoControlManager;
// VTABLE: LEGO1 0x100d87b8 SYNTHETIC // VTABLE: LEGO1 0x100d87b8
// class MxCollection<LegoEventNotificationParam> // class MxCollection<LegoEventNotificationParam>
// VTABLE: LEGO1 0x100d87d0 SYNTHETIC // VTABLE: LEGO1 0x100d87d0
// class MxList<LegoEventNotificationParam> // class MxList<LegoEventNotificationParam>
// VTABLE: LEGO1 0x100d87e8 SYNTHETIC // VTABLE: LEGO1 0x100d87e8
// class MxQueue<LegoEventNotificationParam> // class MxQueue<LegoEventNotificationParam>
// VTABLE: LEGO1 0x100d8800 // VTABLE: LEGO1 0x100d8800

View File

@ -5,7 +5,7 @@
#include "mxlist.h" #include "mxlist.h"
#include "mxtypes.h" #include "mxtypes.h"
// VTABLE: LEGO1 0x100d6320 SYNTHETIC // VTABLE: LEGO1 0x100d6320
// class MxPtrList<LegoPathController> // class MxPtrList<LegoPathController>
// VTABLE: LEGO1 0x100d6338 // VTABLE: LEGO1 0x100d6338
@ -17,10 +17,10 @@ class LegoPathControllerList : public MxPtrList<LegoPathController> {
static void Destroy(LegoPathController*); static void Destroy(LegoPathController*);
}; };
// VTABLE: LEGO1 0x100d6380 SYNTHETIC // VTABLE: LEGO1 0x100d6380
// class MxCollection<LegoPathController *> // class MxCollection<LegoPathController *>
// VTABLE: LEGO1 0x100d6398 SYNTHETIC // VTABLE: LEGO1 0x100d6398
// class MxList<LegoPathController *> // class MxList<LegoPathController *>
#endif // LEGOPATHCONTROLLERLIST_H #endif // LEGOPATHCONTROLLERLIST_H

View File

@ -17,7 +17,8 @@ class MxDeviceModeFinder {
MxDirectDraw::DeviceModesInfo* m_deviceInfo; // +0xe0 MxDirectDraw::DeviceModesInfo* m_deviceInfo; // +0xe0
}; };
// VTABLE: LEGO1 0x100db814 (or 0x100d9cc8?) // VTABLE: LEGO1 0x100db814
// or is it 0x100d9cc8?
// SIZE 0x198 // SIZE 0x198
class MxDeviceEnumerate { class MxDeviceEnumerate {
public: public:

View File

@ -6,10 +6,10 @@
class MxDSAction; class MxDSAction;
// VTABLE: LEGO1 0x100dcea8 SYNTHETIC // VTABLE: LEGO1 0x100dcea8
// class MxCollection<MxDSAction *> // class MxCollection<MxDSAction *>
// VTABLE: LEGO1 0x100dcec0 SYNTHETIC // VTABLE: LEGO1 0x100dcec0
// class MxList<MxDSAction *> // class MxList<MxDSAction *>
// VTABLE: LEGO1 0x100dced8 // VTABLE: LEGO1 0x100dced8

View File

@ -3,7 +3,8 @@
#include "mxomni.h" #include "mxomni.h"
#include "mxvideomanager.h" #include "mxvideomanager.h"
// GLOBAL: LEGO1 0x10102188 0x400 // GLOBAL: LEGO1 0x10102188
// SIZE 0x400
PALETTEENTRY g_defaultPaletteEntries[256] = { PALETTEENTRY g_defaultPaletteEntries[256] = {
{0u, 0u, 0u, 0u}, {128u, 0u, 0u, 0u}, {0u, 128u, 0u, 0u}, {128u, 128u, 0u, 0u}, {0u, 0u, 0u, 0u}, {128u, 0u, 0u, 0u}, {0u, 128u, 0u, 0u}, {128u, 128u, 0u, 0u},
{0u, 0u, 128u, 0u}, {128u, 0u, 128u, 0u}, {0u, 128u, 128u, 0u}, {128u, 128u, 128u, 0u}, {0u, 0u, 128u, 0u}, {128u, 0u, 128u, 0u}, {0u, 128u, 128u, 0u}, {128u, 128u, 128u, 0u},

View File

@ -5,7 +5,7 @@
class MxPresenter; class MxPresenter;
// VTABLE: LEGO1 0x100d62f0 SYNTHETIC // VTABLE: LEGO1 0x100d62f0
// class MxPtrList<MxPresenter> // class MxPtrList<MxPresenter>
// VTABLE: LEGO1 0x100d6308 // VTABLE: LEGO1 0x100d6308
@ -17,10 +17,10 @@ class MxPresenterList : public MxPtrList<MxPresenter> {
typedef MxListCursorChildChild<MxPresenter*> MxPresenterListCursor; typedef MxListCursorChildChild<MxPresenter*> MxPresenterListCursor;
// VTABLE: LEGO1 0x100d6350 SYNTHETIC // VTABLE: LEGO1 0x100d6350
// class MxCollection<MxPresenter *> // class MxCollection<MxPresenter *>
// VTABLE: LEGO1 0x100d6368 SYNTHETIC // VTABLE: LEGO1 0x100d6368
// class MxList<MxPresenter *> // class MxList<MxPresenter *>
#endif // MXPRESENTERLIST_H #endif // MXPRESENTERLIST_H

View File

@ -6,13 +6,13 @@
struct MxRegionTopBottom; struct MxRegionTopBottom;
struct MxRegionLeftRight; struct MxRegionLeftRight;
// VTABLE: LEGO1 0x100dcb10 SYNTHETIC // VTABLE: LEGO1 0x100dcb10
// class MxCollection<MxRegionTopBottom *> // class MxCollection<MxRegionTopBottom *>
// VTABLE: LEGO1 0x100dcb28 SYNTHETIC // VTABLE: LEGO1 0x100dcb28
// class MxList<MxRegionTopBottom *> // class MxList<MxRegionTopBottom *>
// VTABLE: LEGO1 0x100dcb40 SYNTHETIC // VTABLE: LEGO1 0x100dcb40
// class MxPtrList<MxRegionTopBottom> // class MxPtrList<MxRegionTopBottom>
// VTABLE: LEGO1 0x100dcb58 // VTABLE: LEGO1 0x100dcb58
@ -24,18 +24,20 @@ class MxRegionList : public MxPtrList<MxRegionTopBottom> {
}; };
// VTABLE: LEGO1 0x100dcb88 // VTABLE: LEGO1 0x100dcb88
// class MxListCursorChildChild<MxRegionTopBottom *>
typedef MxListCursorChildChild<MxRegionTopBottom*> MxRegionListCursor; typedef MxListCursorChildChild<MxRegionTopBottom*> MxRegionListCursor;
// VTABLE: LEGO1 0x100dcc10 // VTABLE: LEGO1 0x100dcc10
// class MxListCursorChildChild<MxRegionLeftRight *>
typedef MxListCursorChildChild<MxRegionLeftRight*> MxRegionLeftRightListCursor; typedef MxListCursorChildChild<MxRegionLeftRight*> MxRegionLeftRightListCursor;
// VTABLE: LEGO1 0x100dcc40 SYNTHETIC // VTABLE: LEGO1 0x100dcc40
// class MxCollection<MxRegionLeftRight *> // class MxCollection<MxRegionLeftRight *>
// VTABLE: LEGO1 0x100dcc58 SYNTHETIC // VTABLE: LEGO1 0x100dcc58
// class MxList<MxRegionLeftRight *> // class MxList<MxRegionLeftRight *>
// VTABLE: LEGO1 0x100dcc70 SYNTHETIC // VTABLE: LEGO1 0x100dcc70
// class MxPtrList<MxRegionLeftRight> // class MxPtrList<MxRegionLeftRight>
// VTABLE: LEGO1 0x100dcc88 // VTABLE: LEGO1 0x100dcc88

View File

@ -6,10 +6,10 @@
class MxStreamChunk; class MxStreamChunk;
// VTABLE: LEGO1 0x100dc5d0 SYNTHETIC // VTABLE: LEGO1 0x100dc5d0
// class MxCollection<MxStreamChunk *> // class MxCollection<MxStreamChunk *>
// VTABLE: LEGO1 0x100dc5e8 SYNTHETIC // VTABLE: LEGO1 0x100dc5e8
// class MxList<MxStreamChunk *> // class MxList<MxStreamChunk *>
// VTABLE: LEGO1 0x100dc600 // VTABLE: LEGO1 0x100dc600

View File

@ -8,7 +8,7 @@ DECOMP_SIZE_ASSERT(MxStreamListMxDSSubscriber, 0xc);
// FUNCTION: LEGO1 0x100bfa80 // FUNCTION: LEGO1 0x100bfa80
MxDSAction* MxStreamListMxDSAction::Find(MxDSAction* p_action, MxBool p_delete) MxDSAction* MxStreamListMxDSAction::Find(MxDSAction* p_action, MxBool p_delete)
{ {
// DECOMP: ALPHA 0x1008b99d ? // DECOMP ALPHA 0x1008b99d ?
MxDSAction* found = NULL; MxDSAction* found = NULL;

View File

@ -20,10 +20,10 @@ class MxVariableTable : public MxHashTable<MxVariable*> {
virtual MxU32 Hash(MxVariable*) override; // vtable+0x18 virtual MxU32 Hash(MxVariable*) override; // vtable+0x18
}; };
// VTABLE: LEGO1 0x100dc1b0 SYNTHETIC // VTABLE: LEGO1 0x100dc1b0
// class MxCollection<MxVariable *> // class MxCollection<MxVariable *>
// VTABLE: LEGO1 0x100dc1e8 SYNTHETIC // VTABLE: LEGO1 0x100dc1e8
// class MxHashTable<MxVariable *> // class MxHashTable<MxVariable *>
#endif // MXVARIABLETABLE_H #endif // MXVARIABLETABLE_H

View File

@ -2,8 +2,7 @@
import sys import sys
import argparse import argparse
from isledecomp.dir import walk_source_dir, is_file_cpp from isledecomp.dir import walk_source_dir, is_file_cpp
from isledecomp.parser import find_code_blocks from isledecomp.parser import DecompParser
from isledecomp.parser.util import is_exact_offset_comment
def sig_truncate(sig: str) -> str: def sig_truncate(sig: str) -> str:
@ -16,23 +15,21 @@ def check_file(filename: str, verbose: bool = False) -> bool:
"""Open and read the given file, then check whether the code blocks """Open and read the given file, then check whether the code blocks
are in order. If verbose, print each block.""" are in order. If verbose, print each block."""
parser = DecompParser()
with open(filename, "r", encoding="utf-8") as f: with open(filename, "r", encoding="utf-8") as f:
code_blocks = find_code_blocks(f) parser.read_lines(f)
bad_comments = [ just_offsets = [block.offset for block in parser.functions]
(block.start_line, block.offset_comment)
for block in code_blocks
if not is_exact_offset_comment(block.offset_comment)
]
just_offsets = [block.offset for block in code_blocks]
sorted_offsets = sorted(just_offsets) sorted_offsets = sorted(just_offsets)
file_out_of_order = just_offsets != sorted_offsets file_out_of_order = just_offsets != sorted_offsets
# TODO: When we add parser error severity, actual errors that obstruct
# parsing should probably be shown here regardless of verbose mode
# If we detect inexact comments, don't print anything unless we are # If we detect inexact comments, don't print anything unless we are
# in verbose mode. If the file is out of order, we always print the # in verbose mode. If the file is out of order, we always print the
# file name. # file name.
should_report = (len(bad_comments) > 0 and verbose) or file_out_of_order should_report = (len(parser.alerts) > 0 and verbose) or file_out_of_order
if not should_report and not file_out_of_order: if not should_report and not file_out_of_order:
return False return False
@ -44,22 +41,22 @@ def check_file(filename: str, verbose: bool = False) -> bool:
order_lookup = {k: i for i, k in enumerate(sorted_offsets)} order_lookup = {k: i for i, k in enumerate(sorted_offsets)}
prev_offset = 0 prev_offset = 0
for block in code_blocks: for fun in parser.functions:
msg = " ".join( msg = " ".join(
[ [
" " if block.offset > prev_offset else "!", " " if fun.offset > prev_offset else "!",
f"{block.offset:08x}", f"{fun.offset:08x}",
f"{block.end_line - block.start_line:4} lines", f"{fun.end_line - fun.line_number:4} lines",
f"{order_lookup[block.offset]:3}", f"{order_lookup[fun.offset]:3}",
" ", " ",
sig_truncate(block.signature), sig_truncate(fun.signature),
] ]
) )
print(msg) print(msg)
prev_offset = block.offset prev_offset = fun.offset
for line_no, line in bad_comments: for alert in parser.alerts:
print(f"* line {line_no:3} bad offset comment ({line})") print(f"* line {alert.line_number:4} {alert.code} ({alert.line})")
print() print()

View File

@ -1 +1 @@
from .parser import find_code_blocks from .parser import DecompParser

View File

@ -0,0 +1,41 @@
from enum import Enum
class ParserError(Enum):
# WARN: Stub function exceeds some line number threshold
UNLIKELY_STUB = 100
# WARN: Decomp marker is close enough to be recognized, but does not follow syntax exactly
BAD_DECOMP_MARKER = 101
# WARN: Multiple markers in sequence do not have distinct modules
DUPLICATE_MODULE = 102
# WARN: Detected a dupcliate module/offset pair in the current file
DUPLICATE_OFFSET = 103
# WARN: We read a line that matches the decomp marker pattern, but we are not set up
# to handle it
BOGUS_MARKER = 104
# WARN: New function marker appeared while we were inside a function
MISSED_END_OF_FUNCTION = 105
# WARN: If we find a curly brace right after the function declaration
# this is wrong but we still have enough to make a match with reccmp
MISSED_START_OF_FUNCTION = 106
# WARN: A blank line appeared between the end of FUNCTION markers
# and the start of the function. We can ignore it, but the line shouldn't be there
UNEXPECTED_BLANK_LINE = 107
# ERROR: We found a marker unexpectedly
UNEXPECTED_MARKER = 200
# ERROR: We found a marker where we expected to find one, but it is incompatible
# with the preceding markers.
# For example, a GLOBAL cannot follow FUNCTION/STUB
INCOMPATIBLE_MARKER = 201
# ERROR: The line following a synthetic marker was not a comment
BAD_SYNTHETIC = 202

View File

@ -0,0 +1,41 @@
from dataclasses import dataclass
@dataclass
class ParserNode:
line_number: int
@dataclass
class ParserAlert(ParserNode):
code: int
line: str
@dataclass
class ParserSymbol(ParserNode):
module: str
offset: int
@dataclass
class ParserFunction(ParserSymbol):
name: str
lookup_by_name: bool = False
is_stub: bool = False
is_synthetic: bool = False
is_template: bool = False
end_line: int = -1
@dataclass
class ParserVariable(ParserSymbol):
name: str
size: int = -1
is_static: bool = False
@dataclass
class ParserVtable(ParserSymbol):
class_name: str
num_entries: int = -1

View File

@ -1,145 +1,394 @@
# C++ file parser # C++ file parser
from typing import List, TextIO from typing import List, Iterable, Iterator
from enum import Enum from enum import Enum
from .util import ( from .util import (
CodeBlock, DecompMarker,
OffsetMatch,
is_blank_or_comment, is_blank_or_comment,
match_offset_comment, match_marker,
get_template_function_name, is_marker_exact,
get_class_name,
get_synthetic_name,
remove_trailing_comment, remove_trailing_comment,
distinct_by_module,
) )
from .node import (
ParserAlert,
ParserFunction,
ParserVariable,
ParserVtable,
)
from .error import ParserError
class ReaderState(Enum): class ReaderState(Enum):
WANT_OFFSET = 0 SEARCH = 0
WANT_SIG = 1 WANT_SIG = 1
IN_FUNC = 2 IN_FUNC = 2
IN_TEMPLATE = 3 IN_TEMPLATE = 3
WANT_CURLY = 4 WANT_CURLY = 4
FUNCTION_DONE = 5 IN_GLOBAL = 5
IN_FUNC_GLOBAL = 6
IN_VTABLE = 7
def find_code_blocks(stream: TextIO) -> List[CodeBlock]: def marker_is_stub(marker: DecompMarker) -> bool:
"""Read the IO stream (file) line-by-line and give the following report: return marker.type.upper() == "STUB"
Foreach code block (function) in the file, what are its starting and
ending line numbers, and what is the given offset in the original
binary. We expect the result to be ordered by line number because we
are reading the file from start to finish."""
blocks: List[CodeBlock] = []
offset_matches: List[OffsetMatch] = [] def marker_is_variable(marker: DecompMarker) -> bool:
return marker.type.upper() == "GLOBAL"
function_sig = None
start_line = None
end_line = None
state = ReaderState.WANT_OFFSET
# 1-based to match cvdump and your text editor def marker_is_synthetic(marker: DecompMarker) -> bool:
# I know it says 0, but we will increment before each readline() return marker.type.upper() in ("SYNTHETIC", "TEMPLATE")
line_no = 0
can_seek = True
while True:
# Do this before reading again so that an EOF will not def marker_is_template(marker: DecompMarker) -> bool:
# cause us to miss the last function of the file. return marker.type.upper() == "TEMPLATE"
if state == ReaderState.FUNCTION_DONE:
# Our list of offset marks could have duplicates on
# module name, so we'll eliminate those now. def marker_is_function(marker: DecompMarker) -> bool:
for offset_match in distinct_by_module(offset_matches): return marker.type.upper() in ("FUNCTION", "STUB")
block = CodeBlock(
offset=offset_match.address,
signature=function_sig, def marker_is_vtable(marker: DecompMarker) -> bool:
start_line=start_line, return marker.type.upper() == "VTABLE"
class MarkerDict:
def __init__(self):
self.markers: dict = {}
def insert(self, marker: DecompMarker) -> bool:
"""Return True if this insert would overwrite"""
module = marker.module.upper()
if module in self.markers:
return True
self.markers[module] = (marker.type, marker.offset)
return False
def iter(self) -> Iterator[DecompMarker]:
for module, (marker_type, offset) in self.markers.items():
yield DecompMarker(marker_type, module, offset)
def empty(self):
self.markers = {}
class DecompParser:
# pylint: disable=too-many-instance-attributes
# Could combine output lists into a single list to get under the limit,
# but not right now
def __init__(self):
# The lists to be populated as we parse
self.functions: List[ParserFunction] = []
self.vtables: List[ParserVtable] = []
self.variables: List[ParserVariable] = []
self.alerts: List[ParserAlert] = []
self.line_number: int = 0
self.state: ReaderState = ReaderState.SEARCH
self.last_line: str = ""
# To allow for multiple markers where code is shared across different
# modules, save lists of compatible markers that appear in sequence
self.fun_markers = MarkerDict()
self.var_markers = MarkerDict()
self.tbl_markers = MarkerDict()
# To handle functions that are entirely indented (i.e. those defined
# in class declarations), remember how many whitespace characters
# came before the opening curly brace and match that up at the end.
# This should give us the same or better accuracy for a well-formed file.
# The alternative is counting the curly braces on each line
# but that's probably too cumbersome.
self.curly_indent_stops: int = 0
# For non-synthetic functions, save the line number where the function begins
# (i.e. where we see the curly brace) along with the function signature.
# We will need both when we reach the end of the function.
self.function_start: int = 0
self.function_sig: str = ""
def reset(self):
self.functions = []
self.vtables = []
self.variables = []
self.alerts = []
self.line_number = 0
self.state = ReaderState.SEARCH
self.last_line = ""
self.fun_markers.empty()
self.var_markers.empty()
self.tbl_markers.empty()
self.curly_indent_stops = 0
self.function_start = 0
self.function_sig = ""
def _recover(self):
"""We hit a syntax error and need to reset temp structures"""
self.state = ReaderState.SEARCH
self.fun_markers.empty()
self.var_markers.empty()
self.tbl_markers.empty()
def _syntax_warning(self, code):
self.alerts.append(
ParserAlert(
line_number=self.line_number,
code=code,
line=self.last_line.strip(),
)
)
def _syntax_error(self, code):
self._syntax_warning(code)
self._recover()
def _function_starts_here(self):
self.function_start = self.line_number
def _function_marker(self, marker: DecompMarker):
if self.fun_markers.insert(marker):
self._syntax_warning(ParserError.DUPLICATE_MODULE)
self.state = ReaderState.WANT_SIG
def _synthetic_marker(self, marker: DecompMarker):
if self.fun_markers.insert(marker):
self._syntax_warning(ParserError.DUPLICATE_MODULE)
self.state = ReaderState.IN_TEMPLATE
def _function_done(self, lookup_by_name: bool = False, unexpected: bool = False):
end_line = self.line_number
if unexpected:
# If we missed the end of the previous function, assume it ended
# on the previous line and that whatever we are tracking next
# begins on the current line.
end_line -= 1
for marker in self.fun_markers.iter():
self.functions.append(
ParserFunction(
line_number=self.function_start,
module=marker.module,
offset=marker.offset,
lookup_by_name=lookup_by_name,
is_stub=marker_is_stub(marker),
is_synthetic=marker_is_synthetic(marker),
is_template=marker_is_template(marker),
name=self.function_sig,
end_line=end_line, end_line=end_line,
offset_comment=offset_match.comment,
module=offset_match.module,
is_template=offset_match.is_template,
is_stub=offset_match.is_stub,
) )
blocks.append(block) )
offset_matches = []
state = ReaderState.WANT_OFFSET
if can_seek: self.fun_markers.empty()
line_no += 1 self.curly_indent_stops = 0
line = stream.readline() self.state = ReaderState.SEARCH
if line == "":
break
new_match = match_offset_comment(line) def _vtable_marker(self, marker: DecompMarker):
if new_match is not None: if self.tbl_markers.insert(marker):
# We will allow multiple offsets if we have just begun self._syntax_warning(ParserError.DUPLICATE_MODULE)
# the code block, but not after we hit the curly brace. self.state = ReaderState.IN_VTABLE
if state in (
ReaderState.WANT_OFFSET, def _vtable_done(self, class_name: str = None):
ReaderState.IN_TEMPLATE, if class_name is None:
# Best we can do
class_name = self.last_line.strip()
for marker in self.tbl_markers.iter():
self.vtables.append(
ParserVtable(
line_number=self.line_number,
module=marker.module,
offset=marker.offset,
class_name=class_name,
)
)
self.tbl_markers.empty()
self.state = ReaderState.SEARCH
def _variable_marker(self, marker: DecompMarker):
if self.var_markers.insert(marker):
self._syntax_warning(ParserError.DUPLICATE_MODULE)
if self.state in (ReaderState.IN_FUNC, ReaderState.IN_FUNC_GLOBAL):
self.state = ReaderState.IN_FUNC_GLOBAL
else:
self.state = ReaderState.IN_GLOBAL
def _variable_done(self):
for marker in self.var_markers.iter():
self.variables.append(
ParserVariable(
line_number=self.line_number,
module=marker.module,
offset=marker.offset,
name=self.last_line.strip(),
)
)
self.var_markers.empty()
if self.state == ReaderState.IN_FUNC_GLOBAL:
self.state = ReaderState.IN_FUNC
else:
self.state = ReaderState.SEARCH
def _handle_marker(self, marker: DecompMarker):
# Cannot handle any markers between function sig and opening curly brace
if self.state == ReaderState.WANT_CURLY:
self._syntax_error(ParserError.UNEXPECTED_MARKER)
return
# TODO: How uncertain are we of detecting the end of a function
# in a clang-formatted file? For now we assume we have missed the
# end if we detect a non-GLOBAL marker while state is IN_FUNC.
# Maybe these cases should be syntax errors instead
if marker_is_function(marker):
if self.state in (
ReaderState.SEARCH,
ReaderState.WANT_SIG, ReaderState.WANT_SIG,
): ):
# If we detected an offset marker unexpectedly, # We will allow multiple offsets if we have just begun
# we are handling it here so we can continue seeking. # the code block, but not after we hit the curly brace.
can_seek = True self._function_marker(marker)
elif self.state == ReaderState.IN_FUNC:
offset_matches.append(new_match)
if new_match.is_template:
state = ReaderState.IN_TEMPLATE
else:
state = ReaderState.WANT_SIG
else:
# We hit another offset unexpectedly. # We hit another offset unexpectedly.
# We can recover easily by just ending the function here. # We can recover easily by just ending the function here.
end_line = line_no - 1 self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
state = ReaderState.FUNCTION_DONE self._function_done(unexpected=True)
# Pause reading here so we handle the offset marker # Start the next function right after so we can
# on the next loop iteration # read the next line.
can_seek = False self._function_marker(marker)
else:
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
elif state == ReaderState.IN_TEMPLATE: elif marker_is_synthetic(marker):
if self.state in (ReaderState.SEARCH, ReaderState.IN_TEMPLATE):
self._synthetic_marker(marker)
elif self.state == ReaderState.IN_FUNC:
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
self._function_done(lookup_by_name=True, unexpected=True)
self._synthetic_marker(marker)
else:
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
elif marker_is_variable(marker):
if self.state in (
ReaderState.SEARCH,
ReaderState.IN_GLOBAL,
ReaderState.IN_FUNC,
ReaderState.IN_FUNC_GLOBAL,
):
self._variable_marker(marker)
else:
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
elif marker_is_vtable(marker):
if self.state in (ReaderState.SEARCH, ReaderState.IN_VTABLE):
self._vtable_marker(marker)
elif self.state == ReaderState.IN_FUNC:
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
self._function_done(unexpected=True)
self._vtable_marker(marker)
else:
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
else:
self._syntax_warning(ParserError.BOGUS_MARKER)
def read_line(self, line: str):
self.last_line = line # TODO: Useful or hack for error reporting?
self.line_number += 1
marker = match_marker(line)
if marker is not None:
# TODO: what's the best place for this?
# Does it belong with reading or marker handling?
if not is_marker_exact(self.last_line):
self._syntax_warning(ParserError.BAD_DECOMP_MARKER)
self._handle_marker(marker)
return
line_strip = line.strip()
if self.state == ReaderState.IN_TEMPLATE:
# TEMPLATE functions are a special case. The signature is # TEMPLATE functions are a special case. The signature is
# given on the next line (in a // comment) # given on the next line (in a // comment)
function_sig = get_template_function_name(line) name = get_synthetic_name(line)
start_line = line_no if name is None:
end_line = line_no self._syntax_error(ParserError.BAD_SYNTHETIC)
state = ReaderState.FUNCTION_DONE else:
self.function_sig = name
self._function_starts_here()
self._function_done(lookup_by_name=True)
elif state == ReaderState.WANT_SIG: elif self.state == ReaderState.WANT_SIG:
# Skip blank lines or comments that come after the offset # Ignore blanks on the way to function start or function name
# marker. There is not a formal procedure for this, so just if len(line_strip) == 0:
# assume the next "code line" is the function signature self._syntax_warning(ParserError.UNEXPECTED_BLANK_LINE)
if not is_blank_or_comment(line):
elif line_strip.startswith("//"):
# If we found a comment, assume implicit lookup-by-name
# function and end here. We know this is not a decomp marker
# because it would have been handled already.
self.function_sig = get_synthetic_name(line)
self._function_starts_here()
self._function_done(lookup_by_name=True)
elif line_strip == "{":
# We missed the function signature but we can recover from this
self.function_sig = "(unknown)"
self._function_starts_here()
self._syntax_warning(ParserError.MISSED_START_OF_FUNCTION)
self.state = ReaderState.IN_FUNC
else:
# Inline functions may end with a comment. Strip that out # Inline functions may end with a comment. Strip that out
# to help parsing. # to help parsing.
function_sig = remove_trailing_comment(line.strip()) self.function_sig = remove_trailing_comment(line_strip)
# Now check to see if the opening curly bracket is on the # Now check to see if the opening curly bracket is on the
# same line. clang-format should prevent this (BraceWrapping) # same line. clang-format should prevent this (BraceWrapping)
# but it is easy to detect. # but it is easy to detect.
# If the entire function is on one line, handle that too. # If the entire function is on one line, handle that too.
if function_sig.endswith("{"): if self.function_sig.endswith("{"):
start_line = line_no self._function_starts_here()
state = ReaderState.IN_FUNC self.state = ReaderState.IN_FUNC
elif function_sig.endswith("}") or function_sig.endswith("};"): elif self.function_sig.endswith("}") or self.function_sig.endswith(
start_line = line_no "};"
end_line = line_no ):
state = ReaderState.FUNCTION_DONE self._function_starts_here()
self._function_done()
else: else:
state = ReaderState.WANT_CURLY self.state = ReaderState.WANT_CURLY
elif state == ReaderState.WANT_CURLY: elif self.state == ReaderState.WANT_CURLY:
if line.strip() == "{": if line_strip == "{":
start_line = line_no self.curly_indent_stops = line.index("{")
state = ReaderState.IN_FUNC self._function_starts_here()
self.state = ReaderState.IN_FUNC
elif state == ReaderState.IN_FUNC: elif self.state == ReaderState.IN_FUNC:
# Naive but reasonable assumption that functions will end with if line_strip.startswith("}") and line[self.curly_indent_stops] == "}":
# a curly brace on its own line with no prepended spaces. self._function_done()
if line.startswith("}"):
end_line = line_no
state = ReaderState.FUNCTION_DONE
return blocks elif self.state in (ReaderState.IN_GLOBAL, ReaderState.IN_FUNC_GLOBAL):
if not is_blank_or_comment(line):
self._variable_done()
elif self.state == ReaderState.IN_VTABLE:
vtable_class = get_class_name(line)
if vtable_class is not None:
self._vtable_done(class_name=vtable_class)
def read_lines(self, lines: Iterable):
for line in lines:
self.read_line(line)

View File

@ -1,44 +1,17 @@
# C++ Parser utility functions and data structures # C++ Parser utility functions and data structures
from __future__ import annotations # python <3.10 compatibility from __future__ import annotations # python <3.10 compatibility
import re import re
from typing import List
from collections import namedtuple from collections import namedtuple
DecompMarker = namedtuple("DecompMarker", ["type", "module", "offset"])
CodeBlock = namedtuple(
"CodeBlock",
[
"offset",
"signature",
"start_line",
"end_line",
"offset_comment",
"module",
"is_template",
"is_stub",
],
)
OffsetMatch = namedtuple( markerRegex = re.compile(
"OffsetMatch", ["module", "address", "is_template", "is_stub", "comment"] r"\s*//\s*(\w+):\s*(\w+)\s+(0x[a-f0-9]+)",
)
# This has not been formally established, but considering that "STUB"
# is a temporary state for a function, we assume it will appear last,
# after any other modifiers (i.e. TEMPLATE)
# To match a reasonable variance of formatting for the offset comment
offsetCommentRegex = re.compile(
r"\s*//\s*OFFSET:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+TEMPLATE)?(\s+STUB)?", # nopep8
flags=re.I, flags=re.I,
) )
# To match the exact syntax (text upper case, hex lower case, with spaces) markerExactRegex = re.compile(r"\s*// ([A-Z]+): ([A-Z0-9]+) (0x[a-f0-9]+)$")
# that is used in most places
offsetCommentExactRegex = re.compile(
r"^// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)( TEMPLATE)?( STUB)?$"
) # nopep8
# The goal here is to just read whatever is on the next line, so some # The goal here is to just read whatever is on the next line, so some
# flexibility in the formatting seems OK # flexibility in the formatting seems OK
@ -50,15 +23,15 @@
trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$") trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")
def get_template_function_name(line: str) -> str: def get_synthetic_name(line: str) -> str | None:
"""Parse function signature for special TEMPLATE functions""" """Synthetic names appear on a single line comment on the line after the marker.
If that's not what we have, return None"""
template_match = templateCommentRegex.match(line) template_match = templateCommentRegex.match(line)
# If we don't match, you get whatever is on the line as the signature
if template_match is not None: if template_match is not None:
return template_match.group(1) return template_match.group(1)
return line return None
def remove_trailing_comment(line: str) -> str: def remove_trailing_comment(line: str) -> str:
@ -78,39 +51,45 @@ def is_blank_or_comment(line: str) -> bool:
) )
def is_exact_offset_comment(line: str) -> bool: def match_marker(line: str) -> DecompMarker | None:
"""If the offset comment does not match our (unofficial) syntax match = markerRegex.match(line)
we may want to alert the user to fix it for style points."""
return offsetCommentExactRegex.match(line) is not None
def match_offset_comment(line: str) -> OffsetMatch | None:
match = offsetCommentRegex.match(line)
if match is None: if match is None:
return None return None
return OffsetMatch( return DecompMarker(
module=match.group(1), type=match.group(1), module=match.group(2), offset=int(match.group(3), 16)
address=int(match.group(2), 16),
is_template=match.group(3) is not None,
is_stub=match.group(4) is not None,
comment=line.strip(),
) )
def distinct_by_module(offsets: List) -> List: def is_marker_exact(line: str) -> bool:
"""Given a list of offset markers, return a list with distinct return markerExactRegex.match(line) is not None
module names. If module names (case-insensitive) are repeated,
choose the offset that appears first."""
if len(offsets) < 2:
return offsets
# Dict maintains insertion order in python >=3.7 template_class_decl_regex = re.compile(
offsets_dict = {} r"\s*(?:\/\/)?\s*(?:class|struct) (\w+)<([\w]+)\s*(\*+)?\s*>"
for offset in offsets: )
module_upper = offset.module.upper()
if module_upper not in offsets_dict:
offsets_dict[module_upper] = offset
return list(offsets_dict.values())
class_decl_regex = re.compile(r"\s*(?:\/\/)?\s*(?:class|struct) (\w+)")
def get_class_name(line: str) -> str | None:
"""For VTABLE markers, extract the class name from the code line or comment
where it appears."""
match = template_class_decl_regex.match(line)
if match is not None:
# For template classes, we should reformat the class name so it matches
# the output from cvdump: one space between the template type and any asterisks
# if it is a pointer type.
(class_name, template_type, asterisks) = match.groups()
if asterisks is not None:
return f"{class_name}<{template_type} {asterisks}>"
return f"{class_name}<{template_type}>"
match = class_decl_regex.match(line)
if match is not None:
return match.group(1)
return None

View File

@ -3,6 +3,7 @@
// A very simple class // A very simple class
// VTABLE: TEST 0x1001002
class TestClass { class TestClass {
public: public:
TestClass(); TestClass();
@ -10,14 +11,14 @@ class TestClass {
virtual MxResult Tickle() override; // vtable+08 virtual MxResult Tickle() override; // vtable+08
// OFFSET: TEST 0x12345678 // FUNCTION: TEST 0x12345678
inline const char* ClassName() const // vtable+0c inline const char* ClassName() const // vtable+0c
{ {
// 0xabcd1234 // 0xabcd1234
return "TestClass"; return "TestClass";
} }
// OFFSET: TEST 0xdeadbeef // FUNCTION: TEST 0xdeadbeef
inline MxBool IsA(const char* name) const override // vtable+10 inline MxBool IsA(const char* name) const override // vtable+10
{ {
return !strcmp(name, TestClass::ClassName()); return !strcmp(name, TestClass::ClassName());

View File

@ -3,19 +3,19 @@
// A very simple well-formed code file // A very simple well-formed code file
// OFFSET: TEST 0x1234 // FUNCTION: TEST 0x1234
void function01() void function01()
{ {
// TODO // TODO
} }
// OFFSET: TEST 0x2345 // FUNCTION: TEST 0x2345
void function02() void function02()
{ {
// TODO // TODO
} }
// OFFSET: TEST 0x3456 // FUNCTION: TEST 0x3456
void function03() void function03()
{ {
// TODO // TODO

View File

@ -0,0 +1,14 @@
// Sample for python unit tests
// Not part of the decomp
// Global variables inside and outside of functions
// GLOBAL: TEST 0x1000
const char *g_message = "test";
// FUNCTION: TEST 0x1234
void function01()
{
// GLOBAL: TEST 0x5555
static int g_hello = 123;
}

View File

@ -1,8 +1,8 @@
// Sample for python unit tests // Sample for python unit tests
// Not part of the decomp // Not part of the decomp
// OFFSET: TEST 0x10000001 // FUNCTION: TEST 0x10000001
inline const char* OneLineWithComment() const { return "MxDSObject"; }; // hi there inline const char* OneLineWithComment() const { return "MxDSObject"; }; // hi there
// OFFSET: TEST 0x10000002 // FUNCTION: TEST 0x10000002
inline const char* OneLine() const { return "MxDSObject"; }; inline const char* OneLine() const { return "MxDSObject"; };

View File

@ -9,7 +9,7 @@ int no_offset_comment()
return -1; return -1;
} }
// OFFSET: TEST 0xdeadbeef // FUNCTION: TEST 0xdeadbeef
void regular_ole_function() void regular_ole_function()
{ {
printf("hi there"); printf("hi there");

View File

@ -3,22 +3,22 @@
// Handling multiple offset markers // Handling multiple offset markers
// OFFSET: TEST 0x1234 // FUNCTION: TEST 0x1234
// OFFSET: HELLO 0x5555 // FUNCTION: HELLO 0x5555
void different_modules() void different_modules()
{ {
// TODO // TODO
} }
// OFFSET: TEST 0x2345 // FUNCTION: TEST 0x2345
// OFFSET: TEST 0x1234 // FUNCTION: TEST 0x1234
void same_module() void same_module()
{ {
// TODO // TODO
} }
// OFFSET: TEST 0x2002 // FUNCTION: TEST 0x2002
// OFFSET: test 0x1001 // FUNCTION: test 0x1001
void same_case_insensitive() void same_case_insensitive()
{ {
// TODO // TODO

View File

@ -1,10 +1,10 @@
// Sample for python unit tests // Sample for python unit tests
// Not part of the decomp // Not part of the decomp
// OFFSET: TEST 0x1234 // FUNCTION: TEST 0x1234
void short_function() { static char* msg = "oneliner"; } void short_function() { static char* msg = "oneliner"; }
// OFFSET: TEST 0x5555 // FUNCTION: TEST 0x5555
void function_after_one_liner() void function_after_one_liner()
{ {
// This function comes after the previous that is on a single line. // This function comes after the previous that is on a single line.

View File

@ -1,19 +1,19 @@
// Sample for python unit tests // Sample for python unit tests
// Not part of the decomp // Not part of the decomp
// OFFSET: TEST 0x1001 // FUNCTION: TEST 0x1001
void function_order01() void function_order01()
{ {
// TODO // TODO
} }
// OFFSET: TEST 0x1003 // FUNCTION: TEST 0x1003
void function_order03() void function_order03()
{ {
// TODO // TODO
} }
// OFFSET: TEST 0x1002 // FUNCTION: TEST 0x1002
void function_order02() void function_order02()
{ {
// TODO // TODO

View File

@ -4,18 +4,18 @@
// While it's reasonable to expect a well-formed file (and clang-format // While it's reasonable to expect a well-formed file (and clang-format
// will make sure we get one), this will put the parser through its paces. // will make sure we get one), this will put the parser through its paces.
// OFFSET: TEST 0x1234 // FUNCTION: TEST 0x1234
void curly_with_spaces() void curly_with_spaces()
{ {
static char* msg = "hello"; static char* msg = "hello";
} }
// OFFSET: TEST 0x5555 // FUNCTION: TEST 0x5555
void weird_closing_curly() void weird_closing_curly()
{ {
int x = 123; } int x = 123; }
// OFFSET: HELLO 0x5656 // FUNCTION: HELLO 0x5656
void bad_indenting() { void bad_indenting() {
if (0) if (0)
{ {

View File

@ -1,127 +1,360 @@
import os import pytest
from typing import List, TextIO from isledecomp.parser.parser import (
from isledecomp.parser import find_code_blocks ReaderState,
from isledecomp.parser.util import CodeBlock DecompParser,
)
SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "samples") from isledecomp.parser.error import ParserError
def sample_file(filename: str) -> TextIO: @pytest.fixture(name="parser")
"""Wrapper for opening the samples from the directory that does not def fixture_parser():
depend on the cwd where we run the test""" return DecompParser()
full_path = os.path.join(SAMPLE_DIR, filename)
return open(full_path, "r", encoding="utf-8")
def code_blocks_are_sorted(blocks: List[CodeBlock]) -> bool: def test_missing_sig(parser):
"""Helper to make this more idiomatic""" """In the hopefully rare scenario that the function signature and marker
just_offsets = [block.offset for block in blocks] are swapped, we still have enough to match witch reccmp"""
return just_offsets == sorted(just_offsets) parser.read_lines(
[
"void my_function()",
"// FUNCTION: TEST 0x1234",
"{",
"}",
]
)
assert parser.state == ReaderState.SEARCH
assert len(parser.functions) == 1
assert parser.functions[0].line_number == 3
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.MISSED_START_OF_FUNCTION
# Tests are below # def test_not_exact_syntax(parser):
"""Alert to inexact syntax right here in the parser instead of kicking it downstream.
Doing this means we don't have to save the actual text."""
parser.read_line("// function: test 0x1234")
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.BAD_DECOMP_MARKER
def test_sanity(): def test_invalid_marker(parser):
"""Read a very basic file""" """We matched a decomp marker, but it's not one we care about"""
with sample_file("basic_file.cpp") as f: parser.read_line("// BANANA: TEST 0x1234")
blocks = find_code_blocks(f) assert parser.state == ReaderState.SEARCH
assert len(blocks) == 3 assert len(parser.alerts) == 1
assert code_blocks_are_sorted(blocks) is True assert parser.alerts[0].code == ParserError.BOGUS_MARKER
# n.b. The parser returns line numbers as 1-based
# Function starts when we see the opening curly brace
assert blocks[0].start_line == 8
assert blocks[0].end_line == 10
def test_oneline(): def test_incompatible_marker(parser):
"""(Assuming clang-format permits this) This sample has a function """The marker we just read cannot be handled in the current parser state"""
on a single line. This will test the end-of-function detection""" parser.read_lines(
with sample_file("oneline_function.cpp") as f: [
blocks = find_code_blocks(f) "// FUNCTION: TEST 0x1234",
"// GLOBAL: TEST 0x5000",
assert len(blocks) == 2 ]
assert blocks[0].start_line == 5 )
assert blocks[0].end_line == 5 assert parser.state == ReaderState.SEARCH
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.INCOMPATIBLE_MARKER
def test_missing_offset(): def test_variable(parser):
"""What if the function doesn't have an offset comment?""" """Should identify a global variable"""
with sample_file("missing_offset.cpp") as f: parser.read_lines(
blocks = find_code_blocks(f) [
"// GLOBAL: HELLO 0x1234",
# TODO: For now, the function without the offset will just be ignored. "int g_value = 5;",
# Would be the same outcome if the comment was present but mangled and ]
# we failed to match it. We should detect these cases in the future. )
assert len(blocks) == 1 assert len(parser.variables) == 1
def test_jumbled_case(): def test_synthetic_plus_marker(parser):
"""The parser just reports what it sees. It is the responsibility of """Marker tracking preempts synthetic name detection.
the downstream tools to do something about a jumbled file. Should fail with error and not log the synthetic"""
Just verify that we are reading it correctly.""" parser.read_lines(
with sample_file("out_of_order.cpp") as f: [
blocks = find_code_blocks(f) "// SYNTHETIC: HEY 0x555",
"// FUNCTION: HOWDY 0x1234",
assert len(blocks) == 3 ]
assert code_blocks_are_sorted(blocks) is False )
assert len(parser.functions) == 0
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.INCOMPATIBLE_MARKER
def test_bad_file(): def test_different_markers_different_module(parser):
with sample_file("poorly_formatted.cpp") as f: """Does it make any sense for a function to be a stub in one module,
blocks = find_code_blocks(f) but not in another? I don't know. But it's no problem for us."""
parser.read_lines(
[
"// FUNCTION: HOWDY 0x1234",
"// STUB: SUP 0x5555",
"void interesting_function() {",
"}",
]
)
assert len(blocks) == 3 assert len(parser.alerts) == 0
assert len(parser.functions) == 2
def test_indented(): def test_different_markers_same_module(parser):
"""Offsets for functions inside of a class will probably be indented.""" """Now, if something is a regular function but then a stub,
with sample_file("basic_class.cpp") as f: what do we say about that?"""
blocks = find_code_blocks(f) parser.read_lines(
[
"// FUNCTION: HOWDY 0x1234",
"// STUB: HOWDY 0x5555",
"void interesting_function() {",
"}",
]
)
# TODO: We don't properly detect the end of these functions # Use first marker declaration, don't replace
# because the closing brace is indented. However... knowing where each assert len(parser.functions) == 1
# function ends is less important (for now) than capturing assert parser.functions[0].is_stub is False
# all the functions that are there.
assert len(blocks) == 2 # Should alert to this
assert blocks[0].offset == int("0x12345678", 16) assert len(parser.alerts) == 1
assert blocks[0].start_line == 15 assert parser.alerts[0].code == ParserError.DUPLICATE_MODULE
# assert blocks[0].end_line == 18
assert blocks[1].offset == int("0xdeadbeef", 16)
assert blocks[1].start_line == 22
# assert blocks[1].end_line == 24
def test_inline(): def test_unexpected_synthetic(parser):
with sample_file("inline.cpp") as f: """FUNCTION then SYNTHETIC should fail to report either one"""
blocks = find_code_blocks(f) parser.read_lines(
[
"// FUNCTION: HOWDY 0x1234",
"// SYNTHETIC: HOWDY 0x5555",
"void interesting_function() {",
"}",
]
)
assert len(blocks) == 2 assert parser.state == ReaderState.SEARCH
for block in blocks: assert len(parser.functions) == 0
assert block.start_line is not None assert len(parser.alerts) == 1
assert block.start_line == block.end_line assert parser.alerts[0].code == ParserError.INCOMPATIBLE_MARKER
def test_multiple_offsets(): @pytest.mark.skip(reason="not implemented yet")
"""If multiple offset marks appear before for a code block, take them def test_duplicate_offset(parser):
all but ensure module name (case-insensitive) is distinct. """Repeating the same module/offset in the same file is probably a typo"""
Use first module occurrence in case of duplicates.""" parser.read_lines(
with sample_file("multiple_offsets.cpp") as f: [
blocks = find_code_blocks(f) "// GLOBAL: HELLO 0x1234",
"int x = 1;",
"// GLOBAL: HELLO 0x1234",
"int y = 2;",
]
)
assert len(blocks) == 4 assert len(parser.alerts) == 1
assert blocks[0].module == "TEST" assert parser.alerts[0].code == ParserError.DUPLICATE_OFFSET
assert blocks[0].start_line == 9
assert blocks[1].module == "HELLO"
assert blocks[1].start_line == 9
# Duplicate modules are ignored def test_multiple_variables(parser):
assert blocks[2].start_line == 16 """Theoretically the same global variable can appear in multiple modules"""
assert blocks[2].offset == 0x2345 parser.read_lines(
[
"// GLOBAL: HELLO 0x1234",
"// GLOBAL: WUZZUP 0x555",
"const char *g_greeting;",
]
)
assert len(parser.alerts) == 0
assert len(parser.variables) == 2
assert blocks[3].module == "TEST"
assert blocks[3].offset == 0x2002 def test_multiple_variables_same_module(parser):
"""Should not overwrite offset"""
parser.read_lines(
[
"// GLOBAL: HELLO 0x1234",
"// GLOBAL: HELLO 0x555",
"const char *g_greeting;",
]
)
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.DUPLICATE_MODULE
assert len(parser.variables) == 1
assert parser.variables[0].offset == 0x1234
def test_multiple_vtables(parser):
parser.read_lines(
[
"// VTABLE: HELLO 0x1234",
"// VTABLE: TEST 0x5432",
"class MxString : public MxCore {",
]
)
assert len(parser.alerts) == 0
assert len(parser.vtables) == 2
assert parser.vtables[0].class_name == "MxString"
def test_multiple_vtables_same_module(parser):
"""Should not overwrite offset"""
parser.read_lines(
[
"// VTABLE: HELLO 0x1234",
"// VTABLE: HELLO 0x5432",
"class MxString : public MxCore {",
]
)
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.DUPLICATE_MODULE
assert len(parser.vtables) == 1
assert parser.vtables[0].offset == 0x1234
def test_synthetic(parser):
parser.read_lines(
[
"// SYNTHETIC: TEST 0x1234",
"// TestClass::TestMethod",
]
)
assert len(parser.functions) == 1
assert parser.functions[0].lookup_by_name is True
assert parser.functions[0].name == "TestClass::TestMethod"
def test_synthetic_same_module(parser):
parser.read_lines(
[
"// SYNTHETIC: TEST 0x1234",
"// SYNTHETIC: TEST 0x555",
"// TestClass::TestMethod",
]
)
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.DUPLICATE_MODULE
assert len(parser.functions) == 1
assert parser.functions[0].offset == 0x1234
def test_synthetic_no_comment(parser):
"""Synthetic marker followed by a code line (i.e. non-comment)"""
parser.read_lines(
[
"// SYNTHETIC: TEST 0x1234",
"int x = 123;",
]
)
assert len(parser.functions) == 0
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.BAD_SYNTHETIC
assert parser.state == ReaderState.SEARCH
def test_single_line_function(parser):
parser.read_lines(
[
"// FUNCTION: TEST 0x1234",
"int hello() { return 1234; }",
]
)
assert len(parser.functions) == 1
assert parser.functions[0].line_number == 2
assert parser.functions[0].end_line == 2
def test_indented_function(parser):
"""Track the number of whitespace characters when we begin the function
and check that against each closing curly brace we read.
Should not report a syntax warning if the function is indented"""
parser.read_lines(
[
" // FUNCTION: TEST 0x1234",
" void indented()",
" {",
" // TODO",
" }",
" // FUNCTION: NEXT 0x555",
]
)
assert len(parser.alerts) == 0
@pytest.mark.xfail(reason="todo")
def test_indented_no_curly_hint(parser):
"""Same as above, but opening curly brace is on the same line.
Without the hint of how many whitespace characters to check, can we
still identify the end of the function?"""
parser.read_lines(
[
" // FUNCTION: TEST 0x1234",
" void indented() {",
" }",
" // FUNCTION: NEXT 0x555",
]
)
assert len(parser.alerts) == 0
def test_implicit_lookup_by_name(parser):
"""FUNCTION (or STUB) offsets must directly precede the function signature.
If we detect a comment instead, we assume that this is a lookup-by-name
function and end here."""
parser.read_lines(
[
"// FUNCTION: TEST 0x1234",
"// TestClass::TestMethod()",
]
)
assert parser.state == ReaderState.SEARCH
assert len(parser.functions) == 1
assert parser.functions[0].lookup_by_name is True
assert parser.functions[0].name == "TestClass::TestMethod()"
def test_function_with_spaces(parser):
"""There should not be any spaces between the end of FUNCTION markers
and the start or name of the function. If it's a blank line, we can safely
ignore but should alert to this."""
parser.read_lines(
[
"// FUNCTION: TEST 0x1234",
" ",
"inline void test_function() { };",
]
)
assert len(parser.functions) == 1
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.UNEXPECTED_BLANK_LINE
def test_function_with_spaces_implicit(parser):
"""Same as above, but for implicit lookup-by-name"""
parser.read_lines(
[
"// FUNCTION: TEST 0x1234",
" ",
"// Implicit::Method",
]
)
assert len(parser.functions) == 1
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.UNEXPECTED_BLANK_LINE
@pytest.mark.xfail(reason="will assume implicit lookup-by-name function")
def test_function_is_commented(parser):
"""In an ideal world, we would recognize that there is no code here.
Some editors (or users) might comment the function on each line like this
but hopefully it is rare."""
parser.read_lines(
[
"// FUNCTION: TEST 0x1234",
"// int my_function()",
"// {",
"// return 5;",
"// }",
]
)
assert len(parser.functions) == 0

View File

@ -0,0 +1,141 @@
import os
from typing import List, TextIO
import pytest
from isledecomp.parser import DecompParser
from isledecomp.parser.node import ParserSymbol
SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "samples")
def sample_file(filename: str) -> TextIO:
"""Wrapper for opening the samples from the directory that does not
depend on the cwd where we run the test"""
full_path = os.path.join(SAMPLE_DIR, filename)
return open(full_path, "r", encoding="utf-8")
def code_blocks_are_sorted(blocks: List[ParserSymbol]) -> bool:
"""Helper to make this more idiomatic"""
just_offsets = [block.offset for block in blocks]
return just_offsets == sorted(just_offsets)
@pytest.fixture(name="parser")
def fixture_parser():
return DecompParser()
# Tests are below #
def test_sanity(parser):
"""Read a very basic file"""
with sample_file("basic_file.cpp") as f:
parser.read_lines(f)
assert len(parser.functions) == 3
assert code_blocks_are_sorted(parser.functions) is True
# n.b. The parser returns line numbers as 1-based
# Function starts when we see the opening curly brace
assert parser.functions[0].line_number == 8
assert parser.functions[0].end_line == 10
def test_oneline(parser):
"""(Assuming clang-format permits this) This sample has a function
on a single line. This will test the end-of-function detection"""
with sample_file("oneline_function.cpp") as f:
parser.read_lines(f)
assert len(parser.functions) == 2
assert parser.functions[0].line_number == 5
assert parser.functions[0].end_line == 5
def test_missing_offset(parser):
"""What if the function doesn't have an offset comment?"""
with sample_file("missing_offset.cpp") as f:
parser.read_lines(f)
# TODO: For now, the function without the offset will just be ignored.
# Would be the same outcome if the comment was present but mangled and
# we failed to match it. We should detect these cases in the future.
assert len(parser.functions) == 1
def test_jumbled_case(parser):
"""The parser just reports what it sees. It is the responsibility of
the downstream tools to do something about a jumbled file.
Just verify that we are reading it correctly."""
with sample_file("out_of_order.cpp") as f:
parser.read_lines(f)
assert len(parser.functions) == 3
assert code_blocks_are_sorted(parser.functions) is False
def test_bad_file(parser):
with sample_file("poorly_formatted.cpp") as f:
parser.read_lines(f)
assert len(parser.functions) == 3
def test_indented(parser):
"""Offsets for functions inside of a class will probably be indented."""
with sample_file("basic_class.cpp") as f:
parser.read_lines(f)
# TODO: We don't properly detect the end of these functions
# because the closing brace is indented. However... knowing where each
# function ends is less important (for now) than capturing
# all the functions that are there.
assert len(parser.functions) == 2
assert parser.functions[0].offset == int("0x12345678", 16)
assert parser.functions[0].line_number == 16
# assert parser.functions[0].end_line == 19
assert parser.functions[1].offset == int("0xdeadbeef", 16)
assert parser.functions[1].line_number == 23
# assert parser.functions[1].end_line == 25
def test_inline(parser):
with sample_file("inline.cpp") as f:
parser.read_lines(f)
assert len(parser.functions) == 2
for fun in parser.functions:
assert fun.line_number is not None
assert fun.line_number == fun.end_line
def test_multiple_offsets(parser):
"""If multiple offset marks appear before for a code block, take them
all but ensure module name (case-insensitive) is distinct.
Use first module occurrence in case of duplicates."""
with sample_file("multiple_offsets.cpp") as f:
parser.read_lines(f)
assert len(parser.functions) == 4
assert parser.functions[0].module == "TEST"
assert parser.functions[0].line_number == 9
assert parser.functions[1].module == "HELLO"
assert parser.functions[1].line_number == 9
# Duplicate modules are ignored
assert parser.functions[2].line_number == 16
assert parser.functions[2].offset == 0x2345
assert parser.functions[3].module == "TEST"
assert parser.functions[3].offset == 0x2002
def test_variables(parser):
with sample_file("global_variables.cpp") as f:
parser.read_lines(f)
assert len(parser.functions) == 1
assert len(parser.variables) == 2

View File

@ -0,0 +1,150 @@
import pytest
from isledecomp.parser.parser import (
ReaderState as _rs,
DecompParser,
)
from isledecomp.parser.error import ParserError as _pe
# fmt: off
state_change_marker_cases = [
(_rs.SEARCH, "FUNCTION", _rs.WANT_SIG, None),
(_rs.SEARCH, "GLOBAL", _rs.IN_GLOBAL, None),
(_rs.SEARCH, "STUB", _rs.WANT_SIG, None),
(_rs.SEARCH, "SYNTHETIC", _rs.IN_TEMPLATE, None),
(_rs.SEARCH, "TEMPLATE", _rs.IN_TEMPLATE, None),
(_rs.SEARCH, "VTABLE", _rs.IN_VTABLE, None),
(_rs.WANT_SIG, "FUNCTION", _rs.WANT_SIG, None),
(_rs.WANT_SIG, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_SIG, "STUB", _rs.WANT_SIG, None),
(_rs.WANT_SIG, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_SIG, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_SIG, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC, "FUNCTION", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
(_rs.IN_FUNC, "STUB", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "SYNTHETIC", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "TEMPLATE", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "VTABLE", _rs.IN_VTABLE, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_TEMPLATE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "SYNTHETIC", _rs.IN_TEMPLATE, None),
(_rs.IN_TEMPLATE, "TEMPLATE", _rs.IN_TEMPLATE, None),
(_rs.IN_TEMPLATE, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_CURLY, "FUNCTION", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "GLOBAL", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "STUB", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "SYNTHETIC", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "TEMPLATE", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "VTABLE", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.IN_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "GLOBAL", _rs.IN_GLOBAL, None),
(_rs.IN_GLOBAL, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
(_rs.IN_FUNC_GLOBAL, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "VTABLE", _rs.IN_VTABLE, None),
]
# fmt: on
@pytest.mark.parametrize(
"state, marker_type, new_state, expected_error", state_change_marker_cases
)
def test_state_change_by_marker(
state: _rs, marker_type: str, new_state: _rs, expected_error: None | _pe
):
p = DecompParser()
p.state = state
mock_line = f"// {marker_type}: TEST 0x1234"
p.read_line(mock_line)
assert p.state == new_state
if expected_error is not None:
assert len(p.alerts) > 0
assert p.alerts[0].code == expected_error
# Reading any of these lines should have no effect in ReaderState.SEARCH
search_lines_no_effect = [
"",
"\t",
" ",
"int x = 0;",
"// Comment",
"/*",
"*/",
"/* Block comment */",
"{",
"}",
]
@pytest.mark.parametrize("line", search_lines_no_effect)
def test_state_search_line(line: str):
p = DecompParser()
p.read_line(line)
assert p.state == _rs.SEARCH
assert len(p.alerts) == 0
global_lines = [
("// A comment", _rs.IN_GLOBAL),
("", _rs.IN_GLOBAL),
("\t", _rs.IN_GLOBAL),
(" ", _rs.IN_GLOBAL),
# TODO: no check for "likely" variable declaration so these all count
("void function()", _rs.SEARCH),
("int x = 123;", _rs.SEARCH),
("just some text", _rs.SEARCH),
]
@pytest.mark.parametrize("line, new_state", global_lines)
def test_state_global_line(line: str, new_state: _rs):
p = DecompParser()
p.read_line("// GLOBAL: TEST 0x1234")
assert p.state == _rs.IN_GLOBAL
p.read_line(line)
assert p.state == new_state
# mostly same as above
in_func_global_lines = [
("// A comment", _rs.IN_FUNC_GLOBAL),
("", _rs.IN_FUNC_GLOBAL),
("\t", _rs.IN_FUNC_GLOBAL),
(" ", _rs.IN_FUNC_GLOBAL),
# TODO: no check for "likely" variable declaration so these all count
("void function()", _rs.IN_FUNC),
("int x = 123;", _rs.IN_FUNC),
("just some text", _rs.IN_FUNC),
]
@pytest.mark.parametrize("line, new_state", in_func_global_lines)
def test_state_in_func_global_line(line: str, new_state: _rs):
p = DecompParser()
p.state = _rs.IN_FUNC
p.read_line("// GLOBAL: TEST 0x1234")
assert p.state == _rs.IN_FUNC_GLOBAL
p.read_line(line)
assert p.state == new_state

View File

@ -1,11 +1,11 @@
from collections import namedtuple
from typing import List
import pytest import pytest
from isledecomp.parser.parser import MarkerDict
from isledecomp.parser.util import ( from isledecomp.parser.util import (
DecompMarker,
is_blank_or_comment, is_blank_or_comment,
match_offset_comment, match_marker,
is_exact_offset_comment, is_marker_exact,
distinct_by_module, get_class_name,
) )
@ -28,76 +28,106 @@ def test_is_blank_or_comment(line: str, expected: bool):
assert is_blank_or_comment(line) is expected assert is_blank_or_comment(line) is expected
offset_comment_samples = [ marker_samples = [
# (can_parse: bool, exact_match: bool, line: str) # (can_parse: bool, exact_match: bool, line: str)
# Should match both expected modules with optional STUB marker (True, True, "// FUNCTION: LEGO1 0xdeadbeef"),
(True, True, "// OFFSET: LEGO1 0xdeadbeef"), (True, True, "// FUNCTION: ISLE 0x12345678"),
(True, True, "// OFFSET: LEGO1 0xdeadbeef STUB"),
(True, True, "// OFFSET: ISLE 0x12345678"),
(True, True, "// OFFSET: ISLE 0x12345678 STUB"),
# No trailing spaces allowed # No trailing spaces allowed
(True, False, "// OFFSET: LEGO1 0xdeadbeef "), (True, False, "// FUNCTION: LEGO1 0xdeadbeef "),
(True, False, "// OFFSET: LEGO1 0xdeadbeef STUB "),
# Must have exactly one space between elements # Must have exactly one space between elements
(True, False, "//OFFSET: ISLE 0xdeadbeef"), (True, False, "//FUNCTION: ISLE 0xdeadbeef"),
(True, False, "// OFFSET:ISLE 0xdeadbeef"), (True, False, "// FUNCTION:ISLE 0xdeadbeef"),
(True, False, "// OFFSET: ISLE 0xdeadbeef"), (True, False, "// FUNCTION: ISLE 0xdeadbeef"),
(True, False, "// OFFSET: ISLE 0xdeadbeef"), (True, False, "// FUNCTION: ISLE 0xdeadbeef"),
(True, False, "// OFFSET: ISLE 0xdeadbeef"), (True, False, "// FUNCTION: ISLE 0xdeadbeef"),
(True, False, "// OFFSET: ISLE 0xdeadbeef STUB"), # Must have 0x prefix for hex number to match at all
# Must have 0x prefix for hex number (False, False, "// FUNCTION: ISLE deadbeef"),
(True, False, "// OFFSET: ISLE deadbeef"),
# Offset, module name, and STUB must be uppercase # Offset, module name, and STUB must be uppercase
(True, False, "// offset: ISLE 0xdeadbeef"), (True, False, "// function: ISLE 0xdeadbeef"),
(True, False, "// offset: isle 0xdeadbeef"), (True, False, "// function: isle 0xdeadbeef"),
(True, False, "// OFFSET: LEGO1 0xdeadbeef stub"),
# Hex string must be lowercase # Hex string must be lowercase
(True, False, "// OFFSET: ISLE 0xDEADBEEF"), (True, False, "// FUNCTION: ISLE 0xDEADBEEF"),
# TODO: How flexible should we be with matching the module name? # TODO: How flexible should we be with matching the module name?
(True, True, "// OFFSET: OMNI 0x12345678"), (True, True, "// FUNCTION: OMNI 0x12345678"),
(True, True, "// OFFSET: LEG01 0x12345678"), (True, True, "// FUNCTION: LEG01 0x12345678"),
(True, False, "// OFFSET: hello 0x12345678"), (True, False, "// FUNCTION: hello 0x12345678"),
# Not close enough to match # Not close enough to match
(False, False, "// OFFSET: ISLE0x12345678"), (False, False, "// FUNCTION: ISLE0x12345678"),
(False, False, "// OFFSET: 0x12345678"), (False, False, "// FUNCTION: 0x12345678"),
(False, False, "// LEGO1: 0x12345678"), (False, False, "// LEGO1: 0x12345678"),
# Hex string shorter than 8 characters # Hex string shorter than 8 characters
(True, True, "// OFFSET: LEGO1 0x1234"), (True, True, "// FUNCTION: LEGO1 0x1234"),
# TODO: These match but shouldn't. # TODO: These match but shouldn't.
# (False, False, '// OFFSET: LEGO1 0'), # (False, False, '// FUNCTION: LEGO1 0'),
# (False, False, '// OFFSET: LEGO1 0x'), # (False, False, '// FUNCTION: LEGO1 0x'),
] ]
@pytest.mark.parametrize("match, _, line", offset_comment_samples) @pytest.mark.parametrize("match, _, line", marker_samples)
def test_offset_match(line: str, match: bool, _): def test_marker_match(line: str, match: bool, _):
did_match = match_offset_comment(line) is not None did_match = match_marker(line) is not None
assert did_match is match assert did_match is match
@pytest.mark.parametrize("_, exact, line", offset_comment_samples) @pytest.mark.parametrize("_, exact, line", marker_samples)
def test_exact_offset_comment(line: str, exact: bool, _): def test_marker_exact(line: str, exact: bool, _):
assert is_exact_offset_comment(line) is exact assert is_marker_exact(line) is exact
# Helper for the next test: cut down version of OffsetMatch def test_marker_dict_simple():
MiniOfs = namedtuple("MiniOfs", ["module", "value"]) d = MarkerDict()
d.insert(DecompMarker("FUNCTION", "TEST", 0x1234))
markers = list(d.iter())
assert len(markers) == 1
distinct_by_module_samples = [
# empty set def test_marker_dict_ofs_replace():
([], []), d = MarkerDict()
# same module name d.insert(DecompMarker("FUNCTION", "TEST", 0x1234))
([MiniOfs("TEST", 123), MiniOfs("TEST", 555)], [MiniOfs("TEST", 123)]), d.insert(DecompMarker("FUNCTION", "TEST", 0x555))
# same module name, case-insensitive markers = list(d.iter())
([MiniOfs("test", 123), MiniOfs("TEST", 555)], [MiniOfs("test", 123)]), assert len(markers) == 1
# duplicates, non-consecutive assert markers[0].offset == 0x1234
(
[MiniOfs("test", 123), MiniOfs("abc", 111), MiniOfs("TEST", 555)],
[MiniOfs("test", 123), MiniOfs("abc", 111)], def test_marker_dict_type_replace():
), d = MarkerDict()
d.insert(DecompMarker("FUNCTION", "TEST", 0x1234))
d.insert(DecompMarker("STUB", "TEST", 0x1234))
markers = list(d.iter())
assert len(markers) == 1
assert markers[0].type == "FUNCTION"
class_name_match_cases = [
("struct MxString {", "MxString"),
("class MxString {", "MxString"),
("// class MxString", "MxString"),
("class MxString : public MxCore {", "MxString"),
("class MxPtrList<MxPresenter>", "MxPtrList<MxPresenter>"),
# If it is possible to match the symbol MxList<LegoPathController *>::`vftable'
# we should get the correct class name if possible. If the template type is a pointer,
# the asterisk and class name are separated by one space.
("// class MxList<LegoPathController *>", "MxList<LegoPathController *>"),
("// class MxList<LegoPathController*>", "MxList<LegoPathController *>"),
("// class MxList<LegoPathController* >", "MxList<LegoPathController *>"),
# I don't know if this would ever come up, but sure, why not?
("// class MxList<LegoPathController**>", "MxList<LegoPathController **>"),
] ]
@pytest.mark.parametrize("sample, expected", distinct_by_module_samples) @pytest.mark.parametrize("line, class_name", class_name_match_cases)
def test_distinct_by_module(sample: List[MiniOfs], expected: List[MiniOfs]): def test_get_class_name(line: str, class_name: str):
assert distinct_by_module(sample) == expected assert get_class_name(line) == class_name
class_name_no_match_cases = [
"MxString { ",
"clas MxString",
"// MxPtrList<MxPresenter>::`scalar deleting destructor'",
]
@pytest.mark.parametrize("line", class_name_no_match_cases)
def test_get_class_name_none(line: str):
assert get_class_name(line) is None

View File

@ -10,7 +10,7 @@
from isledecomp import ( from isledecomp import (
Bin, Bin,
find_code_blocks, DecompParser,
get_file_in_script_dir, get_file_in_script_dir,
OffsetPlaceholderGenerator, OffsetPlaceholderGenerator,
print_diff, print_diff,
@ -313,18 +313,20 @@ def gen_svg(svg_file, name_svg, icon, svg_implemented_funcs, total_funcs, raw_ac
# Generate basename of original file, used in locating OFFSET lines # Generate basename of original file, used in locating OFFSET lines
basename = os.path.basename(os.path.splitext(original)[0]) basename = os.path.basename(os.path.splitext(original)[0])
parser = DecompParser()
for srcfilename in walk_source_dir(source): for srcfilename in walk_source_dir(source):
parser.reset()
with open(srcfilename, "r", encoding="utf-8") as srcfile: with open(srcfilename, "r", encoding="utf-8") as srcfile:
blocks = find_code_blocks(srcfile) parser.read_lines(srcfile)
for block in blocks: for fun in parser.functions:
if block.is_stub: if fun.is_stub:
continue continue
if block.module != basename: if fun.module != basename:
continue continue
addr = block.offset addr = fun.offset
# Verbose flag handling # Verbose flag handling
if verbose: if verbose:
if addr == verbose: if addr == verbose:
@ -332,13 +334,13 @@ def gen_svg(svg_file, name_svg, icon, svg_implemented_funcs, total_funcs, raw_ac
else: else:
continue continue
if block.is_template: if fun.lookup_by_name:
recinfo = syminfo.get_recompiled_address_from_name(block.signature) recinfo = syminfo.get_recompiled_address_from_name(fun.name)
if not recinfo: if not recinfo:
continue continue
else: else:
recinfo = syminfo.get_recompiled_address( recinfo = syminfo.get_recompiled_address(
srcfilename, block.start_line srcfilename, fun.line_number
) )
if not recinfo: if not recinfo:
continue continue