Parser refactor:

- Handling LIRBARY and STRING markers
- Extracting global variable name for future comparison
- Marking function static variables
- More fluent error messages
This commit is contained in:
disinvite 2023-12-27 14:18:31 -05:00
parent d5854a46ae
commit f0377ed839
9 changed files with 435 additions and 174 deletions

View File

@ -39,6 +39,14 @@ class ParserError(Enum):
# WARN: We found a marker to be referenced by name outside of a header file. # WARN: We found a marker to be referenced by name outside of a header file.
BYNAME_FUNCTION_IN_CPP = 109 BYNAME_FUNCTION_IN_CPP = 109
# WARN: A GLOBAL marker appeared over a variable without the g_ prefix
GLOBAL_MISSING_PREFIX = 110
# WARN: GLOBAL marker points at something other than variable declaration.
# We can't match global variables based on position, but the goal here is
# to ignore things like string literal that are not variables.
GLOBAL_NOT_VARIABLE = 111
# This code or higher is an error, not a warning # This code or higher is an error, not a warning
DECOMP_ERROR_START = 200 DECOMP_ERROR_START = 200
@ -50,13 +58,18 @@ class ParserError(Enum):
# For example, a GLOBAL cannot follow FUNCTION/STUB # For example, a GLOBAL cannot follow FUNCTION/STUB
INCOMPATIBLE_MARKER = 201 INCOMPATIBLE_MARKER = 201
# ERROR: The line following a synthetic marker was not a comment # ERROR: The line following an explicit by-name marker was not a comment
BAD_SYNTHETIC = 202 # We assume a syntax error here rather than try to use the next line
BAD_NAMEREF = 202
# ERROR: This function offset comes before the previous offset from the same module # ERROR: This function offset comes before the previous offset from the same module
# This hopefully gives some hint about which functions need to be rearranged. # This hopefully gives some hint about which functions need to be rearranged.
FUNCTION_OUT_OF_ORDER = 203 FUNCTION_OUT_OF_ORDER = 203
# ERROR: The line following an explicit by-name marker that does _not_ expect
# a comment -- i.e. VTABLE or GLOBAL -- could not extract the name
NO_SUITABLE_NAME = 204
@dataclass @dataclass
class ParserAlert: class ParserAlert:

View File

@ -0,0 +1,103 @@
import re
from typing import Optional
from enum import Enum
class MarkerType(Enum):
UNKNOWN = -100
FUNCTION = 1
STUB = 2
SYNTHETIC = 3
TEMPLATE = 4
GLOBAL = 5
VTABLE = 6
STRING = 7
LIBRARY = 8
markerRegex = re.compile(
r"\s*//\s*(?P<type>\w+):\s*(?P<module>\w+)\s+(?P<offset>0x[a-f0-9]+)",
flags=re.I,
)
markerExactRegex = re.compile(
r"\s*// (?P<type>[A-Z]+): (?P<module>[A-Z0-9]+) (?P<offset>0x[a-f0-9]+)$"
)
class DecompMarker:
def __init__(self, marker_type: str, module: str, offset: int) -> None:
try:
self._type = MarkerType[marker_type.upper()]
except KeyError:
self._type = MarkerType.UNKNOWN
# Convert to upper here. A lot of other analysis depends on this name
# being consistent and predictable. If the name is _not_ capitalized
# we will emit a syntax error.
self._module: str = module.upper()
self._offset: int = offset
@property
def type(self) -> MarkerType:
return self._type
@property
def module(self) -> str:
return self._module
@property
def offset(self) -> int:
return self._offset
def is_regular_function(self) -> bool:
"""Regular function, meaning: not an explicit byname lookup. FUNCTION
markers can be _implicit_ byname.
FUNCTION and STUB markers are (currently) the only heterogenous marker types that
can be lumped together, although the reasons for doing so are a little vague."""
return self._type in (MarkerType.FUNCTION, MarkerType.STUB)
def is_explicit_byname(self) -> bool:
return self._type in (
MarkerType.SYNTHETIC,
MarkerType.TEMPLATE,
MarkerType.LIBRARY,
)
def is_variable(self) -> bool:
return self._type == MarkerType.GLOBAL
def is_synthetic(self) -> bool:
return self._type == MarkerType.SYNTHETIC
def is_template(self) -> bool:
return self._type == MarkerType.TEMPLATE
def is_vtable(self) -> bool:
return self._type == MarkerType.VTABLE
def is_library(self) -> bool:
return self._type == MarkerType.LIBRARY
def is_string(self) -> bool:
return self._type == MarkerType.STRING
def allowed_in_func(self) -> bool:
return self._type in (MarkerType.GLOBAL, MarkerType.STRING)
def match_marker(line: str) -> Optional[DecompMarker]:
match = markerRegex.match(line)
if match is None:
return None
return DecompMarker(
marker_type=match.group("type"),
module=match.group("module"),
offset=int(match.group("offset"), 16),
)
def is_marker_exact(line: str) -> bool:
return markerExactRegex.match(line) is not None

View File

@ -1,35 +1,58 @@
from typing import Optional
from dataclasses import dataclass from dataclasses import dataclass
from .marker import MarkerType
@dataclass @dataclass
class ParserNode: class ParserSymbol:
"""Exported decomp marker with all information (except the code filename) required to
cross-reference with cvdump data."""
type: MarkerType
line_number: int line_number: int
@dataclass
class ParserSymbol(ParserNode):
module: str module: str
offset: int offset: int
name: str
# The parser doesn't (currently) know about the code filename, but if you
# wanted to set it here after the fact, here's the spot.
filename: Optional[str] = None
def should_skip(self) -> bool:
"""The default is to compare any symbols we have"""
return False
def is_nameref(self) -> bool:
"""All symbols default to name lookup"""
return True
@dataclass @dataclass
class ParserFunction(ParserSymbol): class ParserFunction(ParserSymbol):
name: str # We are able to detect the closing line of a function with some reliability.
# This isn't used for anything right now, but perhaps later it will be.
end_line: Optional[int] = None
# All marker types are referenced by name except FUNCTION/STUB. These can also be
# referenced by name, but only if this flag is true.
lookup_by_name: bool = False lookup_by_name: bool = False
is_stub: bool = False
is_synthetic: bool = False def should_skip(self) -> bool:
is_template: bool = False """Temporary helper function because reccmp expects this to be here"""
end_line: int = -1 return self.type in (MarkerType.STUB, MarkerType.LIBRARY)
def is_nameref(self) -> bool:
return (
self.type in (MarkerType.SYNTHETIC, MarkerType.TEMPLATE, MarkerType.LIBRARY)
or self.lookup_by_name
)
@dataclass @dataclass
class ParserVariable(ParserSymbol): class ParserVariable(ParserSymbol):
name: str
size: int = -1
is_static: bool = False is_static: bool = False
@dataclass @dataclass
class ParserVtable(ParserSymbol): class ParserVtable(ParserSymbol):
class_name: str pass
num_entries: int = -1

View File

@ -3,15 +3,19 @@
from typing import List, Iterable, Iterator from typing import List, Iterable, Iterator
from enum import Enum from enum import Enum
from .util import ( from .util import (
DecompMarker,
is_blank_or_comment, is_blank_or_comment,
match_marker,
is_marker_exact,
get_class_name, get_class_name,
get_variable_name,
get_synthetic_name, get_synthetic_name,
remove_trailing_comment, remove_trailing_comment,
) )
from .marker import (
DecompMarker,
match_marker,
is_marker_exact,
)
from .node import ( from .node import (
ParserSymbol,
ParserFunction, ParserFunction,
ParserVariable, ParserVariable,
ParserVtable, ParserVtable,
@ -28,44 +32,23 @@ class ReaderState(Enum):
IN_GLOBAL = 5 IN_GLOBAL = 5
IN_FUNC_GLOBAL = 6 IN_FUNC_GLOBAL = 6
IN_VTABLE = 7 IN_VTABLE = 7
IN_SYNTHETIC = 8
IN_LIBRARY = 9
DONE = 100 DONE = 100
def marker_is_stub(marker: DecompMarker) -> bool:
return marker.type.upper() == "STUB"
def marker_is_variable(marker: DecompMarker) -> bool:
return marker.type.upper() == "GLOBAL"
def marker_is_synthetic(marker: DecompMarker) -> bool:
return marker.type.upper() in ("SYNTHETIC", "TEMPLATE")
def marker_is_template(marker: DecompMarker) -> bool:
return marker.type.upper() == "TEMPLATE"
def marker_is_function(marker: DecompMarker) -> bool:
return marker.type.upper() in ("FUNCTION", "STUB")
def marker_is_vtable(marker: DecompMarker) -> bool:
return marker.type.upper() == "VTABLE"
class MarkerDict: class MarkerDict:
def __init__(self) -> None: def __init__(self) -> None:
self.markers: dict = {} self.markers: dict = {}
def insert(self, marker: DecompMarker) -> bool: def insert(self, marker: DecompMarker) -> bool:
"""Return True if this insert would overwrite""" """Return True if this insert would overwrite"""
module = marker.module.upper() module = marker.module
if module in self.markers: if module in self.markers:
return True return True
self.markers[module] = (marker.type, marker.offset) # TODO: type converted back to string version here instead of using enum
self.markers[module] = (marker.type.name, marker.offset)
return False return False
def iter(self) -> Iterator[DecompMarker]: def iter(self) -> Iterator[DecompMarker]:
@ -82,9 +65,7 @@ class DecompParser:
# but not right now # but not right now
def __init__(self) -> None: def __init__(self) -> None:
# The lists to be populated as we parse # The lists to be populated as we parse
self.functions: List[ParserFunction] = [] self._symbols: List[ParserSymbol] = []
self.vtables: List[ParserVtable] = []
self.variables: List[ParserVariable] = []
self.alerts: List[ParserAlert] = [] self.alerts: List[ParserAlert] = []
self.line_number: int = 0 self.line_number: int = 0
@ -113,9 +94,7 @@ def __init__(self) -> None:
self.function_sig: str = "" self.function_sig: str = ""
def reset(self): def reset(self):
self.functions = [] self._symbols = []
self.vtables = []
self.variables = []
self.alerts = [] self.alerts = []
self.line_number = 0 self.line_number = 0
@ -131,6 +110,18 @@ def reset(self):
self.function_start = 0 self.function_start = 0
self.function_sig = "" self.function_sig = ""
@property
def functions(self) -> List[ParserSymbol]:
return [s for s in self._symbols if isinstance(s, ParserFunction)]
@property
def vtables(self) -> List[ParserSymbol]:
return [s for s in self._symbols if isinstance(s, ParserVtable)]
@property
def variables(self) -> List[ParserSymbol]:
return [s for s in self._symbols if isinstance(s, ParserVariable)]
def _recover(self): def _recover(self):
"""We hit a syntax error and need to reset temp structures""" """We hit a syntax error and need to reset temp structures"""
self.state = ReaderState.SEARCH self.state = ReaderState.SEARCH
@ -159,10 +150,17 @@ def _function_marker(self, marker: DecompMarker):
self._syntax_warning(ParserError.DUPLICATE_MODULE) self._syntax_warning(ParserError.DUPLICATE_MODULE)
self.state = ReaderState.WANT_SIG self.state = ReaderState.WANT_SIG
def _synthetic_marker(self, marker: DecompMarker): def _nameref_marker(self, marker: DecompMarker):
"""Functions explicitly referenced by name are set here"""
if self.fun_markers.insert(marker): if self.fun_markers.insert(marker):
self._syntax_warning(ParserError.DUPLICATE_MODULE) self._syntax_warning(ParserError.DUPLICATE_MODULE)
self.state = ReaderState.IN_TEMPLATE
if marker.is_template():
self.state = ReaderState.IN_TEMPLATE
elif marker.is_synthetic():
self.state = ReaderState.IN_SYNTHETIC
else:
self.state = ReaderState.IN_LIBRARY
def _function_done(self, lookup_by_name: bool = False, unexpected: bool = False): def _function_done(self, lookup_by_name: bool = False, unexpected: bool = False):
end_line = self.line_number end_line = self.line_number
@ -173,16 +171,14 @@ def _function_done(self, lookup_by_name: bool = False, unexpected: bool = False)
end_line -= 1 end_line -= 1
for marker in self.fun_markers.iter(): for marker in self.fun_markers.iter():
self.functions.append( self._symbols.append(
ParserFunction( ParserFunction(
type=marker.type,
line_number=self.function_start, line_number=self.function_start,
module=marker.module, module=marker.module,
offset=marker.offset, offset=marker.offset,
lookup_by_name=lookup_by_name,
is_stub=marker_is_stub(marker),
is_synthetic=marker_is_synthetic(marker),
is_template=marker_is_template(marker),
name=self.function_sig, name=self.function_sig,
lookup_by_name=lookup_by_name,
end_line=end_line, end_line=end_line,
) )
) )
@ -202,12 +198,13 @@ def _vtable_done(self, class_name: str = None):
class_name = self.last_line.strip() class_name = self.last_line.strip()
for marker in self.tbl_markers.iter(): for marker in self.tbl_markers.iter():
self.vtables.append( self._symbols.append(
ParserVtable( ParserVtable(
type=marker.type,
line_number=self.line_number, line_number=self.line_number,
module=marker.module, module=marker.module,
offset=marker.offset, offset=marker.offset,
class_name=class_name, name=class_name,
) )
) )
@ -223,14 +220,19 @@ def _variable_marker(self, marker: DecompMarker):
else: else:
self.state = ReaderState.IN_GLOBAL self.state = ReaderState.IN_GLOBAL
def _variable_done(self): def _variable_done(self, name: str):
if not name.startswith("g_"):
self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX)
for marker in self.var_markers.iter(): for marker in self.var_markers.iter():
self.variables.append( self._symbols.append(
ParserVariable( ParserVariable(
type=marker.type,
line_number=self.line_number, line_number=self.line_number,
module=marker.module, module=marker.module,
offset=marker.offset, offset=marker.offset,
name=self.last_line.strip(), name=name,
is_static=self.state == ReaderState.IN_FUNC_GLOBAL,
) )
) )
@ -246,12 +248,23 @@ def _handle_marker(self, marker: DecompMarker):
self._syntax_error(ParserError.UNEXPECTED_MARKER) self._syntax_error(ParserError.UNEXPECTED_MARKER)
return return
# If we are inside a function, the only markers we accept are:
# GLOBAL, indicating a static variable
# STRING, indicating a literal string.
# Otherwise we assume that the parser missed the end of the function
# and we have moved on to something else.
# This is unlikely to occur with well-formed code, but
# we can recover easily by just ending the function here.
if self.state == ReaderState.IN_FUNC and not marker.allowed_in_func():
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
self._function_done(unexpected=True)
# TODO: How uncertain are we of detecting the end of a function # TODO: How uncertain are we of detecting the end of a function
# in a clang-formatted file? For now we assume we have missed the # in a clang-formatted file? For now we assume we have missed the
# end if we detect a non-GLOBAL marker while state is IN_FUNC. # end if we detect a non-GLOBAL marker while state is IN_FUNC.
# Maybe these cases should be syntax errors instead # Maybe these cases should be syntax errors instead
if marker_is_function(marker): if marker.is_regular_function():
if self.state in ( if self.state in (
ReaderState.SEARCH, ReaderState.SEARCH,
ReaderState.WANT_SIG, ReaderState.WANT_SIG,
@ -259,29 +272,41 @@ def _handle_marker(self, marker: DecompMarker):
# We will allow multiple offsets if we have just begun # We will allow multiple offsets if we have just begun
# the code block, but not after we hit the curly brace. # the code block, but not after we hit the curly brace.
self._function_marker(marker) self._function_marker(marker)
elif self.state == ReaderState.IN_FUNC:
# We hit another offset unexpectedly.
# We can recover easily by just ending the function here.
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
self._function_done(unexpected=True)
# Start the next function right after so we can
# read the next line.
self._function_marker(marker)
else: else:
self._syntax_error(ParserError.INCOMPATIBLE_MARKER) self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
elif marker_is_synthetic(marker): elif marker.is_template():
if self.state in (ReaderState.SEARCH, ReaderState.IN_TEMPLATE): if self.state in (ReaderState.SEARCH, ReaderState.IN_TEMPLATE):
self._synthetic_marker(marker) self._nameref_marker(marker)
elif self.state == ReaderState.IN_FUNC:
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
self._function_done(lookup_by_name=True, unexpected=True)
self._synthetic_marker(marker)
else: else:
self._syntax_error(ParserError.INCOMPATIBLE_MARKER) self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
elif marker_is_variable(marker): elif marker.is_synthetic():
if self.state in (ReaderState.SEARCH, ReaderState.IN_SYNTHETIC):
self._nameref_marker(marker)
else:
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
elif marker.is_library():
if self.state in (ReaderState.SEARCH, ReaderState.IN_LIBRARY):
self._nameref_marker(marker)
else:
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
elif marker.is_string():
# TODO: We are ignoring string markers for the moment.
# We already have a lot of them in the codebase, though, so we'll
# hang onto them for now in case we can use them later.
# To match up string constants, the strategy will be:
# 1. Use cvdump to find all string constants in the recomp
# 2. In the original binary, look at relocated vaddrs from .rdata
# 3. Try to match up string data from #1 with locations in #2
# Throw the syntax error we would throw if we were parsing these
if self.state not in (ReaderState.SEARCH, ReaderState.IN_FUNC):
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
elif marker.is_variable():
if self.state in ( if self.state in (
ReaderState.SEARCH, ReaderState.SEARCH,
ReaderState.IN_GLOBAL, ReaderState.IN_GLOBAL,
@ -292,13 +317,9 @@ def _handle_marker(self, marker: DecompMarker):
else: else:
self._syntax_error(ParserError.INCOMPATIBLE_MARKER) self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
elif marker_is_vtable(marker): elif marker.is_vtable():
if self.state in (ReaderState.SEARCH, ReaderState.IN_VTABLE): if self.state in (ReaderState.SEARCH, ReaderState.IN_VTABLE):
self._vtable_marker(marker) self._vtable_marker(marker)
elif self.state == ReaderState.IN_FUNC:
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
self._function_done(unexpected=True)
self._vtable_marker(marker)
else: else:
self._syntax_error(ParserError.INCOMPATIBLE_MARKER) self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
@ -322,12 +343,16 @@ def read_line(self, line: str):
return return
line_strip = line.strip() line_strip = line.strip()
if self.state == ReaderState.IN_TEMPLATE: if self.state in (
# TEMPLATE functions are a special case. The signature is ReaderState.IN_SYNTHETIC,
# given on the next line (in a // comment) ReaderState.IN_TEMPLATE,
ReaderState.IN_LIBRARY,
):
# Explicit nameref functions provide the function name
# on the next line (in a // comment)
name = get_synthetic_name(line) name = get_synthetic_name(line)
if name is None: if name is None:
self._syntax_error(ParserError.BAD_SYNTHETIC) self._syntax_error(ParserError.BAD_NAMEREF)
else: else:
self.function_sig = name self.function_sig = name
self._function_starts_here() self._function_starts_here()
@ -384,8 +409,28 @@ def read_line(self, line: str):
self._function_done() self._function_done()
elif self.state in (ReaderState.IN_GLOBAL, ReaderState.IN_FUNC_GLOBAL): elif self.state in (ReaderState.IN_GLOBAL, ReaderState.IN_FUNC_GLOBAL):
if not is_blank_or_comment(line): # TODO: Known problem that an error here will cause us to abandon a
self._variable_done() # function we have already parsed if state == IN_FUNC_GLOBAL.
# However, we are not tolerant of _any_ syntax problems in our
# CI actions, so the solution is to just fix the invalid marker.
if is_blank_or_comment(line):
self._syntax_error(ParserError.NO_SUITABLE_NAME)
return
# We don't have a foolproof mechanism to tell what is and is not a variable.
# If the GLOBAL is being declared on a `return` statement, though, this is
# not correct. It is either a string literal (which will be handled differently)
# or it is not the variable declaration, which is incorrect decomp syntax.
if line.strip().startswith("return"):
self._syntax_error(ParserError.GLOBAL_NOT_VARIABLE)
return
name = get_variable_name(line)
if name is None:
self._syntax_error(ParserError.NO_SUITABLE_NAME)
return
self._variable_done(name)
elif self.state == ReaderState.IN_VTABLE: elif self.state == ReaderState.IN_VTABLE:
vtable_class = get_class_name(line) vtable_class = get_class_name(line)

View File

@ -1,17 +1,6 @@
# C++ Parser utility functions and data structures # C++ Parser utility functions and data structures
from __future__ import annotations # python <3.10 compatibility
import re import re
from collections import namedtuple from typing import Optional
DecompMarker = namedtuple("DecompMarker", ["type", "module", "offset"])
markerRegex = re.compile(
r"\s*//\s*(\w+):\s*(\w+)\s+(0x[a-f0-9]+)",
flags=re.I,
)
markerExactRegex = re.compile(r"\s*// ([A-Z]+): ([A-Z0-9]+) (0x[a-f0-9]+)$")
# The goal here is to just read whatever is on the next line, so some # The goal here is to just read whatever is on the next line, so some
# flexibility in the formatting seems OK # flexibility in the formatting seems OK
@ -23,7 +12,7 @@
trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$") trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")
def get_synthetic_name(line: str) -> str | None: def get_synthetic_name(line: str) -> Optional[str]:
"""Synthetic names appear on a single line comment on the line after the marker. """Synthetic names appear on a single line comment on the line after the marker.
If that's not what we have, return None""" If that's not what we have, return None"""
template_match = templateCommentRegex.match(line) template_match = templateCommentRegex.match(line)
@ -51,20 +40,6 @@ def is_blank_or_comment(line: str) -> bool:
) )
def match_marker(line: str) -> DecompMarker | None:
match = markerRegex.match(line)
if match is None:
return None
return DecompMarker(
type=match.group(1), module=match.group(2), offset=int(match.group(3), 16)
)
def is_marker_exact(line: str) -> bool:
return markerExactRegex.match(line) is not None
template_class_decl_regex = re.compile( template_class_decl_regex = re.compile(
r"\s*(?:\/\/)?\s*(?:class|struct) (\w+)<([\w]+)\s*(\*+)?\s*>" r"\s*(?:\/\/)?\s*(?:class|struct) (\w+)<([\w]+)\s*(\*+)?\s*>"
) )
@ -73,7 +48,7 @@ def is_marker_exact(line: str) -> bool:
class_decl_regex = re.compile(r"\s*(?:\/\/)?\s*(?:class|struct) (\w+)") class_decl_regex = re.compile(r"\s*(?:\/\/)?\s*(?:class|struct) (\w+)")
def get_class_name(line: str) -> str | None: def get_class_name(line: str) -> Optional[str]:
"""For VTABLE markers, extract the class name from the code line or comment """For VTABLE markers, extract the class name from the code line or comment
where it appears.""" where it appears."""
@ -93,3 +68,21 @@ def get_class_name(line: str) -> str | None:
return match.group(1) return match.group(1)
return None return None
global_regex = re.compile(r"(?P<name>g_\w+)")
less_strict_global_regex = re.compile(r"(?P<name>\w+)(?:\)\(|\[.*|\s*=.*|;)")
def get_variable_name(line: str) -> Optional[str]:
"""Grab the name of the variable annotated with the GLOBAL marker.
Correct syntax would have the variable start with the prefix "g_"
but we will try to match regardless."""
if (match := global_regex.search(line)) is not None:
return match.group("name")
if (match := less_strict_global_regex.search(line)) is not None:
return match.group("name")
return None

View File

@ -115,7 +115,7 @@ def test_different_markers_same_module(parser):
# Use first marker declaration, don't replace # Use first marker declaration, don't replace
assert len(parser.functions) == 1 assert len(parser.functions) == 1
assert parser.functions[0].is_stub is False assert parser.functions[0].should_skip() is False
# Should alert to this # Should alert to this
assert len(parser.alerts) == 1 assert len(parser.alerts) == 1
@ -193,7 +193,7 @@ def test_multiple_vtables(parser):
) )
assert len(parser.alerts) == 0 assert len(parser.alerts) == 0
assert len(parser.vtables) == 2 assert len(parser.vtables) == 2
assert parser.vtables[0].class_name == "MxString" assert parser.vtables[0].name == "MxString"
def test_multiple_vtables_same_module(parser): def test_multiple_vtables_same_module(parser):
@ -247,7 +247,7 @@ def test_synthetic_no_comment(parser):
) )
assert len(parser.functions) == 0 assert len(parser.functions) == 0
assert len(parser.alerts) == 1 assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.BAD_SYNTHETIC assert parser.alerts[0].code == ParserError.BAD_NAMEREF
assert parser.state == ReaderState.SEARCH assert parser.state == ReaderState.SEARCH
@ -375,3 +375,70 @@ def test_unexpected_eof(parser):
assert len(parser.functions) == 1 assert len(parser.functions) == 1
assert len(parser.alerts) == 1 assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.UNEXPECTED_END_OF_FILE assert parser.alerts[0].code == ParserError.UNEXPECTED_END_OF_FILE
def test_global_variable_prefix(parser):
"""Global and static variables should have the g_ prefix."""
parser.read_lines(
[
"// GLOBAL: TEST 0x1234",
'const char* g_msg = "hello";',
]
)
assert len(parser.variables) == 1
assert len(parser.alerts) == 0
parser.read_lines(
[
"// GLOBAL: TEXT 0x5555",
"int test = 5;",
]
)
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.GLOBAL_MISSING_PREFIX
# In spite of that, we should still grab the variable name.
assert parser.variables[1].name == "test"
def test_global_nomatch(parser):
"""We do our best to grab the variable name, even without the g_ prefix
but this (by design) will not match everything."""
parser.read_lines(
[
"// GLOBAL: TEST 0x1234",
"FunctionCall();",
]
)
assert len(parser.variables) == 0
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.NO_SUITABLE_NAME
def test_static_variable(parser):
"""We can detect whether a variable is a static function variable
based on the parser's state when we detect it.
Checking for the word `static` alone is not a good test.
Static class variables are filed as S_GDATA32, same as regular globals.
Only function statics are filed as S_LDATA32."""
parser.read_lines(
[
"// GLOBAL: TEST 0x1234",
"int g_test = 1234;",
]
)
assert len(parser.variables) == 1
assert parser.variables[0].is_static is False
parser.read_lines(
[
"// FUNCTION: TEST 0x5555",
"void test_function() {",
"// GLOBAL: TEST 0x8888",
"int g_internal = 0;",
"}",
]
)
assert len(parser.variables) == 2
assert parser.variables[1].is_static is True

View File

@ -11,9 +11,11 @@
(_rs.SEARCH, "FUNCTION", _rs.WANT_SIG, None), (_rs.SEARCH, "FUNCTION", _rs.WANT_SIG, None),
(_rs.SEARCH, "GLOBAL", _rs.IN_GLOBAL, None), (_rs.SEARCH, "GLOBAL", _rs.IN_GLOBAL, None),
(_rs.SEARCH, "STUB", _rs.WANT_SIG, None), (_rs.SEARCH, "STUB", _rs.WANT_SIG, None),
(_rs.SEARCH, "SYNTHETIC", _rs.IN_TEMPLATE, None), (_rs.SEARCH, "SYNTHETIC", _rs.IN_SYNTHETIC, None),
(_rs.SEARCH, "TEMPLATE", _rs.IN_TEMPLATE, None), (_rs.SEARCH, "TEMPLATE", _rs.IN_TEMPLATE, None),
(_rs.SEARCH, "VTABLE", _rs.IN_VTABLE, None), (_rs.SEARCH, "VTABLE", _rs.IN_VTABLE, None),
(_rs.SEARCH, "LIBRARY", _rs.IN_LIBRARY, None),
(_rs.SEARCH, "STRING", _rs.SEARCH, None),
(_rs.WANT_SIG, "FUNCTION", _rs.WANT_SIG, None), (_rs.WANT_SIG, "FUNCTION", _rs.WANT_SIG, None),
(_rs.WANT_SIG, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.WANT_SIG, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
@ -21,20 +23,26 @@
(_rs.WANT_SIG, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.WANT_SIG, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_SIG, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.WANT_SIG, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_SIG, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.WANT_SIG, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_SIG, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_SIG, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC, "FUNCTION", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION), (_rs.IN_FUNC, "FUNCTION", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "GLOBAL", _rs.IN_FUNC_GLOBAL, None), (_rs.IN_FUNC, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
(_rs.IN_FUNC, "STUB", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION), (_rs.IN_FUNC, "STUB", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "SYNTHETIC", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION), (_rs.IN_FUNC, "SYNTHETIC", _rs.IN_SYNTHETIC, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "TEMPLATE", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION), (_rs.IN_FUNC, "TEMPLATE", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "VTABLE", _rs.IN_VTABLE, _pe.MISSED_END_OF_FUNCTION), (_rs.IN_FUNC, "VTABLE", _rs.IN_VTABLE, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "LIBRARY", _rs.IN_LIBRARY, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "STRING", _rs.IN_FUNC, None),
(_rs.IN_TEMPLATE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_TEMPLATE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_TEMPLATE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_TEMPLATE, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "SYNTHETIC", _rs.IN_TEMPLATE, None), (_rs.IN_TEMPLATE, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "TEMPLATE", _rs.IN_TEMPLATE, None), (_rs.IN_TEMPLATE, "TEMPLATE", _rs.IN_TEMPLATE, None),
(_rs.IN_TEMPLATE, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_TEMPLATE, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_CURLY, "FUNCTION", _rs.SEARCH, _pe.UNEXPECTED_MARKER), (_rs.WANT_CURLY, "FUNCTION", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "GLOBAL", _rs.SEARCH, _pe.UNEXPECTED_MARKER), (_rs.WANT_CURLY, "GLOBAL", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
@ -42,6 +50,8 @@
(_rs.WANT_CURLY, "SYNTHETIC", _rs.SEARCH, _pe.UNEXPECTED_MARKER), (_rs.WANT_CURLY, "SYNTHETIC", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "TEMPLATE", _rs.SEARCH, _pe.UNEXPECTED_MARKER), (_rs.WANT_CURLY, "TEMPLATE", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "VTABLE", _rs.SEARCH, _pe.UNEXPECTED_MARKER), (_rs.WANT_CURLY, "VTABLE", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "LIBRARY", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "STRING", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.IN_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "GLOBAL", _rs.IN_GLOBAL, None), (_rs.IN_GLOBAL, "GLOBAL", _rs.IN_GLOBAL, None),
@ -49,6 +59,8 @@
(_rs.IN_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "GLOBAL", _rs.IN_FUNC_GLOBAL, None), (_rs.IN_FUNC_GLOBAL, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
@ -56,6 +68,8 @@
(_rs.IN_FUNC_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_VTABLE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_VTABLE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
@ -63,6 +77,26 @@
(_rs.IN_VTABLE, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_VTABLE, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_VTABLE, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "VTABLE", _rs.IN_VTABLE, None), (_rs.IN_VTABLE, "VTABLE", _rs.IN_VTABLE, None),
(_rs.IN_VTABLE, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_SYNTHETIC, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_SYNTHETIC, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_SYNTHETIC, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_SYNTHETIC, "SYNTHETIC", _rs.IN_SYNTHETIC, None),
(_rs.IN_SYNTHETIC, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_SYNTHETIC, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_SYNTHETIC, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_SYNTHETIC, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_LIBRARY, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_LIBRARY, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_LIBRARY, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_LIBRARY, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_LIBRARY, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_LIBRARY, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_LIBRARY, "LIBRARY", _rs.IN_LIBRARY, None),
(_rs.IN_LIBRARY, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
] ]
# fmt: on # fmt: on
@ -105,47 +139,3 @@ def test_state_search_line(line: str):
p.read_line(line) p.read_line(line)
assert p.state == _rs.SEARCH assert p.state == _rs.SEARCH
assert len(p.alerts) == 0 assert len(p.alerts) == 0
global_lines = [
("// A comment", _rs.IN_GLOBAL),
("", _rs.IN_GLOBAL),
("\t", _rs.IN_GLOBAL),
(" ", _rs.IN_GLOBAL),
# TODO: no check for "likely" variable declaration so these all count
("void function()", _rs.SEARCH),
("int x = 123;", _rs.SEARCH),
("just some text", _rs.SEARCH),
]
@pytest.mark.parametrize("line, new_state", global_lines)
def test_state_global_line(line: str, new_state: _rs):
p = DecompParser()
p.read_line("// GLOBAL: TEST 0x1234")
assert p.state == _rs.IN_GLOBAL
p.read_line(line)
assert p.state == new_state
# mostly same as above
in_func_global_lines = [
("// A comment", _rs.IN_FUNC_GLOBAL),
("", _rs.IN_FUNC_GLOBAL),
("\t", _rs.IN_FUNC_GLOBAL),
(" ", _rs.IN_FUNC_GLOBAL),
# TODO: no check for "likely" variable declaration so these all count
("void function()", _rs.IN_FUNC),
("int x = 123;", _rs.IN_FUNC),
("just some text", _rs.IN_FUNC),
]
@pytest.mark.parametrize("line, new_state", in_func_global_lines)
def test_state_in_func_global_line(line: str, new_state: _rs):
p = DecompParser()
p.state = _rs.IN_FUNC
p.read_line("// GLOBAL: TEST 0x1234")
assert p.state == _rs.IN_FUNC_GLOBAL
p.read_line(line)
assert p.state == new_state

View File

@ -1,11 +1,15 @@
import pytest import pytest
from isledecomp.parser.parser import MarkerDict from isledecomp.parser.parser import MarkerDict
from isledecomp.parser.util import ( from isledecomp.parser.marker import (
DecompMarker, DecompMarker,
is_blank_or_comment, MarkerType,
match_marker, match_marker,
is_marker_exact, is_marker_exact,
)
from isledecomp.parser.util import (
is_blank_or_comment,
get_class_name, get_class_name,
get_variable_name,
) )
@ -96,7 +100,7 @@ def test_marker_dict_type_replace():
d.insert(DecompMarker("STUB", "TEST", 0x1234)) d.insert(DecompMarker("STUB", "TEST", 0x1234))
markers = list(d.iter()) markers = list(d.iter())
assert len(markers) == 1 assert len(markers) == 1
assert markers[0].type == "FUNCTION" assert markers[0].type == MarkerType.FUNCTION
class_name_match_cases = [ class_name_match_cases = [
@ -131,3 +135,26 @@ def test_get_class_name(line: str, class_name: str):
@pytest.mark.parametrize("line", class_name_no_match_cases) @pytest.mark.parametrize("line", class_name_no_match_cases)
def test_get_class_name_none(line: str): def test_get_class_name_none(line: str):
assert get_class_name(line) is None assert get_class_name(line) is None
variable_name_cases = [
# with prefix for easy access
("char* g_test;", "g_test"),
("g_test;", "g_test"),
("void (*g_test)(int);", "g_test"),
("char g_test[50];", "g_test"),
("char g_test[50] = {1234,", "g_test"),
("int g_test = 500;", "g_test"),
# no prefix
("char* hello;", "hello"),
("hello;", "hello"),
("void (*hello)(int);", "hello"),
("char hello[50];", "hello"),
("char hello[50] = {1234,", "hello"),
("int hello = 500;", "hello"),
]
@pytest.mark.parametrize("line,name", variable_name_cases)
def test_get_variable_name(line: str, name: str):
assert get_variable_name(line) == name

View File

@ -316,7 +316,7 @@ def main():
parser.read_lines(srcfile) parser.read_lines(srcfile)
for fun in parser.functions: for fun in parser.functions:
if fun.is_stub: if fun.should_skip():
continue continue
if fun.module != basename: if fun.module != basename:
@ -330,7 +330,7 @@ def main():
else: else:
continue continue
if fun.lookup_by_name: if fun.is_nameref():
recinfo = syminfo.get_recompiled_address_from_name(fun.name) recinfo = syminfo.get_recompiled_address_from_name(fun.name)
if not recinfo: if not recinfo:
continue continue