mirror of
https://github.com/isledecomp/isle.git
synced 2026-01-30 11:41:16 +00:00
Parser refactor:
- Handling LIRBARY and STRING markers - Extracting global variable name for future comparison - Marking function static variables - More fluent error messages
This commit is contained in:
parent
d5854a46ae
commit
f0377ed839
@ -39,6 +39,14 @@ class ParserError(Enum):
|
|||||||
# WARN: We found a marker to be referenced by name outside of a header file.
|
# WARN: We found a marker to be referenced by name outside of a header file.
|
||||||
BYNAME_FUNCTION_IN_CPP = 109
|
BYNAME_FUNCTION_IN_CPP = 109
|
||||||
|
|
||||||
|
# WARN: A GLOBAL marker appeared over a variable without the g_ prefix
|
||||||
|
GLOBAL_MISSING_PREFIX = 110
|
||||||
|
|
||||||
|
# WARN: GLOBAL marker points at something other than variable declaration.
|
||||||
|
# We can't match global variables based on position, but the goal here is
|
||||||
|
# to ignore things like string literal that are not variables.
|
||||||
|
GLOBAL_NOT_VARIABLE = 111
|
||||||
|
|
||||||
# This code or higher is an error, not a warning
|
# This code or higher is an error, not a warning
|
||||||
DECOMP_ERROR_START = 200
|
DECOMP_ERROR_START = 200
|
||||||
|
|
||||||
@ -50,13 +58,18 @@ class ParserError(Enum):
|
|||||||
# For example, a GLOBAL cannot follow FUNCTION/STUB
|
# For example, a GLOBAL cannot follow FUNCTION/STUB
|
||||||
INCOMPATIBLE_MARKER = 201
|
INCOMPATIBLE_MARKER = 201
|
||||||
|
|
||||||
# ERROR: The line following a synthetic marker was not a comment
|
# ERROR: The line following an explicit by-name marker was not a comment
|
||||||
BAD_SYNTHETIC = 202
|
# We assume a syntax error here rather than try to use the next line
|
||||||
|
BAD_NAMEREF = 202
|
||||||
|
|
||||||
# ERROR: This function offset comes before the previous offset from the same module
|
# ERROR: This function offset comes before the previous offset from the same module
|
||||||
# This hopefully gives some hint about which functions need to be rearranged.
|
# This hopefully gives some hint about which functions need to be rearranged.
|
||||||
FUNCTION_OUT_OF_ORDER = 203
|
FUNCTION_OUT_OF_ORDER = 203
|
||||||
|
|
||||||
|
# ERROR: The line following an explicit by-name marker that does _not_ expect
|
||||||
|
# a comment -- i.e. VTABLE or GLOBAL -- could not extract the name
|
||||||
|
NO_SUITABLE_NAME = 204
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ParserAlert:
|
class ParserAlert:
|
||||||
|
|||||||
103
tools/isledecomp/isledecomp/parser/marker.py
Normal file
103
tools/isledecomp/isledecomp/parser/marker.py
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
import re
|
||||||
|
from typing import Optional
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class MarkerType(Enum):
|
||||||
|
UNKNOWN = -100
|
||||||
|
FUNCTION = 1
|
||||||
|
STUB = 2
|
||||||
|
SYNTHETIC = 3
|
||||||
|
TEMPLATE = 4
|
||||||
|
GLOBAL = 5
|
||||||
|
VTABLE = 6
|
||||||
|
STRING = 7
|
||||||
|
LIBRARY = 8
|
||||||
|
|
||||||
|
|
||||||
|
markerRegex = re.compile(
|
||||||
|
r"\s*//\s*(?P<type>\w+):\s*(?P<module>\w+)\s+(?P<offset>0x[a-f0-9]+)",
|
||||||
|
flags=re.I,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
markerExactRegex = re.compile(
|
||||||
|
r"\s*// (?P<type>[A-Z]+): (?P<module>[A-Z0-9]+) (?P<offset>0x[a-f0-9]+)$"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DecompMarker:
|
||||||
|
def __init__(self, marker_type: str, module: str, offset: int) -> None:
|
||||||
|
try:
|
||||||
|
self._type = MarkerType[marker_type.upper()]
|
||||||
|
except KeyError:
|
||||||
|
self._type = MarkerType.UNKNOWN
|
||||||
|
|
||||||
|
# Convert to upper here. A lot of other analysis depends on this name
|
||||||
|
# being consistent and predictable. If the name is _not_ capitalized
|
||||||
|
# we will emit a syntax error.
|
||||||
|
self._module: str = module.upper()
|
||||||
|
self._offset: int = offset
|
||||||
|
|
||||||
|
@property
|
||||||
|
def type(self) -> MarkerType:
|
||||||
|
return self._type
|
||||||
|
|
||||||
|
@property
|
||||||
|
def module(self) -> str:
|
||||||
|
return self._module
|
||||||
|
|
||||||
|
@property
|
||||||
|
def offset(self) -> int:
|
||||||
|
return self._offset
|
||||||
|
|
||||||
|
def is_regular_function(self) -> bool:
|
||||||
|
"""Regular function, meaning: not an explicit byname lookup. FUNCTION
|
||||||
|
markers can be _implicit_ byname.
|
||||||
|
FUNCTION and STUB markers are (currently) the only heterogenous marker types that
|
||||||
|
can be lumped together, although the reasons for doing so are a little vague."""
|
||||||
|
return self._type in (MarkerType.FUNCTION, MarkerType.STUB)
|
||||||
|
|
||||||
|
def is_explicit_byname(self) -> bool:
|
||||||
|
return self._type in (
|
||||||
|
MarkerType.SYNTHETIC,
|
||||||
|
MarkerType.TEMPLATE,
|
||||||
|
MarkerType.LIBRARY,
|
||||||
|
)
|
||||||
|
|
||||||
|
def is_variable(self) -> bool:
|
||||||
|
return self._type == MarkerType.GLOBAL
|
||||||
|
|
||||||
|
def is_synthetic(self) -> bool:
|
||||||
|
return self._type == MarkerType.SYNTHETIC
|
||||||
|
|
||||||
|
def is_template(self) -> bool:
|
||||||
|
return self._type == MarkerType.TEMPLATE
|
||||||
|
|
||||||
|
def is_vtable(self) -> bool:
|
||||||
|
return self._type == MarkerType.VTABLE
|
||||||
|
|
||||||
|
def is_library(self) -> bool:
|
||||||
|
return self._type == MarkerType.LIBRARY
|
||||||
|
|
||||||
|
def is_string(self) -> bool:
|
||||||
|
return self._type == MarkerType.STRING
|
||||||
|
|
||||||
|
def allowed_in_func(self) -> bool:
|
||||||
|
return self._type in (MarkerType.GLOBAL, MarkerType.STRING)
|
||||||
|
|
||||||
|
|
||||||
|
def match_marker(line: str) -> Optional[DecompMarker]:
|
||||||
|
match = markerRegex.match(line)
|
||||||
|
if match is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return DecompMarker(
|
||||||
|
marker_type=match.group("type"),
|
||||||
|
module=match.group("module"),
|
||||||
|
offset=int(match.group("offset"), 16),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def is_marker_exact(line: str) -> bool:
|
||||||
|
return markerExactRegex.match(line) is not None
|
||||||
@ -1,35 +1,58 @@
|
|||||||
|
from typing import Optional
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from .marker import MarkerType
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ParserNode:
|
class ParserSymbol:
|
||||||
|
"""Exported decomp marker with all information (except the code filename) required to
|
||||||
|
cross-reference with cvdump data."""
|
||||||
|
|
||||||
|
type: MarkerType
|
||||||
line_number: int
|
line_number: int
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ParserSymbol(ParserNode):
|
|
||||||
module: str
|
module: str
|
||||||
offset: int
|
offset: int
|
||||||
|
name: str
|
||||||
|
|
||||||
|
# The parser doesn't (currently) know about the code filename, but if you
|
||||||
|
# wanted to set it here after the fact, here's the spot.
|
||||||
|
filename: Optional[str] = None
|
||||||
|
|
||||||
|
def should_skip(self) -> bool:
|
||||||
|
"""The default is to compare any symbols we have"""
|
||||||
|
return False
|
||||||
|
|
||||||
|
def is_nameref(self) -> bool:
|
||||||
|
"""All symbols default to name lookup"""
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ParserFunction(ParserSymbol):
|
class ParserFunction(ParserSymbol):
|
||||||
name: str
|
# We are able to detect the closing line of a function with some reliability.
|
||||||
|
# This isn't used for anything right now, but perhaps later it will be.
|
||||||
|
end_line: Optional[int] = None
|
||||||
|
|
||||||
|
# All marker types are referenced by name except FUNCTION/STUB. These can also be
|
||||||
|
# referenced by name, but only if this flag is true.
|
||||||
lookup_by_name: bool = False
|
lookup_by_name: bool = False
|
||||||
is_stub: bool = False
|
|
||||||
is_synthetic: bool = False
|
def should_skip(self) -> bool:
|
||||||
is_template: bool = False
|
"""Temporary helper function because reccmp expects this to be here"""
|
||||||
end_line: int = -1
|
return self.type in (MarkerType.STUB, MarkerType.LIBRARY)
|
||||||
|
|
||||||
|
def is_nameref(self) -> bool:
|
||||||
|
return (
|
||||||
|
self.type in (MarkerType.SYNTHETIC, MarkerType.TEMPLATE, MarkerType.LIBRARY)
|
||||||
|
or self.lookup_by_name
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ParserVariable(ParserSymbol):
|
class ParserVariable(ParserSymbol):
|
||||||
name: str
|
|
||||||
size: int = -1
|
|
||||||
is_static: bool = False
|
is_static: bool = False
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ParserVtable(ParserSymbol):
|
class ParserVtable(ParserSymbol):
|
||||||
class_name: str
|
pass
|
||||||
num_entries: int = -1
|
|
||||||
|
|||||||
@ -3,15 +3,19 @@
|
|||||||
from typing import List, Iterable, Iterator
|
from typing import List, Iterable, Iterator
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from .util import (
|
from .util import (
|
||||||
DecompMarker,
|
|
||||||
is_blank_or_comment,
|
is_blank_or_comment,
|
||||||
match_marker,
|
|
||||||
is_marker_exact,
|
|
||||||
get_class_name,
|
get_class_name,
|
||||||
|
get_variable_name,
|
||||||
get_synthetic_name,
|
get_synthetic_name,
|
||||||
remove_trailing_comment,
|
remove_trailing_comment,
|
||||||
)
|
)
|
||||||
|
from .marker import (
|
||||||
|
DecompMarker,
|
||||||
|
match_marker,
|
||||||
|
is_marker_exact,
|
||||||
|
)
|
||||||
from .node import (
|
from .node import (
|
||||||
|
ParserSymbol,
|
||||||
ParserFunction,
|
ParserFunction,
|
||||||
ParserVariable,
|
ParserVariable,
|
||||||
ParserVtable,
|
ParserVtable,
|
||||||
@ -28,44 +32,23 @@ class ReaderState(Enum):
|
|||||||
IN_GLOBAL = 5
|
IN_GLOBAL = 5
|
||||||
IN_FUNC_GLOBAL = 6
|
IN_FUNC_GLOBAL = 6
|
||||||
IN_VTABLE = 7
|
IN_VTABLE = 7
|
||||||
|
IN_SYNTHETIC = 8
|
||||||
|
IN_LIBRARY = 9
|
||||||
DONE = 100
|
DONE = 100
|
||||||
|
|
||||||
|
|
||||||
def marker_is_stub(marker: DecompMarker) -> bool:
|
|
||||||
return marker.type.upper() == "STUB"
|
|
||||||
|
|
||||||
|
|
||||||
def marker_is_variable(marker: DecompMarker) -> bool:
|
|
||||||
return marker.type.upper() == "GLOBAL"
|
|
||||||
|
|
||||||
|
|
||||||
def marker_is_synthetic(marker: DecompMarker) -> bool:
|
|
||||||
return marker.type.upper() in ("SYNTHETIC", "TEMPLATE")
|
|
||||||
|
|
||||||
|
|
||||||
def marker_is_template(marker: DecompMarker) -> bool:
|
|
||||||
return marker.type.upper() == "TEMPLATE"
|
|
||||||
|
|
||||||
|
|
||||||
def marker_is_function(marker: DecompMarker) -> bool:
|
|
||||||
return marker.type.upper() in ("FUNCTION", "STUB")
|
|
||||||
|
|
||||||
|
|
||||||
def marker_is_vtable(marker: DecompMarker) -> bool:
|
|
||||||
return marker.type.upper() == "VTABLE"
|
|
||||||
|
|
||||||
|
|
||||||
class MarkerDict:
|
class MarkerDict:
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.markers: dict = {}
|
self.markers: dict = {}
|
||||||
|
|
||||||
def insert(self, marker: DecompMarker) -> bool:
|
def insert(self, marker: DecompMarker) -> bool:
|
||||||
"""Return True if this insert would overwrite"""
|
"""Return True if this insert would overwrite"""
|
||||||
module = marker.module.upper()
|
module = marker.module
|
||||||
if module in self.markers:
|
if module in self.markers:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
self.markers[module] = (marker.type, marker.offset)
|
# TODO: type converted back to string version here instead of using enum
|
||||||
|
self.markers[module] = (marker.type.name, marker.offset)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def iter(self) -> Iterator[DecompMarker]:
|
def iter(self) -> Iterator[DecompMarker]:
|
||||||
@ -82,9 +65,7 @@ class DecompParser:
|
|||||||
# but not right now
|
# but not right now
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
# The lists to be populated as we parse
|
# The lists to be populated as we parse
|
||||||
self.functions: List[ParserFunction] = []
|
self._symbols: List[ParserSymbol] = []
|
||||||
self.vtables: List[ParserVtable] = []
|
|
||||||
self.variables: List[ParserVariable] = []
|
|
||||||
self.alerts: List[ParserAlert] = []
|
self.alerts: List[ParserAlert] = []
|
||||||
|
|
||||||
self.line_number: int = 0
|
self.line_number: int = 0
|
||||||
@ -113,9 +94,7 @@ def __init__(self) -> None:
|
|||||||
self.function_sig: str = ""
|
self.function_sig: str = ""
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self.functions = []
|
self._symbols = []
|
||||||
self.vtables = []
|
|
||||||
self.variables = []
|
|
||||||
self.alerts = []
|
self.alerts = []
|
||||||
|
|
||||||
self.line_number = 0
|
self.line_number = 0
|
||||||
@ -131,6 +110,18 @@ def reset(self):
|
|||||||
self.function_start = 0
|
self.function_start = 0
|
||||||
self.function_sig = ""
|
self.function_sig = ""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def functions(self) -> List[ParserSymbol]:
|
||||||
|
return [s for s in self._symbols if isinstance(s, ParserFunction)]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def vtables(self) -> List[ParserSymbol]:
|
||||||
|
return [s for s in self._symbols if isinstance(s, ParserVtable)]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def variables(self) -> List[ParserSymbol]:
|
||||||
|
return [s for s in self._symbols if isinstance(s, ParserVariable)]
|
||||||
|
|
||||||
def _recover(self):
|
def _recover(self):
|
||||||
"""We hit a syntax error and need to reset temp structures"""
|
"""We hit a syntax error and need to reset temp structures"""
|
||||||
self.state = ReaderState.SEARCH
|
self.state = ReaderState.SEARCH
|
||||||
@ -159,10 +150,17 @@ def _function_marker(self, marker: DecompMarker):
|
|||||||
self._syntax_warning(ParserError.DUPLICATE_MODULE)
|
self._syntax_warning(ParserError.DUPLICATE_MODULE)
|
||||||
self.state = ReaderState.WANT_SIG
|
self.state = ReaderState.WANT_SIG
|
||||||
|
|
||||||
def _synthetic_marker(self, marker: DecompMarker):
|
def _nameref_marker(self, marker: DecompMarker):
|
||||||
|
"""Functions explicitly referenced by name are set here"""
|
||||||
if self.fun_markers.insert(marker):
|
if self.fun_markers.insert(marker):
|
||||||
self._syntax_warning(ParserError.DUPLICATE_MODULE)
|
self._syntax_warning(ParserError.DUPLICATE_MODULE)
|
||||||
self.state = ReaderState.IN_TEMPLATE
|
|
||||||
|
if marker.is_template():
|
||||||
|
self.state = ReaderState.IN_TEMPLATE
|
||||||
|
elif marker.is_synthetic():
|
||||||
|
self.state = ReaderState.IN_SYNTHETIC
|
||||||
|
else:
|
||||||
|
self.state = ReaderState.IN_LIBRARY
|
||||||
|
|
||||||
def _function_done(self, lookup_by_name: bool = False, unexpected: bool = False):
|
def _function_done(self, lookup_by_name: bool = False, unexpected: bool = False):
|
||||||
end_line = self.line_number
|
end_line = self.line_number
|
||||||
@ -173,16 +171,14 @@ def _function_done(self, lookup_by_name: bool = False, unexpected: bool = False)
|
|||||||
end_line -= 1
|
end_line -= 1
|
||||||
|
|
||||||
for marker in self.fun_markers.iter():
|
for marker in self.fun_markers.iter():
|
||||||
self.functions.append(
|
self._symbols.append(
|
||||||
ParserFunction(
|
ParserFunction(
|
||||||
|
type=marker.type,
|
||||||
line_number=self.function_start,
|
line_number=self.function_start,
|
||||||
module=marker.module,
|
module=marker.module,
|
||||||
offset=marker.offset,
|
offset=marker.offset,
|
||||||
lookup_by_name=lookup_by_name,
|
|
||||||
is_stub=marker_is_stub(marker),
|
|
||||||
is_synthetic=marker_is_synthetic(marker),
|
|
||||||
is_template=marker_is_template(marker),
|
|
||||||
name=self.function_sig,
|
name=self.function_sig,
|
||||||
|
lookup_by_name=lookup_by_name,
|
||||||
end_line=end_line,
|
end_line=end_line,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@ -202,12 +198,13 @@ def _vtable_done(self, class_name: str = None):
|
|||||||
class_name = self.last_line.strip()
|
class_name = self.last_line.strip()
|
||||||
|
|
||||||
for marker in self.tbl_markers.iter():
|
for marker in self.tbl_markers.iter():
|
||||||
self.vtables.append(
|
self._symbols.append(
|
||||||
ParserVtable(
|
ParserVtable(
|
||||||
|
type=marker.type,
|
||||||
line_number=self.line_number,
|
line_number=self.line_number,
|
||||||
module=marker.module,
|
module=marker.module,
|
||||||
offset=marker.offset,
|
offset=marker.offset,
|
||||||
class_name=class_name,
|
name=class_name,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -223,14 +220,19 @@ def _variable_marker(self, marker: DecompMarker):
|
|||||||
else:
|
else:
|
||||||
self.state = ReaderState.IN_GLOBAL
|
self.state = ReaderState.IN_GLOBAL
|
||||||
|
|
||||||
def _variable_done(self):
|
def _variable_done(self, name: str):
|
||||||
|
if not name.startswith("g_"):
|
||||||
|
self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX)
|
||||||
|
|
||||||
for marker in self.var_markers.iter():
|
for marker in self.var_markers.iter():
|
||||||
self.variables.append(
|
self._symbols.append(
|
||||||
ParserVariable(
|
ParserVariable(
|
||||||
|
type=marker.type,
|
||||||
line_number=self.line_number,
|
line_number=self.line_number,
|
||||||
module=marker.module,
|
module=marker.module,
|
||||||
offset=marker.offset,
|
offset=marker.offset,
|
||||||
name=self.last_line.strip(),
|
name=name,
|
||||||
|
is_static=self.state == ReaderState.IN_FUNC_GLOBAL,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -246,12 +248,23 @@ def _handle_marker(self, marker: DecompMarker):
|
|||||||
self._syntax_error(ParserError.UNEXPECTED_MARKER)
|
self._syntax_error(ParserError.UNEXPECTED_MARKER)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# If we are inside a function, the only markers we accept are:
|
||||||
|
# GLOBAL, indicating a static variable
|
||||||
|
# STRING, indicating a literal string.
|
||||||
|
# Otherwise we assume that the parser missed the end of the function
|
||||||
|
# and we have moved on to something else.
|
||||||
|
# This is unlikely to occur with well-formed code, but
|
||||||
|
# we can recover easily by just ending the function here.
|
||||||
|
if self.state == ReaderState.IN_FUNC and not marker.allowed_in_func():
|
||||||
|
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
|
||||||
|
self._function_done(unexpected=True)
|
||||||
|
|
||||||
# TODO: How uncertain are we of detecting the end of a function
|
# TODO: How uncertain are we of detecting the end of a function
|
||||||
# in a clang-formatted file? For now we assume we have missed the
|
# in a clang-formatted file? For now we assume we have missed the
|
||||||
# end if we detect a non-GLOBAL marker while state is IN_FUNC.
|
# end if we detect a non-GLOBAL marker while state is IN_FUNC.
|
||||||
# Maybe these cases should be syntax errors instead
|
# Maybe these cases should be syntax errors instead
|
||||||
|
|
||||||
if marker_is_function(marker):
|
if marker.is_regular_function():
|
||||||
if self.state in (
|
if self.state in (
|
||||||
ReaderState.SEARCH,
|
ReaderState.SEARCH,
|
||||||
ReaderState.WANT_SIG,
|
ReaderState.WANT_SIG,
|
||||||
@ -259,29 +272,41 @@ def _handle_marker(self, marker: DecompMarker):
|
|||||||
# We will allow multiple offsets if we have just begun
|
# We will allow multiple offsets if we have just begun
|
||||||
# the code block, but not after we hit the curly brace.
|
# the code block, but not after we hit the curly brace.
|
||||||
self._function_marker(marker)
|
self._function_marker(marker)
|
||||||
elif self.state == ReaderState.IN_FUNC:
|
|
||||||
# We hit another offset unexpectedly.
|
|
||||||
# We can recover easily by just ending the function here.
|
|
||||||
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
|
|
||||||
self._function_done(unexpected=True)
|
|
||||||
|
|
||||||
# Start the next function right after so we can
|
|
||||||
# read the next line.
|
|
||||||
self._function_marker(marker)
|
|
||||||
else:
|
else:
|
||||||
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||||
|
|
||||||
elif marker_is_synthetic(marker):
|
elif marker.is_template():
|
||||||
if self.state in (ReaderState.SEARCH, ReaderState.IN_TEMPLATE):
|
if self.state in (ReaderState.SEARCH, ReaderState.IN_TEMPLATE):
|
||||||
self._synthetic_marker(marker)
|
self._nameref_marker(marker)
|
||||||
elif self.state == ReaderState.IN_FUNC:
|
|
||||||
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
|
|
||||||
self._function_done(lookup_by_name=True, unexpected=True)
|
|
||||||
self._synthetic_marker(marker)
|
|
||||||
else:
|
else:
|
||||||
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||||
|
|
||||||
elif marker_is_variable(marker):
|
elif marker.is_synthetic():
|
||||||
|
if self.state in (ReaderState.SEARCH, ReaderState.IN_SYNTHETIC):
|
||||||
|
self._nameref_marker(marker)
|
||||||
|
else:
|
||||||
|
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||||
|
|
||||||
|
elif marker.is_library():
|
||||||
|
if self.state in (ReaderState.SEARCH, ReaderState.IN_LIBRARY):
|
||||||
|
self._nameref_marker(marker)
|
||||||
|
else:
|
||||||
|
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||||
|
|
||||||
|
elif marker.is_string():
|
||||||
|
# TODO: We are ignoring string markers for the moment.
|
||||||
|
# We already have a lot of them in the codebase, though, so we'll
|
||||||
|
# hang onto them for now in case we can use them later.
|
||||||
|
# To match up string constants, the strategy will be:
|
||||||
|
# 1. Use cvdump to find all string constants in the recomp
|
||||||
|
# 2. In the original binary, look at relocated vaddrs from .rdata
|
||||||
|
# 3. Try to match up string data from #1 with locations in #2
|
||||||
|
|
||||||
|
# Throw the syntax error we would throw if we were parsing these
|
||||||
|
if self.state not in (ReaderState.SEARCH, ReaderState.IN_FUNC):
|
||||||
|
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||||
|
|
||||||
|
elif marker.is_variable():
|
||||||
if self.state in (
|
if self.state in (
|
||||||
ReaderState.SEARCH,
|
ReaderState.SEARCH,
|
||||||
ReaderState.IN_GLOBAL,
|
ReaderState.IN_GLOBAL,
|
||||||
@ -292,13 +317,9 @@ def _handle_marker(self, marker: DecompMarker):
|
|||||||
else:
|
else:
|
||||||
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||||
|
|
||||||
elif marker_is_vtable(marker):
|
elif marker.is_vtable():
|
||||||
if self.state in (ReaderState.SEARCH, ReaderState.IN_VTABLE):
|
if self.state in (ReaderState.SEARCH, ReaderState.IN_VTABLE):
|
||||||
self._vtable_marker(marker)
|
self._vtable_marker(marker)
|
||||||
elif self.state == ReaderState.IN_FUNC:
|
|
||||||
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
|
|
||||||
self._function_done(unexpected=True)
|
|
||||||
self._vtable_marker(marker)
|
|
||||||
else:
|
else:
|
||||||
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||||
|
|
||||||
@ -322,12 +343,16 @@ def read_line(self, line: str):
|
|||||||
return
|
return
|
||||||
|
|
||||||
line_strip = line.strip()
|
line_strip = line.strip()
|
||||||
if self.state == ReaderState.IN_TEMPLATE:
|
if self.state in (
|
||||||
# TEMPLATE functions are a special case. The signature is
|
ReaderState.IN_SYNTHETIC,
|
||||||
# given on the next line (in a // comment)
|
ReaderState.IN_TEMPLATE,
|
||||||
|
ReaderState.IN_LIBRARY,
|
||||||
|
):
|
||||||
|
# Explicit nameref functions provide the function name
|
||||||
|
# on the next line (in a // comment)
|
||||||
name = get_synthetic_name(line)
|
name = get_synthetic_name(line)
|
||||||
if name is None:
|
if name is None:
|
||||||
self._syntax_error(ParserError.BAD_SYNTHETIC)
|
self._syntax_error(ParserError.BAD_NAMEREF)
|
||||||
else:
|
else:
|
||||||
self.function_sig = name
|
self.function_sig = name
|
||||||
self._function_starts_here()
|
self._function_starts_here()
|
||||||
@ -384,8 +409,28 @@ def read_line(self, line: str):
|
|||||||
self._function_done()
|
self._function_done()
|
||||||
|
|
||||||
elif self.state in (ReaderState.IN_GLOBAL, ReaderState.IN_FUNC_GLOBAL):
|
elif self.state in (ReaderState.IN_GLOBAL, ReaderState.IN_FUNC_GLOBAL):
|
||||||
if not is_blank_or_comment(line):
|
# TODO: Known problem that an error here will cause us to abandon a
|
||||||
self._variable_done()
|
# function we have already parsed if state == IN_FUNC_GLOBAL.
|
||||||
|
# However, we are not tolerant of _any_ syntax problems in our
|
||||||
|
# CI actions, so the solution is to just fix the invalid marker.
|
||||||
|
if is_blank_or_comment(line):
|
||||||
|
self._syntax_error(ParserError.NO_SUITABLE_NAME)
|
||||||
|
return
|
||||||
|
|
||||||
|
# We don't have a foolproof mechanism to tell what is and is not a variable.
|
||||||
|
# If the GLOBAL is being declared on a `return` statement, though, this is
|
||||||
|
# not correct. It is either a string literal (which will be handled differently)
|
||||||
|
# or it is not the variable declaration, which is incorrect decomp syntax.
|
||||||
|
if line.strip().startswith("return"):
|
||||||
|
self._syntax_error(ParserError.GLOBAL_NOT_VARIABLE)
|
||||||
|
return
|
||||||
|
|
||||||
|
name = get_variable_name(line)
|
||||||
|
if name is None:
|
||||||
|
self._syntax_error(ParserError.NO_SUITABLE_NAME)
|
||||||
|
return
|
||||||
|
|
||||||
|
self._variable_done(name)
|
||||||
|
|
||||||
elif self.state == ReaderState.IN_VTABLE:
|
elif self.state == ReaderState.IN_VTABLE:
|
||||||
vtable_class = get_class_name(line)
|
vtable_class = get_class_name(line)
|
||||||
|
|||||||
@ -1,17 +1,6 @@
|
|||||||
# C++ Parser utility functions and data structures
|
# C++ Parser utility functions and data structures
|
||||||
from __future__ import annotations # python <3.10 compatibility
|
|
||||||
import re
|
import re
|
||||||
from collections import namedtuple
|
from typing import Optional
|
||||||
|
|
||||||
DecompMarker = namedtuple("DecompMarker", ["type", "module", "offset"])
|
|
||||||
|
|
||||||
|
|
||||||
markerRegex = re.compile(
|
|
||||||
r"\s*//\s*(\w+):\s*(\w+)\s+(0x[a-f0-9]+)",
|
|
||||||
flags=re.I,
|
|
||||||
)
|
|
||||||
|
|
||||||
markerExactRegex = re.compile(r"\s*// ([A-Z]+): ([A-Z0-9]+) (0x[a-f0-9]+)$")
|
|
||||||
|
|
||||||
# The goal here is to just read whatever is on the next line, so some
|
# The goal here is to just read whatever is on the next line, so some
|
||||||
# flexibility in the formatting seems OK
|
# flexibility in the formatting seems OK
|
||||||
@ -23,7 +12,7 @@
|
|||||||
trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")
|
trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")
|
||||||
|
|
||||||
|
|
||||||
def get_synthetic_name(line: str) -> str | None:
|
def get_synthetic_name(line: str) -> Optional[str]:
|
||||||
"""Synthetic names appear on a single line comment on the line after the marker.
|
"""Synthetic names appear on a single line comment on the line after the marker.
|
||||||
If that's not what we have, return None"""
|
If that's not what we have, return None"""
|
||||||
template_match = templateCommentRegex.match(line)
|
template_match = templateCommentRegex.match(line)
|
||||||
@ -51,20 +40,6 @@ def is_blank_or_comment(line: str) -> bool:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def match_marker(line: str) -> DecompMarker | None:
|
|
||||||
match = markerRegex.match(line)
|
|
||||||
if match is None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
return DecompMarker(
|
|
||||||
type=match.group(1), module=match.group(2), offset=int(match.group(3), 16)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def is_marker_exact(line: str) -> bool:
|
|
||||||
return markerExactRegex.match(line) is not None
|
|
||||||
|
|
||||||
|
|
||||||
template_class_decl_regex = re.compile(
|
template_class_decl_regex = re.compile(
|
||||||
r"\s*(?:\/\/)?\s*(?:class|struct) (\w+)<([\w]+)\s*(\*+)?\s*>"
|
r"\s*(?:\/\/)?\s*(?:class|struct) (\w+)<([\w]+)\s*(\*+)?\s*>"
|
||||||
)
|
)
|
||||||
@ -73,7 +48,7 @@ def is_marker_exact(line: str) -> bool:
|
|||||||
class_decl_regex = re.compile(r"\s*(?:\/\/)?\s*(?:class|struct) (\w+)")
|
class_decl_regex = re.compile(r"\s*(?:\/\/)?\s*(?:class|struct) (\w+)")
|
||||||
|
|
||||||
|
|
||||||
def get_class_name(line: str) -> str | None:
|
def get_class_name(line: str) -> Optional[str]:
|
||||||
"""For VTABLE markers, extract the class name from the code line or comment
|
"""For VTABLE markers, extract the class name from the code line or comment
|
||||||
where it appears."""
|
where it appears."""
|
||||||
|
|
||||||
@ -93,3 +68,21 @@ def get_class_name(line: str) -> str | None:
|
|||||||
return match.group(1)
|
return match.group(1)
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
global_regex = re.compile(r"(?P<name>g_\w+)")
|
||||||
|
less_strict_global_regex = re.compile(r"(?P<name>\w+)(?:\)\(|\[.*|\s*=.*|;)")
|
||||||
|
|
||||||
|
|
||||||
|
def get_variable_name(line: str) -> Optional[str]:
|
||||||
|
"""Grab the name of the variable annotated with the GLOBAL marker.
|
||||||
|
Correct syntax would have the variable start with the prefix "g_"
|
||||||
|
but we will try to match regardless."""
|
||||||
|
|
||||||
|
if (match := global_regex.search(line)) is not None:
|
||||||
|
return match.group("name")
|
||||||
|
|
||||||
|
if (match := less_strict_global_regex.search(line)) is not None:
|
||||||
|
return match.group("name")
|
||||||
|
|
||||||
|
return None
|
||||||
|
|||||||
@ -115,7 +115,7 @@ def test_different_markers_same_module(parser):
|
|||||||
|
|
||||||
# Use first marker declaration, don't replace
|
# Use first marker declaration, don't replace
|
||||||
assert len(parser.functions) == 1
|
assert len(parser.functions) == 1
|
||||||
assert parser.functions[0].is_stub is False
|
assert parser.functions[0].should_skip() is False
|
||||||
|
|
||||||
# Should alert to this
|
# Should alert to this
|
||||||
assert len(parser.alerts) == 1
|
assert len(parser.alerts) == 1
|
||||||
@ -193,7 +193,7 @@ def test_multiple_vtables(parser):
|
|||||||
)
|
)
|
||||||
assert len(parser.alerts) == 0
|
assert len(parser.alerts) == 0
|
||||||
assert len(parser.vtables) == 2
|
assert len(parser.vtables) == 2
|
||||||
assert parser.vtables[0].class_name == "MxString"
|
assert parser.vtables[0].name == "MxString"
|
||||||
|
|
||||||
|
|
||||||
def test_multiple_vtables_same_module(parser):
|
def test_multiple_vtables_same_module(parser):
|
||||||
@ -247,7 +247,7 @@ def test_synthetic_no_comment(parser):
|
|||||||
)
|
)
|
||||||
assert len(parser.functions) == 0
|
assert len(parser.functions) == 0
|
||||||
assert len(parser.alerts) == 1
|
assert len(parser.alerts) == 1
|
||||||
assert parser.alerts[0].code == ParserError.BAD_SYNTHETIC
|
assert parser.alerts[0].code == ParserError.BAD_NAMEREF
|
||||||
assert parser.state == ReaderState.SEARCH
|
assert parser.state == ReaderState.SEARCH
|
||||||
|
|
||||||
|
|
||||||
@ -375,3 +375,70 @@ def test_unexpected_eof(parser):
|
|||||||
assert len(parser.functions) == 1
|
assert len(parser.functions) == 1
|
||||||
assert len(parser.alerts) == 1
|
assert len(parser.alerts) == 1
|
||||||
assert parser.alerts[0].code == ParserError.UNEXPECTED_END_OF_FILE
|
assert parser.alerts[0].code == ParserError.UNEXPECTED_END_OF_FILE
|
||||||
|
|
||||||
|
|
||||||
|
def test_global_variable_prefix(parser):
|
||||||
|
"""Global and static variables should have the g_ prefix."""
|
||||||
|
parser.read_lines(
|
||||||
|
[
|
||||||
|
"// GLOBAL: TEST 0x1234",
|
||||||
|
'const char* g_msg = "hello";',
|
||||||
|
]
|
||||||
|
)
|
||||||
|
assert len(parser.variables) == 1
|
||||||
|
assert len(parser.alerts) == 0
|
||||||
|
|
||||||
|
parser.read_lines(
|
||||||
|
[
|
||||||
|
"// GLOBAL: TEXT 0x5555",
|
||||||
|
"int test = 5;",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
assert len(parser.alerts) == 1
|
||||||
|
assert parser.alerts[0].code == ParserError.GLOBAL_MISSING_PREFIX
|
||||||
|
# In spite of that, we should still grab the variable name.
|
||||||
|
assert parser.variables[1].name == "test"
|
||||||
|
|
||||||
|
|
||||||
|
def test_global_nomatch(parser):
|
||||||
|
"""We do our best to grab the variable name, even without the g_ prefix
|
||||||
|
but this (by design) will not match everything."""
|
||||||
|
|
||||||
|
parser.read_lines(
|
||||||
|
[
|
||||||
|
"// GLOBAL: TEST 0x1234",
|
||||||
|
"FunctionCall();",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
assert len(parser.variables) == 0
|
||||||
|
assert len(parser.alerts) == 1
|
||||||
|
assert parser.alerts[0].code == ParserError.NO_SUITABLE_NAME
|
||||||
|
|
||||||
|
|
||||||
|
def test_static_variable(parser):
|
||||||
|
"""We can detect whether a variable is a static function variable
|
||||||
|
based on the parser's state when we detect it.
|
||||||
|
Checking for the word `static` alone is not a good test.
|
||||||
|
Static class variables are filed as S_GDATA32, same as regular globals.
|
||||||
|
Only function statics are filed as S_LDATA32."""
|
||||||
|
|
||||||
|
parser.read_lines(
|
||||||
|
[
|
||||||
|
"// GLOBAL: TEST 0x1234",
|
||||||
|
"int g_test = 1234;",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
assert len(parser.variables) == 1
|
||||||
|
assert parser.variables[0].is_static is False
|
||||||
|
|
||||||
|
parser.read_lines(
|
||||||
|
[
|
||||||
|
"// FUNCTION: TEST 0x5555",
|
||||||
|
"void test_function() {",
|
||||||
|
"// GLOBAL: TEST 0x8888",
|
||||||
|
"int g_internal = 0;",
|
||||||
|
"}",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
assert len(parser.variables) == 2
|
||||||
|
assert parser.variables[1].is_static is True
|
||||||
|
|||||||
@ -11,9 +11,11 @@
|
|||||||
(_rs.SEARCH, "FUNCTION", _rs.WANT_SIG, None),
|
(_rs.SEARCH, "FUNCTION", _rs.WANT_SIG, None),
|
||||||
(_rs.SEARCH, "GLOBAL", _rs.IN_GLOBAL, None),
|
(_rs.SEARCH, "GLOBAL", _rs.IN_GLOBAL, None),
|
||||||
(_rs.SEARCH, "STUB", _rs.WANT_SIG, None),
|
(_rs.SEARCH, "STUB", _rs.WANT_SIG, None),
|
||||||
(_rs.SEARCH, "SYNTHETIC", _rs.IN_TEMPLATE, None),
|
(_rs.SEARCH, "SYNTHETIC", _rs.IN_SYNTHETIC, None),
|
||||||
(_rs.SEARCH, "TEMPLATE", _rs.IN_TEMPLATE, None),
|
(_rs.SEARCH, "TEMPLATE", _rs.IN_TEMPLATE, None),
|
||||||
(_rs.SEARCH, "VTABLE", _rs.IN_VTABLE, None),
|
(_rs.SEARCH, "VTABLE", _rs.IN_VTABLE, None),
|
||||||
|
(_rs.SEARCH, "LIBRARY", _rs.IN_LIBRARY, None),
|
||||||
|
(_rs.SEARCH, "STRING", _rs.SEARCH, None),
|
||||||
|
|
||||||
(_rs.WANT_SIG, "FUNCTION", _rs.WANT_SIG, None),
|
(_rs.WANT_SIG, "FUNCTION", _rs.WANT_SIG, None),
|
||||||
(_rs.WANT_SIG, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.WANT_SIG, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
@ -21,20 +23,26 @@
|
|||||||
(_rs.WANT_SIG, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.WANT_SIG, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
(_rs.WANT_SIG, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.WANT_SIG, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
(_rs.WANT_SIG, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.WANT_SIG, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.WANT_SIG, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.WANT_SIG, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
|
||||||
(_rs.IN_FUNC, "FUNCTION", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION),
|
(_rs.IN_FUNC, "FUNCTION", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION),
|
||||||
(_rs.IN_FUNC, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
|
(_rs.IN_FUNC, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
|
||||||
(_rs.IN_FUNC, "STUB", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION),
|
(_rs.IN_FUNC, "STUB", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION),
|
||||||
(_rs.IN_FUNC, "SYNTHETIC", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION),
|
(_rs.IN_FUNC, "SYNTHETIC", _rs.IN_SYNTHETIC, _pe.MISSED_END_OF_FUNCTION),
|
||||||
(_rs.IN_FUNC, "TEMPLATE", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION),
|
(_rs.IN_FUNC, "TEMPLATE", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION),
|
||||||
(_rs.IN_FUNC, "VTABLE", _rs.IN_VTABLE, _pe.MISSED_END_OF_FUNCTION),
|
(_rs.IN_FUNC, "VTABLE", _rs.IN_VTABLE, _pe.MISSED_END_OF_FUNCTION),
|
||||||
|
(_rs.IN_FUNC, "LIBRARY", _rs.IN_LIBRARY, _pe.MISSED_END_OF_FUNCTION),
|
||||||
|
(_rs.IN_FUNC, "STRING", _rs.IN_FUNC, None),
|
||||||
|
|
||||||
(_rs.IN_TEMPLATE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_TEMPLATE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
(_rs.IN_TEMPLATE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_TEMPLATE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
(_rs.IN_TEMPLATE, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_TEMPLATE, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
(_rs.IN_TEMPLATE, "SYNTHETIC", _rs.IN_TEMPLATE, None),
|
(_rs.IN_TEMPLATE, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
(_rs.IN_TEMPLATE, "TEMPLATE", _rs.IN_TEMPLATE, None),
|
(_rs.IN_TEMPLATE, "TEMPLATE", _rs.IN_TEMPLATE, None),
|
||||||
(_rs.IN_TEMPLATE, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_TEMPLATE, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_TEMPLATE, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_TEMPLATE, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
|
||||||
(_rs.WANT_CURLY, "FUNCTION", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
(_rs.WANT_CURLY, "FUNCTION", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
||||||
(_rs.WANT_CURLY, "GLOBAL", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
(_rs.WANT_CURLY, "GLOBAL", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
||||||
@ -42,6 +50,8 @@
|
|||||||
(_rs.WANT_CURLY, "SYNTHETIC", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
(_rs.WANT_CURLY, "SYNTHETIC", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
||||||
(_rs.WANT_CURLY, "TEMPLATE", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
(_rs.WANT_CURLY, "TEMPLATE", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
||||||
(_rs.WANT_CURLY, "VTABLE", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
(_rs.WANT_CURLY, "VTABLE", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
||||||
|
(_rs.WANT_CURLY, "LIBRARY", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
||||||
|
(_rs.WANT_CURLY, "STRING", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
||||||
|
|
||||||
(_rs.IN_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
(_rs.IN_GLOBAL, "GLOBAL", _rs.IN_GLOBAL, None),
|
(_rs.IN_GLOBAL, "GLOBAL", _rs.IN_GLOBAL, None),
|
||||||
@ -49,6 +59,8 @@
|
|||||||
(_rs.IN_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
(_rs.IN_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
(_rs.IN_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_GLOBAL, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
|
||||||
(_rs.IN_FUNC_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_FUNC_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
(_rs.IN_FUNC_GLOBAL, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
|
(_rs.IN_FUNC_GLOBAL, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
|
||||||
@ -56,6 +68,8 @@
|
|||||||
(_rs.IN_FUNC_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_FUNC_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
(_rs.IN_FUNC_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_FUNC_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
(_rs.IN_FUNC_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_FUNC_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_FUNC_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_FUNC_GLOBAL, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
|
||||||
(_rs.IN_VTABLE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_VTABLE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
(_rs.IN_VTABLE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_VTABLE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
@ -63,6 +77,26 @@
|
|||||||
(_rs.IN_VTABLE, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_VTABLE, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
(_rs.IN_VTABLE, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_VTABLE, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
(_rs.IN_VTABLE, "VTABLE", _rs.IN_VTABLE, None),
|
(_rs.IN_VTABLE, "VTABLE", _rs.IN_VTABLE, None),
|
||||||
|
(_rs.IN_VTABLE, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_VTABLE, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
|
||||||
|
(_rs.IN_SYNTHETIC, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_SYNTHETIC, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_SYNTHETIC, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_SYNTHETIC, "SYNTHETIC", _rs.IN_SYNTHETIC, None),
|
||||||
|
(_rs.IN_SYNTHETIC, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_SYNTHETIC, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_SYNTHETIC, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_SYNTHETIC, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
|
||||||
|
(_rs.IN_LIBRARY, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_LIBRARY, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_LIBRARY, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_LIBRARY, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_LIBRARY, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_LIBRARY, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_LIBRARY, "LIBRARY", _rs.IN_LIBRARY, None),
|
||||||
|
(_rs.IN_LIBRARY, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
]
|
]
|
||||||
# fmt: on
|
# fmt: on
|
||||||
|
|
||||||
@ -105,47 +139,3 @@ def test_state_search_line(line: str):
|
|||||||
p.read_line(line)
|
p.read_line(line)
|
||||||
assert p.state == _rs.SEARCH
|
assert p.state == _rs.SEARCH
|
||||||
assert len(p.alerts) == 0
|
assert len(p.alerts) == 0
|
||||||
|
|
||||||
|
|
||||||
global_lines = [
|
|
||||||
("// A comment", _rs.IN_GLOBAL),
|
|
||||||
("", _rs.IN_GLOBAL),
|
|
||||||
("\t", _rs.IN_GLOBAL),
|
|
||||||
(" ", _rs.IN_GLOBAL),
|
|
||||||
# TODO: no check for "likely" variable declaration so these all count
|
|
||||||
("void function()", _rs.SEARCH),
|
|
||||||
("int x = 123;", _rs.SEARCH),
|
|
||||||
("just some text", _rs.SEARCH),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("line, new_state", global_lines)
|
|
||||||
def test_state_global_line(line: str, new_state: _rs):
|
|
||||||
p = DecompParser()
|
|
||||||
p.read_line("// GLOBAL: TEST 0x1234")
|
|
||||||
assert p.state == _rs.IN_GLOBAL
|
|
||||||
p.read_line(line)
|
|
||||||
assert p.state == new_state
|
|
||||||
|
|
||||||
|
|
||||||
# mostly same as above
|
|
||||||
in_func_global_lines = [
|
|
||||||
("// A comment", _rs.IN_FUNC_GLOBAL),
|
|
||||||
("", _rs.IN_FUNC_GLOBAL),
|
|
||||||
("\t", _rs.IN_FUNC_GLOBAL),
|
|
||||||
(" ", _rs.IN_FUNC_GLOBAL),
|
|
||||||
# TODO: no check for "likely" variable declaration so these all count
|
|
||||||
("void function()", _rs.IN_FUNC),
|
|
||||||
("int x = 123;", _rs.IN_FUNC),
|
|
||||||
("just some text", _rs.IN_FUNC),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("line, new_state", in_func_global_lines)
|
|
||||||
def test_state_in_func_global_line(line: str, new_state: _rs):
|
|
||||||
p = DecompParser()
|
|
||||||
p.state = _rs.IN_FUNC
|
|
||||||
p.read_line("// GLOBAL: TEST 0x1234")
|
|
||||||
assert p.state == _rs.IN_FUNC_GLOBAL
|
|
||||||
p.read_line(line)
|
|
||||||
assert p.state == new_state
|
|
||||||
|
|||||||
@ -1,11 +1,15 @@
|
|||||||
import pytest
|
import pytest
|
||||||
from isledecomp.parser.parser import MarkerDict
|
from isledecomp.parser.parser import MarkerDict
|
||||||
from isledecomp.parser.util import (
|
from isledecomp.parser.marker import (
|
||||||
DecompMarker,
|
DecompMarker,
|
||||||
is_blank_or_comment,
|
MarkerType,
|
||||||
match_marker,
|
match_marker,
|
||||||
is_marker_exact,
|
is_marker_exact,
|
||||||
|
)
|
||||||
|
from isledecomp.parser.util import (
|
||||||
|
is_blank_or_comment,
|
||||||
get_class_name,
|
get_class_name,
|
||||||
|
get_variable_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -96,7 +100,7 @@ def test_marker_dict_type_replace():
|
|||||||
d.insert(DecompMarker("STUB", "TEST", 0x1234))
|
d.insert(DecompMarker("STUB", "TEST", 0x1234))
|
||||||
markers = list(d.iter())
|
markers = list(d.iter())
|
||||||
assert len(markers) == 1
|
assert len(markers) == 1
|
||||||
assert markers[0].type == "FUNCTION"
|
assert markers[0].type == MarkerType.FUNCTION
|
||||||
|
|
||||||
|
|
||||||
class_name_match_cases = [
|
class_name_match_cases = [
|
||||||
@ -131,3 +135,26 @@ def test_get_class_name(line: str, class_name: str):
|
|||||||
@pytest.mark.parametrize("line", class_name_no_match_cases)
|
@pytest.mark.parametrize("line", class_name_no_match_cases)
|
||||||
def test_get_class_name_none(line: str):
|
def test_get_class_name_none(line: str):
|
||||||
assert get_class_name(line) is None
|
assert get_class_name(line) is None
|
||||||
|
|
||||||
|
|
||||||
|
variable_name_cases = [
|
||||||
|
# with prefix for easy access
|
||||||
|
("char* g_test;", "g_test"),
|
||||||
|
("g_test;", "g_test"),
|
||||||
|
("void (*g_test)(int);", "g_test"),
|
||||||
|
("char g_test[50];", "g_test"),
|
||||||
|
("char g_test[50] = {1234,", "g_test"),
|
||||||
|
("int g_test = 500;", "g_test"),
|
||||||
|
# no prefix
|
||||||
|
("char* hello;", "hello"),
|
||||||
|
("hello;", "hello"),
|
||||||
|
("void (*hello)(int);", "hello"),
|
||||||
|
("char hello[50];", "hello"),
|
||||||
|
("char hello[50] = {1234,", "hello"),
|
||||||
|
("int hello = 500;", "hello"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("line,name", variable_name_cases)
|
||||||
|
def test_get_variable_name(line: str, name: str):
|
||||||
|
assert get_variable_name(line) == name
|
||||||
|
|||||||
@ -316,7 +316,7 @@ def main():
|
|||||||
parser.read_lines(srcfile)
|
parser.read_lines(srcfile)
|
||||||
|
|
||||||
for fun in parser.functions:
|
for fun in parser.functions:
|
||||||
if fun.is_stub:
|
if fun.should_skip():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if fun.module != basename:
|
if fun.module != basename:
|
||||||
@ -330,7 +330,7 @@ def main():
|
|||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if fun.lookup_by_name:
|
if fun.is_nameref():
|
||||||
recinfo = syminfo.get_recompiled_address_from_name(fun.name)
|
recinfo = syminfo.get_recompiled_address_from_name(fun.name)
|
||||||
if not recinfo:
|
if not recinfo:
|
||||||
continue
|
continue
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user