mirror of
https://github.com/isledecomp/isle.git
synced 2026-01-28 18:51:16 +00:00
Enable string annotations and indirect globals
This commit is contained in:
parent
6af0c6cb1a
commit
abefacf9b2
@ -85,13 +85,19 @@ def _load_cvdump(self):
|
|||||||
|
|
||||||
if sym.node_type == SymbolType.STRING:
|
if sym.node_type == SymbolType.STRING:
|
||||||
string_info = demangle_string_const(sym.decorated_name)
|
string_info = demangle_string_const(sym.decorated_name)
|
||||||
|
if string_info is None:
|
||||||
|
logger.debug(
|
||||||
|
"Could not demangle string symbol: %s", sym.decorated_name
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
# TODO: skip unicode for now. will need to handle these differently.
|
# TODO: skip unicode for now. will need to handle these differently.
|
||||||
if string_info.is_utf16:
|
if string_info.is_utf16:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
raw = self.recomp_bin.read(addr, sym.size())
|
raw = self.recomp_bin.read(addr, sym.size())
|
||||||
try:
|
try:
|
||||||
sym.friendly_name = raw.decode("latin1")
|
sym.friendly_name = raw.decode("latin1").rstrip("\x00")
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -134,6 +140,26 @@ def _load_markers(self):
|
|||||||
for tbl in codebase.iter_vtables():
|
for tbl in codebase.iter_vtables():
|
||||||
self._db.match_vtable(tbl.offset, tbl.name)
|
self._db.match_vtable(tbl.offset, tbl.name)
|
||||||
|
|
||||||
|
for string in codebase.iter_strings():
|
||||||
|
# Not that we don't trust you, but we're checking the string
|
||||||
|
# annotation to make sure it is accurate.
|
||||||
|
try:
|
||||||
|
# TODO: would presumably fail for wchar_t strings
|
||||||
|
orig = self.orig_bin.read_string(string.offset).decode("latin1")
|
||||||
|
string_correct = string.name == orig
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
string_correct = False
|
||||||
|
|
||||||
|
if not string_correct:
|
||||||
|
logger.error(
|
||||||
|
"Data at 0x%x does not match string %s",
|
||||||
|
string.offset,
|
||||||
|
repr(string.name),
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
self._db.match_string(string.offset, string.name)
|
||||||
|
|
||||||
def _find_original_strings(self):
|
def _find_original_strings(self):
|
||||||
"""Go to the original binary and look for the specified string constants
|
"""Go to the original binary and look for the specified string constants
|
||||||
to find a match. This is a (relatively) expensive operation so we only
|
to find a match. This is a (relatively) expensive operation so we only
|
||||||
|
|||||||
@ -43,7 +43,8 @@ def match_name(self) -> str:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
ctype = self.compare_type.name if self.compare_type is not None else "UNK"
|
ctype = self.compare_type.name if self.compare_type is not None else "UNK"
|
||||||
return f"{self.name} ({ctype})"
|
name = repr(self.name) if ctype == "STRING" else self.name
|
||||||
|
return f"{name} ({ctype})"
|
||||||
|
|
||||||
|
|
||||||
def matchinfo_factory(_, row):
|
def matchinfo_factory(_, row):
|
||||||
@ -197,3 +198,5 @@ def match_string(self, addr: int, value: str) -> bool:
|
|||||||
if not did_match:
|
if not did_match:
|
||||||
escaped = repr(value)
|
escaped = repr(value)
|
||||||
logger.error("Failed to find string: %s", escaped)
|
logger.error("Failed to find string: %s", escaped)
|
||||||
|
|
||||||
|
return did_match
|
||||||
|
|||||||
@ -94,7 +94,11 @@ def set_decorated(self, name: str):
|
|||||||
def name(self) -> Optional[str]:
|
def name(self) -> Optional[str]:
|
||||||
"""Prefer "friendly" name if we have it.
|
"""Prefer "friendly" name if we have it.
|
||||||
This is what we have been using to match functions."""
|
This is what we have been using to match functions."""
|
||||||
return self.friendly_name or self.decorated_name
|
return (
|
||||||
|
self.friendly_name
|
||||||
|
if self.friendly_name is not None
|
||||||
|
else self.decorated_name
|
||||||
|
)
|
||||||
|
|
||||||
def size(self) -> Optional[int]:
|
def size(self) -> Optional[int]:
|
||||||
if self.confirmed_size is not None:
|
if self.confirmed_size is not None:
|
||||||
|
|||||||
@ -4,6 +4,7 @@
|
|||||||
"""
|
"""
|
||||||
import re
|
import re
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
class InvalidEncodedNumberError(Exception):
|
class InvalidEncodedNumberError(Exception):
|
||||||
@ -30,13 +31,12 @@ def parse_encoded_number(string: str) -> int:
|
|||||||
StringConstInfo = namedtuple("StringConstInfo", "len is_utf16")
|
StringConstInfo = namedtuple("StringConstInfo", "len is_utf16")
|
||||||
|
|
||||||
|
|
||||||
def demangle_string_const(symbol: str) -> StringConstInfo:
|
def demangle_string_const(symbol: str) -> Optional[StringConstInfo]:
|
||||||
"""Don't bother to decode the string text from the symbol.
|
"""Don't bother to decode the string text from the symbol.
|
||||||
We can just read it from the binary once we have the length."""
|
We can just read it from the binary once we have the length."""
|
||||||
match = string_const_regex.match(symbol)
|
match = string_const_regex.match(symbol)
|
||||||
if match is None:
|
if match is None:
|
||||||
# See below
|
return None
|
||||||
return StringConstInfo(0, False)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
strlen = (
|
strlen = (
|
||||||
@ -45,10 +45,7 @@ def demangle_string_const(symbol: str) -> StringConstInfo:
|
|||||||
else int(match.group("len"))
|
else int(match.group("len"))
|
||||||
)
|
)
|
||||||
except (ValueError, InvalidEncodedNumberError):
|
except (ValueError, InvalidEncodedNumberError):
|
||||||
# This would be an annoying error to fail on if we get a bad symbol.
|
return None
|
||||||
# For now, just assume a zero length string because this will probably
|
|
||||||
# raise some eyebrows during the comparison.
|
|
||||||
strlen = 0
|
|
||||||
|
|
||||||
is_utf16 = match.group("is_utf16") == "1"
|
is_utf16 = match.group("is_utf16") == "1"
|
||||||
return StringConstInfo(len=strlen, is_utf16=is_utf16)
|
return StringConstInfo(len=strlen, is_utf16=is_utf16)
|
||||||
|
|||||||
@ -6,6 +6,7 @@
|
|||||||
ParserFunction,
|
ParserFunction,
|
||||||
ParserVtable,
|
ParserVtable,
|
||||||
ParserVariable,
|
ParserVariable,
|
||||||
|
ParserString,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -42,3 +43,6 @@ def iter_vtables(self) -> Iterator[ParserVtable]:
|
|||||||
|
|
||||||
def iter_variables(self) -> Iterator[ParserVariable]:
|
def iter_variables(self) -> Iterator[ParserVariable]:
|
||||||
return filter(lambda s: isinstance(s, ParserVariable), self._symbols)
|
return filter(lambda s: isinstance(s, ParserVariable), self._symbols)
|
||||||
|
|
||||||
|
def iter_strings(self) -> Iterator[ParserString]:
|
||||||
|
return filter(lambda s: isinstance(s, ParserString), self._symbols)
|
||||||
|
|||||||
@ -70,6 +70,10 @@ class ParserError(Enum):
|
|||||||
# a comment -- i.e. VTABLE or GLOBAL -- could not extract the name
|
# a comment -- i.e. VTABLE or GLOBAL -- could not extract the name
|
||||||
NO_SUITABLE_NAME = 204
|
NO_SUITABLE_NAME = 204
|
||||||
|
|
||||||
|
# ERROR: Two STRING markers have the same module and offset, but the strings
|
||||||
|
# they annotate are different.
|
||||||
|
WRONG_STRING = 205
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ParserAlert:
|
class ParserAlert:
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
from .parser import DecompParser
|
from .parser import DecompParser
|
||||||
from .error import ParserAlert, ParserError
|
from .error import ParserAlert, ParserError
|
||||||
from .node import ParserSymbol
|
from .node import ParserSymbol, ParserString
|
||||||
|
|
||||||
|
|
||||||
def get_checkorder_filter(module):
|
def get_checkorder_filter(module):
|
||||||
@ -19,6 +19,9 @@ def __init__(self) -> None:
|
|||||||
# This is _not_ reset between files and is intended to report offset reuse
|
# This is _not_ reset between files and is intended to report offset reuse
|
||||||
# when scanning the entire directory.
|
# when scanning the entire directory.
|
||||||
self._offsets_used = set()
|
self._offsets_used = set()
|
||||||
|
# Keep track of strings we have seen. Persists across files.
|
||||||
|
# Module/offset can be repeated for string markers but the strings must match.
|
||||||
|
self._strings = {}
|
||||||
|
|
||||||
def reset(self, full_reset: bool = False):
|
def reset(self, full_reset: bool = False):
|
||||||
self.alerts = []
|
self.alerts = []
|
||||||
@ -28,6 +31,7 @@ def reset(self, full_reset: bool = False):
|
|||||||
|
|
||||||
if full_reset:
|
if full_reset:
|
||||||
self._offsets_used.clear()
|
self._offsets_used.clear()
|
||||||
|
self._strings = {}
|
||||||
|
|
||||||
def file_is_header(self):
|
def file_is_header(self):
|
||||||
return self._filename.lower().endswith(".h")
|
return self._filename.lower().endswith(".h")
|
||||||
@ -36,8 +40,20 @@ def _load_offsets_from_list(self, marker_list: List[ParserSymbol]):
|
|||||||
"""Helper for loading (module, offset) tuples while the DecompParser
|
"""Helper for loading (module, offset) tuples while the DecompParser
|
||||||
has them broken up into three different lists."""
|
has them broken up into three different lists."""
|
||||||
for marker in marker_list:
|
for marker in marker_list:
|
||||||
|
is_string = isinstance(marker, ParserString)
|
||||||
|
|
||||||
value = (marker.module, marker.offset)
|
value = (marker.module, marker.offset)
|
||||||
if value in self._offsets_used:
|
if value in self._offsets_used:
|
||||||
|
if is_string:
|
||||||
|
if self._strings[value] != marker.name:
|
||||||
|
self.alerts.append(
|
||||||
|
ParserAlert(
|
||||||
|
code=ParserError.WRONG_STRING,
|
||||||
|
line_number=marker.line_number,
|
||||||
|
line=f"0x{marker.offset:08x}, {repr(self._strings[value])} vs. {repr(marker.name)}",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
self.alerts.append(
|
self.alerts.append(
|
||||||
ParserAlert(
|
ParserAlert(
|
||||||
code=ParserError.DUPLICATE_OFFSET,
|
code=ParserError.DUPLICATE_OFFSET,
|
||||||
@ -47,6 +63,8 @@ def _load_offsets_from_list(self, marker_list: List[ParserSymbol]):
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
self._offsets_used.add(value)
|
self._offsets_used.add(value)
|
||||||
|
if is_string:
|
||||||
|
self._strings[value] = marker.name
|
||||||
|
|
||||||
def _check_function_order(self):
|
def _check_function_order(self):
|
||||||
"""Rules:
|
"""Rules:
|
||||||
@ -82,6 +100,7 @@ def _check_offset_uniqueness(self):
|
|||||||
self._load_offsets_from_list(self._parser.functions)
|
self._load_offsets_from_list(self._parser.functions)
|
||||||
self._load_offsets_from_list(self._parser.vtables)
|
self._load_offsets_from_list(self._parser.vtables)
|
||||||
self._load_offsets_from_list(self._parser.variables)
|
self._load_offsets_from_list(self._parser.variables)
|
||||||
|
self._load_offsets_from_list(self._parser.strings)
|
||||||
|
|
||||||
def _check_byname_allowed(self):
|
def _check_byname_allowed(self):
|
||||||
if self.file_is_header():
|
if self.file_is_header():
|
||||||
|
|||||||
@ -3,6 +3,19 @@
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class MarkerCategory(Enum):
|
||||||
|
"""For the purposes of grouping multiple different DecompMarkers together,
|
||||||
|
assign a rough "category" for the MarkerType values below.
|
||||||
|
It's really only the function types that have to get folded down, but
|
||||||
|
we'll do that in a structured way to permit future expansion."""
|
||||||
|
|
||||||
|
FUNCTION = 1
|
||||||
|
VARIABLE = 2
|
||||||
|
STRING = 3
|
||||||
|
VTABLE = 4
|
||||||
|
ADDRESS = 100 # i.e. no comparison required or possible
|
||||||
|
|
||||||
|
|
||||||
class MarkerType(Enum):
|
class MarkerType(Enum):
|
||||||
UNKNOWN = -100
|
UNKNOWN = -100
|
||||||
FUNCTION = 1
|
FUNCTION = 1
|
||||||
@ -51,6 +64,23 @@ def module(self) -> str:
|
|||||||
def offset(self) -> int:
|
def offset(self) -> int:
|
||||||
return self._offset
|
return self._offset
|
||||||
|
|
||||||
|
@property
|
||||||
|
def category(self) -> MarkerCategory:
|
||||||
|
if self.is_vtable():
|
||||||
|
return MarkerCategory.VTABLE
|
||||||
|
|
||||||
|
if self.is_variable():
|
||||||
|
return MarkerCategory.VARIABLE
|
||||||
|
|
||||||
|
if self.is_string():
|
||||||
|
return MarkerCategory.STRING
|
||||||
|
|
||||||
|
# TODO: worth another look if we add more types, but this covers it
|
||||||
|
if self.is_regular_function() or self.is_explicit_byname():
|
||||||
|
return MarkerCategory.FUNCTION
|
||||||
|
|
||||||
|
return MarkerCategory.ADDRESS
|
||||||
|
|
||||||
def is_regular_function(self) -> bool:
|
def is_regular_function(self) -> bool:
|
||||||
"""Regular function, meaning: not an explicit byname lookup. FUNCTION
|
"""Regular function, meaning: not an explicit byname lookup. FUNCTION
|
||||||
markers can be _implicit_ byname.
|
markers can be _implicit_ byname.
|
||||||
|
|||||||
@ -55,3 +55,8 @@ class ParserVariable(ParserSymbol):
|
|||||||
@dataclass
|
@dataclass
|
||||||
class ParserVtable(ParserSymbol):
|
class ParserVtable(ParserSymbol):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ParserString(ParserSymbol):
|
||||||
|
pass
|
||||||
|
|||||||
@ -3,11 +3,11 @@
|
|||||||
from typing import List, Iterable, Iterator, Optional
|
from typing import List, Iterable, Iterator, Optional
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from .util import (
|
from .util import (
|
||||||
is_blank_or_comment,
|
|
||||||
get_class_name,
|
get_class_name,
|
||||||
get_variable_name,
|
get_variable_name,
|
||||||
get_synthetic_name,
|
get_synthetic_name,
|
||||||
remove_trailing_comment,
|
remove_trailing_comment,
|
||||||
|
get_string_contents,
|
||||||
)
|
)
|
||||||
from .marker import (
|
from .marker import (
|
||||||
DecompMarker,
|
DecompMarker,
|
||||||
@ -19,6 +19,7 @@
|
|||||||
ParserFunction,
|
ParserFunction,
|
||||||
ParserVariable,
|
ParserVariable,
|
||||||
ParserVtable,
|
ParserVtable,
|
||||||
|
ParserString,
|
||||||
)
|
)
|
||||||
from .error import ParserAlert, ParserError
|
from .error import ParserAlert, ParserError
|
||||||
|
|
||||||
@ -43,17 +44,16 @@ def __init__(self) -> None:
|
|||||||
|
|
||||||
def insert(self, marker: DecompMarker) -> bool:
|
def insert(self, marker: DecompMarker) -> bool:
|
||||||
"""Return True if this insert would overwrite"""
|
"""Return True if this insert would overwrite"""
|
||||||
module = marker.module
|
key = (marker.category, marker.module)
|
||||||
if module in self.markers:
|
if key in self.markers:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# TODO: type converted back to string version here instead of using enum
|
self.markers[key] = marker
|
||||||
self.markers[module] = (marker.type.name, marker.offset)
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def iter(self) -> Iterator[DecompMarker]:
|
def iter(self) -> Iterator[DecompMarker]:
|
||||||
for module, (marker_type, offset) in self.markers.items():
|
for _, marker in self.markers.items():
|
||||||
yield DecompMarker(marker_type, module, offset)
|
yield marker
|
||||||
|
|
||||||
def empty(self):
|
def empty(self):
|
||||||
self.markers = {}
|
self.markers = {}
|
||||||
@ -111,17 +111,21 @@ def reset(self):
|
|||||||
self.function_sig = ""
|
self.function_sig = ""
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def functions(self) -> List[ParserSymbol]:
|
def functions(self) -> List[ParserFunction]:
|
||||||
return [s for s in self._symbols if isinstance(s, ParserFunction)]
|
return [s for s in self._symbols if isinstance(s, ParserFunction)]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def vtables(self) -> List[ParserSymbol]:
|
def vtables(self) -> List[ParserVtable]:
|
||||||
return [s for s in self._symbols if isinstance(s, ParserVtable)]
|
return [s for s in self._symbols if isinstance(s, ParserVtable)]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def variables(self) -> List[ParserSymbol]:
|
def variables(self) -> List[ParserVariable]:
|
||||||
return [s for s in self._symbols if isinstance(s, ParserVariable)]
|
return [s for s in self._symbols if isinstance(s, ParserVariable)]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def strings(self) -> List[ParserString]:
|
||||||
|
return [s for s in self._symbols if isinstance(s, ParserString)]
|
||||||
|
|
||||||
def iter_symbols(self, module: Optional[str] = None) -> Iterator[ParserSymbol]:
|
def iter_symbols(self, module: Optional[str] = None) -> Iterator[ParserSymbol]:
|
||||||
for s in self._symbols:
|
for s in self._symbols:
|
||||||
if module is None or s.module == module:
|
if module is None or s.module == module:
|
||||||
@ -225,18 +229,32 @@ def _variable_marker(self, marker: DecompMarker):
|
|||||||
else:
|
else:
|
||||||
self.state = ReaderState.IN_GLOBAL
|
self.state = ReaderState.IN_GLOBAL
|
||||||
|
|
||||||
def _variable_done(self, name: str):
|
def _variable_done(
|
||||||
if not name.startswith("g_"):
|
self, variable_name: Optional[str] = None, string_value: Optional[str] = None
|
||||||
self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX)
|
):
|
||||||
|
if variable_name is None and string_value is None:
|
||||||
|
self._syntax_error(ParserError.NO_SUITABLE_NAME)
|
||||||
|
return
|
||||||
|
|
||||||
for marker in self.var_markers.iter():
|
for marker in self.var_markers.iter():
|
||||||
|
if marker.is_string():
|
||||||
|
self._symbols.append(
|
||||||
|
ParserString(
|
||||||
|
type=marker.type,
|
||||||
|
line_number=self.line_number,
|
||||||
|
module=marker.module,
|
||||||
|
offset=marker.offset,
|
||||||
|
name=string_value,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
self._symbols.append(
|
self._symbols.append(
|
||||||
ParserVariable(
|
ParserVariable(
|
||||||
type=marker.type,
|
type=marker.type,
|
||||||
line_number=self.line_number,
|
line_number=self.line_number,
|
||||||
module=marker.module,
|
module=marker.module,
|
||||||
offset=marker.offset,
|
offset=marker.offset,
|
||||||
name=name,
|
name=variable_name,
|
||||||
is_static=self.state == ReaderState.IN_FUNC_GLOBAL,
|
is_static=self.state == ReaderState.IN_FUNC_GLOBAL,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@ -298,20 +316,8 @@ def _handle_marker(self, marker: DecompMarker):
|
|||||||
else:
|
else:
|
||||||
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||||
|
|
||||||
elif marker.is_string():
|
# Strings and variables are almost the same thing
|
||||||
# TODO: We are ignoring string markers for the moment.
|
elif marker.is_string() or marker.is_variable():
|
||||||
# We already have a lot of them in the codebase, though, so we'll
|
|
||||||
# hang onto them for now in case we can use them later.
|
|
||||||
# To match up string constants, the strategy will be:
|
|
||||||
# 1. Use cvdump to find all string constants in the recomp
|
|
||||||
# 2. In the original binary, look at relocated vaddrs from .rdata
|
|
||||||
# 3. Try to match up string data from #1 with locations in #2
|
|
||||||
|
|
||||||
# Throw the syntax error we would throw if we were parsing these
|
|
||||||
if self.state not in (ReaderState.SEARCH, ReaderState.IN_FUNC):
|
|
||||||
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
|
||||||
|
|
||||||
elif marker.is_variable():
|
|
||||||
if self.state in (
|
if self.state in (
|
||||||
ReaderState.SEARCH,
|
ReaderState.SEARCH,
|
||||||
ReaderState.IN_GLOBAL,
|
ReaderState.IN_GLOBAL,
|
||||||
@ -418,24 +424,39 @@ def read_line(self, line: str):
|
|||||||
# function we have already parsed if state == IN_FUNC_GLOBAL.
|
# function we have already parsed if state == IN_FUNC_GLOBAL.
|
||||||
# However, we are not tolerant of _any_ syntax problems in our
|
# However, we are not tolerant of _any_ syntax problems in our
|
||||||
# CI actions, so the solution is to just fix the invalid marker.
|
# CI actions, so the solution is to just fix the invalid marker.
|
||||||
if is_blank_or_comment(line):
|
variable_name = None
|
||||||
self._syntax_error(ParserError.NO_SUITABLE_NAME)
|
|
||||||
|
global_markers_queued = any(
|
||||||
|
m.is_variable() for m in self.var_markers.iter()
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(line_strip) == 0:
|
||||||
|
self._syntax_warning(ParserError.UNEXPECTED_BLANK_LINE)
|
||||||
return
|
return
|
||||||
|
|
||||||
# We don't have a foolproof mechanism to tell what is and is not a variable.
|
if global_markers_queued:
|
||||||
# If the GLOBAL is being declared on a `return` statement, though, this is
|
# Not the greatest solution, but a consequence of combining GLOBAL and
|
||||||
# not correct. It is either a string literal (which will be handled differently)
|
# STRING markers together. If the marker precedes a return statement, it is
|
||||||
# or it is not the variable declaration, which is incorrect decomp syntax.
|
# valid for a STRING marker to be here, but not a GLOBAL. We need to look
|
||||||
if line.strip().startswith("return"):
|
# ahead and tell whether this *would* fail.
|
||||||
|
if line_strip.startswith("return"):
|
||||||
self._syntax_error(ParserError.GLOBAL_NOT_VARIABLE)
|
self._syntax_error(ParserError.GLOBAL_NOT_VARIABLE)
|
||||||
return
|
return
|
||||||
|
if line_strip.startswith("//"):
|
||||||
|
# If we found a comment, assume implicit lookup-by-name
|
||||||
|
# function and end here. We know this is not a decomp marker
|
||||||
|
# because it would have been handled already.
|
||||||
|
variable_name = get_synthetic_name(line)
|
||||||
|
else:
|
||||||
|
variable_name = get_variable_name(line)
|
||||||
|
# This is out of our control for library variables, but all of our
|
||||||
|
# variables should start with "g_".
|
||||||
|
if variable_name is not None and not variable_name.startswith("g_"):
|
||||||
|
self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX)
|
||||||
|
|
||||||
name = get_variable_name(line)
|
string_name = get_string_contents(line)
|
||||||
if name is None:
|
|
||||||
self._syntax_error(ParserError.NO_SUITABLE_NAME)
|
|
||||||
return
|
|
||||||
|
|
||||||
self._variable_done(name)
|
self._variable_done(variable_name, string_name)
|
||||||
|
|
||||||
elif self.state == ReaderState.IN_VTABLE:
|
elif self.state == ReaderState.IN_VTABLE:
|
||||||
vtable_class = get_class_name(line)
|
vtable_class = get_class_name(line)
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
# C++ Parser utility functions and data structures
|
# C++ Parser utility functions and data structures
|
||||||
import re
|
import re
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
from ast import literal_eval
|
||||||
|
|
||||||
# The goal here is to just read whatever is on the next line, so some
|
# The goal here is to just read whatever is on the next line, so some
|
||||||
# flexibility in the formatting seems OK
|
# flexibility in the formatting seems OK
|
||||||
@ -12,6 +13,10 @@
|
|||||||
trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")
|
trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")
|
||||||
|
|
||||||
|
|
||||||
|
# Get string contents, ignore escape characters that might interfere
|
||||||
|
doubleQuoteRegex = re.compile(r"(\"(?:[^\"\\]|\\.)*\")")
|
||||||
|
|
||||||
|
|
||||||
def get_synthetic_name(line: str) -> Optional[str]:
|
def get_synthetic_name(line: str) -> Optional[str]:
|
||||||
"""Synthetic names appear on a single line comment on the line after the marker.
|
"""Synthetic names appear on a single line comment on the line after the marker.
|
||||||
If that's not what we have, return None"""
|
If that's not what we have, return None"""
|
||||||
@ -86,3 +91,20 @@ def get_variable_name(line: str) -> Optional[str]:
|
|||||||
return match.group("name")
|
return match.group("name")
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_string_contents(line: str) -> Optional[str]:
|
||||||
|
"""Return the first C string seen on this line.
|
||||||
|
We have to unescape the string, and a simple way to do that is to use
|
||||||
|
python's ast.literal_eval. I'm sure there are many pitfalls to doing
|
||||||
|
it this way, but hopefully the regex will ensure reasonably sane input."""
|
||||||
|
|
||||||
|
try:
|
||||||
|
if (match := doubleQuoteRegex.search(line)) is not None:
|
||||||
|
return literal_eval(match.group(1))
|
||||||
|
# pylint: disable=broad-exception-caught
|
||||||
|
# No way to predict what kind of exception could occur.
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return None
|
||||||
|
|||||||
@ -14,6 +14,7 @@
|
|||||||
14,
|
14,
|
||||||
True,
|
True,
|
||||||
),
|
),
|
||||||
|
("??_C@_00A@?$AA@", 0, False),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -112,3 +112,33 @@ def test_duplicate_offsets(linter):
|
|||||||
# Full reset will forget seen offsets.
|
# Full reset will forget seen offsets.
|
||||||
linter.reset(True)
|
linter.reset(True)
|
||||||
assert linter.check_lines(lines, "test.h", "TEST") is True
|
assert linter.check_lines(lines, "test.h", "TEST") is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_duplicate_strings(linter):
|
||||||
|
"""Duplicate string markers are okay if the string value is the same."""
|
||||||
|
string_lines = [
|
||||||
|
"// STRING: TEST 0x1000",
|
||||||
|
'return "hello world";',
|
||||||
|
]
|
||||||
|
|
||||||
|
# No problem to use this marker twice.
|
||||||
|
assert linter.check_lines(string_lines, "test.h", "TEST") is True
|
||||||
|
assert linter.check_lines(string_lines, "test.h", "TEST") is True
|
||||||
|
|
||||||
|
different_string = [
|
||||||
|
"// STRING: TEST 0x1000",
|
||||||
|
'return "hi there";',
|
||||||
|
]
|
||||||
|
|
||||||
|
# Same address but the string is different
|
||||||
|
assert linter.check_lines(different_string, "greeting.h", "TEST") is False
|
||||||
|
assert len(linter.alerts) == 1
|
||||||
|
assert linter.alerts[0].code == ParserError.WRONG_STRING
|
||||||
|
|
||||||
|
same_addr_reused = [
|
||||||
|
"// GLOBAL:TEXT 0x1000",
|
||||||
|
"int g_test = 123;",
|
||||||
|
]
|
||||||
|
|
||||||
|
# This will fail like any other offset reuse.
|
||||||
|
assert linter.check_lines(same_addr_reused, "other.h", "TEST") is False
|
||||||
|
|||||||
@ -442,3 +442,82 @@ def test_static_variable(parser):
|
|||||||
)
|
)
|
||||||
assert len(parser.variables) == 2
|
assert len(parser.variables) == 2
|
||||||
assert parser.variables[1].is_static is True
|
assert parser.variables[1].is_static is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_reject_global_return(parser):
|
||||||
|
"""Previously we had annotated strings with the GLOBAL marker.
|
||||||
|
For example: if a function returned a string. We now want these to be
|
||||||
|
annotated with the STRING marker."""
|
||||||
|
|
||||||
|
parser.read_lines(
|
||||||
|
[
|
||||||
|
"// FUNCTION: TEST 0x5555",
|
||||||
|
"void test_function() {",
|
||||||
|
" // GLOBAL: TEST 0x8888",
|
||||||
|
' return "test";',
|
||||||
|
"}",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
assert len(parser.variables) == 0
|
||||||
|
assert len(parser.alerts) == 1
|
||||||
|
assert parser.alerts[0].code == ParserError.GLOBAL_NOT_VARIABLE
|
||||||
|
|
||||||
|
|
||||||
|
def test_global_string(parser):
|
||||||
|
"""We now allow GLOBAL and STRING markers for the same item."""
|
||||||
|
|
||||||
|
parser.read_lines(
|
||||||
|
[
|
||||||
|
"// GLOBAL: TEST 0x1234",
|
||||||
|
"// STRING: TEXT 0x5555",
|
||||||
|
'char* g_test = "hello";',
|
||||||
|
]
|
||||||
|
)
|
||||||
|
assert len(parser.variables) == 1
|
||||||
|
assert len(parser.strings) == 1
|
||||||
|
assert len(parser.alerts) == 0
|
||||||
|
|
||||||
|
assert parser.variables[0].name == "g_test"
|
||||||
|
assert parser.strings[0].name == "hello"
|
||||||
|
|
||||||
|
|
||||||
|
def test_comment_variables(parser):
|
||||||
|
"""Match on hidden variables from libraries."""
|
||||||
|
|
||||||
|
parser.read_lines(
|
||||||
|
[
|
||||||
|
"// GLOBAL: TEST 0x1234",
|
||||||
|
"// g_test",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
assert len(parser.variables) == 1
|
||||||
|
assert parser.variables[0].name == "g_test"
|
||||||
|
|
||||||
|
|
||||||
|
def test_flexible_variable_prefix(parser):
|
||||||
|
"""Don't alert to library variables that lack the g_ prefix.
|
||||||
|
This is out of our control."""
|
||||||
|
|
||||||
|
parser.read_lines(
|
||||||
|
[
|
||||||
|
"// GLOBAL: TEST 0x1234",
|
||||||
|
"// some_other_variable",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
assert len(parser.variables) == 1
|
||||||
|
assert len(parser.alerts) == 0
|
||||||
|
assert parser.variables[0].name == "some_other_variable"
|
||||||
|
|
||||||
|
|
||||||
|
def test_string_ignore_g_prefix(parser):
|
||||||
|
"""String annotations above a regular variable should not alert to
|
||||||
|
the missing g_ prefix. This is only required for GLOBAL markers."""
|
||||||
|
|
||||||
|
parser.read_lines(
|
||||||
|
[
|
||||||
|
"// STRING: TEST 0x1234",
|
||||||
|
'const char* value = "";',
|
||||||
|
]
|
||||||
|
)
|
||||||
|
assert len(parser.strings) == 1
|
||||||
|
assert len(parser.alerts) == 0
|
||||||
|
|||||||
@ -15,7 +15,7 @@
|
|||||||
(_rs.SEARCH, "TEMPLATE", _rs.IN_TEMPLATE, None),
|
(_rs.SEARCH, "TEMPLATE", _rs.IN_TEMPLATE, None),
|
||||||
(_rs.SEARCH, "VTABLE", _rs.IN_VTABLE, None),
|
(_rs.SEARCH, "VTABLE", _rs.IN_VTABLE, None),
|
||||||
(_rs.SEARCH, "LIBRARY", _rs.IN_LIBRARY, None),
|
(_rs.SEARCH, "LIBRARY", _rs.IN_LIBRARY, None),
|
||||||
(_rs.SEARCH, "STRING", _rs.SEARCH, None),
|
(_rs.SEARCH, "STRING", _rs.IN_GLOBAL, None),
|
||||||
|
|
||||||
(_rs.WANT_SIG, "FUNCTION", _rs.WANT_SIG, None),
|
(_rs.WANT_SIG, "FUNCTION", _rs.WANT_SIG, None),
|
||||||
(_rs.WANT_SIG, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.WANT_SIG, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
@ -33,7 +33,7 @@
|
|||||||
(_rs.IN_FUNC, "TEMPLATE", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION),
|
(_rs.IN_FUNC, "TEMPLATE", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION),
|
||||||
(_rs.IN_FUNC, "VTABLE", _rs.IN_VTABLE, _pe.MISSED_END_OF_FUNCTION),
|
(_rs.IN_FUNC, "VTABLE", _rs.IN_VTABLE, _pe.MISSED_END_OF_FUNCTION),
|
||||||
(_rs.IN_FUNC, "LIBRARY", _rs.IN_LIBRARY, _pe.MISSED_END_OF_FUNCTION),
|
(_rs.IN_FUNC, "LIBRARY", _rs.IN_LIBRARY, _pe.MISSED_END_OF_FUNCTION),
|
||||||
(_rs.IN_FUNC, "STRING", _rs.IN_FUNC, None),
|
(_rs.IN_FUNC, "STRING", _rs.IN_FUNC_GLOBAL, None),
|
||||||
|
|
||||||
(_rs.IN_TEMPLATE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_TEMPLATE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
(_rs.IN_TEMPLATE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_TEMPLATE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
@ -60,7 +60,7 @@
|
|||||||
(_rs.IN_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
(_rs.IN_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
(_rs.IN_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
(_rs.IN_GLOBAL, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_GLOBAL, "STRING", _rs.IN_GLOBAL, None),
|
||||||
|
|
||||||
(_rs.IN_FUNC_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_FUNC_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
(_rs.IN_FUNC_GLOBAL, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
|
(_rs.IN_FUNC_GLOBAL, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
|
||||||
@ -69,7 +69,7 @@
|
|||||||
(_rs.IN_FUNC_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_FUNC_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
(_rs.IN_FUNC_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_FUNC_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
(_rs.IN_FUNC_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_FUNC_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
(_rs.IN_FUNC_GLOBAL, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_FUNC_GLOBAL, "STRING", _rs.IN_FUNC_GLOBAL, None),
|
||||||
|
|
||||||
(_rs.IN_VTABLE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_VTABLE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
(_rs.IN_VTABLE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
(_rs.IN_VTABLE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
|||||||
@ -10,6 +10,7 @@
|
|||||||
is_blank_or_comment,
|
is_blank_or_comment,
|
||||||
get_class_name,
|
get_class_name,
|
||||||
get_variable_name,
|
get_variable_name,
|
||||||
|
get_string_contents,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -158,3 +159,18 @@ def test_get_class_name_none(line: str):
|
|||||||
@pytest.mark.parametrize("line,name", variable_name_cases)
|
@pytest.mark.parametrize("line,name", variable_name_cases)
|
||||||
def test_get_variable_name(line: str, name: str):
|
def test_get_variable_name(line: str, name: str):
|
||||||
assert get_variable_name(line) == name
|
assert get_variable_name(line) == name
|
||||||
|
|
||||||
|
|
||||||
|
string_match_cases = [
|
||||||
|
('return "hello world";', "hello world"),
|
||||||
|
('"hello\\\\"', "hello\\"),
|
||||||
|
('"hello \\"world\\""', 'hello "world"'),
|
||||||
|
('"hello\\nworld"', "hello\nworld"),
|
||||||
|
# Only match first string if there are multiple options
|
||||||
|
('Method("hello", "world");', "hello"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("line, string", string_match_cases)
|
||||||
|
def test_get_string_contents(line: str, string: str):
|
||||||
|
assert get_string_contents(line) == string
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user