Enable string annotations and indirect globals

This commit is contained in:
disinvite 2024-01-16 19:02:18 -05:00
parent 6af0c6cb1a
commit abefacf9b2
16 changed files with 332 additions and 71 deletions

View File

@ -85,13 +85,19 @@ def _load_cvdump(self):
if sym.node_type == SymbolType.STRING: if sym.node_type == SymbolType.STRING:
string_info = demangle_string_const(sym.decorated_name) string_info = demangle_string_const(sym.decorated_name)
if string_info is None:
logger.debug(
"Could not demangle string symbol: %s", sym.decorated_name
)
continue
# TODO: skip unicode for now. will need to handle these differently. # TODO: skip unicode for now. will need to handle these differently.
if string_info.is_utf16: if string_info.is_utf16:
continue continue
raw = self.recomp_bin.read(addr, sym.size()) raw = self.recomp_bin.read(addr, sym.size())
try: try:
sym.friendly_name = raw.decode("latin1") sym.friendly_name = raw.decode("latin1").rstrip("\x00")
except UnicodeDecodeError: except UnicodeDecodeError:
pass pass
@ -134,6 +140,26 @@ def _load_markers(self):
for tbl in codebase.iter_vtables(): for tbl in codebase.iter_vtables():
self._db.match_vtable(tbl.offset, tbl.name) self._db.match_vtable(tbl.offset, tbl.name)
for string in codebase.iter_strings():
# Not that we don't trust you, but we're checking the string
# annotation to make sure it is accurate.
try:
# TODO: would presumably fail for wchar_t strings
orig = self.orig_bin.read_string(string.offset).decode("latin1")
string_correct = string.name == orig
except UnicodeDecodeError:
string_correct = False
if not string_correct:
logger.error(
"Data at 0x%x does not match string %s",
string.offset,
repr(string.name),
)
continue
self._db.match_string(string.offset, string.name)
def _find_original_strings(self): def _find_original_strings(self):
"""Go to the original binary and look for the specified string constants """Go to the original binary and look for the specified string constants
to find a match. This is a (relatively) expensive operation so we only to find a match. This is a (relatively) expensive operation so we only

View File

@ -43,7 +43,8 @@ def match_name(self) -> str:
return None return None
ctype = self.compare_type.name if self.compare_type is not None else "UNK" ctype = self.compare_type.name if self.compare_type is not None else "UNK"
return f"{self.name} ({ctype})" name = repr(self.name) if ctype == "STRING" else self.name
return f"{name} ({ctype})"
def matchinfo_factory(_, row): def matchinfo_factory(_, row):
@ -197,3 +198,5 @@ def match_string(self, addr: int, value: str) -> bool:
if not did_match: if not did_match:
escaped = repr(value) escaped = repr(value)
logger.error("Failed to find string: %s", escaped) logger.error("Failed to find string: %s", escaped)
return did_match

View File

@ -94,7 +94,11 @@ def set_decorated(self, name: str):
def name(self) -> Optional[str]: def name(self) -> Optional[str]:
"""Prefer "friendly" name if we have it. """Prefer "friendly" name if we have it.
This is what we have been using to match functions.""" This is what we have been using to match functions."""
return self.friendly_name or self.decorated_name return (
self.friendly_name
if self.friendly_name is not None
else self.decorated_name
)
def size(self) -> Optional[int]: def size(self) -> Optional[int]:
if self.confirmed_size is not None: if self.confirmed_size is not None:

View File

@ -4,6 +4,7 @@
""" """
import re import re
from collections import namedtuple from collections import namedtuple
from typing import Optional
class InvalidEncodedNumberError(Exception): class InvalidEncodedNumberError(Exception):
@ -30,13 +31,12 @@ def parse_encoded_number(string: str) -> int:
StringConstInfo = namedtuple("StringConstInfo", "len is_utf16") StringConstInfo = namedtuple("StringConstInfo", "len is_utf16")
def demangle_string_const(symbol: str) -> StringConstInfo: def demangle_string_const(symbol: str) -> Optional[StringConstInfo]:
"""Don't bother to decode the string text from the symbol. """Don't bother to decode the string text from the symbol.
We can just read it from the binary once we have the length.""" We can just read it from the binary once we have the length."""
match = string_const_regex.match(symbol) match = string_const_regex.match(symbol)
if match is None: if match is None:
# See below return None
return StringConstInfo(0, False)
try: try:
strlen = ( strlen = (
@ -45,10 +45,7 @@ def demangle_string_const(symbol: str) -> StringConstInfo:
else int(match.group("len")) else int(match.group("len"))
) )
except (ValueError, InvalidEncodedNumberError): except (ValueError, InvalidEncodedNumberError):
# This would be an annoying error to fail on if we get a bad symbol. return None
# For now, just assume a zero length string because this will probably
# raise some eyebrows during the comparison.
strlen = 0
is_utf16 = match.group("is_utf16") == "1" is_utf16 = match.group("is_utf16") == "1"
return StringConstInfo(len=strlen, is_utf16=is_utf16) return StringConstInfo(len=strlen, is_utf16=is_utf16)

View File

@ -6,6 +6,7 @@
ParserFunction, ParserFunction,
ParserVtable, ParserVtable,
ParserVariable, ParserVariable,
ParserString,
) )
@ -42,3 +43,6 @@ def iter_vtables(self) -> Iterator[ParserVtable]:
def iter_variables(self) -> Iterator[ParserVariable]: def iter_variables(self) -> Iterator[ParserVariable]:
return filter(lambda s: isinstance(s, ParserVariable), self._symbols) return filter(lambda s: isinstance(s, ParserVariable), self._symbols)
def iter_strings(self) -> Iterator[ParserString]:
return filter(lambda s: isinstance(s, ParserString), self._symbols)

View File

@ -70,6 +70,10 @@ class ParserError(Enum):
# a comment -- i.e. VTABLE or GLOBAL -- could not extract the name # a comment -- i.e. VTABLE or GLOBAL -- could not extract the name
NO_SUITABLE_NAME = 204 NO_SUITABLE_NAME = 204
# ERROR: Two STRING markers have the same module and offset, but the strings
# they annotate are different.
WRONG_STRING = 205
@dataclass @dataclass
class ParserAlert: class ParserAlert:

View File

@ -1,7 +1,7 @@
from typing import List, Optional from typing import List, Optional
from .parser import DecompParser from .parser import DecompParser
from .error import ParserAlert, ParserError from .error import ParserAlert, ParserError
from .node import ParserSymbol from .node import ParserSymbol, ParserString
def get_checkorder_filter(module): def get_checkorder_filter(module):
@ -19,6 +19,9 @@ def __init__(self) -> None:
# This is _not_ reset between files and is intended to report offset reuse # This is _not_ reset between files and is intended to report offset reuse
# when scanning the entire directory. # when scanning the entire directory.
self._offsets_used = set() self._offsets_used = set()
# Keep track of strings we have seen. Persists across files.
# Module/offset can be repeated for string markers but the strings must match.
self._strings = {}
def reset(self, full_reset: bool = False): def reset(self, full_reset: bool = False):
self.alerts = [] self.alerts = []
@ -28,6 +31,7 @@ def reset(self, full_reset: bool = False):
if full_reset: if full_reset:
self._offsets_used.clear() self._offsets_used.clear()
self._strings = {}
def file_is_header(self): def file_is_header(self):
return self._filename.lower().endswith(".h") return self._filename.lower().endswith(".h")
@ -36,17 +40,31 @@ def _load_offsets_from_list(self, marker_list: List[ParserSymbol]):
"""Helper for loading (module, offset) tuples while the DecompParser """Helper for loading (module, offset) tuples while the DecompParser
has them broken up into three different lists.""" has them broken up into three different lists."""
for marker in marker_list: for marker in marker_list:
is_string = isinstance(marker, ParserString)
value = (marker.module, marker.offset) value = (marker.module, marker.offset)
if value in self._offsets_used: if value in self._offsets_used:
self.alerts.append( if is_string:
ParserAlert( if self._strings[value] != marker.name:
code=ParserError.DUPLICATE_OFFSET, self.alerts.append(
line_number=marker.line_number, ParserAlert(
line=f"0x{marker.offset:08x}", code=ParserError.WRONG_STRING,
line_number=marker.line_number,
line=f"0x{marker.offset:08x}, {repr(self._strings[value])} vs. {repr(marker.name)}",
)
)
else:
self.alerts.append(
ParserAlert(
code=ParserError.DUPLICATE_OFFSET,
line_number=marker.line_number,
line=f"0x{marker.offset:08x}",
)
) )
)
else: else:
self._offsets_used.add(value) self._offsets_used.add(value)
if is_string:
self._strings[value] = marker.name
def _check_function_order(self): def _check_function_order(self):
"""Rules: """Rules:
@ -82,6 +100,7 @@ def _check_offset_uniqueness(self):
self._load_offsets_from_list(self._parser.functions) self._load_offsets_from_list(self._parser.functions)
self._load_offsets_from_list(self._parser.vtables) self._load_offsets_from_list(self._parser.vtables)
self._load_offsets_from_list(self._parser.variables) self._load_offsets_from_list(self._parser.variables)
self._load_offsets_from_list(self._parser.strings)
def _check_byname_allowed(self): def _check_byname_allowed(self):
if self.file_is_header(): if self.file_is_header():

View File

@ -3,6 +3,19 @@
from enum import Enum from enum import Enum
class MarkerCategory(Enum):
"""For the purposes of grouping multiple different DecompMarkers together,
assign a rough "category" for the MarkerType values below.
It's really only the function types that have to get folded down, but
we'll do that in a structured way to permit future expansion."""
FUNCTION = 1
VARIABLE = 2
STRING = 3
VTABLE = 4
ADDRESS = 100 # i.e. no comparison required or possible
class MarkerType(Enum): class MarkerType(Enum):
UNKNOWN = -100 UNKNOWN = -100
FUNCTION = 1 FUNCTION = 1
@ -51,6 +64,23 @@ def module(self) -> str:
def offset(self) -> int: def offset(self) -> int:
return self._offset return self._offset
@property
def category(self) -> MarkerCategory:
if self.is_vtable():
return MarkerCategory.VTABLE
if self.is_variable():
return MarkerCategory.VARIABLE
if self.is_string():
return MarkerCategory.STRING
# TODO: worth another look if we add more types, but this covers it
if self.is_regular_function() or self.is_explicit_byname():
return MarkerCategory.FUNCTION
return MarkerCategory.ADDRESS
def is_regular_function(self) -> bool: def is_regular_function(self) -> bool:
"""Regular function, meaning: not an explicit byname lookup. FUNCTION """Regular function, meaning: not an explicit byname lookup. FUNCTION
markers can be _implicit_ byname. markers can be _implicit_ byname.

View File

@ -55,3 +55,8 @@ class ParserVariable(ParserSymbol):
@dataclass @dataclass
class ParserVtable(ParserSymbol): class ParserVtable(ParserSymbol):
pass pass
@dataclass
class ParserString(ParserSymbol):
pass

View File

@ -3,11 +3,11 @@
from typing import List, Iterable, Iterator, Optional from typing import List, Iterable, Iterator, Optional
from enum import Enum from enum import Enum
from .util import ( from .util import (
is_blank_or_comment,
get_class_name, get_class_name,
get_variable_name, get_variable_name,
get_synthetic_name, get_synthetic_name,
remove_trailing_comment, remove_trailing_comment,
get_string_contents,
) )
from .marker import ( from .marker import (
DecompMarker, DecompMarker,
@ -19,6 +19,7 @@
ParserFunction, ParserFunction,
ParserVariable, ParserVariable,
ParserVtable, ParserVtable,
ParserString,
) )
from .error import ParserAlert, ParserError from .error import ParserAlert, ParserError
@ -43,17 +44,16 @@ def __init__(self) -> None:
def insert(self, marker: DecompMarker) -> bool: def insert(self, marker: DecompMarker) -> bool:
"""Return True if this insert would overwrite""" """Return True if this insert would overwrite"""
module = marker.module key = (marker.category, marker.module)
if module in self.markers: if key in self.markers:
return True return True
# TODO: type converted back to string version here instead of using enum self.markers[key] = marker
self.markers[module] = (marker.type.name, marker.offset)
return False return False
def iter(self) -> Iterator[DecompMarker]: def iter(self) -> Iterator[DecompMarker]:
for module, (marker_type, offset) in self.markers.items(): for _, marker in self.markers.items():
yield DecompMarker(marker_type, module, offset) yield marker
def empty(self): def empty(self):
self.markers = {} self.markers = {}
@ -111,17 +111,21 @@ def reset(self):
self.function_sig = "" self.function_sig = ""
@property @property
def functions(self) -> List[ParserSymbol]: def functions(self) -> List[ParserFunction]:
return [s for s in self._symbols if isinstance(s, ParserFunction)] return [s for s in self._symbols if isinstance(s, ParserFunction)]
@property @property
def vtables(self) -> List[ParserSymbol]: def vtables(self) -> List[ParserVtable]:
return [s for s in self._symbols if isinstance(s, ParserVtable)] return [s for s in self._symbols if isinstance(s, ParserVtable)]
@property @property
def variables(self) -> List[ParserSymbol]: def variables(self) -> List[ParserVariable]:
return [s for s in self._symbols if isinstance(s, ParserVariable)] return [s for s in self._symbols if isinstance(s, ParserVariable)]
@property
def strings(self) -> List[ParserString]:
return [s for s in self._symbols if isinstance(s, ParserString)]
def iter_symbols(self, module: Optional[str] = None) -> Iterator[ParserSymbol]: def iter_symbols(self, module: Optional[str] = None) -> Iterator[ParserSymbol]:
for s in self._symbols: for s in self._symbols:
if module is None or s.module == module: if module is None or s.module == module:
@ -225,21 +229,35 @@ def _variable_marker(self, marker: DecompMarker):
else: else:
self.state = ReaderState.IN_GLOBAL self.state = ReaderState.IN_GLOBAL
def _variable_done(self, name: str): def _variable_done(
if not name.startswith("g_"): self, variable_name: Optional[str] = None, string_value: Optional[str] = None
self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX) ):
if variable_name is None and string_value is None:
self._syntax_error(ParserError.NO_SUITABLE_NAME)
return
for marker in self.var_markers.iter(): for marker in self.var_markers.iter():
self._symbols.append( if marker.is_string():
ParserVariable( self._symbols.append(
type=marker.type, ParserString(
line_number=self.line_number, type=marker.type,
module=marker.module, line_number=self.line_number,
offset=marker.offset, module=marker.module,
name=name, offset=marker.offset,
is_static=self.state == ReaderState.IN_FUNC_GLOBAL, name=string_value,
)
)
else:
self._symbols.append(
ParserVariable(
type=marker.type,
line_number=self.line_number,
module=marker.module,
offset=marker.offset,
name=variable_name,
is_static=self.state == ReaderState.IN_FUNC_GLOBAL,
)
) )
)
self.var_markers.empty() self.var_markers.empty()
if self.state == ReaderState.IN_FUNC_GLOBAL: if self.state == ReaderState.IN_FUNC_GLOBAL:
@ -298,20 +316,8 @@ def _handle_marker(self, marker: DecompMarker):
else: else:
self._syntax_error(ParserError.INCOMPATIBLE_MARKER) self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
elif marker.is_string(): # Strings and variables are almost the same thing
# TODO: We are ignoring string markers for the moment. elif marker.is_string() or marker.is_variable():
# We already have a lot of them in the codebase, though, so we'll
# hang onto them for now in case we can use them later.
# To match up string constants, the strategy will be:
# 1. Use cvdump to find all string constants in the recomp
# 2. In the original binary, look at relocated vaddrs from .rdata
# 3. Try to match up string data from #1 with locations in #2
# Throw the syntax error we would throw if we were parsing these
if self.state not in (ReaderState.SEARCH, ReaderState.IN_FUNC):
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
elif marker.is_variable():
if self.state in ( if self.state in (
ReaderState.SEARCH, ReaderState.SEARCH,
ReaderState.IN_GLOBAL, ReaderState.IN_GLOBAL,
@ -418,24 +424,39 @@ def read_line(self, line: str):
# function we have already parsed if state == IN_FUNC_GLOBAL. # function we have already parsed if state == IN_FUNC_GLOBAL.
# However, we are not tolerant of _any_ syntax problems in our # However, we are not tolerant of _any_ syntax problems in our
# CI actions, so the solution is to just fix the invalid marker. # CI actions, so the solution is to just fix the invalid marker.
if is_blank_or_comment(line): variable_name = None
self._syntax_error(ParserError.NO_SUITABLE_NAME)
global_markers_queued = any(
m.is_variable() for m in self.var_markers.iter()
)
if len(line_strip) == 0:
self._syntax_warning(ParserError.UNEXPECTED_BLANK_LINE)
return return
# We don't have a foolproof mechanism to tell what is and is not a variable. if global_markers_queued:
# If the GLOBAL is being declared on a `return` statement, though, this is # Not the greatest solution, but a consequence of combining GLOBAL and
# not correct. It is either a string literal (which will be handled differently) # STRING markers together. If the marker precedes a return statement, it is
# or it is not the variable declaration, which is incorrect decomp syntax. # valid for a STRING marker to be here, but not a GLOBAL. We need to look
if line.strip().startswith("return"): # ahead and tell whether this *would* fail.
self._syntax_error(ParserError.GLOBAL_NOT_VARIABLE) if line_strip.startswith("return"):
return self._syntax_error(ParserError.GLOBAL_NOT_VARIABLE)
return
if line_strip.startswith("//"):
# If we found a comment, assume implicit lookup-by-name
# function and end here. We know this is not a decomp marker
# because it would have been handled already.
variable_name = get_synthetic_name(line)
else:
variable_name = get_variable_name(line)
# This is out of our control for library variables, but all of our
# variables should start with "g_".
if variable_name is not None and not variable_name.startswith("g_"):
self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX)
name = get_variable_name(line) string_name = get_string_contents(line)
if name is None:
self._syntax_error(ParserError.NO_SUITABLE_NAME)
return
self._variable_done(name) self._variable_done(variable_name, string_name)
elif self.state == ReaderState.IN_VTABLE: elif self.state == ReaderState.IN_VTABLE:
vtable_class = get_class_name(line) vtable_class = get_class_name(line)

View File

@ -1,6 +1,7 @@
# C++ Parser utility functions and data structures # C++ Parser utility functions and data structures
import re import re
from typing import Optional from typing import Optional
from ast import literal_eval
# The goal here is to just read whatever is on the next line, so some # The goal here is to just read whatever is on the next line, so some
# flexibility in the formatting seems OK # flexibility in the formatting seems OK
@ -12,6 +13,10 @@
trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$") trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")
# Get string contents, ignore escape characters that might interfere
doubleQuoteRegex = re.compile(r"(\"(?:[^\"\\]|\\.)*\")")
def get_synthetic_name(line: str) -> Optional[str]: def get_synthetic_name(line: str) -> Optional[str]:
"""Synthetic names appear on a single line comment on the line after the marker. """Synthetic names appear on a single line comment on the line after the marker.
If that's not what we have, return None""" If that's not what we have, return None"""
@ -86,3 +91,20 @@ def get_variable_name(line: str) -> Optional[str]:
return match.group("name") return match.group("name")
return None return None
def get_string_contents(line: str) -> Optional[str]:
"""Return the first C string seen on this line.
We have to unescape the string, and a simple way to do that is to use
python's ast.literal_eval. I'm sure there are many pitfalls to doing
it this way, but hopefully the regex will ensure reasonably sane input."""
try:
if (match := doubleQuoteRegex.search(line)) is not None:
return literal_eval(match.group(1))
# pylint: disable=broad-exception-caught
# No way to predict what kind of exception could occur.
except Exception:
pass
return None

View File

@ -14,6 +14,7 @@
14, 14,
True, True,
), ),
("??_C@_00A@?$AA@", 0, False),
] ]

View File

@ -112,3 +112,33 @@ def test_duplicate_offsets(linter):
# Full reset will forget seen offsets. # Full reset will forget seen offsets.
linter.reset(True) linter.reset(True)
assert linter.check_lines(lines, "test.h", "TEST") is True assert linter.check_lines(lines, "test.h", "TEST") is True
def test_duplicate_strings(linter):
"""Duplicate string markers are okay if the string value is the same."""
string_lines = [
"// STRING: TEST 0x1000",
'return "hello world";',
]
# No problem to use this marker twice.
assert linter.check_lines(string_lines, "test.h", "TEST") is True
assert linter.check_lines(string_lines, "test.h", "TEST") is True
different_string = [
"// STRING: TEST 0x1000",
'return "hi there";',
]
# Same address but the string is different
assert linter.check_lines(different_string, "greeting.h", "TEST") is False
assert len(linter.alerts) == 1
assert linter.alerts[0].code == ParserError.WRONG_STRING
same_addr_reused = [
"// GLOBAL:TEXT 0x1000",
"int g_test = 123;",
]
# This will fail like any other offset reuse.
assert linter.check_lines(same_addr_reused, "other.h", "TEST") is False

View File

@ -442,3 +442,82 @@ def test_static_variable(parser):
) )
assert len(parser.variables) == 2 assert len(parser.variables) == 2
assert parser.variables[1].is_static is True assert parser.variables[1].is_static is True
def test_reject_global_return(parser):
"""Previously we had annotated strings with the GLOBAL marker.
For example: if a function returned a string. We now want these to be
annotated with the STRING marker."""
parser.read_lines(
[
"// FUNCTION: TEST 0x5555",
"void test_function() {",
" // GLOBAL: TEST 0x8888",
' return "test";',
"}",
]
)
assert len(parser.variables) == 0
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.GLOBAL_NOT_VARIABLE
def test_global_string(parser):
"""We now allow GLOBAL and STRING markers for the same item."""
parser.read_lines(
[
"// GLOBAL: TEST 0x1234",
"// STRING: TEXT 0x5555",
'char* g_test = "hello";',
]
)
assert len(parser.variables) == 1
assert len(parser.strings) == 1
assert len(parser.alerts) == 0
assert parser.variables[0].name == "g_test"
assert parser.strings[0].name == "hello"
def test_comment_variables(parser):
"""Match on hidden variables from libraries."""
parser.read_lines(
[
"// GLOBAL: TEST 0x1234",
"// g_test",
]
)
assert len(parser.variables) == 1
assert parser.variables[0].name == "g_test"
def test_flexible_variable_prefix(parser):
"""Don't alert to library variables that lack the g_ prefix.
This is out of our control."""
parser.read_lines(
[
"// GLOBAL: TEST 0x1234",
"// some_other_variable",
]
)
assert len(parser.variables) == 1
assert len(parser.alerts) == 0
assert parser.variables[0].name == "some_other_variable"
def test_string_ignore_g_prefix(parser):
"""String annotations above a regular variable should not alert to
the missing g_ prefix. This is only required for GLOBAL markers."""
parser.read_lines(
[
"// STRING: TEST 0x1234",
'const char* value = "";',
]
)
assert len(parser.strings) == 1
assert len(parser.alerts) == 0

View File

@ -15,7 +15,7 @@
(_rs.SEARCH, "TEMPLATE", _rs.IN_TEMPLATE, None), (_rs.SEARCH, "TEMPLATE", _rs.IN_TEMPLATE, None),
(_rs.SEARCH, "VTABLE", _rs.IN_VTABLE, None), (_rs.SEARCH, "VTABLE", _rs.IN_VTABLE, None),
(_rs.SEARCH, "LIBRARY", _rs.IN_LIBRARY, None), (_rs.SEARCH, "LIBRARY", _rs.IN_LIBRARY, None),
(_rs.SEARCH, "STRING", _rs.SEARCH, None), (_rs.SEARCH, "STRING", _rs.IN_GLOBAL, None),
(_rs.WANT_SIG, "FUNCTION", _rs.WANT_SIG, None), (_rs.WANT_SIG, "FUNCTION", _rs.WANT_SIG, None),
(_rs.WANT_SIG, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.WANT_SIG, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
@ -33,7 +33,7 @@
(_rs.IN_FUNC, "TEMPLATE", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION), (_rs.IN_FUNC, "TEMPLATE", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "VTABLE", _rs.IN_VTABLE, _pe.MISSED_END_OF_FUNCTION), (_rs.IN_FUNC, "VTABLE", _rs.IN_VTABLE, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "LIBRARY", _rs.IN_LIBRARY, _pe.MISSED_END_OF_FUNCTION), (_rs.IN_FUNC, "LIBRARY", _rs.IN_LIBRARY, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "STRING", _rs.IN_FUNC, None), (_rs.IN_FUNC, "STRING", _rs.IN_FUNC_GLOBAL, None),
(_rs.IN_TEMPLATE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_TEMPLATE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_TEMPLATE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
@ -60,7 +60,7 @@
(_rs.IN_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "STRING", _rs.IN_GLOBAL, None),
(_rs.IN_FUNC_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "GLOBAL", _rs.IN_FUNC_GLOBAL, None), (_rs.IN_FUNC_GLOBAL, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
@ -69,7 +69,7 @@
(_rs.IN_FUNC_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "STRING", _rs.IN_FUNC_GLOBAL, None),
(_rs.IN_VTABLE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_VTABLE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_VTABLE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),

View File

@ -10,6 +10,7 @@
is_blank_or_comment, is_blank_or_comment,
get_class_name, get_class_name,
get_variable_name, get_variable_name,
get_string_contents,
) )
@ -158,3 +159,18 @@ def test_get_class_name_none(line: str):
@pytest.mark.parametrize("line,name", variable_name_cases) @pytest.mark.parametrize("line,name", variable_name_cases)
def test_get_variable_name(line: str, name: str): def test_get_variable_name(line: str, name: str):
assert get_variable_name(line) == name assert get_variable_name(line) == name
string_match_cases = [
('return "hello world";', "hello world"),
('"hello\\\\"', "hello\\"),
('"hello \\"world\\""', 'hello "world"'),
('"hello\\nworld"', "hello\nworld"),
# Only match first string if there are multiple options
('Method("hello", "world");', "hello"),
]
@pytest.mark.parametrize("line, string", string_match_cases)
def test_get_string_contents(line: str, string: str):
assert get_string_contents(line) == string