diff --git a/tools/isledecomp/isledecomp/parser/error.py b/tools/isledecomp/isledecomp/parser/error.py index 638a806e..eb1aa7b8 100644 --- a/tools/isledecomp/isledecomp/parser/error.py +++ b/tools/isledecomp/isledecomp/parser/error.py @@ -39,6 +39,14 @@ class ParserError(Enum): # WARN: We found a marker to be referenced by name outside of a header file. BYNAME_FUNCTION_IN_CPP = 109 + # WARN: A GLOBAL marker appeared over a variable without the g_ prefix + GLOBAL_MISSING_PREFIX = 110 + + # WARN: GLOBAL marker points at something other than variable declaration. + # We can't match global variables based on position, but the goal here is + # to ignore things like string literal that are not variables. + GLOBAL_NOT_VARIABLE = 111 + # This code or higher is an error, not a warning DECOMP_ERROR_START = 200 @@ -50,13 +58,18 @@ class ParserError(Enum): # For example, a GLOBAL cannot follow FUNCTION/STUB INCOMPATIBLE_MARKER = 201 - # ERROR: The line following a synthetic marker was not a comment - BAD_SYNTHETIC = 202 + # ERROR: The line following an explicit by-name marker was not a comment + # We assume a syntax error here rather than try to use the next line + BAD_NAMEREF = 202 # ERROR: This function offset comes before the previous offset from the same module # This hopefully gives some hint about which functions need to be rearranged. FUNCTION_OUT_OF_ORDER = 203 + # ERROR: The line following an explicit by-name marker that does _not_ expect + # a comment -- i.e. VTABLE or GLOBAL -- could not extract the name + NO_SUITABLE_NAME = 204 + @dataclass class ParserAlert: diff --git a/tools/isledecomp/isledecomp/parser/marker.py b/tools/isledecomp/isledecomp/parser/marker.py new file mode 100644 index 00000000..d2e181dd --- /dev/null +++ b/tools/isledecomp/isledecomp/parser/marker.py @@ -0,0 +1,103 @@ +import re +from typing import Optional +from enum import Enum + + +class MarkerType(Enum): + UNKNOWN = -100 + FUNCTION = 1 + STUB = 2 + SYNTHETIC = 3 + TEMPLATE = 4 + GLOBAL = 5 + VTABLE = 6 + STRING = 7 + LIBRARY = 8 + + +markerRegex = re.compile( + r"\s*//\s*(?P\w+):\s*(?P\w+)\s+(?P0x[a-f0-9]+)", + flags=re.I, +) + + +markerExactRegex = re.compile( + r"\s*// (?P[A-Z]+): (?P[A-Z0-9]+) (?P0x[a-f0-9]+)$" +) + + +class DecompMarker: + def __init__(self, marker_type: str, module: str, offset: int) -> None: + try: + self._type = MarkerType[marker_type.upper()] + except KeyError: + self._type = MarkerType.UNKNOWN + + # Convert to upper here. A lot of other analysis depends on this name + # being consistent and predictable. If the name is _not_ capitalized + # we will emit a syntax error. + self._module: str = module.upper() + self._offset: int = offset + + @property + def type(self) -> MarkerType: + return self._type + + @property + def module(self) -> str: + return self._module + + @property + def offset(self) -> int: + return self._offset + + def is_regular_function(self) -> bool: + """Regular function, meaning: not an explicit byname lookup. FUNCTION + markers can be _implicit_ byname. + FUNCTION and STUB markers are (currently) the only heterogenous marker types that + can be lumped together, although the reasons for doing so are a little vague.""" + return self._type in (MarkerType.FUNCTION, MarkerType.STUB) + + def is_explicit_byname(self) -> bool: + return self._type in ( + MarkerType.SYNTHETIC, + MarkerType.TEMPLATE, + MarkerType.LIBRARY, + ) + + def is_variable(self) -> bool: + return self._type == MarkerType.GLOBAL + + def is_synthetic(self) -> bool: + return self._type == MarkerType.SYNTHETIC + + def is_template(self) -> bool: + return self._type == MarkerType.TEMPLATE + + def is_vtable(self) -> bool: + return self._type == MarkerType.VTABLE + + def is_library(self) -> bool: + return self._type == MarkerType.LIBRARY + + def is_string(self) -> bool: + return self._type == MarkerType.STRING + + def allowed_in_func(self) -> bool: + return self._type in (MarkerType.GLOBAL, MarkerType.STRING) + + +def match_marker(line: str) -> Optional[DecompMarker]: + match = markerRegex.match(line) + if match is None: + return None + + return DecompMarker( + marker_type=match.group("type"), + module=match.group("module"), + offset=int(match.group("offset"), 16), + ) + + +def is_marker_exact(line: str) -> bool: + return markerExactRegex.match(line) is not None diff --git a/tools/isledecomp/isledecomp/parser/node.py b/tools/isledecomp/isledecomp/parser/node.py index 721a24b9..9776be75 100644 --- a/tools/isledecomp/isledecomp/parser/node.py +++ b/tools/isledecomp/isledecomp/parser/node.py @@ -1,35 +1,58 @@ +from typing import Optional from dataclasses import dataclass +from .marker import MarkerType @dataclass -class ParserNode: +class ParserSymbol: + """Exported decomp marker with all information (except the code filename) required to + cross-reference with cvdump data.""" + + type: MarkerType line_number: int - - -@dataclass -class ParserSymbol(ParserNode): module: str offset: int + name: str + + # The parser doesn't (currently) know about the code filename, but if you + # wanted to set it here after the fact, here's the spot. + filename: Optional[str] = None + + def should_skip(self) -> bool: + """The default is to compare any symbols we have""" + return False + + def is_nameref(self) -> bool: + """All symbols default to name lookup""" + return True @dataclass class ParserFunction(ParserSymbol): - name: str + # We are able to detect the closing line of a function with some reliability. + # This isn't used for anything right now, but perhaps later it will be. + end_line: Optional[int] = None + + # All marker types are referenced by name except FUNCTION/STUB. These can also be + # referenced by name, but only if this flag is true. lookup_by_name: bool = False - is_stub: bool = False - is_synthetic: bool = False - is_template: bool = False - end_line: int = -1 + + def should_skip(self) -> bool: + """Temporary helper function because reccmp expects this to be here""" + return self.type in (MarkerType.STUB, MarkerType.LIBRARY) + + def is_nameref(self) -> bool: + return ( + self.type in (MarkerType.SYNTHETIC, MarkerType.TEMPLATE, MarkerType.LIBRARY) + or self.lookup_by_name + ) @dataclass class ParserVariable(ParserSymbol): - name: str - size: int = -1 is_static: bool = False @dataclass class ParserVtable(ParserSymbol): - class_name: str - num_entries: int = -1 + pass diff --git a/tools/isledecomp/isledecomp/parser/parser.py b/tools/isledecomp/isledecomp/parser/parser.py index 336d4b0c..f9485e65 100644 --- a/tools/isledecomp/isledecomp/parser/parser.py +++ b/tools/isledecomp/isledecomp/parser/parser.py @@ -3,15 +3,19 @@ from typing import List, Iterable, Iterator from enum import Enum from .util import ( - DecompMarker, is_blank_or_comment, - match_marker, - is_marker_exact, get_class_name, + get_variable_name, get_synthetic_name, remove_trailing_comment, ) +from .marker import ( + DecompMarker, + match_marker, + is_marker_exact, +) from .node import ( + ParserSymbol, ParserFunction, ParserVariable, ParserVtable, @@ -28,44 +32,23 @@ class ReaderState(Enum): IN_GLOBAL = 5 IN_FUNC_GLOBAL = 6 IN_VTABLE = 7 + IN_SYNTHETIC = 8 + IN_LIBRARY = 9 DONE = 100 -def marker_is_stub(marker: DecompMarker) -> bool: - return marker.type.upper() == "STUB" - - -def marker_is_variable(marker: DecompMarker) -> bool: - return marker.type.upper() == "GLOBAL" - - -def marker_is_synthetic(marker: DecompMarker) -> bool: - return marker.type.upper() in ("SYNTHETIC", "TEMPLATE") - - -def marker_is_template(marker: DecompMarker) -> bool: - return marker.type.upper() == "TEMPLATE" - - -def marker_is_function(marker: DecompMarker) -> bool: - return marker.type.upper() in ("FUNCTION", "STUB") - - -def marker_is_vtable(marker: DecompMarker) -> bool: - return marker.type.upper() == "VTABLE" - - class MarkerDict: def __init__(self) -> None: self.markers: dict = {} def insert(self, marker: DecompMarker) -> bool: """Return True if this insert would overwrite""" - module = marker.module.upper() + module = marker.module if module in self.markers: return True - self.markers[module] = (marker.type, marker.offset) + # TODO: type converted back to string version here instead of using enum + self.markers[module] = (marker.type.name, marker.offset) return False def iter(self) -> Iterator[DecompMarker]: @@ -82,9 +65,7 @@ class DecompParser: # but not right now def __init__(self) -> None: # The lists to be populated as we parse - self.functions: List[ParserFunction] = [] - self.vtables: List[ParserVtable] = [] - self.variables: List[ParserVariable] = [] + self._symbols: List[ParserSymbol] = [] self.alerts: List[ParserAlert] = [] self.line_number: int = 0 @@ -113,9 +94,7 @@ def __init__(self) -> None: self.function_sig: str = "" def reset(self): - self.functions = [] - self.vtables = [] - self.variables = [] + self._symbols = [] self.alerts = [] self.line_number = 0 @@ -131,6 +110,18 @@ def reset(self): self.function_start = 0 self.function_sig = "" + @property + def functions(self) -> List[ParserSymbol]: + return [s for s in self._symbols if isinstance(s, ParserFunction)] + + @property + def vtables(self) -> List[ParserSymbol]: + return [s for s in self._symbols if isinstance(s, ParserVtable)] + + @property + def variables(self) -> List[ParserSymbol]: + return [s for s in self._symbols if isinstance(s, ParserVariable)] + def _recover(self): """We hit a syntax error and need to reset temp structures""" self.state = ReaderState.SEARCH @@ -159,10 +150,17 @@ def _function_marker(self, marker: DecompMarker): self._syntax_warning(ParserError.DUPLICATE_MODULE) self.state = ReaderState.WANT_SIG - def _synthetic_marker(self, marker: DecompMarker): + def _nameref_marker(self, marker: DecompMarker): + """Functions explicitly referenced by name are set here""" if self.fun_markers.insert(marker): self._syntax_warning(ParserError.DUPLICATE_MODULE) - self.state = ReaderState.IN_TEMPLATE + + if marker.is_template(): + self.state = ReaderState.IN_TEMPLATE + elif marker.is_synthetic(): + self.state = ReaderState.IN_SYNTHETIC + else: + self.state = ReaderState.IN_LIBRARY def _function_done(self, lookup_by_name: bool = False, unexpected: bool = False): end_line = self.line_number @@ -173,16 +171,14 @@ def _function_done(self, lookup_by_name: bool = False, unexpected: bool = False) end_line -= 1 for marker in self.fun_markers.iter(): - self.functions.append( + self._symbols.append( ParserFunction( + type=marker.type, line_number=self.function_start, module=marker.module, offset=marker.offset, - lookup_by_name=lookup_by_name, - is_stub=marker_is_stub(marker), - is_synthetic=marker_is_synthetic(marker), - is_template=marker_is_template(marker), name=self.function_sig, + lookup_by_name=lookup_by_name, end_line=end_line, ) ) @@ -202,12 +198,13 @@ def _vtable_done(self, class_name: str = None): class_name = self.last_line.strip() for marker in self.tbl_markers.iter(): - self.vtables.append( + self._symbols.append( ParserVtable( + type=marker.type, line_number=self.line_number, module=marker.module, offset=marker.offset, - class_name=class_name, + name=class_name, ) ) @@ -223,14 +220,19 @@ def _variable_marker(self, marker: DecompMarker): else: self.state = ReaderState.IN_GLOBAL - def _variable_done(self): + def _variable_done(self, name: str): + if not name.startswith("g_"): + self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX) + for marker in self.var_markers.iter(): - self.variables.append( + self._symbols.append( ParserVariable( + type=marker.type, line_number=self.line_number, module=marker.module, offset=marker.offset, - name=self.last_line.strip(), + name=name, + is_static=self.state == ReaderState.IN_FUNC_GLOBAL, ) ) @@ -246,12 +248,23 @@ def _handle_marker(self, marker: DecompMarker): self._syntax_error(ParserError.UNEXPECTED_MARKER) return + # If we are inside a function, the only markers we accept are: + # GLOBAL, indicating a static variable + # STRING, indicating a literal string. + # Otherwise we assume that the parser missed the end of the function + # and we have moved on to something else. + # This is unlikely to occur with well-formed code, but + # we can recover easily by just ending the function here. + if self.state == ReaderState.IN_FUNC and not marker.allowed_in_func(): + self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION) + self._function_done(unexpected=True) + # TODO: How uncertain are we of detecting the end of a function # in a clang-formatted file? For now we assume we have missed the # end if we detect a non-GLOBAL marker while state is IN_FUNC. # Maybe these cases should be syntax errors instead - if marker_is_function(marker): + if marker.is_regular_function(): if self.state in ( ReaderState.SEARCH, ReaderState.WANT_SIG, @@ -259,29 +272,41 @@ def _handle_marker(self, marker: DecompMarker): # We will allow multiple offsets if we have just begun # the code block, but not after we hit the curly brace. self._function_marker(marker) - elif self.state == ReaderState.IN_FUNC: - # We hit another offset unexpectedly. - # We can recover easily by just ending the function here. - self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION) - self._function_done(unexpected=True) - - # Start the next function right after so we can - # read the next line. - self._function_marker(marker) else: self._syntax_error(ParserError.INCOMPATIBLE_MARKER) - elif marker_is_synthetic(marker): + elif marker.is_template(): if self.state in (ReaderState.SEARCH, ReaderState.IN_TEMPLATE): - self._synthetic_marker(marker) - elif self.state == ReaderState.IN_FUNC: - self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION) - self._function_done(lookup_by_name=True, unexpected=True) - self._synthetic_marker(marker) + self._nameref_marker(marker) else: self._syntax_error(ParserError.INCOMPATIBLE_MARKER) - elif marker_is_variable(marker): + elif marker.is_synthetic(): + if self.state in (ReaderState.SEARCH, ReaderState.IN_SYNTHETIC): + self._nameref_marker(marker) + else: + self._syntax_error(ParserError.INCOMPATIBLE_MARKER) + + elif marker.is_library(): + if self.state in (ReaderState.SEARCH, ReaderState.IN_LIBRARY): + self._nameref_marker(marker) + else: + self._syntax_error(ParserError.INCOMPATIBLE_MARKER) + + elif marker.is_string(): + # TODO: We are ignoring string markers for the moment. + # We already have a lot of them in the codebase, though, so we'll + # hang onto them for now in case we can use them later. + # To match up string constants, the strategy will be: + # 1. Use cvdump to find all string constants in the recomp + # 2. In the original binary, look at relocated vaddrs from .rdata + # 3. Try to match up string data from #1 with locations in #2 + + # Throw the syntax error we would throw if we were parsing these + if self.state not in (ReaderState.SEARCH, ReaderState.IN_FUNC): + self._syntax_error(ParserError.INCOMPATIBLE_MARKER) + + elif marker.is_variable(): if self.state in ( ReaderState.SEARCH, ReaderState.IN_GLOBAL, @@ -292,13 +317,9 @@ def _handle_marker(self, marker: DecompMarker): else: self._syntax_error(ParserError.INCOMPATIBLE_MARKER) - elif marker_is_vtable(marker): + elif marker.is_vtable(): if self.state in (ReaderState.SEARCH, ReaderState.IN_VTABLE): self._vtable_marker(marker) - elif self.state == ReaderState.IN_FUNC: - self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION) - self._function_done(unexpected=True) - self._vtable_marker(marker) else: self._syntax_error(ParserError.INCOMPATIBLE_MARKER) @@ -322,12 +343,16 @@ def read_line(self, line: str): return line_strip = line.strip() - if self.state == ReaderState.IN_TEMPLATE: - # TEMPLATE functions are a special case. The signature is - # given on the next line (in a // comment) + if self.state in ( + ReaderState.IN_SYNTHETIC, + ReaderState.IN_TEMPLATE, + ReaderState.IN_LIBRARY, + ): + # Explicit nameref functions provide the function name + # on the next line (in a // comment) name = get_synthetic_name(line) if name is None: - self._syntax_error(ParserError.BAD_SYNTHETIC) + self._syntax_error(ParserError.BAD_NAMEREF) else: self.function_sig = name self._function_starts_here() @@ -384,8 +409,28 @@ def read_line(self, line: str): self._function_done() elif self.state in (ReaderState.IN_GLOBAL, ReaderState.IN_FUNC_GLOBAL): - if not is_blank_or_comment(line): - self._variable_done() + # TODO: Known problem that an error here will cause us to abandon a + # function we have already parsed if state == IN_FUNC_GLOBAL. + # However, we are not tolerant of _any_ syntax problems in our + # CI actions, so the solution is to just fix the invalid marker. + if is_blank_or_comment(line): + self._syntax_error(ParserError.NO_SUITABLE_NAME) + return + + # We don't have a foolproof mechanism to tell what is and is not a variable. + # If the GLOBAL is being declared on a `return` statement, though, this is + # not correct. It is either a string literal (which will be handled differently) + # or it is not the variable declaration, which is incorrect decomp syntax. + if line.strip().startswith("return"): + self._syntax_error(ParserError.GLOBAL_NOT_VARIABLE) + return + + name = get_variable_name(line) + if name is None: + self._syntax_error(ParserError.NO_SUITABLE_NAME) + return + + self._variable_done(name) elif self.state == ReaderState.IN_VTABLE: vtable_class = get_class_name(line) diff --git a/tools/isledecomp/isledecomp/parser/util.py b/tools/isledecomp/isledecomp/parser/util.py index 99ab1c56..a106d841 100644 --- a/tools/isledecomp/isledecomp/parser/util.py +++ b/tools/isledecomp/isledecomp/parser/util.py @@ -1,17 +1,6 @@ # C++ Parser utility functions and data structures -from __future__ import annotations # python <3.10 compatibility import re -from collections import namedtuple - -DecompMarker = namedtuple("DecompMarker", ["type", "module", "offset"]) - - -markerRegex = re.compile( - r"\s*//\s*(\w+):\s*(\w+)\s+(0x[a-f0-9]+)", - flags=re.I, -) - -markerExactRegex = re.compile(r"\s*// ([A-Z]+): ([A-Z0-9]+) (0x[a-f0-9]+)$") +from typing import Optional # The goal here is to just read whatever is on the next line, so some # flexibility in the formatting seems OK @@ -23,7 +12,7 @@ trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$") -def get_synthetic_name(line: str) -> str | None: +def get_synthetic_name(line: str) -> Optional[str]: """Synthetic names appear on a single line comment on the line after the marker. If that's not what we have, return None""" template_match = templateCommentRegex.match(line) @@ -51,20 +40,6 @@ def is_blank_or_comment(line: str) -> bool: ) -def match_marker(line: str) -> DecompMarker | None: - match = markerRegex.match(line) - if match is None: - return None - - return DecompMarker( - type=match.group(1), module=match.group(2), offset=int(match.group(3), 16) - ) - - -def is_marker_exact(line: str) -> bool: - return markerExactRegex.match(line) is not None - - template_class_decl_regex = re.compile( r"\s*(?:\/\/)?\s*(?:class|struct) (\w+)<([\w]+)\s*(\*+)?\s*>" ) @@ -73,7 +48,7 @@ def is_marker_exact(line: str) -> bool: class_decl_regex = re.compile(r"\s*(?:\/\/)?\s*(?:class|struct) (\w+)") -def get_class_name(line: str) -> str | None: +def get_class_name(line: str) -> Optional[str]: """For VTABLE markers, extract the class name from the code line or comment where it appears.""" @@ -93,3 +68,21 @@ def get_class_name(line: str) -> str | None: return match.group(1) return None + + +global_regex = re.compile(r"(?Pg_\w+)") +less_strict_global_regex = re.compile(r"(?P\w+)(?:\)\(|\[.*|\s*=.*|;)") + + +def get_variable_name(line: str) -> Optional[str]: + """Grab the name of the variable annotated with the GLOBAL marker. + Correct syntax would have the variable start with the prefix "g_" + but we will try to match regardless.""" + + if (match := global_regex.search(line)) is not None: + return match.group("name") + + if (match := less_strict_global_regex.search(line)) is not None: + return match.group("name") + + return None diff --git a/tools/isledecomp/tests/test_parser.py b/tools/isledecomp/tests/test_parser.py index c31602f1..853e773d 100644 --- a/tools/isledecomp/tests/test_parser.py +++ b/tools/isledecomp/tests/test_parser.py @@ -115,7 +115,7 @@ def test_different_markers_same_module(parser): # Use first marker declaration, don't replace assert len(parser.functions) == 1 - assert parser.functions[0].is_stub is False + assert parser.functions[0].should_skip() is False # Should alert to this assert len(parser.alerts) == 1 @@ -193,7 +193,7 @@ def test_multiple_vtables(parser): ) assert len(parser.alerts) == 0 assert len(parser.vtables) == 2 - assert parser.vtables[0].class_name == "MxString" + assert parser.vtables[0].name == "MxString" def test_multiple_vtables_same_module(parser): @@ -247,7 +247,7 @@ def test_synthetic_no_comment(parser): ) assert len(parser.functions) == 0 assert len(parser.alerts) == 1 - assert parser.alerts[0].code == ParserError.BAD_SYNTHETIC + assert parser.alerts[0].code == ParserError.BAD_NAMEREF assert parser.state == ReaderState.SEARCH @@ -375,3 +375,70 @@ def test_unexpected_eof(parser): assert len(parser.functions) == 1 assert len(parser.alerts) == 1 assert parser.alerts[0].code == ParserError.UNEXPECTED_END_OF_FILE + + +def test_global_variable_prefix(parser): + """Global and static variables should have the g_ prefix.""" + parser.read_lines( + [ + "// GLOBAL: TEST 0x1234", + 'const char* g_msg = "hello";', + ] + ) + assert len(parser.variables) == 1 + assert len(parser.alerts) == 0 + + parser.read_lines( + [ + "// GLOBAL: TEXT 0x5555", + "int test = 5;", + ] + ) + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.GLOBAL_MISSING_PREFIX + # In spite of that, we should still grab the variable name. + assert parser.variables[1].name == "test" + + +def test_global_nomatch(parser): + """We do our best to grab the variable name, even without the g_ prefix + but this (by design) will not match everything.""" + + parser.read_lines( + [ + "// GLOBAL: TEST 0x1234", + "FunctionCall();", + ] + ) + assert len(parser.variables) == 0 + assert len(parser.alerts) == 1 + assert parser.alerts[0].code == ParserError.NO_SUITABLE_NAME + + +def test_static_variable(parser): + """We can detect whether a variable is a static function variable + based on the parser's state when we detect it. + Checking for the word `static` alone is not a good test. + Static class variables are filed as S_GDATA32, same as regular globals. + Only function statics are filed as S_LDATA32.""" + + parser.read_lines( + [ + "// GLOBAL: TEST 0x1234", + "int g_test = 1234;", + ] + ) + assert len(parser.variables) == 1 + assert parser.variables[0].is_static is False + + parser.read_lines( + [ + "// FUNCTION: TEST 0x5555", + "void test_function() {", + "// GLOBAL: TEST 0x8888", + "int g_internal = 0;", + "}", + ] + ) + assert len(parser.variables) == 2 + assert parser.variables[1].is_static is True diff --git a/tools/isledecomp/tests/test_parser_statechange.py b/tools/isledecomp/tests/test_parser_statechange.py index 8d18d547..cb54cf0a 100644 --- a/tools/isledecomp/tests/test_parser_statechange.py +++ b/tools/isledecomp/tests/test_parser_statechange.py @@ -11,9 +11,11 @@ (_rs.SEARCH, "FUNCTION", _rs.WANT_SIG, None), (_rs.SEARCH, "GLOBAL", _rs.IN_GLOBAL, None), (_rs.SEARCH, "STUB", _rs.WANT_SIG, None), - (_rs.SEARCH, "SYNTHETIC", _rs.IN_TEMPLATE, None), + (_rs.SEARCH, "SYNTHETIC", _rs.IN_SYNTHETIC, None), (_rs.SEARCH, "TEMPLATE", _rs.IN_TEMPLATE, None), (_rs.SEARCH, "VTABLE", _rs.IN_VTABLE, None), + (_rs.SEARCH, "LIBRARY", _rs.IN_LIBRARY, None), + (_rs.SEARCH, "STRING", _rs.SEARCH, None), (_rs.WANT_SIG, "FUNCTION", _rs.WANT_SIG, None), (_rs.WANT_SIG, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), @@ -21,20 +23,26 @@ (_rs.WANT_SIG, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.WANT_SIG, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.WANT_SIG, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.WANT_SIG, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.WANT_SIG, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC, "FUNCTION", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION), (_rs.IN_FUNC, "GLOBAL", _rs.IN_FUNC_GLOBAL, None), (_rs.IN_FUNC, "STUB", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION), - (_rs.IN_FUNC, "SYNTHETIC", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION), + (_rs.IN_FUNC, "SYNTHETIC", _rs.IN_SYNTHETIC, _pe.MISSED_END_OF_FUNCTION), (_rs.IN_FUNC, "TEMPLATE", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION), (_rs.IN_FUNC, "VTABLE", _rs.IN_VTABLE, _pe.MISSED_END_OF_FUNCTION), + (_rs.IN_FUNC, "LIBRARY", _rs.IN_LIBRARY, _pe.MISSED_END_OF_FUNCTION), + (_rs.IN_FUNC, "STRING", _rs.IN_FUNC, None), (_rs.IN_TEMPLATE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_TEMPLATE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_TEMPLATE, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), - (_rs.IN_TEMPLATE, "SYNTHETIC", _rs.IN_TEMPLATE, None), + (_rs.IN_TEMPLATE, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_TEMPLATE, "TEMPLATE", _rs.IN_TEMPLATE, None), (_rs.IN_TEMPLATE, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_TEMPLATE, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_TEMPLATE, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.WANT_CURLY, "FUNCTION", _rs.SEARCH, _pe.UNEXPECTED_MARKER), (_rs.WANT_CURLY, "GLOBAL", _rs.SEARCH, _pe.UNEXPECTED_MARKER), @@ -42,6 +50,8 @@ (_rs.WANT_CURLY, "SYNTHETIC", _rs.SEARCH, _pe.UNEXPECTED_MARKER), (_rs.WANT_CURLY, "TEMPLATE", _rs.SEARCH, _pe.UNEXPECTED_MARKER), (_rs.WANT_CURLY, "VTABLE", _rs.SEARCH, _pe.UNEXPECTED_MARKER), + (_rs.WANT_CURLY, "LIBRARY", _rs.SEARCH, _pe.UNEXPECTED_MARKER), + (_rs.WANT_CURLY, "STRING", _rs.SEARCH, _pe.UNEXPECTED_MARKER), (_rs.IN_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "GLOBAL", _rs.IN_GLOBAL, None), @@ -49,6 +59,8 @@ (_rs.IN_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_GLOBAL, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "GLOBAL", _rs.IN_FUNC_GLOBAL, None), @@ -56,6 +68,8 @@ (_rs.IN_FUNC_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_FUNC_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_FUNC_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_FUNC_GLOBAL, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_VTABLE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_VTABLE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), @@ -63,6 +77,26 @@ (_rs.IN_VTABLE, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_VTABLE, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), (_rs.IN_VTABLE, "VTABLE", _rs.IN_VTABLE, None), + (_rs.IN_VTABLE, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_VTABLE, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + + (_rs.IN_SYNTHETIC, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_SYNTHETIC, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_SYNTHETIC, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_SYNTHETIC, "SYNTHETIC", _rs.IN_SYNTHETIC, None), + (_rs.IN_SYNTHETIC, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_SYNTHETIC, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_SYNTHETIC, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_SYNTHETIC, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + + (_rs.IN_LIBRARY, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_LIBRARY, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_LIBRARY, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_LIBRARY, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_LIBRARY, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_LIBRARY, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), + (_rs.IN_LIBRARY, "LIBRARY", _rs.IN_LIBRARY, None), + (_rs.IN_LIBRARY, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER), ] # fmt: on @@ -105,47 +139,3 @@ def test_state_search_line(line: str): p.read_line(line) assert p.state == _rs.SEARCH assert len(p.alerts) == 0 - - -global_lines = [ - ("// A comment", _rs.IN_GLOBAL), - ("", _rs.IN_GLOBAL), - ("\t", _rs.IN_GLOBAL), - (" ", _rs.IN_GLOBAL), - # TODO: no check for "likely" variable declaration so these all count - ("void function()", _rs.SEARCH), - ("int x = 123;", _rs.SEARCH), - ("just some text", _rs.SEARCH), -] - - -@pytest.mark.parametrize("line, new_state", global_lines) -def test_state_global_line(line: str, new_state: _rs): - p = DecompParser() - p.read_line("// GLOBAL: TEST 0x1234") - assert p.state == _rs.IN_GLOBAL - p.read_line(line) - assert p.state == new_state - - -# mostly same as above -in_func_global_lines = [ - ("// A comment", _rs.IN_FUNC_GLOBAL), - ("", _rs.IN_FUNC_GLOBAL), - ("\t", _rs.IN_FUNC_GLOBAL), - (" ", _rs.IN_FUNC_GLOBAL), - # TODO: no check for "likely" variable declaration so these all count - ("void function()", _rs.IN_FUNC), - ("int x = 123;", _rs.IN_FUNC), - ("just some text", _rs.IN_FUNC), -] - - -@pytest.mark.parametrize("line, new_state", in_func_global_lines) -def test_state_in_func_global_line(line: str, new_state: _rs): - p = DecompParser() - p.state = _rs.IN_FUNC - p.read_line("// GLOBAL: TEST 0x1234") - assert p.state == _rs.IN_FUNC_GLOBAL - p.read_line(line) - assert p.state == new_state diff --git a/tools/isledecomp/tests/test_parser_util.py b/tools/isledecomp/tests/test_parser_util.py index 643abf3e..af102fc2 100644 --- a/tools/isledecomp/tests/test_parser_util.py +++ b/tools/isledecomp/tests/test_parser_util.py @@ -1,11 +1,15 @@ import pytest from isledecomp.parser.parser import MarkerDict -from isledecomp.parser.util import ( +from isledecomp.parser.marker import ( DecompMarker, - is_blank_or_comment, + MarkerType, match_marker, is_marker_exact, +) +from isledecomp.parser.util import ( + is_blank_or_comment, get_class_name, + get_variable_name, ) @@ -96,7 +100,7 @@ def test_marker_dict_type_replace(): d.insert(DecompMarker("STUB", "TEST", 0x1234)) markers = list(d.iter()) assert len(markers) == 1 - assert markers[0].type == "FUNCTION" + assert markers[0].type == MarkerType.FUNCTION class_name_match_cases = [ @@ -131,3 +135,26 @@ def test_get_class_name(line: str, class_name: str): @pytest.mark.parametrize("line", class_name_no_match_cases) def test_get_class_name_none(line: str): assert get_class_name(line) is None + + +variable_name_cases = [ + # with prefix for easy access + ("char* g_test;", "g_test"), + ("g_test;", "g_test"), + ("void (*g_test)(int);", "g_test"), + ("char g_test[50];", "g_test"), + ("char g_test[50] = {1234,", "g_test"), + ("int g_test = 500;", "g_test"), + # no prefix + ("char* hello;", "hello"), + ("hello;", "hello"), + ("void (*hello)(int);", "hello"), + ("char hello[50];", "hello"), + ("char hello[50] = {1234,", "hello"), + ("int hello = 500;", "hello"), +] + + +@pytest.mark.parametrize("line,name", variable_name_cases) +def test_get_variable_name(line: str, name: str): + assert get_variable_name(line) == name diff --git a/tools/reccmp/reccmp.py b/tools/reccmp/reccmp.py index 913154f5..c7db5e2c 100755 --- a/tools/reccmp/reccmp.py +++ b/tools/reccmp/reccmp.py @@ -316,7 +316,7 @@ def main(): parser.read_lines(srcfile) for fun in parser.functions: - if fun.is_stub: + if fun.should_skip(): continue if fun.module != basename: @@ -330,7 +330,7 @@ def main(): else: continue - if fun.lookup_by_name: + if fun.is_nameref(): recinfo = syminfo.get_recompiled_address_from_name(fun.name) if not recinfo: continue