Get vtable class name

This commit is contained in:
disinvite 2023-12-02 18:32:39 -05:00
parent 9f1302e8d8
commit cbeb4168e0
4 changed files with 75 additions and 4 deletions

View File

@ -7,6 +7,7 @@
is_blank_or_comment,
match_marker,
is_marker_exact,
get_class_name,
get_synthetic_name,
remove_trailing_comment,
)
@ -195,14 +196,18 @@ def _vtable_marker(self, marker: DecompMarker):
self._syntax_warning(ParserError.DUPLICATE_MODULE)
self.state = ReaderState.IN_VTABLE
def _vtable_done(self):
def _vtable_done(self, class_name: str = None):
if class_name is None:
# Best we can do
class_name = self.last_line.strip()
for marker in self.tbl_markers.iter():
self.vtables.append(
ParserVtable(
line_number=self.line_number,
module=marker.module,
offset=marker.offset,
class_name=self.last_line.strip(),
class_name=class_name,
)
)
@ -380,8 +385,9 @@ def read_line(self, line: str):
self._variable_done()
elif self.state == ReaderState.IN_VTABLE:
if not is_blank_or_comment(line):
self._vtable_done()
vtable_class = get_class_name(line)
if vtable_class is not None:
self._vtable_done(class_name=vtable_class)
def read_lines(self, lines: Iterable):
for line in lines:

View File

@ -63,3 +63,33 @@ def match_marker(line: str) -> DecompMarker | None:
def is_marker_exact(line: str) -> bool:
return markerExactRegex.match(line) is not None
template_class_decl_regex = re.compile(
r"\s*(?:\/\/)?\s*class (\w+)<([\w]+)\s*(\*+)?\s*>"
)
class_decl_regex = re.compile(r"\s*(?:\/\/)?\s*class (\w+)")
def get_class_name(line: str) -> str | None:
"""For VTABLE markers, extract the class name from the code line or comment
where it appears."""
match = template_class_decl_regex.match(line)
if match is not None:
# For template classes, we should reformat the class name so it matches
# the output from cvdump: one space between the template type and any asterisks
# if it is a pointer type.
(class_name, template_type, asterisks) = match.groups()
if asterisks is not None:
return f"{class_name}<{template_type} {asterisks}>"
return f"{class_name}<{template_type}>"
match = class_decl_regex.match(line)
if match is not None:
return match.group(1)
return None

View File

@ -193,6 +193,7 @@ def test_multiple_vtables(parser):
)
assert len(parser.alerts) == 0
assert len(parser.vtables) == 2
assert parser.vtables[0].class_name == "MxString"
def test_multiple_vtables_same_module(parser):

View File

@ -5,6 +5,7 @@
is_blank_or_comment,
match_marker,
is_marker_exact,
get_class_name,
)
@ -96,3 +97,36 @@ def test_marker_dict_type_replace():
markers = list(d.iter())
assert len(markers) == 1
assert markers[0].type == "FUNCTION"
class_name_match_cases = [
("class MxString {", "MxString"),
("// class MxString", "MxString"),
("class MxString : public MxCore {", "MxString"),
("class MxPtrList<MxPresenter>", "MxPtrList<MxPresenter>"),
# If it is possible to match the symbol MxList<LegoPathController *>::`vftable'
# we should get the correct class name if possible. If the template type is a pointer,
# the asterisk and class name are separated by one space.
("// class MxList<LegoPathController *>", "MxList<LegoPathController *>"),
("// class MxList<LegoPathController*>", "MxList<LegoPathController *>"),
("// class MxList<LegoPathController* >", "MxList<LegoPathController *>"),
# I don't know if this would ever come up, but sure, why not?
("// class MxList<LegoPathController**>", "MxList<LegoPathController **>"),
]
@pytest.mark.parametrize("line, class_name", class_name_match_cases)
def test_get_class_name(line: str, class_name: str):
assert get_class_name(line) == class_name
class_name_no_match_cases = [
"MxString { ",
"clas MxString",
"// MxPtrList<MxPresenter>::`scalar deleting destructor'",
]
@pytest.mark.parametrize("line", class_name_no_match_cases)
def test_get_class_name_none(line: str):
assert get_class_name(line) is None