mirror of
https://github.com/isledecomp/isle-portable.git
synced 2026-02-03 12:31:15 +00:00
200 lines
7.6 KiB
Python
200 lines
7.6 KiB
Python
import re
|
|
from typing import Iterable, Tuple
|
|
from collections import namedtuple
|
|
from .types import CvdumpTypesParser
|
|
|
|
# e.g. `*** PUBLICS`
|
|
_section_change_regex = re.compile(r"\*\*\* (?P<section>[A-Z/ ]{2,})")
|
|
|
|
# e.g. ` 27 00034EC0 28 00034EE2 29 00034EE7 30 00034EF4`
|
|
_line_addr_pairs_findall = re.compile(r"\s+(?P<line_no>\d+) (?P<addr>[A-F0-9]{8})")
|
|
|
|
# We assume no spaces in the file name
|
|
# e.g. ` Z:\lego-island\isle\LEGO1\viewmanager\viewroi.cpp (None), 0001:00034E90-00034E97, line/addr pairs = 2`
|
|
_lines_subsection_header = re.compile(
|
|
r"^\s*(?P<filename>\S+).*?, (?P<section>[A-F0-9]{4}):(?P<start>[A-F0-9]{8})-(?P<end>[A-F0-9]{8}), line/addr pairs = (?P<len>\d+)"
|
|
)
|
|
|
|
# e.g. `S_PUB32: [0001:0003FF60], Flags: 00000000, __read`
|
|
_publics_line_regex = re.compile(
|
|
r"^(?P<type>\w+): \[(?P<section>\w{4}):(?P<offset>\w{8})], Flags: (?P<flags>\w{8}), (?P<name>\S+)"
|
|
)
|
|
|
|
# e.g. `(00008C) S_GPROC32: [0001:00034E90], Cb: 00000007, Type: 0x1024, ViewROI::IntrinsicImportance`
|
|
_symbol_line_regex = re.compile(
|
|
r"\(\w+\) (?P<type>\S+): \[(?P<section>\w{4}):(?P<offset>\w{8})\], Cb: (?P<size>\w+), Type:\s+\S+, (?P<name>.+)"
|
|
)
|
|
|
|
# e.g. ` Debug start: 00000008, Debug end: 0000016E`
|
|
_gproc_debug_regex = re.compile(
|
|
r"\s*Debug start: (?P<start>\w{8}), Debug end: (?P<end>\w{8})"
|
|
)
|
|
|
|
# e.g. ` 00DA 0001:00000000 00000073 60501020`
|
|
_section_contrib_regex = re.compile(
|
|
r"\s*(?P<module>\w{4}) (?P<section>\w{4}):(?P<offset>\w{8}) (?P<size>\w{8}) (?P<flags>\w{8})"
|
|
)
|
|
|
|
# e.g. `S_GDATA32: [0003:000004A4], Type: T_32PRCHAR(0470), g_set`
|
|
_gdata32_regex = re.compile(
|
|
r"S_GDATA32: \[(?P<section>\w{4}):(?P<offset>\w{8})\], Type:\s*(?P<type>\S+), (?P<name>.+)"
|
|
)
|
|
|
|
# e.g. 0003 "CMakeFiles/isle.dir/ISLE/res/isle.rc.res"
|
|
# e.g. 0004 "C:\work\lego-island\isle\3rdparty\smartheap\SHLW32MT.LIB" "check.obj"
|
|
_module_regex = re.compile(r"(?P<id>\w{4})(?: \"(?P<lib>.+?)\")?(?: \"(?P<obj>.+?)\")")
|
|
|
|
# User functions only
|
|
LinesEntry = namedtuple("LinesEntry", "filename line_no section offset")
|
|
|
|
# Strings, vtables, functions
|
|
# superset of everything else
|
|
# only place you can find the C symbols (library functions, smacker, etc)
|
|
PublicsEntry = namedtuple("PublicsEntry", "type section offset flags name")
|
|
|
|
# S_GPROC32 = functions
|
|
SymbolsEntry = namedtuple("SymbolsEntry", "type section offset size name")
|
|
|
|
# (Estimated) size of any symbol
|
|
SizeRefEntry = namedtuple("SizeRefEntry", "module section offset size")
|
|
|
|
# global variables
|
|
GdataEntry = namedtuple("GdataEntry", "section offset type name")
|
|
|
|
ModuleEntry = namedtuple("ModuleEntry", "id lib obj")
|
|
|
|
|
|
class CvdumpParser:
|
|
# pylint: disable=too-many-instance-attributes
|
|
def __init__(self) -> None:
|
|
self._section: str = ""
|
|
self._lines_function: Tuple[str, int] = ("", 0)
|
|
|
|
self.lines = {}
|
|
self.publics = []
|
|
self.symbols = []
|
|
self.sizerefs = []
|
|
self.globals = []
|
|
self.modules = []
|
|
|
|
self.types = CvdumpTypesParser()
|
|
|
|
def _lines_section(self, line: str):
|
|
"""Parsing entries from the LINES section. We only care about the pairs of
|
|
line_number and address and the subsection header to indicate which code file
|
|
we are in."""
|
|
|
|
# Subheader indicates a new function and possibly a new code filename.
|
|
# Save the section here because it is not given on the lines that follow.
|
|
if (match := _lines_subsection_header.match(line)) is not None:
|
|
self._lines_function = (
|
|
match.group("filename"),
|
|
int(match.group("section"), 16),
|
|
)
|
|
return
|
|
|
|
# Match any pairs as we find them
|
|
for line_no, offset in _line_addr_pairs_findall.findall(line):
|
|
key = (self._lines_function[1], int(offset, 16))
|
|
self.lines[key] = (self._lines_function[0], int(line_no))
|
|
|
|
def _publics_section(self, line: str):
|
|
"""Match each line from PUBLICS and pull out the symbol information.
|
|
These are MSVC mangled symbol names. String constants and vtable
|
|
addresses can only be found here."""
|
|
if (match := _publics_line_regex.match(line)) is not None:
|
|
self.publics.append(
|
|
PublicsEntry(
|
|
type=match.group("type"),
|
|
section=int(match.group("section"), 16),
|
|
offset=int(match.group("offset"), 16),
|
|
flags=int(match.group("flags"), 16),
|
|
name=match.group("name"),
|
|
)
|
|
)
|
|
|
|
def _globals_section(self, line: str):
|
|
"""S_PROCREF may be useful later.
|
|
Right now we just want S_GDATA32 symbols because it is the simplest
|
|
way to access global variables."""
|
|
if (match := _gdata32_regex.match(line)) is not None:
|
|
self.globals.append(
|
|
GdataEntry(
|
|
section=int(match.group("section"), 16),
|
|
offset=int(match.group("offset"), 16),
|
|
type=match.group("type"),
|
|
name=match.group("name"),
|
|
)
|
|
)
|
|
|
|
def _symbols_section(self, line: str):
|
|
"""We are interested in S_GPROC32 symbols only."""
|
|
if (match := _symbol_line_regex.match(line)) is not None:
|
|
if match.group("type") == "S_GPROC32":
|
|
self.symbols.append(
|
|
SymbolsEntry(
|
|
type=match.group("type"),
|
|
section=int(match.group("section"), 16),
|
|
offset=int(match.group("offset"), 16),
|
|
size=int(match.group("size"), 16),
|
|
name=match.group("name"),
|
|
)
|
|
)
|
|
|
|
def _section_contributions(self, line: str):
|
|
"""Gives the size of elements across all sections of the binary.
|
|
This is the easiest way to get the data size for .data and .rdata
|
|
members that do not have a primitive data type."""
|
|
if (match := _section_contrib_regex.match(line)) is not None:
|
|
self.sizerefs.append(
|
|
SizeRefEntry(
|
|
module=int(match.group("module"), 16),
|
|
section=int(match.group("section"), 16),
|
|
offset=int(match.group("offset"), 16),
|
|
size=int(match.group("size"), 16),
|
|
)
|
|
)
|
|
|
|
def _modules_section(self, line: str):
|
|
"""Record the object file (and lib file, if used) linked into the binary.
|
|
The auto-incrementing id is cross-referenced in SECTION CONTRIBUTIONS
|
|
(and perhaps other locations)"""
|
|
if (match := _module_regex.match(line)) is not None:
|
|
self.modules.append(
|
|
ModuleEntry(
|
|
id=int(match.group("id"), 16),
|
|
lib=match.group("lib"),
|
|
obj=match.group("obj"),
|
|
)
|
|
)
|
|
|
|
def read_line(self, line: str):
|
|
if (match := _section_change_regex.match(line)) is not None:
|
|
self._section = match.group(1)
|
|
return
|
|
|
|
if self._section == "TYPES":
|
|
self.types.read_line(line)
|
|
|
|
elif self._section == "SYMBOLS":
|
|
self._symbols_section(line)
|
|
|
|
elif self._section == "LINES":
|
|
self._lines_section(line)
|
|
|
|
elif self._section == "PUBLICS":
|
|
self._publics_section(line)
|
|
|
|
elif self._section == "SECTION CONTRIBUTIONS":
|
|
self._section_contributions(line)
|
|
|
|
elif self._section == "GLOBALS":
|
|
self._globals_section(line)
|
|
|
|
elif self._section == "MODULES":
|
|
self._modules_section(line)
|
|
|
|
def read_lines(self, lines: Iterable[str]):
|
|
for line in lines:
|
|
self.read_line(line)
|