mirror of
https://github.com/isledecomp/isle.git
synced 2026-01-23 16:21:15 +00:00
added base vtable functionality
This commit is contained in:
parent
153f4a872e
commit
9b14678652
1
.gitignore
vendored
1
.gitignore
vendored
@ -16,3 +16,4 @@ ISLE.EXE
|
||||
LEGO1.DLL
|
||||
build/
|
||||
*.swp
|
||||
__pycache__/
|
||||
|
||||
40
tools/reccmp/modules/bin.py
Normal file
40
tools/reccmp/modules/bin.py
Normal file
@ -0,0 +1,40 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from modules.logger import logger
|
||||
import struct
|
||||
|
||||
# Declare a class that can automatically convert virtual executable addresses
|
||||
# to file addresses
|
||||
class Bin:
|
||||
file = None
|
||||
|
||||
def __init__(self, filename):
|
||||
logger.debug('Parsing headers of "%s"... ', filename)
|
||||
self.file = open(filename, 'rb')
|
||||
|
||||
#HACK: Strictly, we should be parsing the header, but we know where
|
||||
# everything is in these two files so we just jump straight there
|
||||
|
||||
# Read ImageBase
|
||||
self.file.seek(0xB4)
|
||||
self.imagebase, = struct.unpack('<i', self.file.read(4))
|
||||
|
||||
# Read .text VirtualAddress
|
||||
self.file.seek(0x184)
|
||||
self.textvirt, = struct.unpack('<i', self.file.read(4))
|
||||
|
||||
# Read .text PointerToRawData
|
||||
self.file.seek(0x18C)
|
||||
self.textraw, = struct.unpack('<i', self.file.read(4))
|
||||
logger.debug('... Parsing finished')
|
||||
|
||||
def __del__(self):
|
||||
if self.file:
|
||||
self.file.close()
|
||||
|
||||
def get_addr(self, virt):
|
||||
return virt - self.imagebase - self.textvirt + self.textraw
|
||||
|
||||
def read(self, offset, size):
|
||||
self.file.seek(self.get_addr(offset))
|
||||
return self.file.read(size)
|
||||
3
tools/reccmp/modules/logger.py
Normal file
3
tools/reccmp/modules/logger.py
Normal file
@ -0,0 +1,3 @@
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
248
tools/reccmp/modules/syminfo.py
Normal file
248
tools/reccmp/modules/syminfo.py
Normal file
@ -0,0 +1,248 @@
|
||||
from modules.logger import logger
|
||||
import modules.util as util
|
||||
import subprocess
|
||||
import os
|
||||
|
||||
fieldlists = dict()
|
||||
classes = dict()
|
||||
|
||||
class VTableEntry:
|
||||
def __init__(self):
|
||||
self.name = None
|
||||
self.offset = -1
|
||||
|
||||
class Class:
|
||||
def __init__(self):
|
||||
self.name = None
|
||||
self.id = None
|
||||
self.field_list = None
|
||||
self.size = None
|
||||
|
||||
class FieldList:
|
||||
def __init__(self):
|
||||
self.id = None
|
||||
self.baseclass = None
|
||||
self.vtable = []
|
||||
self.members = []
|
||||
|
||||
class RecompiledInfo:
|
||||
def __init__(self):
|
||||
self.addr = None
|
||||
self.size = None
|
||||
self.name = None
|
||||
self.start = None
|
||||
|
||||
def remove_quotes(l):
|
||||
while l[0] == '\'':
|
||||
l = l[1:]
|
||||
|
||||
while l[len(l)-1] == '\'':
|
||||
l = l[0:len(l)-1]
|
||||
|
||||
return l
|
||||
|
||||
# Declare a class that parses the output of cvdump for fast access later
|
||||
class SymInfo:
|
||||
funcs = {}
|
||||
lines = {}
|
||||
names = {}
|
||||
|
||||
def __init__(self, pdb, file, wine_path_converter):
|
||||
call = [util.get_file_in_script_dir('cvdump.exe'), '-l', '-s', '-t']
|
||||
|
||||
if wine_path_converter:
|
||||
# Run cvdump through wine and convert path to Windows-friendly wine path
|
||||
call.insert(0, 'wine')
|
||||
call.append(wine_path_converter.get_wine_path(pdb))
|
||||
else:
|
||||
call.append(pdb)
|
||||
|
||||
logger.info('Parsing %s ...', pdb)
|
||||
logger.debug('Command = %r', call)
|
||||
line_dump = subprocess.check_output(call).decode('utf-8').split('\r\n')
|
||||
|
||||
current_section = None
|
||||
|
||||
logger.debug('Parsing output of cvdump.exe ...')
|
||||
|
||||
for i, line in enumerate(line_dump):
|
||||
if line.startswith('***'):
|
||||
current_section = line[4:]
|
||||
|
||||
if current_section == 'SYMBOLS' and 'S_GPROC32' in line:
|
||||
addr = int(line[26:34], 16)
|
||||
|
||||
info = RecompiledInfo()
|
||||
info.addr = addr + file.imagebase + file.textvirt
|
||||
|
||||
use_dbg_offs = False
|
||||
if use_dbg_offs:
|
||||
debug_offs = line_dump[i + 2]
|
||||
debug_start = int(debug_offs[22:30], 16)
|
||||
debug_end = int(debug_offs[43:], 16)
|
||||
|
||||
info.start = debug_start
|
||||
info.size = debug_end - debug_start
|
||||
else:
|
||||
info.start = 0
|
||||
info.size = int(line[41:49], 16)
|
||||
|
||||
info.name = line[77:]
|
||||
|
||||
self.names[info.name] = info
|
||||
self.funcs[addr] = info
|
||||
elif current_section == 'LINES' and line.startswith(' ') and not line.startswith(' '):
|
||||
sourcepath = line.split()[0]
|
||||
|
||||
if wine_path_converter:
|
||||
# Convert filename to Unix path for file compare
|
||||
sourcepath = wine_path_converter.get_unix_path(sourcepath)
|
||||
|
||||
if sourcepath not in self.lines:
|
||||
self.lines[sourcepath] = {}
|
||||
|
||||
j = i + 2
|
||||
while True:
|
||||
ll = line_dump[j].split()
|
||||
if len(ll) == 0:
|
||||
break
|
||||
|
||||
k = 0
|
||||
while k < len(ll):
|
||||
linenum = int(ll[k + 0])
|
||||
address = int(ll[k + 1], 16)
|
||||
if linenum not in self.lines[sourcepath]:
|
||||
self.lines[sourcepath][linenum] = address
|
||||
k += 2
|
||||
|
||||
j += 1
|
||||
elif 'LF_CLASS' in line or 'LF_STRUCTURE' in line:
|
||||
c = Class()
|
||||
|
||||
c.id = int(line.split()[0], 16)
|
||||
|
||||
flt_str = 'field list type '
|
||||
nextline = line_dump[i+1]
|
||||
flt_start = nextline.index(flt_str)+len(flt_str)
|
||||
flt_end = nextline.index(',', flt_start)
|
||||
c.field_list = int(nextline[flt_start:flt_end], 16)
|
||||
|
||||
info = line_dump[i+3].split(',')
|
||||
for i in info:
|
||||
kv = i.split('=')
|
||||
if len(kv) == 2:
|
||||
k = kv[0].strip()
|
||||
v = kv[1].strip()
|
||||
if k == 'Size':
|
||||
c.size = int(v)
|
||||
elif k == 'class name':
|
||||
c.name = v
|
||||
|
||||
classes[c.id] = c
|
||||
|
||||
elif 'LF_FIELDLIST' in line:
|
||||
def parse_line(lines, index):
|
||||
def space_count(s):
|
||||
spaces = 0
|
||||
for c in s:
|
||||
if c == '\t':
|
||||
spaces += 1
|
||||
return spaces
|
||||
|
||||
l = lines[index].rstrip()
|
||||
spaces = space_count(l)
|
||||
|
||||
while True:
|
||||
index += 1
|
||||
nextline = lines[index]
|
||||
nextspaces = space_count(nextline)
|
||||
if nextspaces > spaces:
|
||||
l += nextline[nextspaces:].rstrip()
|
||||
else:
|
||||
break
|
||||
|
||||
l = l.strip()
|
||||
|
||||
return l
|
||||
|
||||
def get_vtable_func_info(l):
|
||||
info = VTableEntry()
|
||||
csv = l.split(',')
|
||||
for c in csv:
|
||||
kv = c.split('=')
|
||||
if len(kv) == 2:
|
||||
k = kv[0].strip()
|
||||
v = kv[1].strip()
|
||||
if k == 'name':
|
||||
info.name = remove_quotes(v)
|
||||
elif k == 'vfptr offset':
|
||||
info.offset = int(v)
|
||||
|
||||
return info
|
||||
|
||||
fl = FieldList()
|
||||
|
||||
fl.id = int(line_dump[i].split()[0], 16)
|
||||
|
||||
while True:
|
||||
i += 1
|
||||
if not line_dump[i].strip():
|
||||
break
|
||||
|
||||
if 'BCLASS' in line_dump[i]:
|
||||
dp = line_dump[i].split(',')
|
||||
for d in dp:
|
||||
kv = d.split('=')
|
||||
if len(kv) == 2:
|
||||
k = kv[0].strip()
|
||||
v = kv[1].strip()
|
||||
if k == 'type':
|
||||
fl.baseclass = int(v, 16)
|
||||
elif 'VIRTUAL' in line_dump[i]:
|
||||
info = get_vtable_func_info(parse_line(line_dump, i))
|
||||
fl.vtable.append(info)
|
||||
elif 'LF_MEMBER' in line_dump[i]:
|
||||
member = VTableEntry()
|
||||
l = line_dump[i]
|
||||
offset_str = 'offset = '
|
||||
member.offset = int(l[l.index(offset_str) + len(offset_str):])
|
||||
member.name = remove_quotes(line_dump[i+1][16:].rstrip())
|
||||
fl.members.append(member)
|
||||
|
||||
fieldlists[fl.id] = fl
|
||||
|
||||
logger.debug('... Parsing output of cvdump.exe finished')
|
||||
|
||||
def get_recompiled_address(self, filename, line):
|
||||
addr = None
|
||||
found = False
|
||||
|
||||
logger.debug('Looking for %s:%d', filename, line)
|
||||
|
||||
for fn in self.lines:
|
||||
# Sometimes a PDB is compiled with a relative path while we always have
|
||||
# an absolute path. Therefore we must
|
||||
try:
|
||||
if os.path.samefile(fn, filename):
|
||||
filename = fn
|
||||
break
|
||||
except FileNotFoundError as e:
|
||||
continue
|
||||
|
||||
if filename in self.lines and line in self.lines[fn]:
|
||||
addr = self.lines[fn][line]
|
||||
|
||||
if addr in self.funcs:
|
||||
return self.funcs[addr]
|
||||
else:
|
||||
logger.error('Failed to find function symbol with address: 0x%x', addr)
|
||||
else:
|
||||
logger.error('Failed to find function symbol with filename and line: %s:%d', filename, line)
|
||||
|
||||
def get_recompiled_address_from_name(self, name):
|
||||
logger.debug('Looking for %s', name)
|
||||
|
||||
if name in self.names:
|
||||
return self.names[name]
|
||||
else:
|
||||
logger.error('Failed to find function symbol with name: %s', name)
|
||||
5
tools/reccmp/modules/util.py
Normal file
5
tools/reccmp/modules/util.py
Normal file
@ -0,0 +1,5 @@
|
||||
import os
|
||||
import sys
|
||||
|
||||
def get_file_in_script_dir(fn):
|
||||
return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)
|
||||
@ -13,6 +13,12 @@
|
||||
import html
|
||||
import re
|
||||
|
||||
# Our modules
|
||||
from modules.syminfo import SymInfo
|
||||
from modules.bin import Bin
|
||||
from modules.logger import logger
|
||||
import modules.util
|
||||
|
||||
parser = argparse.ArgumentParser(allow_abbrev=False,
|
||||
description='Recompilation Compare: compare an original EXE with a recompiled EXE + PDB.')
|
||||
parser.add_argument('original', metavar='original-binary', help='The original binary')
|
||||
@ -33,7 +39,6 @@
|
||||
args = parser.parse_args()
|
||||
|
||||
logging.basicConfig(level=args.loglevel, format='[%(levelname)s] %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
colorama.init()
|
||||
|
||||
@ -66,47 +71,6 @@
|
||||
|
||||
svg = args.svg
|
||||
|
||||
# Declare a class that can automatically convert virtual executable addresses
|
||||
# to file addresses
|
||||
class Bin:
|
||||
def __init__(self, filename):
|
||||
logger.debug('Parsing headers of "%s"... ', filename)
|
||||
self.file = open(filename, 'rb')
|
||||
|
||||
#HACK: Strictly, we should be parsing the header, but we know where
|
||||
# everything is in these two files so we just jump straight there
|
||||
|
||||
# Read ImageBase
|
||||
self.file.seek(0xB4)
|
||||
self.imagebase, = struct.unpack('<i', self.file.read(4))
|
||||
|
||||
# Read .text VirtualAddress
|
||||
self.file.seek(0x184)
|
||||
self.textvirt, = struct.unpack('<i', self.file.read(4))
|
||||
|
||||
# Read .text PointerToRawData
|
||||
self.file.seek(0x18C)
|
||||
self.textraw, = struct.unpack('<i', self.file.read(4))
|
||||
logger.debug('... Parsing finished')
|
||||
|
||||
def __del__(self):
|
||||
if self.file:
|
||||
self.file.close()
|
||||
|
||||
def get_addr(self, virt):
|
||||
return virt - self.imagebase - self.textvirt + self.textraw
|
||||
|
||||
def read(self, offset, size):
|
||||
self.file.seek(self.get_addr(offset))
|
||||
return self.file.read(size)
|
||||
|
||||
class RecompiledInfo:
|
||||
def __init__(self):
|
||||
self.addr = None
|
||||
self.size = None
|
||||
self.name = None
|
||||
self.start = None
|
||||
|
||||
class WinePathConverter:
|
||||
def __init__(self, unix_cwd):
|
||||
self.unix_cwd = unix_cwd
|
||||
@ -134,121 +98,6 @@ def _call_winepath_unix2win(fn: str) -> str:
|
||||
def _call_winepath_win2unix(fn: str) -> str:
|
||||
return subprocess.check_output(['winepath', fn], text=True).strip()
|
||||
|
||||
def get_file_in_script_dir(fn):
|
||||
return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)
|
||||
|
||||
# Declare a class that parses the output of cvdump for fast access later
|
||||
class SymInfo:
|
||||
funcs = {}
|
||||
lines = {}
|
||||
names = {}
|
||||
|
||||
def __init__(self, pdb, file, wine_path_converter):
|
||||
call = [get_file_in_script_dir('cvdump.exe'), '-l', '-s']
|
||||
|
||||
if wine_path_converter:
|
||||
# Run cvdump through wine and convert path to Windows-friendly wine path
|
||||
call.insert(0, 'wine')
|
||||
call.append(wine_path_converter.get_wine_path(pdb))
|
||||
else:
|
||||
call.append(pdb)
|
||||
|
||||
logger.info('Parsing %s ...', pdb)
|
||||
logger.debug('Command = %r', call)
|
||||
line_dump = subprocess.check_output(call).decode('utf-8').split('\r\n')
|
||||
|
||||
current_section = None
|
||||
|
||||
logger.debug('Parsing output of cvdump.exe ...')
|
||||
|
||||
for i, line in enumerate(line_dump):
|
||||
if line.startswith('***'):
|
||||
current_section = line[4:]
|
||||
|
||||
if current_section == 'SYMBOLS' and 'S_GPROC32' in line:
|
||||
addr = int(line[26:34], 16)
|
||||
|
||||
info = RecompiledInfo()
|
||||
info.addr = addr + recompfile.imagebase + recompfile.textvirt
|
||||
|
||||
use_dbg_offs = False
|
||||
if use_dbg_offs:
|
||||
debug_offs = line_dump[i + 2]
|
||||
debug_start = int(debug_offs[22:30], 16)
|
||||
debug_end = int(debug_offs[43:], 16)
|
||||
|
||||
info.start = debug_start
|
||||
info.size = debug_end - debug_start
|
||||
else:
|
||||
info.start = 0
|
||||
info.size = int(line[41:49], 16)
|
||||
|
||||
info.name = line[77:]
|
||||
|
||||
self.names[info.name] = info
|
||||
self.funcs[addr] = info
|
||||
elif current_section == 'LINES' and line.startswith(' ') and not line.startswith(' '):
|
||||
sourcepath = line.split()[0]
|
||||
|
||||
if wine_path_converter:
|
||||
# Convert filename to Unix path for file compare
|
||||
sourcepath = wine_path_converter.get_unix_path(sourcepath)
|
||||
|
||||
if sourcepath not in self.lines:
|
||||
self.lines[sourcepath] = {}
|
||||
|
||||
j = i + 2
|
||||
while True:
|
||||
ll = line_dump[j].split()
|
||||
if len(ll) == 0:
|
||||
break
|
||||
|
||||
k = 0
|
||||
while k < len(ll):
|
||||
linenum = int(ll[k + 0])
|
||||
address = int(ll[k + 1], 16)
|
||||
if linenum not in self.lines[sourcepath]:
|
||||
self.lines[sourcepath][linenum] = address
|
||||
k += 2
|
||||
|
||||
j += 1
|
||||
|
||||
logger.debug('... Parsing output of cvdump.exe finished')
|
||||
|
||||
def get_recompiled_address(self, filename, line):
|
||||
addr = None
|
||||
found = False
|
||||
|
||||
logger.debug('Looking for %s:%d', filename, line)
|
||||
|
||||
for fn in self.lines:
|
||||
# Sometimes a PDB is compiled with a relative path while we always have
|
||||
# an absolute path. Therefore we must
|
||||
try:
|
||||
if os.path.samefile(fn, filename):
|
||||
filename = fn
|
||||
break
|
||||
except FileNotFoundError as e:
|
||||
continue
|
||||
|
||||
if filename in self.lines and line in self.lines[fn]:
|
||||
addr = self.lines[fn][line]
|
||||
|
||||
if addr in self.funcs:
|
||||
return self.funcs[addr]
|
||||
else:
|
||||
logger.error('Failed to find function symbol with address: 0x%x', addr)
|
||||
else:
|
||||
logger.error('Failed to find function symbol with filename and line: %s:%d', filename, line)
|
||||
|
||||
def get_recompiled_address_from_name(self, name):
|
||||
logger.debug('Looking for %s', name)
|
||||
|
||||
if name in self.names:
|
||||
return self.names[name]
|
||||
else:
|
||||
logger.error('Failed to find function symbol with name: %s', name)
|
||||
|
||||
wine_path_converter = None
|
||||
if os.name != 'nt':
|
||||
wine_path_converter = WinePathConverter(source)
|
||||
@ -397,7 +246,9 @@ def can_resolve_register_differences(original_asm, new_asm):
|
||||
|
||||
# Generate basename of original file, used in locating OFFSET lines
|
||||
basename = os.path.basename(os.path.splitext(original)[0])
|
||||
pattern = '// OFFSET:'
|
||||
funcpattern = '// OFFSET:'
|
||||
vtblpattern = '// VTABLE'
|
||||
in_class = None
|
||||
|
||||
for subdir, dirs, files in os.walk(source):
|
||||
for file in files:
|
||||
@ -415,8 +266,8 @@ def can_resolve_register_differences(original_asm, new_asm):
|
||||
|
||||
line = line.strip()
|
||||
|
||||
if line.startswith(pattern) and not line.endswith("STUB"):
|
||||
par = line[len(pattern):].strip().split()
|
||||
if line.startswith(funcpattern) and not line.endswith("STUB"):
|
||||
par = line[len(funcpattern):].strip().split()
|
||||
module = par[0]
|
||||
if module != basename:
|
||||
continue
|
||||
@ -535,12 +386,47 @@ def can_resolve_register_differences(original_asm, new_asm):
|
||||
if html_path:
|
||||
escaped = '\\n'.join(udiff).replace('"', '\\"').replace('\n', '\\n').replace('<', '<').replace('>', '>')
|
||||
htmlinsert.append('{address: "%s", name: "%s", matching: %s, diff: "%s"}' % (hex(addr), html.escape(recinfo.name), str(effective_ratio), escaped))
|
||||
elif line.startswith(vtblpattern):
|
||||
addr_discovery = line.split()
|
||||
|
||||
try:
|
||||
address = int(addr_discovery[len(addr_discovery)-1], 16)
|
||||
while True:
|
||||
line = srcfile.readline()
|
||||
line_no += 1
|
||||
|
||||
if not line:
|
||||
raise Exception('Failed to find function for vtable listing')
|
||||
break
|
||||
|
||||
try:
|
||||
start_brkt = line.index('(')
|
||||
name_discovery = line[0:start_brkt].split()
|
||||
vtbl_name = name_discovery[len(name_discovery) - 1]
|
||||
break
|
||||
except ValueError:
|
||||
continue
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
print('Found vtable function %s::%s offset %s' % (in_class, vtbl_name, hex(address)))
|
||||
else:
|
||||
# NOTE: Naive implementation, won't support vtable functions after a nested class
|
||||
class_discovery = line.split()
|
||||
|
||||
try:
|
||||
class_index = class_discovery.index('class')
|
||||
|
||||
if class_index + 1 < len(class_discovery):
|
||||
in_class = class_discovery[class_index + 1]
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
except UnicodeDecodeError:
|
||||
break
|
||||
|
||||
def gen_html(html_path, data):
|
||||
templatefile = open(get_file_in_script_dir('template.html'), 'r')
|
||||
templatefile = open(util.get_file_in_script_dir('template.html'), 'r')
|
||||
if not templatefile:
|
||||
print('Failed to find HTML template file, can\'t generate HTML summary')
|
||||
return
|
||||
@ -559,7 +445,7 @@ def gen_html(html_path, data):
|
||||
htmlfile.close()
|
||||
|
||||
def gen_svg(svg, name, icon, implemented_funcs, total_funcs, raw_accuracy):
|
||||
templatefile = open(get_file_in_script_dir('template.svg'), 'r')
|
||||
templatefile = open(util.get_file_in_script_dir('template.svg'), 'r')
|
||||
if not templatefile:
|
||||
print('Failed to find SVG template file, can\'t generate SVG summary')
|
||||
return
|
||||
|
||||
Loading…
Reference in New Issue
Block a user