Use isledecomp lib in reccmp

2026-01-24 00:31:16 +00:00 · 2023-11-14 14:49:27 -05:00 · 2023-11-14 14:49:27 -05:00 · 6a1e05c270
commit 6a1e05c270
parent 0fa6d207f8
2 changed files with 100 additions and 118 deletions
--- a/tools/isledecomp/isledecomp/dir.py
+++ b/tools/isledecomp/isledecomp/dir.py
@ -7,11 +7,15 @@ def file_is_cpp(filename: str) -> bool:
    return ext.lower() in ('.h', '.cpp')
-def walk_source_dir(source: str) -> Iterator[str]:
+def walk_source_dir(source: str, recursive: bool = True) -> Iterator[str]:
    """Generator to walk the given directory recursively and return
       any C++ files found."""
    source = os.path.abspath(source)
    for subdir, dirs, files in os.walk(source):
        for file in files:
            if file_is_cpp(file):
                yield os.path.join(subdir, file)
        if not recursive:
            break
--- a/tools/reccmp/reccmp.py
+++ b/tools/reccmp/reccmp.py
@ -12,6 +12,8 @@
 import colorama
 import html
 import re
 from isledecomp.dir import walk_source_dir
 from isledecomp.parser import find_code_blocks
 parser = argparse.ArgumentParser(allow_abbrev=False,
  description='Recompilation Compare: compare an original EXE with a recompiled EXE + PDB.')
@ -414,143 +416,119 @@ def can_resolve_register_differences(original_asm, new_asm):
 basename = os.path.basename(os.path.splitext(original)[0])
 pattern = '// OFFSET:'
-for subdir, dirs, files in os.walk(source):
+for srcfilename in walk_source_dir(source):
-  for file in files:
+  with open(srcfilename, 'r') as srcfile:
-    srcfilename = os.path.join(os.path.abspath(subdir), file)
+    blocks = find_code_blocks(srcfile)
    with open(srcfilename, 'r') as srcfile:
      line_no = 0
-      while True:
+  for block in blocks:
-        try:
+    if block.is_stub:
-          line = srcfile.readline()
+      continue
          line_no += 1
-          if not line:
+    if block.module != basename:
-            break
+      continue
-          line = line.strip()
+    addr = block.offset
    # Verbose flag handling
    if verbose:
      if addr == verbose:
        found_verbose_target = True
      else:
        continue
-          if line.startswith(pattern) and not line.endswith('STUB'):
+    if block.is_template:
-            par = line[len(pattern):].strip().split()
+      recinfo = syminfo.get_recompiled_address_from_name(block.signature)
-            module = par[0]
+      if not recinfo:
-            if module != basename:
+        continue
-              continue
+    else:
      recinfo = syminfo.get_recompiled_address(srcfilename, block.start_line)
      if not recinfo:
        continue
-            addr = int(par[1], 16)
+    # The effective_ratio is the ratio when ignoring differing register
    # allocation vs the ratio is the true ratio.
    ratio = 0.0
    effective_ratio = 0.0
    if recinfo.size:
      origasm = parse_asm(origfile, addr + recinfo.start, recinfo.size)
      recompasm = parse_asm(recompfile, recinfo.addr + recinfo.start, recinfo.size)
-            # Verbose flag handling
+      diff = difflib.SequenceMatcher(None, origasm, recompasm)
-            if verbose:
+      ratio = diff.ratio()
-              if addr == verbose:
+      effective_ratio = ratio
                found_verbose_target = True
              else:
                continue
-            if line.endswith('TEMPLATE'):
+      if ratio != 1.0:
-                line = srcfile.readline()
+        # Check whether we can resolve register swaps which are actually
-                line_no += 1
+        # perfect matches modulo compiler entropy.
-                # Name comes after // comment
+        if can_resolve_register_differences(origasm, recompasm):
-                name = line.strip()[2:].strip()
+          effective_ratio = 1.0
    else:
      ratio = 0
-                recinfo = syminfo.get_recompiled_address_from_name(name)
+    percenttext = f'{(effective_ratio * 100):.2f}%'
-                if not recinfo:
+    if not plain:
-                  continue
+      if effective_ratio == 1.0:
-            else:
+        percenttext = colorama.Fore.GREEN + percenttext + colorama.Style.RESET_ALL
-                find_open_bracket = line
+      elif effective_ratio > 0.8:
-                while '{' not in find_open_bracket:
+        percenttext = colorama.Fore.YELLOW + percenttext + colorama.Style.RESET_ALL
-                  find_open_bracket = srcfile.readline()
+      else:
-                  line_no += 1
+        percenttext = colorama.Fore.RED + percenttext + colorama.Style.RESET_ALL
-                recinfo = syminfo.get_recompiled_address(srcfilename, line_no)
+    if effective_ratio == 1.0 and ratio != 1.0:
-                if not recinfo:
+      if plain:
-                  continue
+        percenttext += '*'
      else:
        percenttext += colorama.Fore.RED + '*' + colorama.Style.RESET_ALL
-            # The effective_ratio is the ratio when ignoring differing register
+    if args.print_rec_addr:
-            # allocation vs the ratio is the true ratio.
+      addrs = f'0x{addr:x} / 0x{recinfo.addr:x}'
-            ratio = 0.0
+    else:
-            effective_ratio = 0.0
+      addrs = hex(addr)
            if recinfo.size:
              origasm = parse_asm(origfile, addr + recinfo.start, recinfo.size)
              recompasm = parse_asm(recompfile, recinfo.addr + recinfo.start, recinfo.size)
-              diff = difflib.SequenceMatcher(None, origasm, recompasm)
+    if not verbose:
-              ratio = diff.ratio()
+      print(f'  {recinfo.name} ({addrs}) is {percenttext} similar to the original')
              effective_ratio = ratio
-              if ratio != 1.0:
+    function_count += 1
-                # Check whether we can resolve register swaps which are actually
+    total_accuracy += ratio
-                # perfect matches modulo compiler entropy.
+    total_effective_accuracy += effective_ratio
                if can_resolve_register_differences(origasm, recompasm):
                  effective_ratio = 1.0
            else:
              ratio = 0
-            percenttext = f'{(effective_ratio * 100):.2f}%'
+    if recinfo.size:
-            if not plain:
+      udiff = difflib.unified_diff(origasm, recompasm, n=10)
              if effective_ratio == 1.0:
                percenttext = colorama.Fore.GREEN + percenttext + colorama.Style.RESET_ALL
              elif effective_ratio > 0.8:
                percenttext = colorama.Fore.YELLOW + percenttext + colorama.Style.RESET_ALL
              else:
                percenttext = colorama.Fore.RED + percenttext + colorama.Style.RESET_ALL
-            if effective_ratio == 1.0 and ratio != 1.0:
+      # If verbose, print the diff for that function to the output
      if verbose:
        if effective_ratio == 1.0:
          ok_text = 'OK!' if plain else (colorama.Fore.GREEN + '✨ OK! ✨' + colorama.Style.RESET_ALL)
          if ratio == 1.0:
            print(f'{addrs}: {recinfo.name} 100% match.\n\n{ok_text}\n\n')
          else:
            print(f'{addrs}: {recinfo.name} Effective 100%% match. (Differs in register allocation only)\n\n{ok_text} (still differs in register allocation)\n\n')
        else:
          for line in udiff:
            if line.startswith('++') or line.startswith('@@') or line.startswith('--'):
              # Skip unneeded parts of the diff for the brief view
              pass
            elif line.startswith('+'):
              if plain:
-                percenttext += '*'
+                print(line)
              else:
-                percenttext += colorama.Fore.RED + '*' + colorama.Style.RESET_ALL
+                print(colorama.Fore.GREEN + line)
-
+            elif line.startswith('-'):
-            if args.print_rec_addr:
+              if plain:
-              addrs = f'0x{addr:x} / 0x{recinfo.addr:x}'
+                print(line)
              else:
                print(colorama.Fore.RED + line)
            else:
-              addrs = hex(addr)
+              print(line)
            if not plain:
              print(colorama.Style.RESET_ALL, end='')
-            if not verbose:
+          print(f'\n{recinfo.name} is only {percenttext} similar to the original, diff above')
              print(f'  {recinfo.name} ({addrs}) is {percenttext} similar to the original')
-            function_count += 1
+      # If html, record the diffs to an HTML file
-            total_accuracy += ratio
+      if html_path:
-            total_effective_accuracy += effective_ratio
+        escaped = html.escape('\\n'.join(udiff).replace('"', '\\"').replace('\n', '\\n'))
        htmlinsert.append(f'{{address: "0x{addr:x}", name: "{html.escape(recinfo.name)}", matching: {effective_ratio}, diff: "{escaped}"}}')
            if recinfo.size:
              udiff = difflib.unified_diff(origasm, recompasm, n=10)
              # If verbose, print the diff for that function to the output
              if verbose:
                if effective_ratio == 1.0:
                  ok_text = 'OK!' if plain else (colorama.Fore.GREEN + '✨ OK! ✨' + colorama.Style.RESET_ALL)
                  if ratio == 1.0:
                    print(f'{addrs}: {recinfo.name} 100% match.\n\n{ok_text}\n\n')
                  else:
                    print(f'{addrs}: {recinfo.name} Effective 100%% match. (Differs in register allocation only)\n\n{ok_text} (still differs in register allocation)\n\n')
                else:
                  for line in udiff:
                    if line.startswith('++') or line.startswith('@@') or line.startswith('--'):
                      # Skip unneeded parts of the diff for the brief view
                      pass
                    elif line.startswith('+'):
                      if plain:
                        print(line)
                      else:
                        print(colorama.Fore.GREEN + line)
                    elif line.startswith('-'):
                      if plain:
                        print(line)
                      else:
                        print(colorama.Fore.RED + line)
                    else:
                      print(line)
                    if not plain:
                      print(colorama.Style.RESET_ALL, end='')
                  print(f'\n{recinfo.name} is only {percenttext} similar to the original, diff above')
              # If html, record the diffs to an HTML file
              if html_path:
                escaped = html.escape('\\n'.join(udiff).replace('"', '\\"').replace('\n', '\\n'))
                htmlinsert.append(f'{{address: "0x{addr:x}", name: "{html.escape(recinfo.name)}", matching: {effective_ratio}, diff: "{escaped}"}}')
        except UnicodeDecodeError:
          break
 def gen_html(html_path, data):
  templatedata = None