Add binary diffing utilities found in several of my repositories.
This commit is contained in:
parent
ffee6fcd1c
commit
f43c46ef2f
49
README.md
49
README.md
|
@ -3,3 +3,52 @@
|
|||
Collection of utilities written in Python for working with various arcade binaries.
|
||||
This is mostly suited towards the separated formats found in MAME archival releases
|
||||
but also work on a variety of binaries from basically anywhere.
|
||||
|
||||
## bindiff
|
||||
|
||||
Create a binary diff from two same-length binaries, or apply a previously created
|
||||
diff to a binary to patch that binary. Run it like `./bindiff diff --help` to see
|
||||
options for diffing, and `./bindiff patch --help` to see options for patching.
|
||||
|
||||
The patch format is simple. The number on the left of the colon is the hex offset where
|
||||
the difference was found, and the numbers on the right are the hex values to find
|
||||
and replace. A wildcard (`*`) can be substituted for a hex pair for any byte in
|
||||
the before section if you do not care what the value is, but be aware that this will
|
||||
make the patch non-reversible. Arbitrary comments are supported anywhere in the diff.
|
||||
Start a line with the `#` character to create a comment. Special values are recognized
|
||||
in comments. If you create a comment starting with `# File size:` then the the base
|
||||
file will be compared against the decimal number placed after the colon and any file
|
||||
not matching that length will be rejected.
|
||||
|
||||
Some examples are as follows:
|
||||
|
||||
A simple patch changing a byte in a file at offset `0x256` from `0xAA` to `0xDD`:
|
||||
|
||||
```
|
||||
256: AA -> DD
|
||||
```
|
||||
|
||||
That same patch, but only for files that are exactly 1024 bytes long:
|
||||
|
||||
```
|
||||
# File size: 1024
|
||||
256: AA -> DD
|
||||
```
|
||||
|
||||
A patch that does not care about one of the bytes it is patching. The byte at `0x513`
|
||||
can be any value and the patch will still be applied, and altogether 4 bytes starting
|
||||
at `0x512` will be changed to the hex value `0x00 0x11 0x22 0x33`:
|
||||
|
||||
```
|
||||
512: AA * CC DD -> 00 11 22 33
|
||||
```
|
||||
|
||||
A patch with multiple offsets, and helpful author descriptions for each section:
|
||||
|
||||
```
|
||||
# This part of the patch fixes a sprite offset issue.
|
||||
128: AA -> BB
|
||||
|
||||
# This part of the patch fixes sound playback issues.
|
||||
256: 33 -> 44
|
||||
```
|
||||
|
|
|
@ -0,0 +1,263 @@
|
|||
from typing import List, Optional, Tuple, cast
|
||||
from typing_extensions import Final
|
||||
|
||||
|
||||
class BinaryDiffException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class BinaryDiff:
|
||||
|
||||
CHUNK_SIZE: Final[int] = 1024
|
||||
|
||||
@staticmethod
|
||||
def _hex(val: int) -> str:
|
||||
out = hex(val)[2:]
|
||||
out = out.upper()
|
||||
if len(out) == 1:
|
||||
out = "0" + out
|
||||
return out
|
||||
|
||||
@staticmethod
|
||||
def diff(bin1: bytes, bin2: bytes) -> List[str]:
|
||||
binlength = len(bin1)
|
||||
if binlength != len(bin2):
|
||||
raise BinaryDiffException("Cannot diff different-sized binary blobs!")
|
||||
|
||||
# First, get the list of differences
|
||||
differences: List[Tuple[int, bytes, bytes]] = []
|
||||
|
||||
# Chunk the differences, assuming files are usually about the same,
|
||||
# for a massive speed boost.
|
||||
for offset in range(0, binlength, Binary.CHUNK_SIZE):
|
||||
if bin1[offset:(offset + Binary.CHUNK_SIZE)] != bin2[offset:(offset + Binary.CHUNK_SIZE)]:
|
||||
for i in range(Binary.CHUNK_SIZE):
|
||||
byte1 = bin1[offset + i]
|
||||
byte2 = bin2[offset + i]
|
||||
|
||||
if byte1 != byte2:
|
||||
differences.append((offset + i, bytes([byte1]), bytes([byte2])))
|
||||
|
||||
# Don't bother with any combination crap if we have nothing to do
|
||||
if not differences:
|
||||
return []
|
||||
|
||||
# Now, combine them for easier printing
|
||||
cur_block: Tuple[int, bytes, bytes] = differences[0]
|
||||
ret: List[str] = []
|
||||
|
||||
# Now, include the original byte size for later comparison/checks
|
||||
ret.append(f"# File size: {len(bin1)}")
|
||||
|
||||
def _hexrun(val: bytes) -> str:
|
||||
return " ".join(Binary._hex(v) for v in val)
|
||||
|
||||
def _output(val: Tuple[int, bytes, bytes]) -> None:
|
||||
start = val[0] - len(val[1]) + 1
|
||||
|
||||
ret.append(
|
||||
f"{Binary._hex(start)}: {_hexrun(val[1])} -> {_hexrun(val[2])}"
|
||||
)
|
||||
|
||||
def _combine(val: Tuple[int, bytes, bytes]) -> None:
|
||||
nonlocal cur_block
|
||||
|
||||
if cur_block[0] + 1 == val[0]:
|
||||
# This is a continuation of a run
|
||||
cur_block = (
|
||||
val[0],
|
||||
cur_block[1] + val[1],
|
||||
cur_block[2] + val[2],
|
||||
)
|
||||
else:
|
||||
# This is a new run
|
||||
_output(cur_block)
|
||||
cur_block = val
|
||||
|
||||
# Combine and output runs of differences
|
||||
for diff in differences[1:]:
|
||||
_combine(diff)
|
||||
|
||||
# Make sure we output the last difference
|
||||
_output(cur_block)
|
||||
|
||||
# Return our summation
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def size(patchlines: List[str]) -> Optional[int]:
|
||||
for patch in patchlines:
|
||||
if patch.startswith('#'):
|
||||
# This is a comment, ignore it, unless its a file-size comment
|
||||
patch = patch[1:].strip().lower()
|
||||
if patch.startswith('file size:'):
|
||||
return int(patch[10:].strip())
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _convert(val: str) -> Optional[int]:
|
||||
val = val.strip()
|
||||
if val == '*':
|
||||
return None
|
||||
return int(val, 16)
|
||||
|
||||
@staticmethod
|
||||
def _gather_differences(patchlines: List[str], reverse: bool) -> List[Tuple[int, Optional[bytes], bytes]]:
|
||||
# First, separate out into a list of offsets and old/new bytes
|
||||
differences: List[Tuple[int, Optional[bytes], bytes]] = []
|
||||
|
||||
for patch in patchlines:
|
||||
if patch.startswith('#'):
|
||||
# This is a comment, ignore it.
|
||||
continue
|
||||
start_offset, patch_contents = patch.split(':', 1)
|
||||
before, after = patch_contents.split('->')
|
||||
beforevals = [
|
||||
Binary._convert(x) for x in before.split(" ") if x.strip()
|
||||
]
|
||||
aftervals = [
|
||||
Binary._convert(x) for x in after.split(" ") if x.strip()
|
||||
]
|
||||
|
||||
if len(beforevals) != len(aftervals):
|
||||
raise BinaryDiffException(
|
||||
f"Patch before and after length mismatch at "
|
||||
f"offset {start_offset}!"
|
||||
)
|
||||
if len(beforevals) == 0:
|
||||
raise BinaryDiffException(
|
||||
f"Must have at least one byte to change at "
|
||||
f"offset {start_offset}!"
|
||||
)
|
||||
|
||||
offset = int(start_offset.strip(), 16)
|
||||
|
||||
for i in range(len(beforevals)):
|
||||
if aftervals[i] is None:
|
||||
raise BinaryDiffException(
|
||||
f"Cannot convert a location to a wildcard "
|
||||
f"at offset {start_offset}"
|
||||
)
|
||||
if beforevals[i] is None and reverse:
|
||||
raise BinaryDiffException(
|
||||
f"Patch offset {start_offset} specifies a wildcard and cannot "
|
||||
f"be reversed!"
|
||||
)
|
||||
differences.append(
|
||||
(
|
||||
offset + i,
|
||||
bytes([beforevals[i] or 0]) if beforevals[i] is not None else None,
|
||||
bytes([aftervals[i] or 0]),
|
||||
)
|
||||
)
|
||||
|
||||
# Now, if we're doing the reverse, just switch them
|
||||
if reverse:
|
||||
# We cast here because mypy can't see that we have already asserted that x[2] will never
|
||||
# be optional in the above loop if reverse is set to True.
|
||||
differences = [cast(Tuple[int, Optional[bytes], bytes], (x[0], x[2], x[1])) for x in differences]
|
||||
|
||||
# Finally, return it
|
||||
return differences
|
||||
|
||||
@staticmethod
|
||||
def patch(
|
||||
binary: bytes,
|
||||
patchlines: List[str],
|
||||
*,
|
||||
reverse: bool = False,
|
||||
) -> bytes:
|
||||
# First, grab the differences
|
||||
file_size = Binary.size(patchlines)
|
||||
if file_size is not None and file_size != len(binary):
|
||||
raise BinaryDiffException(
|
||||
f"Patch is for binary of size {file_size} but binary is {len(binary)} "
|
||||
f"bytes long!"
|
||||
)
|
||||
differences: List[Tuple[int, Optional[bytes], bytes]] = sorted(
|
||||
Binary._gather_differences(patchlines, reverse),
|
||||
key=lambda diff: diff[0],
|
||||
)
|
||||
chunks: List[bytes] = []
|
||||
last_patch_end: int = 0
|
||||
|
||||
# Now, apply the changes to the binary data
|
||||
for diff in differences:
|
||||
offset, old, new = diff
|
||||
|
||||
if len(binary) < offset:
|
||||
raise BinaryDiffException(
|
||||
f"Patch offset {Binary._hex(offset)} is beyond the end of "
|
||||
f"the binary!"
|
||||
)
|
||||
if old is not None and binary[offset:(offset + 1)] != old:
|
||||
raise BinaryDiffException(
|
||||
f"Patch offset {Binary._hex(offset)} expecting {Binary._hex(old[0])} "
|
||||
f"but found {Binary._hex(binary[offset])}!"
|
||||
)
|
||||
|
||||
if last_patch_end < offset:
|
||||
chunks.append(binary[last_patch_end:offset])
|
||||
chunks.append(new)
|
||||
last_patch_end = offset + 1
|
||||
|
||||
# Return the new data!
|
||||
chunks.append(binary[last_patch_end:])
|
||||
return b"".join(chunks)
|
||||
|
||||
@staticmethod
|
||||
def can_patch(
|
||||
binary: bytes,
|
||||
patchlines: List[str],
|
||||
*,
|
||||
reverse: bool = False,
|
||||
ignore_size_differences: bool = False,
|
||||
) -> Tuple[bool, str]:
|
||||
# First, grab the differences
|
||||
if not ignore_size_differences:
|
||||
file_size = Binary.size(patchlines)
|
||||
if file_size is not None and file_size != len(binary):
|
||||
return (
|
||||
False,
|
||||
f"Patch is for binary of size {file_size} but binary is {len(binary)} "
|
||||
f"bytes long!"
|
||||
)
|
||||
differences: List[Tuple[int, Optional[bytes], bytes]] = Binary._gather_differences(patchlines, reverse)
|
||||
|
||||
# Now, verify the changes to the binary data
|
||||
for diff in differences:
|
||||
offset, old, _ = diff
|
||||
|
||||
if len(binary) < offset:
|
||||
return (
|
||||
False,
|
||||
f"Patch offset {Binary._hex(offset)} is beyond the end of "
|
||||
f"the binary!"
|
||||
)
|
||||
if old is not None and binary[offset:(offset + 1)] != old:
|
||||
return (
|
||||
False,
|
||||
f"Patch offset {Binary._hex(offset)} expecting {Binary._hex(old[0])} "
|
||||
f"but found {Binary._hex(binary[offset])}!"
|
||||
)
|
||||
|
||||
# Didn't find any problems
|
||||
return (True, "")
|
||||
|
||||
@staticmethod
|
||||
def description(patchlines: List[str]) -> Optional[str]:
|
||||
for patch in patchlines:
|
||||
if patch.startswith('#'):
|
||||
# This is a comment, ignore it, unless its a description comment
|
||||
patch = patch[1:].strip().lower()
|
||||
if patch.startswith('description:'):
|
||||
return patch[12:].strip()
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def needed_amount(patchlines: List[str]) -> int:
|
||||
# First, grab the differences.
|
||||
differences: List[Tuple[int, Optional[bytes], bytes]] = Binary._gather_differences(patchlines, False)
|
||||
|
||||
# Now, get the maximum byte we need to apply this patch.
|
||||
return max([offset for offset, _, _ in differences]) + 1 if differences else 0
|
|
@ -0,0 +1,114 @@
|
|||
#! /usr/bin/env python3
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
|
||||
from arcadeutils.binary import BinaryDiff
|
||||
|
||||
|
||||
def main() -> int:
|
||||
# Create the argument parser
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Utilities for diffing or patching binary files.",
|
||||
)
|
||||
subparsers = parser.add_subparsers(help='commands', dest='command')
|
||||
|
||||
# Parser for diffing two binary files
|
||||
diff_parser = subparsers.add_parser('diff', help='create a diff of two same-length binary files')
|
||||
diff_parser.add_argument(
|
||||
'file1',
|
||||
metavar='FILE1',
|
||||
type=str,
|
||||
help='the base file that we will output diffs relative to',
|
||||
)
|
||||
diff_parser.add_argument(
|
||||
'file2',
|
||||
metavar='FILE2',
|
||||
type=str,
|
||||
help='the file that we will compare against the base file to find diffs',
|
||||
)
|
||||
diff_parser.add_argument(
|
||||
'--patch-file',
|
||||
metavar='FILE',
|
||||
type=str,
|
||||
help='write patches to a file instead of stdout',
|
||||
)
|
||||
|
||||
# Parser for patching a binary file
|
||||
patch_parser = subparsers.add_parser('patch', help='patch a binary file using a previously created diff')
|
||||
patch_parser.add_argument(
|
||||
'bin',
|
||||
metavar='BIN',
|
||||
type=str,
|
||||
help='the binary file we should patch',
|
||||
)
|
||||
patch_parser.add_argument(
|
||||
'out',
|
||||
metavar='OUT',
|
||||
type=str,
|
||||
help='the file we should write the patched binary to',
|
||||
)
|
||||
patch_parser.add_argument(
|
||||
'--patch-file',
|
||||
metavar='FILE',
|
||||
type=str,
|
||||
help='read patches from a file instead of stdin',
|
||||
)
|
||||
patch_parser.add_argument(
|
||||
'--reverse',
|
||||
action="store_true",
|
||||
help='perform the patch in reverse (undo the patch)',
|
||||
)
|
||||
|
||||
# Grab what we're doing
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.command == 'diff':
|
||||
with open(args.file1, "rb") as fp:
|
||||
file1 = fp.read()
|
||||
with open(args.file2, "rb") as fp:
|
||||
file2 = fp.read()
|
||||
|
||||
try:
|
||||
differences = BinaryDiff.diff(file1, file2)
|
||||
except Exception as e:
|
||||
print(f"Could not diff {args.file1} against {args.file2}: {str(e)}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if not args.patch_file:
|
||||
for line in differences:
|
||||
print(line)
|
||||
else:
|
||||
with open(args.patch_file, "w") as fp:
|
||||
fp.write(os.linesep.join(differences))
|
||||
elif args.command == 'patch':
|
||||
with open(args.bin, "rb") as fp:
|
||||
old = fp.read()
|
||||
|
||||
if not args.patch_file:
|
||||
differences = sys.stdin.readlines()
|
||||
else:
|
||||
with open(args.patch_file, "r") as fp:
|
||||
differences = fp.readlines()
|
||||
differences = [d.strip() for d in differences if d.strip()]
|
||||
|
||||
try:
|
||||
new = BinaryDiff.patch(old, differences, reverse=args.reverse)
|
||||
except Exception as e:
|
||||
print(f"Could not patch {args.bin}: {str(e)}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
with open(args.out, "wb") as fp:
|
||||
fp.write(new)
|
||||
|
||||
print(f"Patched {args.bin} and wrote to {args.out}.")
|
||||
else:
|
||||
print(f"Please specify a valid command!{os.linesep}", file=sys.stderr)
|
||||
parser.print_help()
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
Loading…
Reference in New Issue