111 lines
3.4 KiB
Python
111 lines
3.4 KiB
Python
#!/usr/bin/env python3
|
|
"""Extract All-in-One WP Migration .wpress archive.
|
|
|
|
Usage:
|
|
python3 extract_wpress.py <path/to/file.wpress> <output/directory>
|
|
|
|
The .wpress format is a sequential binary archive with 4377-byte headers:
|
|
255 bytes filename (null-padded)
|
|
14 bytes file size in bytes (ASCII digits, null-padded)
|
|
12 bytes mtime unix timestamp (ASCII digits, null-padded)
|
|
4096 bytes relative path (null-padded)
|
|
Followed immediately by the raw file bytes, then the next header.
|
|
"""
|
|
import os
|
|
import sys
|
|
import argparse
|
|
from pathlib import Path
|
|
|
|
HEADER_SIZE = 4377
|
|
NAME_LEN = 255
|
|
SIZE_LEN = 14
|
|
MTIME_LEN = 12
|
|
PATH_LEN = 4096
|
|
|
|
|
|
def _parse_int(b: bytes) -> int:
|
|
s = b.split(b"\x00", 1)[0].decode(errors="replace").strip()
|
|
return int(s) if s else 0
|
|
|
|
|
|
def _parse_str(b: bytes) -> str:
|
|
return b.split(b"\x00", 1)[0].decode(errors="replace")
|
|
|
|
|
|
def extract(wpress_path: str, out_dir: str, verbose: bool = True) -> dict:
|
|
out = Path(out_dir)
|
|
out.mkdir(parents=True, exist_ok=True)
|
|
count = 0
|
|
total_bytes = 0
|
|
skipped = 0
|
|
|
|
with open(wpress_path, "rb") as f:
|
|
while True:
|
|
header = f.read(HEADER_SIZE)
|
|
if not header or len(header) < HEADER_SIZE:
|
|
break
|
|
if header == b"\x00" * HEADER_SIZE:
|
|
break
|
|
|
|
name = _parse_str(header[0:NAME_LEN])
|
|
size = _parse_int(header[NAME_LEN : NAME_LEN + SIZE_LEN])
|
|
mtime = _parse_int(header[NAME_LEN + SIZE_LEN : NAME_LEN + SIZE_LEN + MTIME_LEN])
|
|
path = _parse_str(header[NAME_LEN + SIZE_LEN + MTIME_LEN : NAME_LEN + SIZE_LEN + MTIME_LEN + PATH_LEN])
|
|
|
|
# Sanitise path traversal
|
|
path = path.lstrip("/").lstrip("\\").lstrip(".")
|
|
path = path.lstrip("/")
|
|
|
|
dest_dir = out / path if path else out
|
|
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
dest_file = dest_dir / name
|
|
|
|
if not name:
|
|
skipped += 1
|
|
f.seek(size, 1)
|
|
continue
|
|
|
|
with open(dest_file, "wb") as o:
|
|
remaining = size
|
|
while remaining > 0:
|
|
chunk = f.read(min(65536, remaining))
|
|
if not chunk:
|
|
break
|
|
o.write(chunk)
|
|
remaining -= len(chunk)
|
|
|
|
try:
|
|
if mtime > 0:
|
|
os.utime(dest_file, (mtime, mtime))
|
|
except Exception:
|
|
pass
|
|
|
|
count += 1
|
|
total_bytes += size
|
|
|
|
if verbose and count % 200 == 0:
|
|
print(f" [{count} files | {total_bytes / 1024 / 1024:.1f} MB extracted]", flush=True)
|
|
|
|
result = {
|
|
"files": count,
|
|
"bytes": total_bytes,
|
|
"mb": round(total_bytes / 1024 / 1024, 1),
|
|
"skipped": skipped,
|
|
"out_dir": str(out),
|
|
}
|
|
print(f"DONE: {count} files | {result['mb']} MB -> {out_dir} (skipped {skipped})")
|
|
return result
|
|
|
|
|
|
def main():
|
|
p = argparse.ArgumentParser(description="Extract .wpress archive")
|
|
p.add_argument("wpress", help="Path to .wpress file")
|
|
p.add_argument("outdir", help="Destination directory")
|
|
p.add_argument("-q", "--quiet", action="store_true", help="Suppress progress output")
|
|
args = p.parse_args()
|
|
extract(args.wpress, args.outdir, verbose=not args.quiet)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|