#!/usr/bin/env python3 """Extract All-in-One WP Migration .wpress archive. Usage: python3 extract_wpress.py The .wpress format is a sequential binary archive with 4377-byte headers: 255 bytes filename (null-padded) 14 bytes file size in bytes (ASCII digits, null-padded) 12 bytes mtime unix timestamp (ASCII digits, null-padded) 4096 bytes relative path (null-padded) Followed immediately by the raw file bytes, then the next header. """ import os import sys import argparse from pathlib import Path HEADER_SIZE = 4377 NAME_LEN = 255 SIZE_LEN = 14 MTIME_LEN = 12 PATH_LEN = 4096 def _parse_int(b: bytes) -> int: s = b.split(b"\x00", 1)[0].decode(errors="replace").strip() return int(s) if s else 0 def _parse_str(b: bytes) -> str: return b.split(b"\x00", 1)[0].decode(errors="replace") def extract(wpress_path: str, out_dir: str, verbose: bool = True) -> dict: out = Path(out_dir) out.mkdir(parents=True, exist_ok=True) count = 0 total_bytes = 0 skipped = 0 with open(wpress_path, "rb") as f: while True: header = f.read(HEADER_SIZE) if not header or len(header) < HEADER_SIZE: break if header == b"\x00" * HEADER_SIZE: break name = _parse_str(header[0:NAME_LEN]) size = _parse_int(header[NAME_LEN : NAME_LEN + SIZE_LEN]) mtime = _parse_int(header[NAME_LEN + SIZE_LEN : NAME_LEN + SIZE_LEN + MTIME_LEN]) path = _parse_str(header[NAME_LEN + SIZE_LEN + MTIME_LEN : NAME_LEN + SIZE_LEN + MTIME_LEN + PATH_LEN]) # Sanitise path traversal path = path.lstrip("/").lstrip("\\").lstrip(".") path = path.lstrip("/") dest_dir = out / path if path else out dest_dir.mkdir(parents=True, exist_ok=True) dest_file = dest_dir / name if not name: skipped += 1 f.seek(size, 1) continue with open(dest_file, "wb") as o: remaining = size while remaining > 0: chunk = f.read(min(65536, remaining)) if not chunk: break o.write(chunk) remaining -= len(chunk) try: if mtime > 0: os.utime(dest_file, (mtime, mtime)) except Exception: pass count += 1 total_bytes += size if verbose and count % 200 == 0: print(f" [{count} files | {total_bytes / 1024 / 1024:.1f} MB extracted]", flush=True) result = { "files": count, "bytes": total_bytes, "mb": round(total_bytes / 1024 / 1024, 1), "skipped": skipped, "out_dir": str(out), } print(f"DONE: {count} files | {result['mb']} MB -> {out_dir} (skipped {skipped})") return result def main(): p = argparse.ArgumentParser(description="Extract .wpress archive") p.add_argument("wpress", help="Path to .wpress file") p.add_argument("outdir", help="Destination directory") p.add_argument("-q", "--quiet", action="store_true", help="Suppress progress output") args = p.parse_args() extract(args.wpress, args.outdir, verbose=not args.quiet) if __name__ == "__main__": main()