recent updates
This commit is contained in:
@@ -0,0 +1,110 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Extract All-in-One WP Migration .wpress archive.
|
||||
|
||||
Usage:
|
||||
python3 extract_wpress.py <path/to/file.wpress> <output/directory>
|
||||
|
||||
The .wpress format is a sequential binary archive with 4377-byte headers:
|
||||
255 bytes filename (null-padded)
|
||||
14 bytes file size in bytes (ASCII digits, null-padded)
|
||||
12 bytes mtime unix timestamp (ASCII digits, null-padded)
|
||||
4096 bytes relative path (null-padded)
|
||||
Followed immediately by the raw file bytes, then the next header.
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
HEADER_SIZE = 4377
|
||||
NAME_LEN = 255
|
||||
SIZE_LEN = 14
|
||||
MTIME_LEN = 12
|
||||
PATH_LEN = 4096
|
||||
|
||||
|
||||
def _parse_int(b: bytes) -> int:
|
||||
s = b.split(b"\x00", 1)[0].decode(errors="replace").strip()
|
||||
return int(s) if s else 0
|
||||
|
||||
|
||||
def _parse_str(b: bytes) -> str:
|
||||
return b.split(b"\x00", 1)[0].decode(errors="replace")
|
||||
|
||||
|
||||
def extract(wpress_path: str, out_dir: str, verbose: bool = True) -> dict:
|
||||
out = Path(out_dir)
|
||||
out.mkdir(parents=True, exist_ok=True)
|
||||
count = 0
|
||||
total_bytes = 0
|
||||
skipped = 0
|
||||
|
||||
with open(wpress_path, "rb") as f:
|
||||
while True:
|
||||
header = f.read(HEADER_SIZE)
|
||||
if not header or len(header) < HEADER_SIZE:
|
||||
break
|
||||
if header == b"\x00" * HEADER_SIZE:
|
||||
break
|
||||
|
||||
name = _parse_str(header[0:NAME_LEN])
|
||||
size = _parse_int(header[NAME_LEN : NAME_LEN + SIZE_LEN])
|
||||
mtime = _parse_int(header[NAME_LEN + SIZE_LEN : NAME_LEN + SIZE_LEN + MTIME_LEN])
|
||||
path = _parse_str(header[NAME_LEN + SIZE_LEN + MTIME_LEN : NAME_LEN + SIZE_LEN + MTIME_LEN + PATH_LEN])
|
||||
|
||||
# Sanitise path traversal
|
||||
path = path.lstrip("/").lstrip("\\").lstrip(".")
|
||||
path = path.lstrip("/")
|
||||
|
||||
dest_dir = out / path if path else out
|
||||
dest_dir.mkdir(parents=True, exist_ok=True)
|
||||
dest_file = dest_dir / name
|
||||
|
||||
if not name:
|
||||
skipped += 1
|
||||
f.seek(size, 1)
|
||||
continue
|
||||
|
||||
with open(dest_file, "wb") as o:
|
||||
remaining = size
|
||||
while remaining > 0:
|
||||
chunk = f.read(min(65536, remaining))
|
||||
if not chunk:
|
||||
break
|
||||
o.write(chunk)
|
||||
remaining -= len(chunk)
|
||||
|
||||
try:
|
||||
if mtime > 0:
|
||||
os.utime(dest_file, (mtime, mtime))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
count += 1
|
||||
total_bytes += size
|
||||
|
||||
if verbose and count % 200 == 0:
|
||||
print(f" [{count} files | {total_bytes / 1024 / 1024:.1f} MB extracted]", flush=True)
|
||||
|
||||
result = {
|
||||
"files": count,
|
||||
"bytes": total_bytes,
|
||||
"mb": round(total_bytes / 1024 / 1024, 1),
|
||||
"skipped": skipped,
|
||||
"out_dir": str(out),
|
||||
}
|
||||
print(f"DONE: {count} files | {result['mb']} MB -> {out_dir} (skipped {skipped})")
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
p = argparse.ArgumentParser(description="Extract .wpress archive")
|
||||
p.add_argument("wpress", help="Path to .wpress file")
|
||||
p.add_argument("outdir", help="Destination directory")
|
||||
p.add_argument("-q", "--quiet", action="store_true", help="Suppress progress output")
|
||||
args = p.parse_args()
|
||||
extract(args.wpress, args.outdir, verbose=not args.quiet)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user