arisingmedia-web-sops/wp-divi-pipeline-to-am-stack/scripts/migrate.py

#!/usr/bin/env python3
"""
migrate.py — AM Stack A migration launcher.
Points at a .wpress file and runs all extraction phases automatically.
Phases 7+ require human/agent review of staged seed_databases.py.

Usage:
  python3 migrate.py --wpress /path/to/backup.wpress --domain example.com [--project /path/to/project]

Output:
  Runs phases 0-6, then prints agent breadcrumbs for phases 7-11.
"""
import argparse, os, sys, subprocess, json

SOPS = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
SCRIPTS = os.path.join(SOPS, 'scripts')

def run(cmd: list, label: str) -> bool:
    print(f"\n[{label}] Running: {' '.join(cmd)}")
    result = subprocess.run(cmd, capture_output=False)
    if result.returncode != 0:
        print(f"[{label}] FAILED (exit {result.returncode})")
        return False
    print(f"[{label}] OK")
    return True

def phase_header(n: int, title: str):
    print(f"\n{'='*60}")
    print(f"  Phase {n} — {title}")
    print(f"{'='*60}")

def main():
    parser = argparse.ArgumentParser(description='AM Stack A migration launcher')
    parser.add_argument('--wpress', required=True, help='Path to .wpress backup file')
    parser.add_argument('--domain', required=True, help='Target domain (e.g. example.com)')
    parser.add_argument('--project', help='Project directory (default: ~/arisingmedia-websites/{domain})')
    args = parser.parse_args()

    wpress = os.path.abspath(args.wpress)
    domain = args.domain
    project = args.project or os.path.expanduser(f'~/arisingmedia-websites/{domain}')
    extract_dir = os.path.join(project, '.planning', 'wpress-extract')
    data_dir    = os.path.join(project, '.planning', 'data')
    content_dir = os.path.join(data_dir, 'content')

    if not os.path.exists(wpress):
        print(f"ERROR: .wpress file not found: {wpress}")
        sys.exit(1)

    print(f"\nAM Stack A Migration Pipeline")
    print(f"  Domain:  {domain}")
    print(f"  Project: {project}")
    print(f"  Archive: {wpress}")

    # Phase 0 — Setup
    phase_header(0, 'Setup')
    for d in [extract_dir, data_dir, content_dir,
              os.path.join(project, 'assets', 'images'),
              os.path.join(project, 'build'),
              os.path.join(project, 'src', 'api', 'data'),
              os.path.join(project, 'src', 'api', 'templates'),
              os.path.join(project, 'src', 'api', 'components')]:
        os.makedirs(d, exist_ok=True)
        print(f"  mkdir {d}")

    # Phase 1 — Extract
    phase_header(1, 'Extract .wpress archive')
    if not run(['python3', os.path.join(SCRIPTS, 'extract_wpress.py'), wpress, extract_dir], 'Phase 1'):
        sys.exit(1)

    # Phase 2 — DB Analysis
    phase_header(2, 'Database analysis')
    if not run(['python3', os.path.join(SCRIPTS, 'analyze_db.py'), extract_dir, data_dir], 'Phase 2'):
        sys.exit(1)

    # Detect Divi version
    site_info_path = os.path.join(data_dir, 'site-info.json')
    divi_version = 5
    if os.path.exists(site_info_path):
        with open(site_info_path) as f:
            info = json.load(f)
        divi_version = info.get('divi_version', 5)
    print(f"  Divi version detected: {divi_version}")

    # Phase 3 — Nav extraction
    phase_header(3, 'Extract navigation menus')
    run(['python3', os.path.join(SCRIPTS, 'extract_nav.py'), extract_dir, data_dir], 'Phase 3 (nav)')

    # Phase 3 — Content extraction
    extract_script = f'extract_divi{divi_version}.py'
    pages_json = os.path.join(data_dir, 'pages.json')
    if not run(['python3', os.path.join(SCRIPTS, extract_script), pages_json, content_dir], f'Phase 3 (divi{divi_version})'):
        print(f"  WARNING: content extraction had errors — review {content_dir}")

    # Phase 5 — Media
    phase_header(5, 'Extract and convert media')
    run(['python3', os.path.join(SCRIPTS, 'extract_media.py'), extract_dir, data_dir,
         os.path.join(project, 'assets', 'images')], 'Phase 5')

    # Phase 6 — Stage seed_databases.py
    phase_header(6, 'Stage seed_databases.py skeleton')
    seed_path = os.path.join(project, 'build', 'seed_databases.py')
    # Check if stage_seed.py exists
    stage_script = os.path.join(SCRIPTS, 'stage_seed.py')
    if os.path.exists(stage_script):
        run(['python3', stage_script, data_dir, seed_path, '--domain', domain], 'Phase 6')
    else:
        print(f"  WARNING: stage_seed.py not found — seed_databases.py must be written manually")
        print(f"  Reference: /home/sirdrez/arisingmedia-websites/vibrantyou.yoga/build/seed_databases.py")

    # Print agent breadcrumbs for remaining phases
    print(f"\n{'='*60}")
    print("  EXTRACTION COMPLETE — Manual/Agent phases follow")
    print(f"{'='*60}")
    print(f"""
Phases 0-6 complete. Staged content is at:
  {data_dir}/content/       ← extracted page sections (JSON)
  {data_dir}/nav.json       ← navigation items
  {data_dir}/media-manifest.json  ← image URL mappings
  {seed_path}  ← seed_databases.py skeleton

Next steps (see 10-agent-breadcrumbs.md for full detail):

  Phase 7 — REVIEW seed_databases.py
    Open: {seed_path}
    For each page: verify sections_json has correct section types
    Replace em-dashes. Remove Divi shortcode residue. Review nav items.

  Phase 8 — RUN seed_databases.py
    cd {project} && python3 build/seed_databases.py
    Verify: output shows all counts > 0

  Phase 9 — SCAFFOLD PHP templates
    Copy from reference: vibrantyou.yoga/src/api/
    Update brand name and colors in _header.php + _footer.php

  Phase 10 — BUILD
    cd {project} && docker compose build --no-cache && docker compose up -d
    Verify: curl -I http://localhost:PORT/

  Phase 11 — QA
    bash {SOPS}/../tools/verify-protection.sh http://localhost:PORT
    Lighthouse in Firefox

Reference: {SOPS}/wp-divi-pipeline-to-am-stack/10-agent-breadcrumbs.md
""")

if __name__ == '__main__':
    main()