diff --git a/CONTENT.md b/CONTENT.md new file mode 100644 index 0000000..f469630 --- /dev/null +++ b/CONTENT.md @@ -0,0 +1,121 @@ +# CONTENT - Copy Standards, Image Rules, and Asset Guidelines +Author: Andre Cobham / Arising Media +Updated: 2026-06-09 + +## Writing Standard + +Reading level: 7th to 8th grade for service businesses, 10th to 12th grade for professional/B2B. + +Active voice. Short paragraphs (2 to 4 sentences max). + +Lead with the customer's problem, not the business's credentials. + +One clear CTA per section. + +No marketing jargon (synergize, leverage, best-in-class, cutting-edge, state-of-the-art). + +No filler phrases (In today's fast-paced world, Look no further, We pride ourselves on, Don't hesitate, Whether you are X or Y). + +Specificity beats superlatives. A timeframe, a material, a measurable result beats any adjective. + +## What We Never Write + +Em dashes or en dashes. Replace with a period, comma, colon, or the word "and" or "to". + +Invented numbers (satisfaction rates, years of experience, award claims) without client-verified proof. + +"Licensed" for any provisionally licensed or permit-holding clinician. Use "Provisionally Licensed" instead. + +Exclamation points (one per page maximum, in a CTA only). + +Passive voice as the default sentence structure. + +Verification check: `grep -rn '.\|–\|—\|–' . --include="*.html" --include="*.json"`. Result must be zero. + +## Tone by Sector + +Service businesses (trades, cleaning, home services): plain, direct, neighborhood-familiar. + +Healthcare / counseling: warm, clinical accuracy required, never overpromise outcomes. + +Professional / B2B / tech: peer-level, systems-oriented, results-focused. + +## Healthcare Credential Rules + +MHC-LP is Provisionally Licensed, not Licensed. LP stands for Limited Permit. + +Any permit holder must display: "Practices under the supervision of [Name], [Credential]" in the footer, about page, and auto-response email. + +Use "Provisionally Licensed" everywhere: credential row, why-cards, location page intros, section headers, auto-response emails, meta descriptions. + +Run grep check before launch: `grep -r "Licensed Professional" --include="*.html"` . result must be zero. + +Supervisor's credential must be accurate to the letter. LMHC not equivalent to LMHC-D. Confirm the current designation before publishing. + +When a credential changes (upgrade or advancement), update the source templates first (render.py, copy_library.py), rebuild the Docker container, then re-run verification checks. + +## Copy Structure + +One h1 per page. h2 for major sections. h3 for cards or items within a section. Never skip levels. + +Footer on every page: phone, hours, address, in identical format. + +Phone format: (###) ###-####. Hours: Monday to Friday: 8:00 AM to 5:00 PM (no dashes). + +Links use relative paths with .html extension for internal pages. External links include target="_blank" rel="noopener". Phone links use tel:+1 format. Email links use mailto:. Map links open in new tab. + +## Image Standards + +Format: WebP only in production. No JPGs or PNGs in the webroot. + +Every image has descriptive alt text or alt="" if decorative. + +loading="lazy" on every image except the above-the-fold hero. + +width and height attributes on every img tag to prevent layout shift. + +No people, no faces in any generated or stock imagery. + +Show the result (clean room, finished floor, complete installation), not the process or equipment. + +Hero images: unique per page, named hero-{page-slug}.webp. + +## Image Generation + +Preferred source: local ComfyUI (FLUX.1 Schnell) or Google Imagen API. + +Every generated image passes a vision validation check for people/faces before being saved. + +Prompt structure: camera angle + lens + subject + foreground detail + background + lighting + no people. + +Specify lens focal length and depth of field. Vague room names produce incoherent scenes. + +Never generate: people, faces, cleaning equipment, text overlaid on source, before/after states. + +No cleaning machines, vacuums, steam equipment, or hoses in any image. + +Show upright equipment only, not flat industrial models. + +Machines must look functional and modern, not dated or commercial-scale. + +## Image Size Targets + +Service card / thumbnail: max 900px wide, 78% quality, 30-80KB target. + +Hero image (page header): max 1400px wide, 80% quality, 50-180KB target. + +OG / social share image: max 1200px wide, 85% quality, under 150KB. + +Images over these targets are rejected at deploy time. + +## Prompt Engineering + +All prompts follow this structure: {camera angle} {lens} {subject description}, {foreground detail} sharp in foreground, {background} receding into bokeh, {lighting description}, no people, ultra-realistic {type} photography. + +This pattern produces correct depth, perspective, and scene geometry because it names every surface explicitly. + +Fix incoherent objects by naming every part of the frame: background walls, floor material, ceiling (if visible), and what recedes. Avoid vague room names ("office" without detail). Specify plain surfaces (cream painted wall, white drop ceiling) not implied ones. + +Inline negative elements in the prompt itself (no people, no machines, no text), not in a separate negative prompt. + +Do not use "wide shot" without a camera angle qualifier. \ No newline at end of file diff --git a/OPTIMIZATION.md b/OPTIMIZATION.md new file mode 100644 index 0000000..75ad181 --- /dev/null +++ b/OPTIMIZATION.md @@ -0,0 +1,616 @@ +# OPTIMIZATION - Mobile Responsive, SEO, Testing, and Performance +Author: Andre Cobham / Arising Media +Updated: 2026-06-09 + +## Mobile + +Mobile-first CSS. Default styles target 320px and up. + +Breakpoints: 360px, 480px, 600px, 768px, 900px, 1023px, 1024px. + +Switch to mobile nav at max-width: 1023px, not 768px. A typical header does not fit cleanly below 1024px. + +Inline grid styles (style="display:grid;grid-template-columns:1fr 1fr") require !important overrides in media queries to collapse on mobile. Include override block at end of main.css. + +Always set: html, body { overflow-x: clip; max-width: 100%; } + +Form fields (input, select, textarea) require min-width: 0 and box-sizing: border-box on mobile or they push layout wider than viewport. + +Touch targets: 44x44px minimum (WCAG, Apple HIG). + +### Verification . Before Declaring Done + +Always run a Playwright check at multiple viewport widths. Save the script in +`.planning/mobile_check.py`: + +```python +from playwright.sync_api import sync_playwright + +PAGES = ['/', '/about/', '/services/', '/locations/buffalo.html', '/contact/'] + +with sync_playwright() as p: + b = p.firefox.launch(headless=True) + for w in [320, 360, 390, 768, 900, 1023, 1024, 1200]: + ctx = b.new_context(viewport={'width': w, 'height': 800}) + page = ctx.new_page() + for path in PAGES: + page.goto(f'http://localhost:8096{path}', wait_until='networkidle') + r = page.evaluate('() => ({sw: document.documentElement.scrollWidth, cw: document.documentElement.clientWidth})') + diff = r['sw'] - r['cw'] + status = 'OK' if diff <= 0 else f'OVERFLOW +{diff}px' + print(f' w={w} {path:<35} {status}') + ctx.close() + b.close() +``` + +Result must be zero overflow on every page at every width. + +### Animation on Mobile + +Scroll-triggered animations (`data-animate="up"` etc.) work the same on mobile. +But if you take a full-page screenshot for review, force-trigger them first: + +```python +page.evaluate("() => document.querySelectorAll('[data-animate]').forEach(e => e.classList.add('in-view'))") +page.wait_for_timeout(500) +``` + +Otherwise the screenshot shows blank sections that are actually hiding behind +the IntersectionObserver. + +### Touch Targets + +- Tap targets must be at least 44x44px (Apple HIG, WCAG) +- Header menu button: 44px square minimum +- Form submit buttons: padding 0.875rem vertical minimum +- Phone-link CTAs: same + +### Test Devices + +When the site is "done", verify on: +- Real iPhone (Safari) . test the form actually submits +- Real Android phone (Chrome) . same +- Tablet (iPad) . header switches to mobile menu cleanly at 1023px and below +- Desktop (any browser) . full nav, hover states work + +--- + +## SEO, Meta, and Schema + +Every page on every site must include the full set of head tags below. +No exceptions. + +### Required `` Tags (Every Page) + +```html + + + + + {Page-specific title under 60 chars} | {Brand} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +``` + +### OG Image + +One default OG image at `/assets/images/og-default.jpg`: +- 1200x630px +- Brand colors +- Logo + company name + city + one descriptor (e.g., "Hardwood Floor Refinishing . Buffalo, NY") +- Under 200KB optimized + +For pages with their own hero image (location, service detail), use that +image's webp/jpg version as the OG image instead of the default. + +### Schema.org JSON-LD + +#### Home Page . LocalBusiness + +```json +{ + "@context": "https://schema.org", + "@type": "LocalBusiness", + "@id": "https://{domain}/#business", + "name": "{Legal business name}", + "url": "https://{domain}", + "telephone": "+1{10digits}", + "email": "{contact email}", + "address": { + "@type": "PostalAddress", + "streetAddress": "{street}", + "addressLocality": "{city}", + "addressRegion": "{state}", + "postalCode": "{zip}", + "addressCountry": "US" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": {lat}, + "longitude": {lng} + }, + "areaServed": [ + { "@type": "City", "name": "Buffalo" }, + { "@type": "City", "name": "Amherst" } + ], + "openingHoursSpecification": [{ + "@type": "OpeningHoursSpecification", + "dayOfWeek": ["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"], + "opens": "08:00", + "closes": "17:00" + }], + "aggregateRating": { + "@type": "AggregateRating", + "ratingValue": "4.9", + "reviewCount": "47" + } +} +``` + +#### Service Detail Pages . Service + +```json +{ + "@context": "https://schema.org", + "@type": "Service", + "serviceType": "{Service name}", + "provider": { "@id": "https://{domain}/#business" }, + "areaServed": { "@type": "AdministrativeArea", "name": "Erie County, NY" }, + "url": "https://{domain}{path}" +} +``` + +#### Location Pages . LocalBusiness with areaServed Override + +Same as the home `LocalBusiness` schema but override `areaServed` to the +specific city, and include a different `@id` per page. + +#### Every Page . BreadcrumbList + +```json +{ + "@context": "https://schema.org", + "@type": "BreadcrumbList", + "itemListElement": [ + { "@type": "ListItem", "position": 1, "name": "Home", "item": "https://{domain}/" }, + { "@type": "ListItem", "position": 2, "name": "Services", "item": "https://{domain}/services/" }, + { "@type": "ListItem", "position": 3, "name": "Floor Refinishing" } + ] +} +``` + +#### FAQ Pages or Sections . FAQPage + +```json +{ + "@context": "https://schema.org", + "@type": "FAQPage", + "mainEntity": [{ + "@type": "Question", + "name": "How long does refinishing take?", + "acceptedAnswer": { "@type": "Answer", "text": "Most rooms take 2 to 3 days..." } + }] +} +``` + +### robots.txt + +Every site needs one at `/robots.txt`: + +``` +User-agent: * +Allow: / +Disallow: /api/ + +Sitemap: https://{domain}/sitemap.xml +``` + +### sitemap.xml + +Generate at the end of every build. Save at `/sitemap.xml`. One `` entry +per page. Use `` from the file's mtime. + +```xml + + + + https://{domain}/ + 2026-05-08 + 1.0 + + + https://{domain}/services/floor-refinishing.html + 2026-05-08 + 0.8 + + +``` + +After deploy, submit the sitemap to Google Search Console. + +### Title and Description Rules + +- **Title** under 60 characters. Format: `{Service} | {Brand} . {City}, {State}` +- **Description** 150-160 characters, action-oriented, include city + service, phone when possible +- Never use the same title/description on multiple pages +- City + service name in title for location and service pages (huge SEO impact) +- No em-dashes in meta tags. Use `|` pipe as brand separator +- Always include `` . 5-10 comma-separated terms, city + service variants + +### Title/Meta Examples (Lahrcarpetcleaning.com Reference) + +Homepage: +```html +Lahr Carpet Cleaning | Residential & Commercial Carpet Cleaning . Finger Lakes, NY + + + +``` + +Service page: +```html +Carpet Cleaning | Lahr Carpet Cleaning . Waterloo, NY + + +``` + +Location page: +```html +Carpet Cleaning in Seneca Falls, NY | Lahr Carpet Cleaning + + +``` + +### Audit Before Launch + +```bash +# All pages have title +grep -L '' site/**/*.html + +# All pages have canonical +grep -L '<link rel="canonical"' site/**/*.html + +# All pages have og: tags +for f in $(find site -name "*.html"); do + grep -q 'property="og:' "$f" || echo "MISSING og: $f" +done + +# All pages have JSON-LD +for f in $(find site -name "*.html"); do + grep -q 'application/ld+json' "$f" || echo "MISSING schema: $f" +done +``` + +Each command should return zero results before declaring done. + +### llms.txt . AI Crawler Documentation + +Every project must include `/llms.txt` at the site root. This is the emerging standard (llmstxt.org) for telling AI crawlers (Perplexity, Claude, GPT) what your site does and what they should/should not index. + +**Location:** `src/llms.txt` (Docker) or `public_html/llms.txt` (cPanel) + +**Minimum content:** + +``` +# {Brand Name} +> {One-line description of what the site does} + +{2-3 sentence description of the business, services, and target audience.} + +## Services +- {Service 1} +- {Service 2} + +## Pages +- /: Homepage +- /about/: About +- /contact/: Contact + +## Not for external use +- /api/: Internal API endpoints +- /account/: User account pages +``` + +**robots.txt must also disallow /api/ and private paths** . llms.txt is the invitation, robots.txt is the boundary. + +--- + +## Testing and Verification + +Before declaring a site done, run every check in this document and show the +output. No exceptions. + +### The Law + +Before stating ANYTHING works, is configured, is ready, is installed, or is +complete . run a live test and show the raw output. No output = it was not +tested. Not for plugins, not for logins, not for deploys. Test it. Show the proof. + +### Build Verification + +After running build scripts: + +```bash +# Zero unreplaced placeholders +grep -rn "{{" site/locations/*.html site/services/*.html +# Result: empty +``` + +### Container Health + +```bash +docker compose ps +# All services "Up" and (healthy) where applicable + +docker logs {project}-api-1 2>&1 | tail -20 +# No errors, no stack traces + +curl -s -o /dev/null -w "%{http_code}\n" http://localhost:{port}/ +# 200 +``` + +### URL Surface Check + +Every public URL returns 200. Every sensitive URL returns 404. + +```bash +# Public . should all be 200 +for p in "/" "/about/" "/services/" "/locations/" "/locations/buffalo.html" \ + "/services/floor-refinishing.html" "/contact/" "/reviews/" \ + "/assets/css/main.css" "/components/header.html" "/api/health"; do + curl -s -o /dev/null -w "%{http_code} ${p}\n" http://localhost:{port}${p} +done + +# Sensitive . should all be 404 +for p in "/Dockerfile" "/nginx.conf" "/.env" "/api/.env" "/.git/HEAD" \ + "/build_locations.py" "/docker-compose.yml" "/README.md"; do + curl -s -o /dev/null -w "%{http_code} ${p}\n" http://localhost:{port}${p} +done +``` + +### Container Content Check + +Confirm sensitive files are not inside the nginx container: + +```bash +docker exec {project}-web-1 ls /usr/share/nginx/html/ +# Only public folders + index.html. No api/, no Dockerfile, no .env + +docker exec {project}-web-1 ls /usr/share/nginx/html/api/ 2>&1 +# Result: "No such file or directory" (api was correctly excluded from web image) +``` + +### Mobile Responsive Check (Playwright) + +```python +from playwright.sync_api import sync_playwright + +PAGES = ['/', '/about/', '/services/', '/services/floor-refinishing.html', + '/locations/', '/locations/buffalo.html', '/contact/', '/reviews/'] + +with sync_playwright() as p: + b = p.firefox.launch(headless=True) + fails = 0 + for w in [320, 360, 390, 768, 900, 1023, 1024, 1200]: + ctx = b.new_context(viewport={'width': w, 'height': 800}) + page = ctx.new_page() + for path in PAGES: + page.goto(f'http://localhost:{port}{path}', wait_until='networkidle') + r = page.evaluate('() => ({sw: document.documentElement.scrollWidth, cw: document.documentElement.clientWidth})') + diff = r['sw'] - r['cw'] + status = 'OK' if diff <= 0 else f'OVERFLOW +{diff}px' + print(f' w={w} {path:<40} {status}') + if diff > 0: fails += 1 + ctx.close() + print(f'\nfails: {fails}') + b.close() +``` + +Result must be `fails: 0` across every page at every width. + +### Form Submission End-to-End + +After Resend domain is verified: + +```bash +# Validation rejection test +curl -s -X POST http://localhost:{port}/api/estimate \ + -H "Content-Type: application/json" \ + -d '{"name":"","email":"bad"}' +# Expected: {"error":"Validation failed.","fields":[...]} + +# Real submission test +curl -s -X POST http://localhost:{port}/api/estimate \ + -H "Content-Type: application/json" \ + -d '{"name":"Test","email":"acobham@arisingmedia.us","phone":"(716) 555-1234","address":"100 Test St","city":"Buffalo","zip":"14201","service":"refinishing","message":"E2E test","token":""}' +# Expected: {"ok":true} + +# Verify the email actually arrived in the destination inbox +``` + +### Idempotency Test + +Send the same payload twice. Both should return `{"ok":true}`. Inbox should +receive only ONE email (Resend deduplicates via Idempotency-Key). + +### Rate Limit Test + +Send 6 requests rapidly from the same IP: +```bash +for i in 1 2 3 4 5 6; do + curl -s -X POST http://localhost:{port}/api/estimate \ + -H "Content-Type: application/json" \ + -d "{\"name\":\"RL$i\",\"email\":\"a@b.co\",\"phone\":\"(716) 555-1234\",\"address\":\"x\",\"service\":\"x\",\"token\":\"\"}" + echo "" +done +# Requests 1-5: 200 or 422 +# Request 6: 429 Too Many Requests +``` + +### SEO Surface + +```bash +# Every page has <title> +grep -L "<title>" site/**/*.html +# Result: empty + +# Every page has canonical +grep -L 'rel="canonical"' site/**/*.html + +# Every page has og:title +for f in $(find site -name "*.html"); do + grep -q 'property="og:title"' "$f" || echo "MISSING og: $f" +done + +# robots.txt and sitemap.xml exist and are served +curl -s http://localhost:{port}/robots.txt | head -5 +curl -s http://localhost:{port}/sitemap.xml | head -10 +``` + +### Dash Check (Content Rule) + +```bash +grep -rn '.\|–\|—\|–' site/ --include="*.html" --include="*.json" +# Result: empty +``` + +### Lighthouse / PageSpeed (Recommended) + +Run https://pagespeed.web.dev/ on the live URL after launch. Targets: +- Performance: 90+ +- Accessibility: 95+ +- Best Practices: 95+ +- SEO: 100 + +If accessibility is below 95, common fixes: +- Color contrast on body text (WCAG AA = 4.5:1 for normal text) +- Form labels associated with inputs (`<label for="...">`) +- Alt text on every meaningful image +- Skip-to-main-content link +- Focus indicators not removed in CSS + +### Browser Test Matrix + +Test in: +- Firefox (current) . desktop + mobile emulation +- Chrome (current) . desktop + mobile emulation +- Safari (current) . desktop + iOS emulator if possible +- Real iPhone . actual phone, actual Safari +- Real Android . actual phone, actual Chrome + +The form must actually submit and produce a real email on the real phones. +That's the launch gate. + +### Pre-Launch Sign-Off + +Don't ship until ALL of these are green: +- [ ] All public URLs return 200 +- [ ] All sensitive URLs return 404 +- [ ] No sensitive files inside nginx container +- [ ] Zero mobile horizontal overflow at 320-1440px +- [ ] Form submission produces a real email +- [ ] Idempotency dedupe works (same payload twice = one email) +- [ ] Rate limit triggers at 6th request +- [ ] All pages have title, description, canonical, og:, schema JSON-LD +- [ ] robots.txt and sitemap.xml exist and are accessible +- [ ] Zero em-dashes anywhere in HTML or JSON +- [ ] Resend domain shows fully green (SPF + DKIM + DMARC) +- [ ] First test email lands in primary inbox, not spam +- [ ] Tested on real iPhone and real Android device +- [ ] Lighthouse score 90+ across all four categories + +--- + +## Performance Standards + +### Images + +- Must be WebP, converted via `convert-to-webp.py` +- Service cards / thumbnails: max 900px wide, 78% quality, 30–80KB target +- Hero images: max 1400px wide, 80% quality, 50–180KB target +- OG images: max 1200px wide, 85% quality, under 150KB + +### Videos + +- Hero video: mp4 + webm pair +- Max ~5MB per clip +- Stitched reels: concatenated via ffmpeg + +### Cache Headers + +- Static assets (jpg, png, webp, css, js, svg, woff, woff2, mp4, webm): `Cache-Control: public, immutable; expires 30d` +- HTML pages: `Cache-Control: no-cache, must-revalidate` (or vary by deployment) +- API responses: `Cache-Control: no-store` + +### JavaScript + +- No render-blocking JS (use `defer` or `type="module"`) +- Vanilla JS only, no frameworks +- `fetch`, `IntersectionObserver`, `querySelector` + +### CSS + +- No unused CSS +- Plain CSS only, no Sass/Tailwind +- `tokens.css` (design tokens) + `main.css` (components) +- Inline `<style>` blocks only for critical above-the-fold styles + +### HTML + +- One `<h1>` per page +- Semantic tags: `<main>`, `<section>`, `<article>`, `<aside>`, `<header>`, `<footer>`, `<nav>` +- Every `<img>` has `alt`, `width`, `height`, and `loading="lazy"` (except hero) +- No inline event handlers (onclick, onload, etc.) +- defer or async on all non-critical `<script>` tags + +### Lighthouse Targets + +- Performance: 90+ +- Accessibility: 95+ +- Best Practices: 95+ +- SEO: 100 + +Minimum viable scores for launch: +- Performance: 80+ (mobile), 90+ (desktop) +- Accessibility: 90+ +- Best Practices: 90+ +- SEO: 95+ diff --git a/STACK.md b/STACK.md new file mode 100644 index 0000000..0633c31 --- /dev/null +++ b/STACK.md @@ -0,0 +1,1770 @@ +# STACK — Architecture, Deployment, and Build Pipeline +Author: Andre Cobham / Arising Media +Updated: 2026-06-09 + +## Stack Philosophy + +Two primary stacks. Pick based on page count and update frequency. + +### Stack A — PHP Router + SQLite (50+ pages, standard as of 2026-05-21) + +- **PHP Router** — `router.php` dispatches every content URL to the correct PHP template. Edit one template = entire page class updates on next request. No find-and-replace. No file edits. +- **SQLite** — single-file content DB. `pages.sqlite` holds all page content (title, meta, sections JSON, schema). 10,000 rows = 5MB. Sub-millisecond reads. No server process. +- **Vanilla JavaScript** — no frameworks. `fetch`, `IntersectionObserver`, `querySelector` +- **Plain CSS** — `tokens.css` (design tokens) + `main.css` (components). No Sass, no Tailwind +- **Docker + nginx** — nginx routes `/assets/*` directly; all content URLs → PHP-FPM → router.php +- **Resend** — transactional email via `/api/contact.php` +- **Reference:** `arisingmedia.us` — 10,000+ pages + +### Stack B — Static HTML (fewer than 50 pages) + +- **Static HTML** — every page is a `.html` file on disk +- Same JS, CSS, Docker, nginx, Resend as Stack A +- Python 3 stdlib for build scripts (no pip) +- **Reference:** `lahrcarpetcleaning.com` + +### Never Use (Both Stacks) + +- Node.js / npm packages on the website. Front-end JS uses ZERO packages +- WordPress for new builds (we migrate clients OUT of WordPress) +- CSS frameworks (Bootstrap, Tailwind, Bulma) +- JS frameworks (React, Vue, Angular, Svelte) +- jQuery, Lodash, Moment, axios, or any utility library +- CSS-in-JS, styled-components +- Build tools that require `node_modules` (webpack, vite, parcel, esbuild) +- Tracking pixels other than what the client explicitly requests + +### Why This Stack + +1. **Performance** — a static HTML page with vanilla JS loads in <100ms with no parse cost from frameworks +2. **Longevity** — no dependency rot. A site we build today still works in 10 years with no maintenance +3. **Security** — no `npm audit` warnings, no supply-chain attack vectors, no transitive deps to patch +4. **Auditability** — every line on the site is something we wrote and can read in plain text +5. **Hosting** — a static folder + tiny Python container fits in the smallest VM tier any provider sells + +### When to Add a Server-Side Service + +Static-only is the default. Add a small Python service ONLY when needed for: +- Form submission (handled via Resend in the stdlib HTTP server pattern) +- A specific dynamic feature the client paid for (e.g., booking widget, AI chat) + +Each service is its own Docker container. Keep them small (single file when possible). +Use Python `http.server` + `urllib` from stdlib. Do not introduce Flask, FastAPI, Django, or any third-party HTTP framework. + +--- + +## Project Structure + +Two folders per project: source and deployment. + +### Source Folder + +Lives in the dev tree under `concept-agent/projects/{domain}/site/`. +Contains everything needed to maintain and rebuild the site. + +``` +{domain}/site/ +├── index.html # home page +├── about/index.html # /about/ +├── contact/index.html # /contact/ +├── reviews/index.html # /reviews/ +├── blog/index.html # /blog/ +├── locations/ # location pages +│ ├── index.html # /locations/ +│ ├── _template.html # template stamped with JSON +│ ├── buffalo.html # generated, flat URL +│ ├── amherst.html +│ └── ... +├── services/ +│ ├── index.html +│ ├── _template.html +│ ├── floor-refinishing.html +│ └── ... +├── components/ +│ ├── header.html # loaded via fetch() by components.js +│ └── footer.html +├── data/ +│ ├── locations.json # source data for build_locations.py +│ └── services.json # source data for build_services.py +├── assets/ +│ ├── css/ +│ │ ├── main.css # variables, reset, layout +│ │ └── components.css # cards, hero, header, footer, nav, responsive +│ ├── js/ +│ │ ├── main.js # scroll animations, count-up, etc. +│ │ ├── components.js # fetch + inject header/footer +│ │ └── form.js # form validation + submit +│ ├── images/ +│ ├── videos/ # hero video files (.mp4 + .webm) +│ └── fonts/ # only if not using Google Fonts CDN +├── build_locations.py # JSON → flat .html stamping +├── build_services.py +└── README.md # project notes, content sources, status +``` + +### Deployment Folder + +Lives at `/home/sirdrez/arisingmedia-websites/{domain}/`. +Contains ONLY what's needed to run `docker compose up`. + +``` +{domain}/ +├── index.html # all public website folders +├── about/ # ↑ +├── assets/ # ↑ +├── blog/ # ↑ +├── components/ # ↑ +├── contact/ # ↑ +├── locations/ # ↑ +├── reviews/ # ↑ +├── services/ # ↑ +├── api/ # form-submit Python service (if used) +│ ├── server.py +│ ├── Dockerfile +│ ├── .env # gitignored — Resend key, etc. +│ └── .env.example +├── Dockerfile # nginx web container +├── nginx.conf +├── docker-compose.yml +├── .dockerignore +├── .gitignore +└── .planning/ # everything not needed at runtime + ├── build_locations.py # build scripts moved here + ├── data/ # JSON sources moved here + ├── README.md + ├── DNS_*.txt # DNS notes + └── review_*.png # design review screenshots +``` + +### What Goes Where + +**Source folder gets** every working file (build scripts, data JSON, screenshots, +notes, raw assets). This is the dev/maintenance copy. NOT what gets deployed. + +**Deployment folder gets** ONLY the rendered website + the small API service. +Build scripts, JSON data, and notes go into `.planning/` to keep root clean and +prevent accidental web exposure. + +### URL Structure — Two Valid Patterns + +#### Pattern A: Flat HTML (default for Docker/nginx projects) + +nginx `try_files $uri $uri/ $uri.html =404` serves `/locations/buffalo` and +`/locations/buffalo.html`. Canonical form: `/locations/buffalo.html`. + +Why flat: +- One file = one page, no `/index.html` confusion +- Easier sitemap generation +- `<a href>` links are unambiguous +- Crawl budget benefit — Google indexes one URL per page, not two + +#### Pattern B: Directory-style (default for cPanel/Apache projects) + +Each page lives at `{slug}/index.html`. Apache auto-serves `index.html` when +visiting `/{slug}/`. Use this when deploying to cPanel shared hosting. + +``` +services/ +├── carpet-cleaning/index.html → /services/carpet-cleaning/ +├── stairs/index.html → /services/stairs/ +commercial/ +├── offices/index.html → /commercial/offices/ +└── vacation-rentals/index.html → /commercial/vacation-rentals/ +``` + +### Lahrcarpetcleaning.com Reference (Directory-Style, cPanel) + +``` +lahrcarpetcleaning.com/ +├── index.html +├── about/index.html +├── contact/index.html +├── reviews/index.html +├── service-area/index.html +├── locations/ +│ ├── index.html +│ ├── waterloo-ny/index.html +│ ├── geneva-ny/index.html +│ └── ... (20 location pages) +├── services/ +│ ├── carpet-cleaning/index.html +│ ├── stairs/index.html +│ ├── upholstery/index.html +│ ├── floors/index.html +│ ├── area-rugs/index.html +│ ├── add-ons/index.html +│ └── commercial/index.html +├── commercial/ +│ ├── offices/index.html +│ ├── vacation-rentals/index.html +│ ├── hotels-inns/index.html +│ ├── retail-showrooms/index.html +│ └── property-management/index.html +├── assets/ +│ ├── css/styles.css?v=N ← always cache-bust on change +│ ├── js/ +│ │ ├── main.js +│ │ └── components.js ← injects nav+footer via innerHTML +│ ├── images/ +│ │ ├── hero/ ← hero-{slug}.webp, one per page +│ │ └── services/ ← {service}.webp card images +│ └── videos/hero/hero-reel.mp4 +├── tools/ ← NOT deployed to webroot +│ ├── convert-to-webp.py +│ ├── gen-images-flux.py +│ └── gen-hero-images.py +├── .cpanel.yml +├── robots.txt +├── sitemap.xml +├── 404.html +└── 500.html +``` + +All images are `.webp`. cPanel deployment via `.cpanel.yml`. + +--- + +## Build Pipeline + +When a site has many similar pages (location pages, service pages, blog posts, +team-member pages), use a JSON + template + Python build script. + +### When to Use a Build Script + +Use it when there are 4+ pages with identical structure differing only in +content. For example: 6 location pages where only the city name and +city-specific copy differs. + +For one-off pages (home, about, contact, services index), hand-write the HTML +directly. Build scripts are for repetition, not for everything. + +### Pattern + +Three files per template family: + +1. **`data/{thing}.json`** — array of objects, one per page +2. **`{thing}/_template.html`** — HTML with `{{placeholder}}` markers +3. **`build_{thing}.py`** — stdlib Python, stamps template with data + +#### Example: locations.json + +```json +[ + { + "slug": "buffalo", + "city": "Buffalo", + "state": "NY", + "title": "Hardwood Floor Refinishing in Buffalo, NY | Floor It", + "meta_description": "Professional hardwood floor refinishing...", + "canonical": "https://floorithardwoodfloors.com/locations/buffalo.html", + "hero_h1": "Hardwood Floor Refinishing in Buffalo, NY", + "hero_lead": "Western New York's most experienced...", + "overview_h2": "Buffalo's Trusted Floor Refinishing Specialists", + "overview_body_1": "...", + "overview_body_2": "...", + "faqs": [ + { "q": "...", "a": "..." } + ] + } +] +``` + +#### Example: _template.html + +```html +<!DOCTYPE html> +<html lang="en"> +<head> + <title>{{title}} + + + ... + + +

{{hero_h1}}

+

{{hero_lead}}

+ ... + + +``` + +#### Example: build_locations.py (skeleton) + +```python +"""Build flat .html location pages from data/locations.json + locations/_template.html.""" +import json, sys +from pathlib import Path + +SITE_ROOT = Path(__file__).parent +DATA_FILE = SITE_ROOT / "data" / "locations.json" +TEMPLATE_FILE = SITE_ROOT / "locations" / "_template.html" +OUT_DIR = SITE_ROOT / "locations" + +def render(template: str, item: dict) -> str: + out = template + for key, value in item.items(): + if isinstance(value, (str, int, float)): + out = out.replace("{{" + key + "}}", str(value)) + # Custom rendering for nested arrays (e.g. faqs) + # ... handle item['faqs'] etc. + return out + +def main(): + data = json.loads(DATA_FILE.read_text(encoding="utf-8")) + template = TEMPLATE_FILE.read_text(encoding="utf-8") + print(f"Building {len(data)} location pages...") + for item in data: + rendered = render(template, item) + outfile = OUT_DIR / f"{item['slug']}.html" + outfile.write_text(rendered, encoding="utf-8") + print(f" Built: {outfile.relative_to(SITE_ROOT)}") + print(f"Done. {len(data)} pages written.") + +if __name__ == "__main__": + main() +``` + +### Rules + +1. **Source of truth is JSON, not HTML.** When content needs to change, edit the + JSON and re-run the build script. Never hand-edit a generated `.html` file — + the next build will overwrite your changes. + +2. **Generated files land in the same folder as their template.** Do not nest + into a subfolder. The template file is always named `_template.html` (leading + underscore so it sorts above the generated pages). + +3. **Build script lives in the SOURCE root**, not in deployment. After running + the build, sync the rendered `.html` files (not the script, not the JSON) to + deployment. + +4. **Verify zero unreplaced placeholders** after every build: + ```bash + grep -rn "{{" {thing}/*.html # should return nothing + ``` + +5. **Build is idempotent.** Running it twice produces identical files. + +### Stamping Rules — Escaping + +When a JSON value gets stamped into an HTML attribute or ``, special +characters can break the page. Use these rules: + +- Plain text in `<p>` or `<h1>`: ampersand-encode (`&` → `&`) +- `<title>` content: ampersand-encode + strip line breaks +- `<meta>` content attribute: encode `&`, `"`, and remove line breaks +- `href` URL attribute: never put user input here, but if needed, urlencode + +For our typical use case (controlled content authored by us), the simple +`str.replace("{{key}}", value)` is sufficient because we don't have hostile +input. Just don't put angle brackets or quotes in the JSON values. + +### Re-Running the Build + +```bash +cd {project}/site +python3 build_locations.py +python3 build_services.py +``` + +After build, sync the rendered files to deployment. + +--- + +## WordPress to Static HTML Migration + +The playbook for migrating a WordPress (Divi, Elementor, classic, whatever) site +to vanilla static HTML. + +### Phase 1 — Capture Source + +Before touching anything, capture the current site so nothing is lost. + +1. **Database dump** — `wp db export ${domain}.sql --add-drop-table` +2. **Wp-content snapshot** — tar the entire `wp-content/` (themes, plugins, uploads) +3. **Crawl the live site** — use `wget --mirror --convert-links --adjust-extension --page-requisites --no-parent https://{domain}` to capture rendered HTML + all assets +4. **Inventory pages** — list every URL returning 200 (use the sitemap if it has one) +5. **Inventory forms** — note every Gravity Form / Contact Form 7 / etc. field-by-field +6. **Inventory dynamic features** — search, comments, members, anything truly dynamic + +Save all of this in the project's `.planning/` folder. + +### Phase 2 — Decide What to Keep + +Re-design pass. Most WP sites have: +- Bloated copy → cut by 30-50% +- Outdated/inflated metrics → remove or replace with real, verifiable data +- Stock photos → replace with real client photos when available +- Cluttered layouts → strip back to one clear CTA per section +- Plugin features the client never uses → drop entirely + +Show the client a wireframe of the simplified structure before building anything. + +### Phase 3 — Information Architecture + +Standard structure for a small business: + +``` +/ home +/about/ about / story / team +/services/ services index +/services/{slug}.html one detail page per service +/locations/ locations index +/locations/{city}.html one detail page per service area (SEO gold) +/reviews/ customer reviews +/contact/ contact + form +/blog/ optional blog index +``` + +For each location and each service: one flat `.html` page generated from JSON + +template. + +### Phase 4 — Build + +1. Set up source folder per `01-project-structure.md` +2. Write `assets/css/main.css` (variables, reset, typography, layout) +3. Write `assets/css/components.css` (header, footer, hero, cards, forms) +4. Write `components/header.html` and `components/footer.html` +5. Write `assets/js/components.js` (fetch + inject header/footer) +6. Write `assets/js/main.js` (scroll animations, anything page-wide) +7. Build `index.html` first — this is the design system in working form +8. Generate location and service detail pages from JSON +9. Build remaining pages: about, contact, reviews, blog index + +### Phase 5 — Forms + +If the WP site had Gravity Forms or similar, build a vanilla replacement: +- HTML form in `contact/index.html` (and inline on service/location pages if needed) +- Client-side validation in `assets/js/form.js` +- POST to `/api/estimate` (or similar) handled by Python stdlib service +- Server-side validation, reCAPTCHA verification, send via Resend + +### Phase 6 — SEO Parity + +Before launch, every old URL must either: +- Have a matching new URL with the same or better content, OR +- 301-redirect to a relevant new URL + +Build a redirect map from the old WP sitemap. Add to `nginx.conf`: + +```nginx +location = /old-page-slug { return 301 /new-slug.html; } +location = /?p=123 { return 301 /about/; } +``` + +Per-page parity checklist: +- `<title>` matches or improves on the WP title +- `<meta name="description">` matches or improves +- `<link rel="canonical">` is set to the new URL +- Headings (h1, h2, h3) preserve the topical structure +- Internal links updated to new URLs +- Image alt text preserved or improved +- Schema.org JSON-LD added (`LocalBusiness`, `Service`, `BreadcrumbList`) + +### Phase 7 — Switch DNS / Cutover + +1. Deploy the static site to a separate URL first (`new.{domain}`) for client review +2. Once approved, point production DNS to the new container +3. Keep the WP container running for 14 days as fallback +4. Submit new sitemap to Google Search Console +5. Use Search Console URL inspection on 5-10 key pages to confirm indexing + +### Phase 8 — Post-Launch + +- Monitor Search Console for crawl errors / 404s, fix in nginx as redirects +- Monitor form submissions — first real lead through the new form is the + ultimate "it works" check +- Decommission WP only after 30 days of clean operation + +### What NOT to Do + +- Do not run a "headless WordPress" or "WordPress as API" — that defeats the + whole point. Static means static. +- Do not use a static-site-generator tool (Hugo, 11ty, Jekyll, Astro, Next.js + static export). We hand-write HTML and use small Python build scripts only + where data is repeated. +- Do not migrate the database. Content gets re-written cleaner during migration. + +--- + +## WP + Divi to AM HTML Pipeline Overview + +End-to-end playbook for converting a WordPress / Divi site backup (.wpress) +into an Arising Media vanilla HTML + vanilla JS deployment. + +### What This Pipeline Does + +Takes a single `.wpress` archive (All-in-One WP Migration backup) and produces: +- A fully structured `src/` directory matching AM project layout +- A CSS design system derived from the original Divi theme settings +- All page content extracted, cleaned, and re-authored into AM HTML templates +- All media migrated to WebP and remapped to `/assets/images/` +- SEO metadata (titles, descriptions, canonicals, schema.org) preserved or improved +- Docker-ready deployment with nginx + PHP contact form + +### Philosophy + +The goal is NOT a 1:1 copy. The goal is: +1. Preserve all content, SEO equity, and brand identity +2. ENHANCE the design — cleaner, faster, more modern +3. Remove all WordPress / Divi bloat (plugin CSS, shortcode residue, 300KB JS bundles) +4. Produce a site that loads in <2s on mobile and scores 95+ on Lighthouse + +Every migration is a design upgrade. The Divi site is the reference, not the target. + +### Divi Version Matters + +Two distinct extraction paths: + +| Version | Content Storage | How to detect | +|---------|----------------|---------------| +| Divi 4 | `[et_pb_section]` shortcodes in `wp_posts.post_content` | `post_content` contains `[et_pb_` | +| Divi 5 | Gutenberg blocks (`<!-- wp:divi/section -->`) + JSON in `wp_postmeta` | `post_content` contains `<!-- wp:divi/` | + +Run Phase 2 (database analysis) first to determine which version before choosing the extraction path. + +### Pipeline Phases + +``` +Phase 0 Setup Verify .wpress location, create extraction directory +Phase 1 Extract Unpack .wpress binary archive to wpress-extract/ +Phase 2 DB Analysis Inspect WordPress database dump, detect Divi version, inventory pages +Phase 3 Content Extract page content via Divi 4 or Divi 5 path +Phase 4 Design System Pull colors, fonts, spacing from wp_options → CSS custom properties +Phase 5 Media Catalog uploads/, convert to WebP, generate image manifest +Phase 6 Build HTML Map extracted content to AM templates, generate JSON data files +Phase 7 SEO Port titles, metas, canonicals, schema.org; build redirect map +Phase 8 Forms Replace Gravity Forms / CF7 with AM vanilla form + Python API +Phase 9 QA Lighthouse audit, grep for unreplaced placeholders, protection check +``` + +### Script Reference + +All scripts live in `.am-webdesign-sops/wp-divi-pipeline/scripts/`. + +| Script | Phase | Purpose | +|--------|-------|---------| +| `extract_wpress.py` | 1 | Unpack .wpress binary archive | +| `analyze_db.py` | 2 | Parse SQL dump, inventory pages + detect Divi version | +| `extract_divi4.py` | 3 | Parse et_pb_ shortcodes → structured content JSON | +| `extract_divi5.py` | 3 | Parse Gutenberg/Divi5 blocks → structured content JSON | +| `extract_design.py` | 4 | Pull Divi theme options → design-system.json | +| `extract_media.py` | 5 | Catalog uploads/, emit media-manifest.json | +| `convert_images.py` | 5 | Batch convert images → WebP | +| `run_pipeline.sh` | 0-7 | Master script — runs all phases in order | + +### Per-Project Working Directory + +``` +{domain}/ +└── .planning/ + ├── vibrantyou-yoga-YYYYMMDD-*.wpress ← source archive (never modify) + ├── wpress-extract/ ← Phase 1 output (gitignored) + │ ├── package.json ← archive metadata + │ ├── database.sql ← MySQL dump + │ └── uploads/ ← all media (NOT in wp-content/) + ├── data/ + │ ├── pages.json ← Phase 2 output + │ ├── design-system.json ← Phase 3 output + │ └── media-manifest.json ← Phase 4 output + └── scripts/ ← project-specific overrides if needed +``` + +### .wpress Extraction Details + +The `.wpress` binary format is NOT a standard zip or tar. Custom sequential binary format: + +``` +[HEADER 4377 bytes] [FILE DATA n bytes] [HEADER] [FILE DATA] ... +``` + +Header breakdown: +``` +Offset Length Field +0 255 Filename (null-padded) +255 14 File size in bytes (ASCII decimal, null-padded) +269 12 mtime unix timestamp (ASCII decimal, null-padded) +281 4096 Relative path (null-padded) +4377 n Raw file bytes (size from header) +``` + +The archive ends when a header of all null bytes is encountered, or EOF. + +Extraction script: + +```bash +python3 /home/sirdrez/arisingmedia-websites/.am-webdesign-sops/wp-divi-pipeline/scripts/extract_wpress.py \ + /home/sirdrez/arisingmedia-websites/{domain}/.planning/{file}.wpress \ + /home/sirdrez/arisingmedia-websites/{domain}/.planning/wpress-extract/ +``` + +### Database Analysis + +Parse the WordPress MySQL dump to inventory pages, detect Divi version, +extract design settings, and build the data JSON files. + +```bash +python3 /home/sirdrez/arisingmedia-websites/.am-webdesign-sops/wp-divi-pipeline/scripts/analyze_db.py \ + {domain}/.planning/wpress-extract/ \ + {domain}/.planning/data/ +``` + +Outputs three files into `.planning/data/`: +- `pages.json` — all published pages/posts with content and SEO meta +- `design-system.json` — colors, fonts, Divi settings +- `site-info.json` — domain, plugin list, WP version, Divi version + +### Divi 5 Content Extraction + +Parse raw Divi page content from `pages.json` into clean, structured HTML +sections ready to map into AM templates. + +```bash +python3 /home/sirdrez/arisingmedia-websites/.am-webdesign-sops/wp-divi-pipeline/scripts/extract_divi5.py \ + {domain}/.planning/data/pages.json \ + {domain}/.planning/data/content/ +``` + +Produces one JSON file per page: `content/{slug}.json` + +Key fields in page JSON: +- `slug`: page URL slug +- `title`: page title +- `seo_title`: SEO title (from Rank Math if available) +- `seo_description`: SEO description (from Rank Math if available) +- `sections`: array of content sections with type, background_color, and modules + +Map each Divi module type to AM component: + +| Divi module | Extract | Map to AM element | +|-------------|---------|-------------------| +| `divi/text` | inner HTML | `<section>`, `<p>`, headings as-is | +| `divi/button` | `text`, `url` | `<a class="btn-primary">` | +| `divi/image` | `src`, `alt`, `title` | `<img>` → rewrite to WebP path | +| `divi/blurb` | icon, title, body | `.am-card` component | +| `divi/testimonial` | quote, author, company | `.am-testimonial` component | +| `divi/video` | `src`, poster | `<video>` or YouTube embed | +| `divi/contact_form` | field list | → replace with AM form | +| `divi/accordion` | Q+A pairs | `<details><summary>` | +| `divi/fullwidth_header` | title, subhead, CTA | hero section | + +Strip Divi class/attribute noise using `clean_divi_html()` from `divi_to_html.py`: + +```python +from divi_to_html import clean_divi_html, rewrite_internal_links + +cleaned = clean_divi_html(raw_html) +cleaned = rewrite_internal_links(cleaned, staging_hosts=("vibrantyou.yoga",)) +``` + +### Design System Extraction + +Convert Divi theme settings into AM CSS custom properties. + +Input: `design-system.json` produced by `analyze_db.py` with fields: +- `primary_color`: main brand color +- `body_font`: font family name +- `header_font`: heading font name +- `body_font_size`: base font size in px +- `body_line_height`: line height ratio +- `divi_version`: "4" or "5" +- `wp_version`: WordPress version +- `site_url`: domain +- `site_name`: brand name + +Never lift the Divi palette 1:1. Use extracted colors as the base and build a +full 5-step scale around the primary hue: + +```css +:root { + --color-primary: {extracted-color}; + --color-primary-dark: {darken-by-15%}; + --color-primary-light: {lighten-by-40%}; + --color-surface: #fafafa; + --color-surface-alt: #f0f7f6; + --color-text: #1a1a1a; + --color-text-muted: #5a6e6b; + --color-border: #c8dedd; + --color-white: #ffffff; + + /* Fonts */ + --font-body: '{body-font}', system-ui, sans-serif; + --font-heading: '{header-font}', Georgia, serif; + + /* Modular scale (1.25 ratio) */ + --text-xs: 0.75rem; --text-sm: 0.875rem; + --text-base: 1rem; --text-lg: 1.125rem; + --text-xl: 1.25rem; --text-2xl: 1.5rem; + --text-3xl: 1.875rem; --text-4xl: 2.25rem; + --text-5xl: 3rem; --text-6xl: 3.75rem; + + /* Spacing scale */ + --space-1: 0.25rem; --space-2: 0.5rem; --space-3: 0.75rem; + --space-4: 1rem; --space-5: 1.25rem; --space-6: 1.5rem; + --space-8: 2rem; --space-10: 2.5rem; --space-12: 3rem; + --space-16: 4rem; --space-20: 5rem; --space-24: 6rem; + --space-32: 8rem; +} +``` + +### Content Migration + +Map extracted Divi content into AM HTML templates. + +Build order: +1. `src/assets/css/main.css` — design tokens, reset, typography, layout grid +2. `src/assets/css/components.css` — header, footer, hero, cards, forms, nav +3. `src/components/header.html` — navigation +4. `src/components/footer.html` — footer links, contact info +5. `src/assets/js/components.js` — fetch + inject header/footer +6. `src/assets/js/main.js` — scroll animations, intersection observer +7. `src/index.html` — home page (this IS the design system in working form) +8. Remaining pages: about, classes, contact, blog +9. `src/robots.txt`, `src/sitemap.xml`, `src/404.html`, `src/500.html` + +For 4+ similar pages (class types, locations), use JSON template build: + +``` +src/classes/ +├── _template.html ← class detail page template +├── hatha.html ← generated from classes.json +├── vinyasa.html +└── yin.html + +.planning/data/ +└── classes.json ← array of class objects +``` + +### Media Assets + +Migrate WordPress uploads to AM `/assets/images/`, convert to WebP, and +generate a media manifest for URL remapping. + +Steps: +1. Catalog all original media (skip WordPress-generated size variants like `-150x150`) +2. Copy originals to `src/assets/images/` +3. Convert to WebP using `cwebp` or Python Pillow +4. Generate media manifest with old → new URL mapping +5. Apply manifest during HTML build to rewrite all image paths + +```bash +# Catalog originals (skip WP size variants) +find .planning/wpress-extract/uploads -type f \( -name "*.jpg" -o -name "*.png" \) | \ + grep -v -E "\-[0-9]+x[0-9]+\.(jpg|png)$" > .planning/data/media-originals.txt + +# Copy and convert +while IFS= read -r src; do + cp "$src" "src/assets/images/$(basename $src)" +done < .planning/data/media-originals.txt + +cd src/assets/images/ +for img in *.jpg *.png; do + [ -f "$img" ] || continue + cwebp -q 82 "$img" -o "${img%.*}.webp" && rm "$img" +done +``` + +Remap URLs during HTML build: + +```python +import json, re + +manifest = json.loads(open('.planning/data/media-manifest.json').read()) +url_map = {m['wp_url']: m['am_url'] for m in manifest} + +def rewrite_media_urls(html: str) -> str: + for wp_url, am_url in url_map.items(): + html = html.replace(wp_url, am_url) + return html +``` + +### SEO Preservation + +Before building HTML, map every WordPress page URL to its new AM URL and +ensure title, description, canonical, and schema.org are preserved or improved. + +Rank Math SEO extraction (already in `pages.json` as `seo_title` and `seo_description`). + +Priority order for SEO fields: +1. `seo_title` from Rank Math (if not empty and not a template) +2. `post_title` with AM format appended: `{Title} | {Brand Name}` +3. Never leave title as the raw WP default + +Rank Math title templates use `%` tokens — strip them and rebuild: + +```python +import re + +def clean_rm_title(rm_title: str, post_title: str, site_name: str) -> str: + if not rm_title or "%" in rm_title: + return f"{post_title} | {site_name}" + return rm_title + +def clean_rm_desc(rm_desc: str) -> str: + return re.sub(r"%[a-z_]+%", "", rm_desc).strip(" -|") +``` + +Schema.org by page type: + +| Page | Schema type | Required fields | +|------|------------|----------------| +| Home | `LocalBusiness` | name, url, telephone, address, areaServed, openingHours | +| About | `AboutPage` + `Organization` | name, description, founders | +| Contact | `ContactPage` | name, url, telephone, email, address | +| Blog post | `Article` | headline, datePublished, author, image | + +Pre-launch SEO audit (all must return empty): + +```bash +SITE=src + +# Every page has title/description/canonical/JSON-LD +find $SITE -name "*.html" | xargs grep -L '<title>' +find $SITE -name "*.html" | xargs grep -L 'name="description"' +find $SITE -name "*.html" | xargs grep -L 'rel="canonical"' +find $SITE -name "*.html" | xargs grep -L 'application/ld+json' + +# No WP URLs leaked +grep -r "wp-content\|wp-admin\|?p=\|?page_id=" $SITE --include="*.html" + +# No unreplaced placeholders +grep -r "{{" $SITE --include="*.html" + +# No Divi residue +grep -r "et_pb_\|wp:divi" $SITE --include="*.html" +``` + +### Run Order (Complete Execution Sequence) + +```bash +export DOMAIN="vibrantyou.yoga" +export PROJECT="/home/sirdrez/arisingmedia-websites/$DOMAIN" +export SOPS="/home/sirdrez/arisingmedia-websites/.am-webdesign-sops" +export WPRESS=$(ls $PROJECT/.planning/*.wpress | head -1) + +# Phase 0: Setup +mkdir -p $PROJECT/{src/{about,services,contact,blog,classes,components,assets/{css,js,images,svg,fonts}},build,infra,api,.planning/{data/{content},scripts,wpress-extract}} + +# Phase 1: Extract archive +python3 $SOPS/wp-divi-pipeline/scripts/extract_wpress.py "$WPRESS" "$PROJECT/.planning/wpress-extract/" + +# Phase 2: Database analysis +python3 $SOPS/wp-divi-pipeline/scripts/analyze_db.py "$PROJECT/.planning/wpress-extract/" "$PROJECT/.planning/data/" + +# Phase 3: Content extraction (Divi 5 example) +python3 $SOPS/wp-divi-pipeline/scripts/extract_divi5.py "$PROJECT/.planning/data/pages.json" "$PROJECT/.planning/data/content/" + +# Phase 4: Design system (manual — read design-system.json, write main.css) + +# Phase 5: Media migration +find $PROJECT/.planning/wpress-extract/uploads -type f \( -name "*.jpg" -o -name "*.png" \) | \ + grep -v -E "\-[0-9]+x[0-9]+\.(jpg|png)$" > $PROJECT/.planning/data/media-originals.txt + +while IFS= read -r src; do + cp "$src" "$PROJECT/src/assets/images/$(basename $src)" +done < $PROJECT/.planning/data/media-originals.txt + +cd $PROJECT/src/assets/images/ +for img in *.jpg *.png; do + [ -f "$img" ] || continue + cwebp -q 82 "$img" -o "${img%.*}.webp" && rm "$img" +done + +# Phase 6: Build HTML (manual — per 05-content-migration.md) + +# Phase 7: SEO audit +cd $PROJECT/src +find . -name "*.html" | grep -v "_template" | xargs grep -L '<title>' +find . -name "*.html" | grep -v "_template" | xargs grep -L 'rel="canonical"' + +# Phase 8: Docker setup +docker compose -f $PROJECT/docker-compose.yml build +docker compose -f $PROJECT/docker-compose.yml up -d +curl -I http://localhost:PORT/ + +# Phase 9: Protection check +bash $SOPS/tools/verify-protection.sh https://$DOMAIN +``` + +--- + +## Docker + Nginx Deployment + +Every project ships with ALL deployment configs so it can go to either a +Docker VPS or a cPanel shared host without refactoring. + +### docker-compose.yml + +```yaml +services: + web: + image: {domain}-static + build: + context: . + dockerfile: Dockerfile + ports: + - "{port}:80" + depends_on: + api: + condition: service_healthy + restart: unless-stopped + + api: + image: {domain}-api + build: + context: ./api + dockerfile: Dockerfile + env_file: ./api/.env + expose: + - "3001" + healthcheck: + test: ["CMD", "python3", "-c", "import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://localhost:3001/health',timeout=3).status==200 else 1)"] + interval: 10s + timeout: 5s + retries: 3 + restart: unless-stopped +``` + +Port assignments are unique per project. Track in +`/home/sirdrez/arisingmedia-websites/PORTS.md` so no two projects collide. + +### Dockerfile (nginx web container) + +CRITICAL — the Dockerfile must explicitly list which folders to copy. Never use +`COPY . /usr/share/nginx/html/` because that copies `.env`, `Dockerfile`, +build scripts, etc. into the web root where they become URL-accessible. + +```dockerfile +FROM nginx:alpine + +# nginx config — server-only, never served as a static file +COPY nginx.conf /etc/nginx/conf.d/default.conf + +# Public website only — explicit list, no wildcards +COPY index.html /usr/share/nginx/html/ +COPY assets /usr/share/nginx/html/assets/ +COPY components /usr/share/nginx/html/components/ +COPY about /usr/share/nginx/html/about/ +COPY blog /usr/share/nginx/html/blog/ +COPY contact /usr/share/nginx/html/contact/ +COPY locations /usr/share/nginx/html/locations/ +COPY reviews /usr/share/nginx/html/reviews/ +COPY services /usr/share/nginx/html/services/ + +EXPOSE 80 +``` + +### Dockerfile (api Python container) + +```dockerfile +FROM python:3.13-alpine +WORKDIR /app +COPY server.py . +EXPOSE 3001 +CMD ["python3", "-u", "server.py"] +``` + +No pip, no requirements.txt, no node_modules. Python stdlib only. + +### nginx.conf + +```nginx +server { + listen 80; + server_name _; + root /usr/share/nginx/html; + index index.html; + + # Defense in depth — deny dotfiles, configs, scripts, source files + location ~ /\. { + deny all; + return 404; + } + location ~* \.(env|env\.example|conf|yml|yaml|py|pyc|md|txt|sh|sql|log|bak|old|swp|dockerfile)$ { + deny all; + return 404; + } + location = /Dockerfile { + deny all; + return 404; + } + + # API proxy — strip /api/ prefix, forward to Python service + location /api/ { + proxy_pass http://api:3001/; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_read_timeout 10s; + proxy_connect_timeout 5s; + } + + # Flat HTML routing — /locations/buffalo serves /locations/buffalo.html + location / { + try_files $uri $uri/ $uri.html =404; + } + + # Cache static assets aggressively + location ~* \.(jpg|jpeg|png|webp|svg|ico|css|js|woff2?|mp4|webm)$ { + expires 30d; + add_header Cache-Control "public, immutable"; + access_log off; + } + + # Security headers + add_header X-Frame-Options "SAMEORIGIN"; + add_header X-Content-Type-Options "nosniff"; + add_header X-XSS-Protection "1; mode=block"; + add_header Referrer-Policy "strict-origin-when-cross-origin"; + add_header Permissions-Policy "geolocation=(), microphone=(), camera=()"; + add_header Content-Security-Policy "default-src 'self'; script-src 'self' 'unsafe-inline' https://www.google.com https://www.gstatic.com https://www.recaptcha.net; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com; font-src https://fonts.gstatic.com; img-src 'self' data: https:; object-src 'none'; frame-ancestors 'self'; form-action 'self'; base-uri 'self';"; + add_header Strict-Transport-Security "max-age=31536000; includeSubDomains; preload"; + + # Disable server tokens + server_tokens off; + client_max_body_size 16k; + + gzip on; + gzip_types text/html text/css application/javascript image/svg+xml; + gzip_min_length 1024; + + error_page 404 /404.html; + error_page 500 /500.html; +} +``` + +### .dockerignore + +Keeps sensitive files out of the build context: + +``` +.git +.gitignore +.dockerignore +api +build_*.py +__pycache__ +*.pyc +*.md +*.txt +review_*.png +docker-compose.yml +.DS_Store +.planning +``` + +### .gitignore + +``` +api/.env +api/__pycache__/ +__pycache__/ +*.pyc +*.log +.DS_Store +``` + +The `api/.env` file is NEVER committed. + +### Sync from Source to Deployment + +After every change to source HTML/CSS/JS/assets: + +```bash +SITE="/path/to/concept-agent/projects/{domain}/site" +DEPLOY="/home/sirdrez/arisingmedia-websites/{domain}" + +rsync -a \ + --exclude=.git --exclude=.planning --exclude=api \ + --exclude=Dockerfile --exclude=nginx.conf --exclude=docker-compose.yml \ + --exclude=.dockerignore --exclude=.gitignore \ + --exclude='build_*.py' --exclude=__pycache__ --exclude=data \ + --exclude='*.md' --exclude='*.txt' --exclude='review_*.png' \ + "$SITE/" "$DEPLOY/" + +cd "$DEPLOY" +docker compose up -d --build web +``` + +### Verify After Deploy + +Every deploy MUST be audited with `tools/verify-protection.sh` before being +considered live. The script probes a fixed list of sensitive paths +(`Dockerfile`, `.env`, `nginx.conf`, `.planning/`, `__pycache__/`, build +scripts, `.git/`, etc.) and fails if any returns 200. + +```bash +~/arisingmedia-websites/.am-webdesign-sops/tools/verify-protection.sh \ + http://localhost:{port} +``` + +Exit codes: +- `0` PASS — every sensitive path 404, every required path reachable. +- `0` PASS (with warnings) — protection clean but `/robots.txt` or + `/sitemap.xml` missing (content gap, not a leak). +- `1` FAIL — at least one sensitive path returned 200, or `/` is unreachable. + +Run it manually after every `docker compose up -d --build`. Wire it into CI +once the site has a remote pipeline. Treat a FAIL as a deploy rollback. + +For ad-hoc spot checks: + +```bash +curl -s -o /dev/null -w "site: %{http_code}\n" http://localhost:{port}/ +curl -s -o /dev/null -w "css: %{http_code}\n" http://localhost:{port}/assets/css/main.css +curl -s -o /dev/null -w "api: %{http_code}\n" http://localhost:{port}/api/health +``` + +All public paths return 200. All sensitive paths return 404. + +### Project Folder Rename Procedure + +WHY: Docker Compose derives its project name from the folder the +`docker-compose.yml` lives in. Renaming the folder changes the compose project +name, which orphans any running containers under the old name. + +The fix is to explicitly remove the old container before bringing up the new +compose project: + +```bash +# Stop and remove the old container by its known name +docker stop {container-name} +docker rm {container-name} + +# Now bring up from the renamed folder — clean start +docker compose -f /path/to/renamed-folder/docker-compose.yml up -d +``` + +Always confirm the env vars loaded correctly after restart: + +```bash +docker exec {container-name} env | grep RESEND +``` + +--- + +## cPanel + Apache Deployment + +Use this deployment method when the client's host is cPanel-based (shared hosting, +WHM, Bluehost, HostGator, SiteGround, etc.) instead of a VPS running Docker. + +### Key Rule: Repo Path ≠ Webroot + +cPanel Git requires an EMPTY directory as the repository path. The webroot +(`public_html/{domain}/`) is never the repo path — cPanel rejects it if it +already contains files. + +``` +Repo path (empty dir): /home/{username}/repositories/{domain}/ +Deploy target (webroot): /home/{username}/public_html/{domain}/ +``` + +### Setting Up the Repo in cPanel + +1. cPanel → Git Version Control → Create Repository +2. Repository Path: `/home/{username}/repositories/{domain}/` (must be empty) +3. Clone URL: your Git remote (GitHub, Bitbucket, etc.) +4. cPanel clones into the repo path — never into the webroot + +### .cpanel.yml + +This file lives in the repo root and tells cPanel what to copy to the webroot +on every push/deploy. All paths are relative to the repo root. + +```yaml +--- +deployment: + tasks: + - export DEPLOYPATH=/home/{username}/public_html/{domain}/ + - /bin/cp -r assets $DEPLOYPATH + - /bin/cp -r about $DEPLOYPATH + - /bin/cp -r commercial $DEPLOYPATH + - /bin/cp -r contact $DEPLOYPATH + - /bin/cp -r locations $DEPLOYPATH + - /bin/cp -r reviews $DEPLOYPATH + - /bin/cp -r service-area $DEPLOYPATH + - /bin/cp -r services $DEPLOYPATH + - /bin/cp index.html $DEPLOYPATH + - /bin/cp 404.html $DEPLOYPATH + - /bin/cp robots.txt $DEPLOYPATH + - /bin/cp sitemap.xml $DEPLOYPATH +``` + +Add or remove folder cp lines to match the project's actual directory structure. +Do NOT copy: `tools/`, `*.py`, `*.md`, `.git/`, `docker-compose.yml`, `Dockerfile`. + +### Lahrcarpetcleaning.com Reference + +```yaml +--- +deployment: + tasks: + - export DEPLOYPATH=/home/dev1communitypro/public_html/lahrcarpetcleaning.dev1.communityproud.com/ + - /bin/cp -r assets $DEPLOYPATH + - /bin/cp -r about $DEPLOYPATH + - /bin/cp -r commercial $DEPLOYPATH + - /bin/cp -r contact $DEPLOYPATH + - /bin/cp -r locations $DEPLOYPATH + - /bin/cp -r reviews $DEPLOYPATH + - /bin/cp -r service-area $DEPLOYPATH + - /bin/cp -r services $DEPLOYPATH + - /bin/cp index.html $DEPLOYPATH + - /bin/cp 404.html $DEPLOYPATH + - /bin/cp robots.txt $DEPLOYPATH + - /bin/cp sitemap.xml $DEPLOYPATH +``` + +### Deploying After a Push + +1. Push to the connected remote (GitHub) +2. cPanel → Git Version Control → Manage → Pull or Deploy +3. cPanel runs the `.cpanel.yml` tasks, copying files to webroot +4. Apache serves from webroot automatically — no nginx, no Docker + +### Apache vs nginx + +cPanel hosts use Apache (not nginx). There is no nginx.conf to manage. +URL routing is handled by `.htaccess`: + +```apache +Options -Indexes +RewriteEngine On + +# Directory-style URLs: /services/carpet-cleaning/ → index.html inside that folder +# Apache handles this automatically with DirectoryIndex — no extra rules needed + +# Deny sensitive files +<FilesMatch "\.(py|yml|yaml|md|log|sh|env|conf|dockerfile)$"> + Order allow,deny + Deny from all +</FilesMatch> + +# Security headers +<IfModule mod_headers.c> + Header set X-Frame-Options "SAMEORIGIN" + Header set X-Content-Type-Options "nosniff" + Header set X-XSS-Protection "1; mode=block" + Header set Referrer-Policy "strict-origin-when-cross-origin" + Header set Permissions-Policy "geolocation=(), microphone=(), camera=()" + Header set Strict-Transport-Security "max-age=31536000; includeSubDomains" +</IfModule> + +ErrorDocument 404 /404.html +ErrorDocument 500 /500.html +``` + +### Cache Busting on cPanel + +Apache does not auto-invalidate cached assets. Bump `?v=N` on CSS/JS in +all HTML files after every asset change: + +```html +<link rel="stylesheet" href="/assets/css/styles.css?v=6"> +<script src="/assets/js/main.js?v=3"></script> +``` + +Increment by 1 on every change. Apply across ALL HTML pages. + +### Verify After cPanel Deploy + +```bash +curl -s -o /dev/null -w "home: %{http_code}\n" https://{domain}/ +curl -s -o /dev/null -w "css: %{http_code}\n" https://{domain}/assets/css/styles.css +curl -s -o /dev/null -w "404: %{http_code}\n" https://{domain}/page-that-does-not-exist +``` + +All public paths return 200. All non-existent paths return 404. + +### Universal Project Checklist (Both Paths) + +Every project must include ALL of these before first deploy: + +``` +Dockerfile ✓ Docker/VPS +docker-compose.yml ✓ Docker/VPS +nginx.conf ✓ Docker/VPS +.htaccess ✓ cPanel/Apache +.cpanel.yml ✓ cPanel Git +.dockerignore ✓ Docker build security +.gitignore ✓ keeps .env and secrets out of git +robots.txt ✓ both paths +sitemap.xml ✓ both paths +404.html ✓ both paths +500.html ✓ both paths +``` + +Lahrcarpetcleaning.com is the reference implementation for both paths. + +--- + +## Domain, Email, DNS, and Resend + +### Resend Account Setup + +1. Sign up at https://resend.com +2. Generate an API key (one per project): https://resend.com/api-keys +3. Save the key in the project's `api/.env` as `RESEND_API_KEY=re_xxxx` +4. NEVER commit `.env`. NEVER paste the key in Slack, GitHub, or chat logs. + +### Add and Verify the Sending Domain + +1. https://resend.com/domains → **Add Domain** +2. Enter the domain (the one you'll send FROM, not necessarily the website domain) +3. Resend gives 3-4 DNS records. Add them all in Cloudflare (or whatever DNS host) +4. Wait 5-15 minutes, click **Verify** in Resend — all records must show green + +### Records Resend Provides + +| Type | Name | Value | Proxy | TTL | +|------|------|-------|-------|-----| +| TXT | `resend._domainkey` | `p=...long-rsa-key...` | DNS only | 1 hr | +| TXT | `send` | `v=spf1 include:amazonses.com ~all` | DNS only | 1 hr | +| MX | `send` | `feedback-smtp.{region}.amazonses.com` priority 10 | DNS only | 1 hr | + +(Resend uses Amazon SES under the hood, hence `amazonses.com` in the SPF.) + +### DMARC — REQUIRED for Inbox Placement + +Without DMARC, Gmail flags otherwise-correctly-configured email as suspicious +and routes it to spam. Resend doesn't auto-create this record. You must add it. + +| Type | Name | Value | Proxy | TTL | +|------|------|-------|-------|-----| +| TXT | `_dmarc` | `v=DMARC1; p=none; rua=mailto:dev@{domain}` | DNS only | Auto | + +Components: +- `v=DMARC1` — declares a DMARC policy exists +- `p=none` — monitor mode, doesn't reject anything yet (safe to start) +- `rua=mailto:...` — DMARC failure reports go to this inbox (review weekly) + +After 30 days of clean DMARC reports with no false positives, optionally +upgrade to `p=quarantine` then `p=reject`. + +### Verify DNS is Live + +```bash +dig +short TXT resend._domainkey.{domain} @8.8.8.8 +dig +short TXT send.{domain} @8.8.8.8 +dig +short TXT _dmarc.{domain} @8.8.8.8 +dig +short MX send.{domain} @8.8.8.8 +``` + +All four should return their expected values. + +### From-Name Format + +Always use a friendly From name, not bare email. Bare email looks robotic +and triggers spam filters. + +``` +FROM_EMAIL=Brand Name <webleads@{domain}> +``` + +### TO-Email Setup + +The `TO_EMAIL` is wherever the lead actually goes. Often a Gmail group address +or the owner's personal inbox. + +- During Resend domain verification (BEFORE green): you can ONLY send TO the + email tied to the Resend account +- After verification: send to anyone + +For local testing without verification, use: +``` +FROM_EMAIL=onboarding@resend.dev +TO_EMAIL={your-resend-account-email} +``` + +### When Emails Go to Spam + +Run this checklist: + +1. **All 4 DNS records green at Resend**? If not, deliverability suffers. +2. **DMARC TXT record exists**? Most common cause of spam folder. +3. **Friendly From name**? `Brand Name <webleads@...>` not bare `webleads@...` +4. **Both `html` and `text` parts in the payload**? HTML-only is suspicious. +5. **Subject line clean**? No em-dashes, no "Estimate Request URGENT", no all-caps. +6. **Recipient marked first emails as Not Spam**? Train Gmail. + +### Cloudflare-Specific Notes + +The user-agent quirk — Cloudflare in front of Resend's API blocks Python's default +`User-Agent: Python-urllib/3.x`. Always set a custom `User-Agent` in the API request headers. + +If the DNS provider is Cloudflare, ensure all Resend records have **proxy status: DNS only** +(the gray cloud icon, not orange). Proxying these breaks authentication. + +### Annual Key Rotation + +Rotate Resend API keys annually: +1. Generate new key in Resend dashboard +2. Update `api/.env` on the server +3. `docker compose down && docker compose up -d` to reload env +4. Confirm a test submission still works +5. Revoke the old key in Resend dashboard + +### Resend HTTP 403 — Domain Not Verified + +A 403 from the Resend API does NOT mean the API key is wrong. The specific +error is: + +```json +{"statusCode":403,"message":"The {domain} domain is not verified. Please, add and verify your domain on https://resend.com/domains","name":"validation_error"} +``` + +This means the key is valid and authenticated, but the FROM domain has not +been added or verified at resend.com/domains yet. + +Rule: **verify the domain BEFORE testing the form endpoint.** If you test +before verification, `{"ok":false}` will be returned to the visitor even +though the API key is correct and the code is correct. + +Sequence: +1. Set `RESEND_API_KEY` in `.env` +2. Add domain at resend.com/domains +3. Add DNS records in Cloudflare +4. Wait for green verification +5. Then test the form endpoint + +### DKIM Key Rotation + +Resend periodically rotates DKIM keys. They send email when this happens. Add +the new `resend2._domainkey` (or whichever selector they specify) TXT record +in Cloudflare, then click verify. Old key remains active until they remove it. + +--- + +## Form Handling — Resend + +Static sites can't send email by themselves. Every project that needs a +contact form gets a small Python service running in its own Docker container, +proxied by nginx. + +### Architecture + +``` +Browser → POST /api/estimate (vanilla JS fetch in form.js) + ↓ +nginx → proxies /api/ to api:3001 (strips /api/ prefix) + ↓ +Python service (server.py, stdlib only) + - Validates fields server-side + - Verifies reCAPTCHA v3 with Google + - Sends via Resend HTTPS API + - Returns {ok: true} or {error: ...} +``` + +### Front-End (Vanilla JS) + +`assets/js/form.js`: + +- Real-time validation (blur events) +- Phone formatting `(###) ###-####` +- Email regex check +- Required-field check +- Async submit to `/api/estimate` with JSON body +- Disable submit button + show "Sending..." during request +- Show success/error message in `.form-status` span +- Reset form on success +- reCAPTCHA v3 token fetched before submit and included in body + +### Back-End (Python stdlib) + +`api/server.py` (skeleton): + +```python +#!/usr/bin/env python3 +import hashlib, http.server, json, os, re, socketserver, time +import urllib.parse, urllib.request + +PORT = int(os.environ.get("PORT", "3001")) +RESEND_API_KEY = os.environ.get("RESEND_API_KEY", "") +RECAPTCHA_SECRET = os.environ.get("RECAPTCHA_SECRET", "") +TO_EMAIL = os.environ.get("TO_EMAIL", "") +FROM_EMAIL = os.environ.get("FROM_EMAIL", "") +RECAPTCHA_MIN = float(os.environ.get("RECAPTCHA_MIN", "0.5")) + +PHONE_RE = re.compile(r"^\(?\d{3}\)?[\s.\-]?\d{3}[\s.\-]?\d{4}$") +EMAIL_RE = re.compile(r"^[^\s@]+@[^\s@]+\.[^\s@]+$") + +# Rate limit: 5 requests / IP / 15 minutes +RATE_MAP = {} +RATE_WINDOW = 15 * 60 +RATE_MAX = 5 + +def sanitize(s): + if not isinstance(s, str): return "" + return s.replace("&","&").replace("<","<").replace(">",">").replace('"',""").strip()[:2000] + +def validate_fields(body): + errors = [] + if not body.get("name") or len((body["name"]).strip()) < 2: errors.append("name") + if not EMAIL_RE.match((body.get("email") or "").strip()): errors.append("email") + if not PHONE_RE.match((body.get("phone") or "").replace(" ", "")): errors.append("phone") + return errors + +def verify_recaptcha(token): + if not RECAPTCHA_SECRET or not token: return 0.0 + data = urllib.parse.urlencode({"secret": RECAPTCHA_SECRET, "response": token}).encode() + req = urllib.request.Request("https://www.google.com/recaptcha/api/siteverify", data=data) + try: + with urllib.request.urlopen(req, timeout=8) as resp: + return float(json.loads(resp.read()).get("score", 0)) + except Exception: + return 0.0 + +def send_via_resend(fields): + safe = {k: sanitize(fields.get(k,"")) for k in ["name","email","phone","address","city","zip","service","condition","message"]} + html = f"""<!DOCTYPE html>...{safe['name']}...""" + text = f"New estimate request\n\nName: {safe['name']}\n..." + payload = json.dumps({ + "from": FROM_EMAIL, + "to": [TO_EMAIL], + "reply_to": fields.get("email","").strip(), + "subject": f"New estimate request: {safe['name']} ({safe['city']})", + "html": html, "text": text, + }).encode("utf-8") + idem = hashlib.sha256(payload).hexdigest()[:64] + req = urllib.request.Request("https://api.resend.com/emails", data=payload, headers={ + "Authorization": f"Bearer {RESEND_API_KEY}", + "Content-Type": "application/json", + "Idempotency-Key": idem, + "User-Agent": "{Brand}-Estimate-Form/1.0", + }) + try: + with urllib.request.urlopen(req, timeout=10) as resp: + if resp.status >= 300: raise RuntimeError(f"Resend {resp.status}: {resp.read().decode('utf-8','ignore')}") + except urllib.error.HTTPError as e: + raise RuntimeError(f"Resend {e.code}: {e.read().decode('utf-8','ignore')}") from None +``` + +Reference implementation: `floorithardwoodfloors.com/api/server.py`. + +### Critical: User-Agent Header + +When calling the Resend API from Python, you MUST set a non-default User-Agent. +Cloudflare (which fronts Resend) blocks Python's default `Python-urllib/3.x` +with HTTP 403 / Cloudflare error code 1010. + +```python +"User-Agent": "{ProjectName}-Form/1.0" +``` + +### Idempotency + +Every Resend request includes an `Idempotency-Key` header set to the SHA-256 +of the payload (truncated to 64 chars). Identical payloads within 24 hours +are deduplicated by Resend automatically. This prevents: +- Double-clicks creating two leads +- Browser retries after a network blip +- Honest user submitting twice + +### Security Checklist + +- API key in `.env` file, NOT in source control. `.gitignore` it. +- API key NEVER reaches the browser bundle (only the server has it) +- `.env` file lives in `api/`, NOT in the nginx web root +- Server-side validation on EVERY field — never trust client +- HTML-escape every field rendered into the email body to prevent injection +- Rate limit per IP (5 / 15 min default) +- 16 KB body cap — reject anything larger +- 10-second upstream timeout — don't hold connections open +- CORS locked to the production domain only (`Access-Control-Allow-Origin: https://{domain}`) +- reCAPTCHA v3 with score threshold (default 0.5) once secret is configured + +### Environment Variables + +`api/.env`: +``` +RESEND_API_KEY=re_xxxxxxxxxxxx +RECAPTCHA_SECRET=6Ldq... +TO_EMAIL=leads@{domain} +FROM_EMAIL=Brand Name <webleads@{domain}> +RECAPTCHA_MIN=0.5 +PORT=3001 +``` + +`api/.env.example` (committed) is the same file with placeholder values. + +### reCAPTCHA Setup + +1. Create site at https://www.google.com/recaptcha/admin +2. Type: **reCAPTCHA v3** (not v2) +3. Add your domain +4. Copy the **site key** into `assets/js/form.js`: + ```js + const RECAPTCHA_SITE_KEY = '6Ldq...'; + ``` +5. Add the script tag to pages with the form: + ```html + <script src="https://www.google.com/recaptcha/api.js?render=6Ldq..."></script> + ``` +6. Copy the **secret key** into `api/.env` as `RECAPTCHA_SECRET` + +### Deliverability Checklist + +When emails are landing in spam: +1. Verify Resend domain is fully green (SPF + DKIM + DMARC) +2. From name set, not bare email: `Brand Name <webleads@{domain}>` +3. Both `html` and `text` parts in every Resend payload (no HTML-only) +4. Subject line is descriptive, no em-dash, no spam-trigger words +5. Recipient marks first 2-3 emails as "Not Spam" in Gmail to train the filter + +### Testing + +```bash +# Validation rejection (expect 422) +curl -X POST http://localhost:8096/api/estimate \ + -H "Content-Type: application/json" \ + -d '{"name":"","email":"bad"}' + +# Full valid submission (expect 200, real email sent) +curl -X POST http://localhost:8096/api/estimate \ + -H "Content-Type: application/json" \ + -d '{"name":"Test","email":"test@example.com","phone":"(716) 555-1234","address":"100 Test St","city":"Buffalo","zip":"14201","service":"refinishing","message":"Test","token":""}' +``` + +The first real test email confirms end-to-end works. + +--- + +## PHP App Stack (Server-Side Processing) + +Use this pattern when a project requires server-side processing that static HTML cannot handle: file conversion, at-rest encryption, payment processing, user authentication, or API-gated features. + +**Reference implementation:** `quickconvert.us` + +### When to Use This Pattern + +- File uploads and processing (image conversion, PDF generation, etc.) +- At-rest encryption of user data +- Payment processing with Stripe subscriptions +- User authentication with magic link or password-based login +- Rate-limited APIs that must be server-enforced + +**Do not** introduce this pattern just to add a contact form. Use the Python stdlib form service instead. + +### Stack + +- **PHP 8.3** (php:8.3-fpm-alpine base image) +- **Nginx** (Alpine package, same container via supervisord) +- **SQLite** (pdo_sqlite extension, no separate DB container needed) +- **libsodium** (built into PHP 8.x — use for all encryption) +- **ImageMagick** (pecl imagick for image processing) +- **msmtp** (SMTP relay for outbound email) +- **supervisord** (manages nginx + php-fpm + crond in one container) + +### Project Structure + +``` +project/ +├── src/ ← nginx document root +│ ├── index.php +│ ├── api/ +│ │ ├── convert.php ← POST endpoint (CSRF + reCAPTCHA protected) +│ │ └── download.php ← GET endpoint (signed token) +│ ├── assets/css/ +│ ├── assets/js/ +│ └── assets/images/ +├── includes/ ← PHP classes (above doc root, not web-accessible) +│ ├── bootstrap.php ← constants, session, autoload +│ ├── auth.php ← login, register, magic token +│ ├── csrf.php +│ ├── db.php ← SQLite PDO wrapper +│ ├── encryption.php ← libsodium wrappers +│ └── mailer.php +├── components/ +│ ├── header.php +│ └── footer.php +├── storage/ ← volume-mounted, NOT in docker image +│ ├── uploads/ ← encrypted .enc files only +│ ├── converted/ +│ ├── temp/ +│ ├── .htaccess ← deny all direct access +│ └── {app}.db +├── infra/ +│ ├── nginx.conf +│ ├── php.ini +│ ├── supervisord.conf +│ └── docker-entrypoint.sh +├── tools/ +│ └── cleanup.php ← cron: delete expired tokens + files +├── Dockerfile +├── docker-compose.yml +└── .env ← gitignored, never committed +``` + +### Security Requirements (Non-Negotiable) + +**CSRF** — every POST form and API endpoint must verify a CSRF token tied to the session. + +**Rate limiting** — two layers: +1. nginx: `limit_req_zone` on /api/ (10 req/s, burst 20) +2. PHP: per-IP daily counter in SQLite rate_limits table + +**reCAPTCHA v3** — on conversion/upload endpoints. Verify server-side via Google API. Cache result in session (verify once per session, not per request). + +**At-rest encryption** — any user-uploaded file must be encrypted before writing to disk. Use `sodium_crypto_secretstream_xchacha20poly1305_*` for files, `sodium_crypto_secretbox` for strings. Key stored in `.env` as `QC_ENCRYPTION_KEY` (32 bytes hex). + +**Signed download tokens** — never expose file paths. Issue a 64-char hex token stored in SQLite with expiry and single-use enforcement. + +**Magic link auth** — prefer magic link over password. On register: create account unverified, send verify email, block login until verified. Token: 64-char hex, 1-hour expiry, stored in `magic_tokens` table, consumed on use. + +### Nginx Security Headers + +```nginx +add_header X-Frame-Options "SAMEORIGIN" always; +add_header X-Content-Type-Options "nosniff" always; +add_header Referrer-Policy "strict-origin-when-cross-origin" always; +add_header Permissions-Policy "camera=(), microphone=(), geolocation=()" always; +add_header Content-Security-Policy "default-src 'self'; script-src 'self' 'unsafe-inline' https://www.google.com https://www.gstatic.com; style-src 'self' 'unsafe-inline'; img-src 'self' data: https:; object-src 'none'; base-uri 'self'; form-action 'self' https://checkout.stripe.com;" always; + +# Stripe webhook — POST only +location = /api/stripe-webhook.php { + limit_except POST { deny all; } +} + +# Block dotfiles +location ~ /\. { deny all; return 403; } +``` + +### Database Schema Pattern (SQLite, Idempotent) + +Use `CREATE TABLE IF NOT EXISTS` for all tables. Use `ALTER TABLE ... ADD COLUMN` wrapped in try/catch for schema migrations. + +```php +try { $pdo->exec("ALTER TABLE users ADD COLUMN verified_at INTEGER DEFAULT NULL"); } +catch (Throwable $e) { /* column already exists */ } +``` + +### Stripe Integration + +- Checkout: create session server-side, redirect to Stripe-hosted page +- Webhook: verify `Stripe-Signature` header using HMAC-SHA256 (implement without Stripe SDK — use curl) +- Webhook tolerance: 300 seconds (5 min) on timestamp +- Register webhook endpoint at: `https://{domain}/api/stripe-webhook.php` +- Events to subscribe: `checkout.session.completed`, `customer.subscription.created`, `customer.subscription.updated`, `customer.subscription.deleted`, `invoice.payment_succeeded`, `invoice.payment_failed` + +### .env Required Vars + +``` +APP_ENV=production +BASE_URL=https://{domain} +QC_ENCRYPTION_KEY={32-bytes-hex} +STRIPE_MODE=live +STRIPE_LIVE_SECRET_KEY=sk_live_... +STRIPE_LIVE_PUBLISHABLE_KEY=pk_live_... +STRIPE_WEBHOOK_SECRET=whsec_... +STRIPE_PRICE_ID=price_... +RECAPTCHA_SITE_KEY=... +RECAPTCHA_SECRET_KEY=... +SMTP_HOST=... +SMTP_PORT=587 +SMTP_USER=... +SMTP_PASS=... +MAIL_FROM=noreply@{domain} +MAIL_FROM_NAME={Brand} +``` + +Generate encryption key: `php -r "echo bin2hex(random_bytes(32));"` diff --git a/build/__pycache__/seed_sops.cpython-313.pyc b/build/__pycache__/seed_sops.cpython-313.pyc new file mode 100644 index 0000000..c099e93 Binary files /dev/null and b/build/__pycache__/seed_sops.cpython-313.pyc differ diff --git a/build/seed_sops.py b/build/seed_sops.py new file mode 100644 index 0000000..03d2139 --- /dev/null +++ b/build/seed_sops.py @@ -0,0 +1,239 @@ +#!/usr/bin/env python3 + +import sqlite3 +import glob +import os +import re +from datetime import datetime + +DB_PATH = "/home/sirdrez/arisingmedia-websites/.am-webdesign-sops/sops.db" +SOP_DIR = "/home/sirdrez/arisingmedia-websites/.am-webdesign-sops" + +def init_db(): + """Initialize database with fresh schema.""" + conn = sqlite3.connect(DB_PATH) + cursor = conn.cursor() + + # Drop tables in reverse dependency order + cursor.execute("DROP TABLE IF EXISTS sop_fts") + cursor.execute("DROP TABLE IF EXISTS rules") + cursor.execute("DROP TABLE IF EXISTS sop_sections") + cursor.execute("DROP TABLE IF EXISTS sops") + + # Create tables + cursor.execute(""" + CREATE TABLE sops ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + number TEXT, + filename TEXT, + title TEXT, + full_content TEXT, + updated_at TEXT + ) + """) + + cursor.execute(""" + CREATE TABLE sop_sections ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + sop_id INTEGER REFERENCES sops(id), + heading_level INTEGER, + title TEXT, + content TEXT + ) + """) + + cursor.execute(""" + CREATE TABLE rules ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + category TEXT, + rule TEXT, + source_sop TEXT, + source_section TEXT + ) + """) + + cursor.execute(""" + CREATE VIRTUAL TABLE sop_fts USING fts5( + sop_number, + sop_title, + section_title, + content + ) + """) + + conn.commit() + return conn + +def extract_number_from_filename(filename): + """Extract number prefix from filename (e.g., '00' from '00-stack-philosophy.md').""" + match = re.match(r'^(\d+)', filename) + if match: + return match.group(1) + return "" + +def extract_first_heading(content): + """Extract first line starting with # as title.""" + for line in content.split('\n'): + if line.startswith('#'): + return line.lstrip('#').strip() + return "" + +def split_into_sections(content): + """Split content into sections by ## or ### headings.""" + sections = [] + current_section = None + current_content = [] + + lines = content.split('\n') + + for line in lines: + if line.startswith('##'): + # Save previous section if exists + if current_section: + current_section['content'] = '\n'.join(current_content).strip() + sections.append(current_section) + + # Determine heading level + heading_level = 2 + if line.startswith('###'): + heading_level = 3 + + current_section = { + 'heading_level': heading_level, + 'title': line.lstrip('#').strip() + } + current_content = [] + elif current_section: + current_content.append(line) + + # Save last section + if current_section: + current_section['content'] = '\n'.join(current_content).strip() + sections.append(current_section) + + return sections + +def extract_rules_from_section(section_title, section_content, category_map): + """Extract rules from section if title matches keyword patterns.""" + title_lower = section_title.lower() + rules = [] + + # Determine category + category = None + if any(keyword in title_lower for keyword in ['never use', 'mandatory', 'rules', 'what we never']): + if 'never' in title_lower: + category = 'never_use' + elif 'mandatory' in title_lower or 'pattern' in title_lower: + category = 'mandatory' + + if not category: + return rules + + # Extract bullet points + for line in section_content.split('\n'): + stripped = line.strip() + if stripped.startswith('-') or stripped.startswith('*'): + rule_text = stripped.lstrip('-*').strip() + if rule_text: + rules.append({ + 'category': category, + 'rule': rule_text + }) + + return rules + +def process_sop_files(conn): + """Process all .md files and populate database.""" + cursor = conn.cursor() + + # Get all .md files in top level only + md_files = glob.glob(os.path.join(SOP_DIR, "*.md")) + md_files.sort() + + sop_count = 0 + section_count = 0 + rule_count = 0 + + for filepath in md_files: + filename = os.path.basename(filepath) + + # Skip certain files + if filename in ['README.md', 'STACK.md', 'CONTENT.md', 'OPTIMIZATION.md']: + continue + + with open(filepath, 'r', encoding='utf-8') as f: + full_content = f.read() + + # Extract metadata + number = extract_number_from_filename(filename) + title = extract_first_heading(full_content) + updated_at = datetime.now().isoformat() + + # Insert SOP record + cursor.execute(""" + INSERT INTO sops (number, filename, title, full_content, updated_at) + VALUES (?, ?, ?, ?, ?) + """, (number, filename, title, full_content, updated_at)) + + sop_id = cursor.lastrowid + sop_count += 1 + + # Split into sections and insert + sections = split_into_sections(full_content) + + for section in sections: + cursor.execute(""" + INSERT INTO sop_sections (sop_id, heading_level, title, content) + VALUES (?, ?, ?, ?) + """, (sop_id, section['heading_level'], section['title'], section['content'])) + + section_count += 1 + + # Extract rules from section + rules = extract_rules_from_section(section['title'], section['content'], {}) + + for rule in rules: + cursor.execute(""" + INSERT INTO rules (category, rule, source_sop, source_section) + VALUES (?, ?, ?, ?) + """, (rule['category'], rule['rule'], filename, section['title'])) + + rule_count += 1 + + conn.commit() + return sop_count, section_count, rule_count + +def rebuild_fts(conn): + """Rebuild FTS index.""" + cursor = conn.cursor() + + cursor.execute(""" + INSERT INTO sop_fts(sop_number, sop_title, section_title, content) + SELECT s.number, s.title, ss.title, ss.content + FROM sop_sections ss JOIN sops s ON ss.sop_id = s.id + """) + + conn.commit() + +def main(): + """Main entry point.""" + try: + conn = init_db() + sop_count, section_count, rule_count = process_sop_files(conn) + rebuild_fts(conn) + conn.close() + + print(f"SOP Database built successfully:") + print(f" SOPs loaded: {sop_count}") + print(f" Sections indexed: {section_count}") + print(f" Rules extracted: {rule_count}") + print(f" Database: {DB_PATH}") + + except Exception as e: + print(f"Error: {e}") + import traceback + traceback.print_exc() + exit(1) + +if __name__ == "__main__": + main() diff --git a/image-gen-workflow/00-workflow-overview.md b/image-gen-workflow/00-workflow-overview.md new file mode 100644 index 0000000..7fdbe6b --- /dev/null +++ b/image-gen-workflow/00-workflow-overview.md @@ -0,0 +1,202 @@ +# Image Generation Workflow — Arising Media + +Last updated: 2026-05-10 +Project reference: cobhamtech.com (first full run) + +--- + +## Purpose + +Standardized process for generating, validating, and deploying AI images across all Arising Media client static sites. Every decision made in this workflow is documented so any agent or session can continue without context loss. + +--- + +## Stack + +API: Google Gemini (generativelanguage.googleapis.com) +SDK: google-genai (NOT the deprecated google-generativeai package) +Draft model: gemini-2.5-flash-image (Nano Banana — Speed Mode) +Final model: imagen-4.0-generate-001 (Imagen 4 — Quality Mode) +Format: JPEG, 85% quality, max 1600px wide + +--- + +## Phase 1 — Site Analysis (before any generation) + +Before generating images, read: +- index.html (home page structure) +- All CSS files (understand existing color tokens, dark/light sections) +- About, services, contact pages (identify where images add value) + +Map each candidate image slot: +- What HTML section will it go in? +- Is it a CSS background-image or an inline img tag? +- What overlay/treatment is needed for text readability? +- What dimensions/aspect ratio does the slot require? + +Document this in: 01-model-selection.md (image plan table) + +--- + +## Phase 2 — Prompt Engineering + +### Rules +- Always reference the site color palette in the prompt (dark navy, slate blue, gold accents) +- Specify "no text" and "no logos" for background images +- Specify "photorealistic" for all marketing images +- NO PEOPLE. NO FACES. Hardware, infrastructure, and environment only across all client sites +- This applies to all slots: hero, about, services, contact, location — no exceptions +- Reason: faces introduce identity/representation risk and age poorly. Hardware stays neutral and professional. + +### Prompt structure +[Subject] + [Environment] + [Lighting] + [Mood/Tone] + [Technical quality terms] + [Exclusions] + +### Example (hero background) +"Professional enterprise server room, long corridor of dark rack servers with blue LED ambient lighting, deep perspective, dark navy background, cinematic shallow depth of field, no people, photorealistic, ultra detailed" + +### cobhamtech.com brand prompt additions +Always append to prompts for this client: +"dark navy and blue ambient lighting, professional, enterprise, no text" + +--- + +## Phase 3 — Generation Script Pattern + +```python +from google import genai +from google.genai import types + +client = genai.Client(api_key='KEY') + +response = client.models.generate_images( + model='imagen-4.0-generate-001', + prompt='PROMPT', + config=types.GenerateImagesConfig( + number_of_images=1, + aspect_ratio='16:9', # 16:9 | 4:3 | 3:2 | 1:1 | 9:16 + output_mime_type='image/jpeg', + ) +) + +with open('output.jpg', 'wb') as f: + f.write(response.generated_images[0].image.image_bytes) +``` + +Validate: file must be > 10,000 bytes. Anything smaller is an API error or empty response. + +CRITICAL — Vision validation is mandatory before saving any image: +The toolbox script (ai-imagen-generate.sh) automatically sends each generated image to +gemini-2.0-flash for visual inspection. It asks: "Does this image contain people, faces, +hands, silhouettes, or body parts?" If YES — the image is rejected, prompt is tightened, +and generation retries up to 3 times. Only images that pass inspection are saved. +Claude cannot visually inspect images — the vision validation step is the enforcement gate. + +--- + +## Phase 4 — Placement Patterns + +### Pattern A: CSS background-image with dark overlay (hero sections) + +Used when: image sits behind text on a dark section +Implementation: CSS only, no HTML change + +```css +.ct-hero { + background: var(--ct-black); /* fallback */ + background-image: linear-gradient(rgba(12,15,24,0.82), rgba(12,15,24,0.92)), url('/assets/images/hero-bg.jpg'); + background-size: cover; + background-position: center; +} +``` + +Overlay opacity guide: +- 0.82/0.92 = subtle image visible, text fully readable +- 0.90/0.95 = very subtle texture only +- 0.70/0.80 = image prominent (use only if no text overlay) + +### Pattern B: Inline img tag (editorial sections) + +Used when: image is a standalone visual element between content sections +Implementation: add img tag + container div + +```html +<div class="container" style="padding-bottom: var(--space-lg);"> + <img src="assets/images/intro-visual.jpg" + alt="Descriptive alt text" + style="width:100%;display:block;max-height:400px;object-fit:cover;"> +</div> +``` + +### Pattern C: Grid column image (about/story sections) + +Used when: image shares a row with text content +Implementation: add img to existing grid + expand grid columns + +```html +<!-- Expand grid to: grid-template-columns: 1fr 1fr 420px --> +<div> + <img src="assets/images/about-visual.jpg" + alt="Alt text" + style="width:100%;display:block;border-radius:4px;"> +</div> +``` + +--- + +## Phase 5 — CSP and nginx Updates + +Any new image source domain requires a CSP update in nginx.conf. +For Google Maps tiles: add `https://*.googleapis.com https://*.gstatic.com` to `img-src` +For self-hosted images: `img-src 'self' data:` is sufficient — no change needed + +--- + +## Phase 6 — Docker Rebuild and Verify + +After every image + HTML change: + +```bash +cd /home/sirdrez/arisingmedia-websites/[client] +docker stop [container-name] +docker rm [container-name] +docker build -t [image-name] . +docker run -d --name [container-name] -p [port]:80 [image-name] +sleep 2 +curl -s -o /dev/null -w "%{http_code}" http://localhost:[port]/ +``` + +Verify image loads: `curl -s -o /dev/null -w "%{http_code}" http://localhost:[port]/assets/images/hero-bg.jpg` +Expected: 200 with Content-Type: image/jpeg + +--- + +## File Naming Convention + +Pattern: `{page}-{slot}.jpg` + +| Slot | File name | Aspect | +|------|-----------|--------| +| Home hero background | hero-bg.jpg | 16:9 | +| Home intro visual | intro-visual.jpg | 3:2 | +| About story | about-visual.jpg | 4:3 | +| Services hub header | services-bg.jpg | 16:9 | +| Contact page | contact-bg.jpg | 16:9 | +| Location page | location-bg.jpg | 16:9 | + +--- + +## Logging Requirement + +Every generation run must produce a log entry in: +`am-webdesign-sops/image-gen-workflow/02-generation-log.md` + +Log must include: date, client, model, each image file name, prompt used, file size in bytes, placement pattern used, Docker rebuild result. + +--- + +## Cobhamtech.com Run Reference + +Container: cobhamtech-site +Port: 8010 +Assets path: /home/sirdrez/arisingmedia-websites/cobhamtech.com/assets/images/ +Color tokens: --ct-black #0c0f18 / --ct-slate #1c2d42 / --ct-blue #2d5a9e / --ct-gold #c79330 diff --git a/image-gen-workflow/01-model-selection.md b/image-gen-workflow/01-model-selection.md new file mode 100644 index 0000000..7286659 --- /dev/null +++ b/image-gen-workflow/01-model-selection.md @@ -0,0 +1,89 @@ +# Image Generation Model Selection + +Source: cutout.pro/model-comparison/imagen-vs-nanobanana + Gemini API model audit (2026-05-10) + +--- + +## Available Models (via Google Gemini API) + +### Imagen 4 — Quality Mode +Model ID: `imagen-4.0-generate-001` +Also available: `imagen-4.0-ultra-generate-001` + +Strengths: +- Photorealistic, high-fidelity output +- Handles complex prompts with multi-element consistency +- Superior text rendering inside images +- Best for brand-critical, final-delivery assets + +Use for: +- Hero background images +- Service page headers +- Marketing and case study visuals +- Any image that ships to production + +--- + +### Nano Banana (Gemini 2.5 Flash Image) — Speed Mode +Model ID: `gemini-2.5-flash-image` + +Strengths: +- Low latency, high volume +- Cost-effective for rapid iteration +- Good for concept previews and brainstorming + +Use for: +- Draft previews before committing to Imagen 4 +- AI chatbot or interactive UI image generation +- Avatar or thumbnail generation at scale +- Rapid iteration when exploring compositions + +--- + +### Imagen 4 Fast — Budget Mode +Model ID: `imagen-4.0-fast-generate-001` + +Use for: +- Quick internal previews +- Non-public-facing visuals +- High-volume batch jobs where quality is secondary + +--- + +## Recommended Workflow + +Step 1 — Draft with Speed Mode (`gemini-2.5-flash-image`) +Generate 2-4 variations quickly. Confirm composition, subject, and tone. Low cost. + +Step 2 — Refine with Quality Mode (`imagen-4.0-generate-001`) +Take the winning prompt from step 1. Generate final version at full quality. +This is the image that goes into the site. + +Step 3 — Review against brand palette +Check that image tones align with site color tokens: +- cobhamtech.com: dark navy (#0c0f18), slate (#1c2d42), blue accent (#2d5a9e), gold (#c79330) +- All hero images need to work behind dark overlays + +Step 4 — Save to project assets +Path convention: `assets/images/{page}-{slot}.jpg` +Examples: `hero-bg.jpg`, `about-visual.jpg`, `services-bg.jpg` + +--- + +## Cobhamtech.com Image Plan + +| Slot | File | Page | Prompt Theme | +|------|------|------|--------------| +| Hero background | `hero-bg.jpg` | index.html | Dark server room, blue ambient lighting, depth of field | +| About story | `about-visual.jpg` | about.html | IT professional at clean desk, dual monitors, neutral dark background | +| Services hub | `services-bg.jpg` | services/index.html | Enterprise network infrastructure, abstract, dark | +| Intro visual | `intro-visual.jpg` | index.html | Business and technology handshake, professional setting | + +--- + +## Notes + +- Never use Nano Banana for final production images on client sites +- Imagen 4 Ultra adds marginal quality gain over standard — not worth the cost for web assets +- All images should be exported as JPEG at 85% quality, max 1600px wide, for web performance +- Run generated images through the site CSP — ensure `img-src` allows `self` and `data:` only (no external CDN hotlinking) diff --git a/image-gen-workflow/archive/02-generation-log.md b/image-gen-workflow/archive/02-generation-log.md new file mode 100644 index 0000000..e9e38ca --- /dev/null +++ b/image-gen-workflow/archive/02-generation-log.md @@ -0,0 +1,71 @@ +# Image Generation Log — CobhamTech.com + +**Date:** 2026-05-10 +**Model:** imagen-4.0-generate-001 (Gemini Imagen 4) +**SDK:** google-genai (Python) +**API Key:** AIzaSyD-njx1-hyqnazckGTJ6SnMJ8o_B2C0UsI +**Script:** generate_images.py (deleted after run) + +--- + +## Images Generated + +### hero-bg.jpg +- **Prompt:** Professional enterprise server room, long corridor of dark rack servers with blue LED ambient lighting, deep perspective, dark navy background, cinematic shallow depth of field, no people, photorealistic, ultra detailed +- **Aspect ratio:** 16:9 +- **File size:** 395,927 bytes +- **Placement:** .ct-hero background-image in assets/css/page-home.css — overlay gradient rgba(12,15,24,0.82) to rgba(12,15,24,0.92), background-size cover +- **Status:** OK + +### about-visual.jpg +- **Prompt:** Professional IT consultant at a clean modern workstation, dual monitors displaying network diagrams and dashboards, dark office with subtle blue ambient lighting, business attire, confident expression, photorealistic +- **Aspect ratio:** 4:3 +- **File size:** 426,565 bytes +- **Placement:** about.html ct-about-story section — third column, grid-template-columns updated to 1fr 1fr 420px, img tag with border-radius 4px +- **Status:** OK + +### services-bg.jpg +- **Prompt:** Abstract enterprise technology network, dark background, glowing blue interconnected nodes and data pathways, minimal high-tech aesthetic, no text, no people, cinematic, photorealistic render +- **Aspect ratio:** 16:9 +- **File size:** 403,142 bytes +- **Placement:** .ct-svc-idx-hero background-image in assets/css/page-services-index.css — same overlay pattern as hero-bg +- **Status:** OK + +### intro-visual.jpg +- **Prompt:** Business professional and IT consultant collaborating at a modern conference table with laptops and tablets, professional corporate office, clean neutral dark background, photorealistic, teamwork and trust +- **Aspect ratio:** 4:3 (retried — original 3:2 not supported) +- **File size:** 373,852 bytes +- **Placement:** index.html — div.container block between ct-intro section and ct-home-sec-services, max-height 400px object-fit cover +- **Status:** OK + +--- + +## API Errors / Retries + +- intro-visual.jpg failed on first attempt with aspect ratio 3:2: `aspectRatio 3:2 is not supported. Supported values are 1:1, 9:16, 16:9, 4:3, 3:4.` +- Retried with 4:3. Succeeded. + +## Supported Aspect Ratios (Imagen 4) + +1:1, 9:16, 16:9, 4:3, 3:4 + +3:2 is NOT supported. Use 4:3 as the closest substitute for landscape-medium compositions. + +--- + +## Docker + +- Rebuilt cobhamtech-static image from scratch after HTML/CSS changes +- Container running on port 8010 +- All 4 images confirmed HTTP 200 at runtime +- Homepage HTTP 200 + +--- + +## Lessons Learned + +1. Imagen 4 does not support 3:2 aspect ratio. The supported set is: 1:1, 9:16, 16:9, 4:3, 3:4. Always validate aspect ratios before scripting a batch. +2. Generation of 4 images (3 x 16:9, 1 x 4:3) completed in under 90 seconds total. +3. Dark overlay gradients (rgba at 0.82-0.92 opacity) are necessary on these photorealistic images to maintain text legibility against white hero text. +4. File sizes ranged 374KB-427KB for JPEG output at these aspect ratios — appropriate for web use without additional compression. +5. The google-genai SDK uses `client.models.generate_images()` with a `GenerateImagesConfig` object — not the `generate_content()` path. diff --git a/image-gen-workflow/archive/cobhamtech-image-requests.json b/image-gen-workflow/archive/cobhamtech-image-requests.json new file mode 100644 index 0000000..70c4c2f --- /dev/null +++ b/image-gen-workflow/archive/cobhamtech-image-requests.json @@ -0,0 +1,22 @@ +[ + { + "file": "hero-bg.jpg", + "aspect": "16:9", + "prompt": "Long corridor of enterprise server racks in a dark data center, blue and white LED indicator lights blinking on rack units, cable management arms, deep perspective vanishing point, dark navy ambient lighting, no people, no humans, hardware only, photorealistic, 8K detail" + }, + { + "file": "about-visual.jpg", + "aspect": "4:3", + "prompt": "Dense fiber optic patch panel with multicolored LC connectors and cables, server rack mounted in data center, LED status lights green and blue, dark background, close-up macro shot, no people, no hands, hardware only, photorealistic, sharp focus" + }, + { + "file": "services-bg.jpg", + "aspect": "16:9", + "prompt": "Overhead view of enterprise network switches and routers mounted in open server rack, Ethernet cables organized in bundles, blue port indicator lights, dark equipment, clean cable management, no people, hardware only, photorealistic, professional data center" + }, + { + "file": "intro-visual.jpg", + "aspect": "4:3", + "prompt": "Cisco network switches and firewall appliances in a wall-mounted server cabinet, blinking LED activity lights, dark navy background, organized cable bundles, cooling vents visible, no people, no human presence, hardware only, photorealistic, enterprise IT infrastructure" + } +] diff --git a/image-gen-workflow/archive/select_hero_images.py b/image-gen-workflow/archive/select_hero_images.py new file mode 100644 index 0000000..c38383e --- /dev/null +++ b/image-gen-workflow/archive/select_hero_images.py @@ -0,0 +1,317 @@ +#!/usr/bin/env python3 +"""Select and regenerate hero images — Carbon Fiber Support CMS. + +- Click a thumbnail to select it (gold border = selected) +- Click Regen under any thumbnail to regenerate just that variant +- Save → writes selections.json for Webflow upload + +Usage: + python3 select_hero_images.py +""" +import json +import os +import subprocess +import threading +import tkinter as tk +from pathlib import Path +from PIL import Image, ImageTk + +SOURCED_DIR = Path("/home/sirdrez/Downloads/Carbon Fiber Support_PDF/image-rendering/Application_Problems/grounded/sourced") +GENERATOR = Path("/home/sirdrez/Downloads/Carbon Fiber Support_PDF/generate_sourced_photoreal.py") +ENV_FILE = Path("/home/sirdrez/Downloads/Carbon Fiber Support_PDF/.env") +SELECTIONS_OUT = Path(__file__).parent / "selections.json" + +APPLICATIONS = [ + ("bowing-basement-wall-repair", "Bowing Basement Wall Repair"), + ("horizontal-basement-wall-cracks", "Horizontal Basement Wall Cracks"), + ("parking-garage-column-wrapping", "Parking Garage Column Wrapping"), + ("bridge-girder-strengthening", "Bridge Girder Strengthening"), + ("stair-step-foundation-cracks", "Stair-Step Foundation Cracks"), + ("parking-garage-deck-repair", "Parking Garage Deck Repair"), + ("vertical-foundation-cracks", "Vertical Foundation Cracks"), + ("poured-concrete-wall-repair", "Poured Concrete Wall Repair"), + ("interior-block-wall-bulging", "Interior Block Wall Bulging"), + ("foundation-wall-repair", "Foundation Wall Repair"), + ("crawlspace-wall-reinforcement", "Crawlspace Wall Reinforcement"), + ("cracked-concrete-slab-repair", "Cracked Concrete Slab Repair"), + ("corner-crack-repair", "Corner Crack Repair"), + ("concrete-block-wall-repair", "Concrete Block Wall Repair"), + ("residential-retaining-wall-repair", "Residential Retaining Wall Repair"), + ("commercial-retaining-wall-repair", "Commercial Retaining Wall Repair"), + ("commercial-building-column-reinforcement", "Commercial Building Column Reinforcement"), + ("warehouse-roof-truss-repair", "Warehouse Roof Truss Repair"), + ("warehouse-beam-strengthening", "Warehouse Beam Strengthening"), + ("parking-garage-beam-strengthening", "Parking Garage Beam Strengthening"), + ("fire-and-impact-damage-beam-repair", "Fire and Impact Damage Beam Repair"), + ("concrete-foundation-beam-repair", "Concrete Foundation Beam Repair"), + ("bridge-column-and-pier-repair", "Bridge Column and Pier Repair"), +] + +VARIANTS = ["v1", "v2", "v3", "v4"] +V_LABELS = ["v1 24mm", "v2 macro", "v3 3/4", "v4 alt"] +THUMB_W, THUMB_H = 190, 143 + +BG = "#0e0e0e" +ROW_EVEN = "#141414" +ROW_ODD = "#111111" +SEL_CLR = "#ffc107" +DIM_CLR = "#555555" +REGEN_BG = "#2a1a00" +REGEN_FG = "#ff9800" +BUSY_CLR = "#ff5722" +OK_CLR = "#4caf50" +TEXT_CLR = "#ffffff" + +PLACEHOLDER = None # lazy-loaded gray image + + +def _load_env() -> dict: + env = os.environ.copy() + if ENV_FILE.exists(): + for line in ENV_FILE.read_text().splitlines(): + line = line.strip() + if line and not line.startswith("#") and "=" in line: + k, v = line.split("=", 1) + env[k.strip()] = v.strip() + return env + + +def _gray_placeholder(w: int, h: int) -> Image.Image: + img = Image.new("RGB", (w, h), "#1a1a1a") + return img + + +class App: + def __init__(self, root: tk.Tk): + self.root = root + self.root.title("CFS Image Selector") + self.root.configure(bg=BG) + self.root.geometry("1030x840") + + self.selections: dict[str, int] = {s: 0 for s, _ in APPLICATIONS} + self._tk_imgs: dict = {} + self._img_lbls: dict = {} # (slug, v_idx) -> Label (image widget) + self._bdr_lbls: dict = {} # (slug, v_idx) -> same Label for border + self._txt_lbls: dict = {} # (slug, v_idx) -> variant text Label + self._regen_btns: dict = {} # (slug, v_idx) -> regen Button + self._busy: set = set() # (slug, v_idx) currently regenerating + + self._build() + self._load_selections() + + # ------------------------------------------------------------------ build + + def _build(self): + bar = tk.Frame(self.root, bg=BG, pady=10) + bar.pack(fill="x", padx=20) + tk.Label(bar, text="Carbon Fiber Support — Select Hero Image", + bg=BG, fg=TEXT_CLR, font=("Helvetica", 12, "bold")).pack(side="left") + tk.Button(bar, text="Save Selections", command=self._save, + bg="#1a3a1a", fg=OK_CLR, relief="flat", padx=14, pady=6, + font=("Helvetica", 11, "bold"), cursor="hand2", + activebackground="#1f4a1f").pack(side="right", padx=(6, 0)) + tk.Button(bar, text="All v1", command=lambda: self._select_all(0), + bg="#1a2a3a", fg="#64b5f6", relief="flat", padx=10, pady=6, + font=("Helvetica", 10), cursor="hand2").pack(side="right") + + self._status = tk.StringVar(value="v1 pre-selected for all — click to change — Regen to regenerate") + tk.Label(self.root, textvariable=self._status, bg=BG, fg=DIM_CLR, + font=("Helvetica", 10), anchor="w").pack(fill="x", padx=20, pady=(0, 4)) + + outer = tk.Frame(self.root, bg=BG) + outer.pack(fill="both", expand=True, padx=8, pady=(0, 8)) + + canvas = tk.Canvas(outer, bg=BG, highlightthickness=0) + vsb = tk.Scrollbar(outer, orient="vertical", command=canvas.yview) + canvas.configure(yscrollcommand=vsb.set) + vsb.pack(side="right", fill="y") + canvas.pack(side="left", fill="both", expand=True) + + self._inner = tk.Frame(canvas, bg=BG) + win_id = canvas.create_window((0, 0), window=self._inner, anchor="nw") + canvas.bind("<Configure>", lambda e: canvas.itemconfig(win_id, width=e.width)) + self._inner.bind("<Configure>", + lambda _: canvas.configure(scrollregion=canvas.bbox("all"))) + canvas.bind_all("<Button-4>", lambda _: canvas.yview_scroll(-1, "units")) + canvas.bind_all("<Button-5>", lambda _: canvas.yview_scroll(1, "units")) + canvas.bind_all("<MouseWheel>", + lambda e: canvas.yview_scroll(int(-1 * e.delta / 120), "units")) + + for idx, (slug, name) in enumerate(APPLICATIONS): + self._build_row(idx, slug, name) + + def _build_row(self, idx: int, slug: str, name: str): + bg = ROW_EVEN if idx % 2 == 0 else ROW_ODD + row = tk.Frame(self._inner, bg=bg, pady=8, padx=12) + row.pack(fill="x", pady=1) + + tk.Label(row, text=f"{idx+1:02d} {name}", bg=bg, fg=TEXT_CLR, + font=("Helvetica", 10, "bold"), anchor="w", width=30).pack(side="left", padx=(0, 10)) + + for v_idx, (vk, vl) in enumerate(zip(VARIANTS, V_LABELS)): + path = SOURCED_DIR / f"{slug}_{vk}.jpg" + cell = tk.Frame(row, bg=bg) + cell.pack(side="left", padx=3) + + # Image label + tk_img = self._make_tk_img(path) + self._tk_imgs[(slug, v_idx)] = tk_img + is_sel = (v_idx == 0) + border = SEL_CLR if is_sel else "#2a2a2a" + lbl = tk.Label(cell, image=tk_img, + highlightthickness=4, highlightbackground=border, + cursor="hand2", bg="#000") + lbl.pack() + self._img_lbls[(slug, v_idx)] = lbl + self._bdr_lbls[(slug, v_idx)] = lbl + lbl.bind("<Button-1>", lambda e, s=slug, vi=v_idx: self._select(s, vi)) + lbl.bind("<Enter>", lambda e, s=slug, vi=v_idx: self._hover(s, vi, True)) + lbl.bind("<Leave>", lambda e, s=slug, vi=v_idx: self._hover(s, vi, False)) + + # Variant label row: text + regen button side by side + foot = tk.Frame(cell, bg=bg) + foot.pack(fill="x") + txt = tk.Label(foot, text=vl, bg=bg, + fg=SEL_CLR if is_sel else DIM_CLR, + font=("Helvetica", 9), anchor="w") + txt.pack(side="left") + self._txt_lbls[(slug, v_idx)] = txt + + rbtn = tk.Button(foot, text="Regen", + command=lambda s=slug, vi=v_idx: self._regen(s, vi), + bg=REGEN_BG, fg=REGEN_FG, relief="flat", + font=("Helvetica", 8), padx=5, pady=1, + cursor="hand2", activebackground="#3a2500") + rbtn.pack(side="right") + self._regen_btns[(slug, v_idx)] = rbtn + + # ------------------------------------------------------------------ logic + + def _make_tk_img(self, path: Path) -> ImageTk.PhotoImage: + if path.exists(): + try: + img = Image.open(path) + img.thumbnail((THUMB_W, THUMB_H), Image.LANCZOS) + return ImageTk.PhotoImage(img) + except Exception: + pass + return ImageTk.PhotoImage(_gray_placeholder(THUMB_W, THUMB_H)) + + def _select(self, slug: str, v_idx: int): + old = self.selections[slug] + if old == v_idx: + return + lbl_old = self._bdr_lbls.get((slug, old)) + if lbl_old: + lbl_old.configure(highlightbackground="#2a2a2a") + txt_old = self._txt_lbls.get((slug, old)) + if txt_old: + txt_old.configure(fg=DIM_CLR) + + lbl_new = self._bdr_lbls.get((slug, v_idx)) + if lbl_new: + lbl_new.configure(highlightbackground=SEL_CLR) + txt_new = self._txt_lbls.get((slug, v_idx)) + if txt_new: + txt_new.configure(fg=SEL_CLR) + + self.selections[slug] = v_idx + self._status.set(f"Selected {slug} → {VARIANTS[v_idx]}") + + def _hover(self, slug: str, v_idx: int, entering: bool): + if self.selections[slug] == v_idx or (slug, v_idx) in self._busy: + return + lbl = self._bdr_lbls.get((slug, v_idx)) + if lbl: + lbl.configure(highlightbackground="#555" if entering else "#2a2a2a") + + def _select_all(self, v_idx: int): + for slug, _ in APPLICATIONS: + self._select(slug, v_idx) + self._status.set(f"All applications set to {VARIANTS[v_idx]}") + + # ------------------------------------------------------------------ regen + + def _regen(self, slug: str, v_idx: int): + key = (slug, v_idx) + if key in self._busy: + return + self._busy.add(key) + + # Delete the file so the generator recreates it + path = SOURCED_DIR / f"{slug}_{VARIANTS[v_idx]}.jpg" + if path.exists(): + path.unlink() + + # Visual: busy state + lbl = self._bdr_lbls.get(key) + if lbl: + lbl.configure(highlightbackground=BUSY_CLR) + btn = self._regen_btns.get(key) + if btn: + btn.configure(text="...", state="disabled") + txt = self._txt_lbls.get(key) + if txt: + txt.configure(fg=BUSY_CLR) + + self._status.set(f"Regenerating {slug} {VARIANTS[v_idx]} ...") + + def run(): + env = _load_env() + env["SLUG_FILTER_VARIANT"] = VARIANTS[v_idx] + subprocess.run( + ["python3", str(GENERATOR), slug], + env=env, capture_output=True + ) + self.root.after(0, lambda: self._regen_done(slug, v_idx)) + + threading.Thread(target=run, daemon=True).start() + + def _regen_done(self, slug: str, v_idx: int): + key = (slug, v_idx) + self._busy.discard(key) + + path = SOURCED_DIR / f"{slug}_{VARIANTS[v_idx]}.jpg" + tk_img = self._make_tk_img(path) + self._tk_imgs[key] = tk_img + lbl = self._img_lbls.get(key) + if lbl: + lbl.configure(image=tk_img) + + is_sel = (self.selections[slug] == v_idx) + border = SEL_CLR if is_sel else "#2a2a2a" + bdr = self._bdr_lbls.get(key) + if bdr: + bdr.configure(highlightbackground=border) + txt = self._txt_lbls.get(key) + if txt: + txt.configure(fg=SEL_CLR if is_sel else DIM_CLR) + btn = self._regen_btns.get(key) + if btn: + btn.configure(text="Regen", state="normal") + + self._status.set(f"Done {slug} {VARIANTS[v_idx]}") + + # ------------------------------------------------------------------ save + + def _save(self): + out = {slug: VARIANTS[vi] for slug, vi in self.selections.items()} + SELECTIONS_OUT.write_text(json.dumps(out, indent=2)) + self._status.set(f"Saved {len(out)} selections → {SELECTIONS_OUT.name}") + + def _load_selections(self): + if SELECTIONS_OUT.exists(): + try: + data = json.loads(SELECTIONS_OUT.read_text()) + for slug, vk in data.items(): + if vk in VARIANTS: + self._select(slug, VARIANTS.index(vk)) + self._status.set(f"Loaded {SELECTIONS_OUT.name}") + except Exception: + pass + + +if __name__ == "__main__": + root = tk.Tk() + App(root) + root.mainloop() diff --git a/image-gen-workflow/imagen-api-reference.json b/image-gen-workflow/imagen-api-reference.json new file mode 100644 index 0000000..89d4be5 --- /dev/null +++ b/image-gen-workflow/imagen-api-reference.json @@ -0,0 +1,128 @@ +{ + "source": "https://ai.google.dev/gemini-api/docs/imagen", + "retrieved": "2026-05-13", + "sdk_package": "google-genai", + "sdk_import": "from google import genai\nfrom google.genai import types", + + "models": [ + { + "id": "imagen-4.0-generate-001", + "label": "Imagen 4 Standard", + "use_case": "Production — best balance of quality and speed", + "supports_image_size": true + }, + { + "id": "imagen-4.0-ultra-generate-001", + "label": "Imagen 4 Ultra", + "use_case": "Highest quality output, slower — hero images and print", + "supports_image_size": true + }, + { + "id": "imagen-4.0-fast-generate-001", + "label": "Imagen 4 Fast", + "use_case": "Drafts and rapid iteration — low latency", + "supports_image_size": false + } + ], + + "deprecated_models": [ + { "id": "imagen-3.0-generate-001", "status": "discontinued" } + ], + + "method": "client.models.generate_images", + "rest_endpoint": "https://generativelanguage.googleapis.com/v1beta/models/{model}:predict", + "auth_header": "x-goog-api-key", + + "parameters": { + "model": { + "type": "string", + "required": true, + "values": ["imagen-4.0-generate-001", "imagen-4.0-ultra-generate-001", "imagen-4.0-fast-generate-001"] + }, + "prompt": { + "type": "string", + "required": true, + "language": "English only", + "max_tokens": 480, + "notes": "Text overlays in images: keep under 25 characters for best results. Exact font replication not guaranteed." + }, + "config": { + "class": "types.GenerateImagesConfig", + "fields": { + "number_of_images": { + "type": "integer", + "min": 1, + "max": 4, + "default": 4 + }, + "aspect_ratio": { + "type": "string", + "default": "1:1", + "values": ["1:1", "3:4", "4:3", "9:16", "16:9"], + "notes": "Do NOT use '3:2' — not supported and will error" + }, + "image_size": { + "type": "string", + "default": "1K", + "values": ["1K", "2K"], + "applies_to": ["imagen-4.0-generate-001", "imagen-4.0-ultra-generate-001"], + "not_available_for": ["imagen-4.0-fast-generate-001"] + }, + "person_generation": { + "type": "string", + "default": "allow_adult", + "values": [ + { "value": "dont_allow", "description": "No people or faces in output — use for hardware, product, landscape" }, + { "value": "allow_adult", "description": "Adults only" }, + { "value": "allow_all", "description": "Adults and children — restricted in EU, UK, CH, MENA regions" } + ] + } + } + } + }, + + "output": { + "watermark": "SynthID — embedded in all generated images, not visible", + "format": "PIL Image object (SDK) / base64 bytes (REST)", + "access_sdk": "response.generated_images[i].image" + }, + + "python_minimal_example": "from google import genai\nfrom google.genai import types\n\nclient = genai.Client()\nresponse = client.models.generate_images(\n model='imagen-4.0-generate-001',\n prompt='Your prompt here',\n config=types.GenerateImagesConfig(\n number_of_images=4,\n aspect_ratio='16:9',\n person_generation='dont_allow'\n )\n)\nfor img in response.generated_images:\n img.image.show()", + + "rest_minimal_example": "curl -X POST 'https://generativelanguage.googleapis.com/v1beta/models/imagen-4.0-generate-001:predict' -H 'x-goog-api-key: $GEMINI_API_KEY' -H 'Content-Type: application/json' -d '{\"instances\":[{\"prompt\":\"Your prompt\"}],\"parameters\":{\"sampleCount\":4}}'", + + "arising_media_defaults": { + "draft_model": "imagen-4.0-fast-generate-001", + "production_model": "imagen-4.0-generate-001", + "hero_model": "imagen-4.0-ultra-generate-001", + "person_generation": "dont_allow", + "number_of_images": 4, + "aspect_ratio_web_hero": "16:9", + "aspect_ratio_square": "1:1", + "aspect_ratio_portrait": "3:4", + "file_naming": "{page}-{slot}.jpg", + "workflow": "draft with fast → select variant → regenerate with standard or ultra" + }, + + "prompt_engineering_notes": [ + "Describe subject, environment, lighting, and mood in one sentence", + "Photorealistic hardware/landscape: add 'photorealistic, 4K, professional photography'", + "Avoid people/faces: include 'no people, no humans' explicitly when using dont_allow is not enough", + "Camera style modifiers: 'shot on Canon 5D', 'wide angle lens', 'golden hour lighting'", + "Art style: 'architectural render', 'flat illustration', 'watercolor wash'", + "Keep text overlays short: 25 chars max, specify position ('top left', 'centered')" + ], + + "known_errors": [ + { + "error": "aspect ratio X:X not supported", + "cause": "3:2 or other non-standard ratio passed", + "fix": "Use only: 1:1, 3:4, 4:3, 9:16, 16:9" + }, + { + "error": "imageSize not applicable", + "cause": "imageSize passed to imagen-4.0-fast-generate-001", + "fix": "Remove imageSize parameter when using fast model" + } + ] +} diff --git a/local-image-generation/01-comfyui-setup.md b/local-image-generation/01-comfyui-setup.md new file mode 100644 index 0000000..16b3a73 --- /dev/null +++ b/local-image-generation/01-comfyui-setup.md @@ -0,0 +1,100 @@ +# 01 — ComfyUI Setup + +ComfyUI is installed at `~/ComfyUI/` on the Arising Media workstation. +Python venv is at `~/ComfyUI/venv/`. + +## Starting ComfyUI + +```bash +tmux new-session -d -s comfyui \ + "cd ~/ComfyUI && HSA_OVERRIDE_GFX_VERSION=10.3.0 venv/bin/python main.py --listen 0.0.0.0 --port 8188 2>&1 | tee ~/comfyui.log" +``` + +**Do NOT use `--cpu`.** The GPU is an AMD Ryzen 9 9950X integrated graphics +(gfx1036, RDNA 2 iGPU) with 30,942 MB unified VRAM (shares system RAM). +All models fit: FLUX (12GB), Wan 2.2 (3.2GB), T5-XXL (4.6GB). + +`HSA_OVERRIDE_GFX_VERSION=10.3.0` is required — gfx1036 (iGPU) is not in +the PyTorch ROCm kernel list, but gfx1030 (RDNA 2 dGPU) is compatible. +Without the override: `HIP error: invalid device function` on first compute op. + +Previous SOP said 2GB VRAM — that was wrong. It was reading the dedicated +VRAM pool, not the full unified memory PyTorch allocates via ROCm. + +Verify it's up: +```bash +curl -s -o /dev/null -w "%{http_code}" http://localhost:8188/system_stats +# should return 200 within 30 seconds +``` + +Check the log for node load errors: +```bash +tmux attach -t comfyui +``` + +## Required custom nodes + +Both installed at `~/ComfyUI/custom_nodes/`: + +- `ComfyUI-GGUF` — loads GGUF quantized models (FLUX, Wan 2.2) +- `ComfyUI-Detail-Daemon` — optional, detail enhancement + +If `ComfyUI-GGUF` fails to load, check for missing Python packages: +```bash +~/ComfyUI/venv/bin/pip install gguf sqlalchemy +``` + +## Known dependency gaps (fix if ComfyUI fails to start) + +```bash +~/ComfyUI/venv/bin/pip install sqlalchemy gguf +``` + +Audio nodes (`nodes_audio.py`, `nodes_lt_audio.py`) will fail to import +because `torchaudio` is not installed. This is safe to ignore — audio +nodes are not used in this pipeline. + +## GPU note + +GPU: AMD Ryzen 9 9950X integrated graphics (gfx1036, RDNA 2 iGPU) +Unified memory: 30,942 MB available to PyTorch via ROCm (shares system RAM) + +```bash +# Verify ROCm sees the GPU +~/ComfyUI/venv/bin/python -c "import torch; print(torch.cuda.is_available()); print(torch.cuda.get_device_name(0))" +# returns True / AMD Ryzen 9 9950X 16-Core Processor + +# Verify arch override works +HSA_OVERRIDE_GFX_VERSION=10.3.0 ~/ComfyUI/venv/bin/python -c " +import torch; x=torch.tensor([1.0]).cuda(); print('GPU OK:', x.device) +" +``` + +gfx1036 requires `HSA_OVERRIDE_GFX_VERSION=10.3.0` — always set this env var +before starting ComfyUI or running any Python that loads GPU tensors. +Without it: `HIP error: invalid device function` immediately on first op. + +## Model folder structure + +``` +~/ComfyUI/models/ +├── unet/ +│ └── flux1-schnell-Q8_0.gguf (12GB, FLUX image) +├── clip/ +│ ├── clip_l.safetensors (235MB, FLUX CLIP-L) +│ ├── t5xxl_fp8_e4m3fn.safetensors (4.6GB, FLUX T5-XXL) +│ └── umt5_xxl_fp8_e4m3fn_scaled.safetensors (6.3GB, Wan text encoder) +├── vae/ +│ ├── ae.safetensors (108MB, FLUX VAE) +│ └── wan_2.1_vae.safetensors (243MB, Wan VAE) +└── diffusion_models/ + └── Wan2.2-TI2V-5B-Q4_K_M.gguf (3.2GB, Wan 2.2 video) +``` + +## Stopping ComfyUI + +```bash +tmux send-keys -t comfyui C-c +# or kill the session: +tmux kill-session -t comfyui +``` diff --git a/local-image-generation/02-flux-images.md b/local-image-generation/02-flux-images.md new file mode 100644 index 0000000..5c1284f --- /dev/null +++ b/local-image-generation/02-flux-images.md @@ -0,0 +1,99 @@ +# 02 — FLUX.1 Schnell Image Pipeline + +## Why FLUX over SDXL + +FLUX is a 12B-parameter transformer model. SDXL (RealVisXL) is 3.5B. +FLUX has significantly better: +- Spatial depth and perspective (lens simulation) +- Scene geometry (vanishing points, depth-of-field) +- Prompt following (T5-XXL understands long, detailed prompts) + +SDXL was tested on lahrcarpetcleaning.com and rejected: flat angles, no depth, +poor spatial coherence. FLUX replaced it entirely. + +## Model stack + +| File | Size | Notes | +|---|---|---| +| flux1-schnell-Q8_0.gguf | 12GB | GGUF Q8, needs ComfyUI-GGUF node | +| t5xxl_fp8_e4m3fn.safetensors | 4.6GB | T5-XXL text encoder, fp8 quantized | +| clip_l.safetensors | 235MB | CLIP-L, short prompt encoder | +| ae.safetensors | 108MB | Official FLUX VAE from Black Forest Labs | + +## Download (one-time) + +FLUX GGUF (public, no auth): +```bash +wget "https://huggingface.co/city96/FLUX.1-schnell-gguf/resolve/main/flux1-schnell-Q8_0.gguf" \ + -O ~/ComfyUI/models/unet/flux1-schnell-Q8_0.gguf + +wget "https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp8_e4m3fn.safetensors" \ + -O ~/ComfyUI/models/clip/t5xxl_fp8_e4m3fn.safetensors + +wget "https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/clip_l.safetensors" \ + -O ~/ComfyUI/models/clip/clip_l.safetensors +``` + +FLUX VAE (gated — requires HF login and license acceptance): +```bash +hf auth login # paste read token +HF_TOKEN=$(cat ~/.cache/huggingface/token) +wget --header="Authorization: Bearer $HF_TOKEN" \ + "https://huggingface.co/black-forest-labs/FLUX.1-schnell/resolve/main/ae.safetensors" \ + -O ~/ComfyUI/models/vae/ae.safetensors +``` + +## ComfyUI workflow (what gen-images-flux.py sends) + +``` +UnetLoaderGGUF → flux1-schnell-Q8_0.gguf +DualCLIPLoader → t5xxl_fp8_e4m3fn + clip_l (type=flux) +VAELoader → ae.safetensors +CLIPTextEncode → prompt +EmptyLatentImage → 1024×576, batch=1 +KSampler → steps=4, cfg=1.0, euler, simple +VAEDecode +SaveImage +``` + +## Settings + +| Setting | Value | Why | +|---|---|---| +| Steps | 4 | Schnell is distilled — 4 steps is optimal | +| CFG | 1.0 | Distilled model, higher CFG degrades quality | +| Sampler | euler | Best for FLUX | +| Scheduler | simple | Matches FLUX training | +| Negative prompt | none | Distilled model ignores it | +| Resolution | 1024×576 | 16:9 hero format | + +## Running generation + +```bash +# ComfyUI must be running first (see 01-comfyui-setup.md) +cd /home/sirdrez/arisingmedia-websites/{domain} +python3 tools/gen-images-flux.py 2>&1 | tee tools/flux-gen.log +``` + +Monitor: +```bash +tmux attach -t comfyui # step progress bars +tail -f tools/flux-gen.log # per-image OK/FAIL +``` + +Speed: ~4 min/image on CPU (2GB VRAM insufficient for GPU). 28 images = ~1h50m. + +## After generation + +```bash +python3 tools/convert-to-webp.py # resize + convert to WebP +rm assets/images/**/*.jpg # delete source JPGs +docker compose build --no-cache web # bake WebP into image +docker compose up -d +``` + +Verify: +```bash +curl -s -o /dev/null -w "%{http_code}" http://localhost:{port}/assets/images/hero/hero-carpet-cleaning.webp +# must return 200 +``` diff --git a/local-image-generation/03-wan-video.md b/local-image-generation/03-wan-video.md new file mode 100644 index 0000000..acbea0e --- /dev/null +++ b/local-image-generation/03-wan-video.md @@ -0,0 +1,159 @@ +# 03 — Wan 2.2 Video Pipeline (Image-to-Video) + +## Default policy: local generation + +Video generation is done locally with Wan 2.2 by default. Google Veo (via +Vertex AI / Gemini API) is NOT used unless the client has explicit budget +allocated for it. Reasons: + +- Google Veo costs money per second of video generated (billed per request) +- Local Wan 2.2 is free after one-time model download (~10GB total) +- Quality from Wan 2.2 at 832x480 is sufficient for hero reels +- No API key, no quota limits, no vendor dependency + +Use Google Veo only when: client approves a paid media budget, OR the local +workstation is unavailable and a deadline cannot wait for CPU generation time. + +## Purpose + +Takes FLUX-generated hero stills and animates each into a 3-5 second clip. +Clips are stitched with ffmpeg into a marketing reel for the hero section. + +## Model stack + +| File | Size | Notes | +|---|---|---| +| Wan2.2-TI2V-5B-Q4_K_M.gguf | 3.2GB | Text+Image to Video, 5B Q4 GGUF | +| umt5_xxl_fp8_e4m3fn_scaled.safetensors | 6.3GB | UMT5-XXL text encoder, fp8 | +| wan_2.1_vae.safetensors | 243MB | Wan VAE (compatible with 2.2) | + +## Download (one-time, all public) + +```bash +# Wan 2.2 model +wget "https://huggingface.co/QuantStack/Wan2.2-TI2V-5B-GGUF/resolve/main/Wan2.2-TI2V-5B-Q4_K_M.gguf" \ + -O ~/ComfyUI/models/diffusion_models/Wan2.2-TI2V-5B-Q4_K_M.gguf + +# Text encoder +wget "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors" \ + -O ~/ComfyUI/models/clip/umt5_xxl_fp8_e4m3fn_scaled.safetensors + +# VAE +wget "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors" \ + -O ~/ComfyUI/models/vae/wan_2.1_vae.safetensors +``` + +## Critical: WanImageToVideo is a conditioning node, NOT a sampler + +This is the most important thing to understand about the Wan pipeline. The node +name is misleading. `WanImageToVideo` does NOT run diffusion — it sets up the +conditioning and empty latent. A separate `KSampler` runs the actual diffusion. + +Wrong mental model (what most tutorials imply): +``` +LoadImage → WanImageToVideo → SaveAnimatedWEBP +``` + +Correct node graph: +``` +UnetLoaderGGUF ─────────────────────────────────────→ KSampler.model +CLIPLoader ──→ CLIPTextEncode (positive) ─→ WanImageToVideo.positive ──→ KSampler.positive + └→ CLIPTextEncode (negative) ─→ WanImageToVideo.negative ──→ KSampler.negative +VAELoader ──→ WanImageToVideo.vae WanImageToVideo.latent ──→ KSampler.latent_image +LoadImage ──→ WanImageToVideo.start_image (optional) + KSampler.samples ──→ VAEDecode ──→ SaveAnimatedWEBP +``` + +WanImageToVideo outputs three things (in order): +- output[0] = positive CONDITIONING (enhanced with image) +- output[1] = negative CONDITIONING +- output[2] = latent LATENT (sized for video: width × height × frames) + +The `start_image` input (optional IMAGE) anchors the first frame. Without it, +video starts from noise. Always pass it for image-to-video. + +## Workflow + +Correct ComfyUI API node graph (as sent by `gen-video-wan.py`): + +``` +node 1: UnetLoaderGGUF → Wan2.2-TI2V-5B-Q4_K_M.gguf +node 2: CLIPLoader → umt5_xxl_fp8_e4m3fn_scaled.safetensors (type=wan) +node 3: VAELoader → wan_2.1_vae.safetensors +node 4: LoadImage → FLUX hero still (.webp) +node 5: CLIPTextEncode → motion prompt text (positive) +node 6: CLIPTextEncode → negative prompt text +node 7: WanImageToVideo → positive=[5,0], negative=[6,0], vae=[3,0], + start_image=[4,0], width=832, height=480, + length=25 (or 49), batch_size=1 +node 8: KSampler → model=[1,0], positive=[7,0], negative=[7,1], + latent_image=[7,2], steps=20, cfg=6.0, + sampler_name=uni_pc, scheduler=simple, denoise=1.0 +node 9: VAEDecode → samples=[8,0], vae=[3,0] +node 10: SaveAnimatedWEBP → images=[9,0], fps=12 +``` + +## Settings + +| Setting | Value | +|---|---| +| Resolution | 832×480 (16:9 ~480p) | +| Frames | 49 (~4 seconds at 12fps) | +| Steps | 20 | +| CFG | 6.0 | +| Sampler | uni_pc | + +**Frame count constraint:** `length` must follow the pattern 1, 5, 9, 13, 17, 21, 25, 29 ... (step of 4). +ComfyUI enforces this. 49 is valid (1 + 4×12). 50 is not. + +**CPU speed on Arising Media workstation (2GB VRAM, CPU inference):** +- ~415 seconds per diffusion step +- 20 steps × 415s = ~2.3 hours per clip +- 6 clips = ~14 hours total for a full reel +- Use 25 frames (not 49) for test runs to halve generation time +- Full reel generation: start before leaving for the day, check next morning + +**CLIPVision note:** No CLIPVision models are installed at `~/ComfyUI/models/clip_vision/`. +The `clip_vision_output` input on WanImageToVideo is optional and currently unused. +Image conditioning comes from `start_image` only (VAE-encoded first frame). +This is sufficient for smooth motion — CLIPVision would add semantic image +understanding but is not required. + +## Running video generation + +```bash +# ComfyUI must be running, FLUX images must be converted to WebP first +cd /home/sirdrez/arisingmedia-websites/{domain} +python3 tools/gen-video-wan.py 2>&1 | tee tools/wan-gen.log +``` + +Output goes to `assets/videos/clips/` as `.webp` animation files. + +## Stitching the reel + +```bash +# Create file list +ls assets/videos/clips/*.webp | sort | while read f; do echo "file '$PWD/$f'"; done > tools/clip-list.txt + +# Convert webp animations to mp4 first (if needed) +for f in assets/videos/clips/*.webp; do + ffmpeg -i "$f" "${f%.webp}.mp4" -y +done + +# Stitch +ls assets/videos/clips/*.mp4 | sort | while read f; do echo "file '$PWD/$f'"; done > tools/clip-list.txt +ffmpeg -f concat -safe 0 -i tools/clip-list.txt -c copy assets/videos/hero/hero-reel-flux.mp4 +``` + +## Reel shot list (lahrcarpetcleaning.com) + +| Clip | Source still | Motion prompt | +|---|---|---| +| clip-01 | hero-carpet-cleaning | slow dolly forward across carpet | +| clip-02 | hero-stairs | slow pan upward along staircase | +| clip-03 | hero-upholstery | gentle push in toward sofa | +| clip-04 | hero-commercial | tracking shot down lobby | +| clip-05 | hero-floors | floor-level drift forward | +| clip-06 | hero-clean-result | rack focus across carpet fibers | + +6 clips × ~4s = ~24 seconds total reel. diff --git a/local-image-generation/04-prompt-guide.md b/local-image-generation/04-prompt-guide.md new file mode 100644 index 0000000..bf9a3c8 --- /dev/null +++ b/local-image-generation/04-prompt-guide.md @@ -0,0 +1,105 @@ +# 04 — Prompt Guide (Interior / Carpet Photography) + +## The core pattern + +All image prompts follow this structure: + +``` +{camera angle} {lens} {subject description}, +{foreground detail} sharp in foreground, {background} receding into bokeh, +{lighting description}, {style tag}, no people, ultra-realistic {type} photography +``` + +## Why this works + +FLUX.1 Schnell uses T5-XXL as the primary text encoder (6GB model) which +understands natural language photography concepts deeply. Specifying lens +focal length, depth of field, and spatial relationships produces images with +correct depth, perspective, and scene geometry. + +SDXL models lack this — their text encoders (CLIP-L/CLIP-G) top out at +77 tokens and don't understand spatial concepts reliably. + +## Lens vocabulary + +| Lens | Effect | Use for | +|---|---|---| +| 24mm wide-angle | Strong perspective distortion, exaggerated depth | Corridors, lobbies, open spaces | +| 35mm | Natural perspective, slight depth emphasis | Most interior shots | +| 50mm prime | Near-natural perspective, shallow DoF | Close-ups, furniture, details | +| macro | Extreme close-up, very shallow DoF | Carpet fiber detail, texture | + +## Camera position vocabulary + +- `low-angle` / `low 35mm angle` — camera near floor level, looking across surface +- `floor level` — pressed to the floor, extreme low angle +- `corner angle` — shot from room corner, wide coverage +- `looking up` — camera below subject, looking upward +- `looking down` — camera above, bird's-eye (avoid for carpet — looks flat) + +## Depth of field vocabulary + +- `shallow depth of field` — subject sharp, background blurred +- `razor sharp in foreground ... receding into bokeh` — specific foreground/background split +- `raking light` — light hitting surface at low angle, reveals texture +- `vanishing point perspective` — strong linear convergence (corridors, offices) + +## Lighting vocabulary + +- `warm afternoon window light` — residential, golden hour feel +- `raking natural light` — reveals carpet texture and pile height +- `recessed ceiling lights creating depth` — commercial/corporate +- `warm wall sconces` — hotel corridors +- `crisp morning light` — bedrooms, bright and clean + +## Full prompt examples + +Carpet hero (residential): +``` +low-angle 35mm lens perspective looking across thick plush cream carpet +in an upstate New York living room, carpet fibers razor sharp in foreground, +couch and coffee table receding into shallow bokeh background, +warm afternoon window light raking across carpet texture, +Finger Lakes farmhouse interior, no people, +ultra-realistic architectural photography, 16:9 +``` + +Hotel corridor: +``` +low 24mm lens looking down a long hotel corridor from floor level, +patterned burgundy carpet runner sharp in extreme foreground receding to vanishing point, +warm wall sconces lining white walls, numbered doors converging in perspective, +no people, ultra-realistic hospitality photography, 16:9 +``` + +Hardwood floor: +``` +low 24mm angle pressed to gleaming light oak hardwood floor, +floor grain razor sharp in extreme foreground receding to hallway vanishing point, +white walls, natural light streaming in, shallow depth of field, +no people, ultra-realistic interior photography, 16:9 +``` + +## What NOT to include + +- No people, no faces, no hands, no feet, no shoes/boots +- No cleaning machines, vacuums, steam equipment, hoses +- No text, logos, watermarks in the scene +- No "before state" (dirty carpet, stains) — only clean result +- No "wide shot" without camera angle qualifier — produces flat frontal views + +## Video motion prompts (Wan 2.2) + +For animating stills, describe the camera motion, not the scene content: + +``` +slow dolly forward across {subject}, gentle camera push toward the far wall, +{lighting}, cinematic, smooth motion +``` + +Motion types: +- `slow dolly forward` — push toward subject +- `slow pan {direction}` — lateral camera rotation +- `tracking shot moving forward` — camera travels through space +- `rack focus` — lens focus shifts from foreground to background +- `gentle push in` — subtle zoom/move toward subject diff --git a/local-image-generation/05-quality-levers.md b/local-image-generation/05-quality-levers.md new file mode 100644 index 0000000..c599da8 --- /dev/null +++ b/local-image-generation/05-quality-levers.md @@ -0,0 +1,87 @@ +# 05 — Quality Improvement Levers + +Three levers control FLUX output quality, in order of impact: + +## 1. Prompt (highest impact, zero cost) + +Incoherent objects in the frame are almost always prompt bleed — the model fills +empty or ambiguous space with training-data defaults. Fix by naming every part of +the frame explicitly. + +**Background** — name it, don't imply it: +- Bad: "living room" (model invents furniture, decor, wall art) +- Good: "plain cream painted wall with a single frosted sliding glass door" + +**Floor material** — always explicit: +- "plush cream berber carpet" or "light oak hardwood floor" +- Ambiguous floor → random floor type generated + +**Ceiling** — if visible, name it; if not wanted, push it out of frame with a +lower camera angle: +- "white drop ceiling with recessed can lights" +- Or: lower the angle until ceiling exits the frame entirely + +**Negative scene elements** — add inline, not as a separate negative prompt +(FLUX Schnell ignores negative prompts): +- "no furniture clutter, no decorative objects, no picture frames, no signage" +- "no cleaning equipment, no machines, no people" + +**What not to use:** +- "wide shot" without a camera angle qualifier — produces flat frontal views +- Vague room names ("office", "lobby") without specifying what fills the space + +## 2. Steps (marginal gain, 2x slower) + +FLUX Schnell is distilled to 4 steps. The distillation process compresses +a full diffusion model's quality into very few steps. + +| Steps | Quality change | Time impact | +|---|---|---| +| 4 (default) | Baseline | ~4 min/image | +| 6 | Slightly sharper edges, cleaner fine detail | ~6 min/image | +| 8 | Diminishing returns past 6 | ~8 min/image | + +Not recommended as a first fix. The distillation ceiling is the constraint, +not step count. Step increases help texture detail but will not fix scene +incoherence — that requires prompt changes. + +KSampler in `gen-images-flux.py`: +```python +"steps": 4, # increase to 6 for detail passes +``` + +## 3. Model size (real quality jump, 6x slower on CPU) + +| Model | Steps | Quality | CPU time/image | +|---|---|---|---| +| FLUX.1 Schnell (current) | 4 | Good depth, some coherence gaps | ~4 min | +| FLUX.1 Dev (full, non-distilled) | 20-30 | Better coherence, sharper geometry | ~20-30 min | + +FLUX Dev would fix most coherence issues. At current CPU-only speed (2GB VRAM +insufficient), a full 28-image batch would take 9+ hours. + +**Practical path to FLUX Dev:** +- Cloud GPU: RunPod or Vast.ai A100 runs FLUX Dev in ~90 seconds/image +- Same prompts, same ComfyUI workflow — only model file and step count change +- Switch `flux1-schnell-Q8_0.gguf` → FLUX Dev GGUF, set steps to 20, cfg to 3.5 + +## Decision matrix + +| Issue | Fix | +|---|---| +| Objects that shouldn't be in frame | Prompt: name every surface explicitly | +| Wrong floor/wall material | Prompt: be specific about material | +| Flat angle despite prompt | Prompt: add "low-angle", lens mm, "foreground sharp" | +| Soft edges on carpet fibers | Steps: increase 4 → 6 | +| Incoherent room geometry | Model: switch to FLUX Dev on cloud GPU | +| Overall composition wrong | Prompt: camera position + lens + foreground/bokeh split | + +## Re-running specific images + +To re-run only the problem frames without regenerating all 28: + +1. Edit `tools/gen-images-flux.py` +2. Change the `IMAGES` list to include only the failed image keys +3. Run: `python3 tools/gen-images-flux.py 2>&1 | tee tools/flux-gen.log` +4. Run: `python3 tools/convert-to-webp.py` (converts only new JPGs) +5. Rebuild: `docker compose build --no-cache web && docker compose up -d` diff --git a/local-image-generation/README.md b/local-image-generation/README.md new file mode 100644 index 0000000..fbe9bbb --- /dev/null +++ b/local-image-generation/README.md @@ -0,0 +1,46 @@ +# Local Image Generation — SOPs + +Complete reference for generating site images locally using ComfyUI. +No cloud API required. No per-image cost. Runs on the Arising Media workstation. + +## Index + +1. [01-comfyui-setup.md](01-comfyui-setup.md) — Installing ComfyUI, venv, GGUF node +2. [02-flux-images.md](02-flux-images.md) — FLUX.1 Schnell image generation pipeline +3. [03-wan-video.md](03-wan-video.md) — Wan 2.2 image-to-video pipeline +4. [04-prompt-guide.md](04-prompt-guide.md) — Prompt patterns for interior/carpet photography +5. [05-quality-levers.md](05-quality-levers.md) — Prompt, steps, model size: what to adjust and when + +## Quick start (images already set up) + +```bash +# 1. Start ComfyUI +tmux new-session -d -s comfyui \ + "cd ~/ComfyUI && venv/bin/python main.py --listen 0.0.0.0 --port 8188 --cpu 2>&1 | tee ~/comfyui.log" + +# 2. Wait ~30s, then generate images +cd /home/sirdrez/arisingmedia-websites/{domain} +python3 tools/gen-images-flux.py 2>&1 | tee tools/flux-gen.log + +# 3. Convert to WebP and deploy +python3 tools/convert-to-webp.py +rm assets/images/**/*.jpg +docker compose build --no-cache web && docker compose up -d +``` + +## Model files (installed at ~/ComfyUI/models/) + +| Purpose | File | Size | Location | +|---|---|---|---| +| FLUX image UNet | flux1-schnell-Q8_0.gguf | 12GB | models/unet/ | +| FLUX T5 encoder | t5xxl_fp8_e4m3fn.safetensors | 4.6GB | models/clip/ | +| FLUX CLIP-L | clip_l.safetensors | 235MB | models/clip/ | +| FLUX VAE | ae.safetensors | 108MB | models/vae/ | +| Wan 2.2 video | Wan2.2-TI2V-5B-Q4_K_M.gguf | 3.2GB | models/diffusion_models/ | +| Wan UMT5 encoder | umt5_xxl_fp8_e4m3fn_scaled.safetensors | 6.3GB | models/clip/ | +| Wan VAE | wan_2.1_vae.safetensors | 243MB | models/vae/ | + +## Reference project + +`lahrcarpetcleaning.com` — first project using this full pipeline. +Scripts: `tools/gen-images-flux.py`, `tools/gen-video-wan.py`, `tools/convert-to-webp.py` diff --git a/sops.db b/sops.db new file mode 100644 index 0000000..6b4b2c9 Binary files /dev/null and b/sops.db differ diff --git a/stack-selector.json b/stack-selector.json new file mode 100644 index 0000000..ea6ea06 --- /dev/null +++ b/stack-selector.json @@ -0,0 +1,129 @@ +{ + "_doc": "Arising Media stack selector — agent reference. Choose the correct stack before starting any project.", + "_updated": "2026-05-21", + + "stacks": { + "static-html": { + "name": "Static HTML", + "reference_project": "lahrcarpetcleaning.com", + "use_when": [ + "Site has fewer than 50 pages", + "Content changes infrequently (monthly or less)", + "Client hosts on cPanel shared hosting with no server-side scripting", + "No database required" + ], + "do_not_use_when": [ + "Site has more than 50 pages", + "Content must be updated across many pages simultaneously", + "Lead capture forms require server-side validation", + "Site has location pages, service pages, or any programmatic content" + ], + "files": ["Dockerfile", "nginx.conf", "docker-compose.yml", ".htaccess", ".cpanel.yml"], + "sops": ["01-project-structure.md", "03-build-pipeline.md", "08-deployment-docker.md"] + }, + + "php-router-sqlite": { + "name": "PHP Router + SQLite", + "reference_project": "arisingmedia.us", + "use_when": [ + "Site has 50+ pages of any type", + "Multiple page classes share a common template (services, locations, blog)", + "Header/footer/nav updates must propagate instantly across all pages", + "Content is authored in a database or Airtable and pulled at render time", + "Site will grow — new pages added without new HTML files" + ], + "do_not_use_when": [ + "Client requires cPanel shared hosting with no PHP-FPM (rare)", + "Site is a pure landing page (1-3 pages)" + ], + "architecture": { + "router": "src/api/router.php", + "templates": "src/api/templates/ — one .php file per page class", + "components": "src/api/components/_sections.php, _header.php, _footer.php", + "database": "src/api/data/pages.sqlite (all page content)", + "tokens": "src/assets/css/tokens.css", + "styles": "src/assets/css/main.css", + "js": "src/assets/js/main.js" + }, + "page_classes": { + "service": "service.php — detailed service pages with value_prop, use_case_carousel, roi_band, lead_magnet, tiers", + "location": "location.php — city + service combination pages, map embed, local content", + "challenge": "challenge.php — problem definition + our approach + CTA", + "static": "static.php — about, contact, hub pages, case studies", + "blog": "blog.php — blog posts with author, date, related posts", + "category": "category.php — service hub pages" + }, + "sops": ["15-php-router-sqlite-standard.md"], + "design_reference": "arisingmedia.us/.planning/WEBSITE_BUILD_STANDARD.md", + "architecture_diagram": "arisingmedia.us/.planning/RENDER_ARCHITECTURE.html", + "approved_mockup": "arisingmedia.us/.planning/template-gallery-2026-05-20/mockup.html" + }, + + "php-app": { + "name": "PHP App Stack", + "reference_project": "quickconvert.us", + "use_when": [ + "File uploads and server-side processing required", + "At-rest encryption of user data", + "Payment processing (Stripe subscriptions)", + "User authentication" + ], + "sops": ["14-php-app-stack.md"] + } + }, + + "design_system": { + "fonts": ["Plus Jakarta Sans (display)", "Inter (body)"], + "approved_colors": { + "cobalt_deep": "#021a6a", + "cobalt": "#042fac", + "cobalt_light": "#5d78c9", + "navy": "#172034", + "footer": "#0f1626", + "slate": "#1c2c44", + "graphite": "#222f42", + "blue": "#1e6bd6", + "facet": "#6b82b2", + "stone": "#eef1f6", + "white": "#ffffff" + }, + "rejected_colors": { + "teal": "never use — not brand", + "mist": "#f8f9fb — eliminated from band rhythm 2026-05-21", + "angled_edges": "clip-path diagonals rejected — tech brands use flat horizontal lines" + }, + "band_rhythm": ["graphite", "light", "slate", "stone"], + "statement_type": "clamp(40px, 6.5vw, 96px)", + "section_padding_desktop": "120px", + "credibility_pattern": "proof numbers in dark grid band (IBM/Nvidia) — NOT generic icon logos" + }, + + "section_types": { + "available": ["text", "split", "pain", "process", "spotlight", "benefits", "faqs", "grid", "comparison", "testimonials", "stats", "cta", "pin_story", "tiers", "value_prop", "use_case_carousel", "roi_band", "lead_magnet"], + "v4_schema_columns": ["hero_value_proposition", "lead_magnet_json", "use_case_carousel_json", "roi_proof_json", "service_variant_strategy_json"], + "deprecated": ["grid — replaced by use_case_carousel when cases are populated", "mist band — eliminated from rhythm"] + }, + + "databases": { + "sqlite": { + "role": "Primary rendering database — exact slug lookups, structured content", + "suitable_up_to": "Millions of rows — 10,000 pages is tiny (5MB)", + "query_time": "< 1ms for slug lookup", + "file": "src/api/data/pages.sqlite" + }, + "chromadb": { + "role": "Future semantic layer — related content, site search, content generation grounding", + "not_suitable_for": "Primary rendering — no exact-match primary key lookup", + "status": "Planned — future phase after all content is in SQLite" + } + }, + + "deployment": { + "container": "am-web (Docker)", + "local_port": 8001, + "db_path_in_container": "/var/www/data/pages.sqlite", + "assets_path": "/var/www/html/assets/", + "hot_copy_db": "docker cp src/api/data/pages.sqlite am-web:/var/www/data/pages.sqlite && docker exec am-web sh -c 'rm -f /var/www/data/pages.sqlite-shm /var/www/data/pages.sqlite-wal && chown www-data:www-data /var/www/data/pages.sqlite'", + "hot_copy_assets": "docker cp src/assets/css/main.css am-web:/var/www/html/assets/css/main.css" + } +} diff --git a/stack.json b/stack.json new file mode 100644 index 0000000..d5f0d85 --- /dev/null +++ b/stack.json @@ -0,0 +1,239 @@ +{ + "meta": { + "author": "Andre Cobham / Arising Media", + "updated": "2026-06-09", + "version": "3.0" + }, + "stack": { + "base_image": "php:8.3-fpm-alpine", + "process_manager": "supervisord", + "web_server": "nginx", + "database": "SQLite (pdo_sqlite)", + "email": "SMTP via msmtp or Resend API", + "js": "vanilla (no frameworks)", + "css": "tokens.css + main.css", + "hosting": "Coolify (Docker) or cPanel (shared hosting)" + }, + "databases": { + "header.db": ["nav_items"], + "footer.db": ["footer_columns", "footer_links", "footer_legal"], + "pages.db": ["pages", "page_sections"], + "blog.db": ["posts", "post_images", "post_schema", "post_stats", "related_posts", "linkedin_drafts", "subscribers"], + "one_db_per_domain": true, + "never_monolithic": "Do not combine unrelated content domains in one database" + }, + "deployment": { + "docker": { + "build_command": "docker compose build", + "run_command": "docker compose up -d", + "process_manager": "supervisord", + "platforms": ["VPS", "DigitalOcean", "Linode", "Coolify", "custom servers"] + }, + "cpanel": { + "requires": [".htaccess", ".cpanel.yml"], + "repo_path": "/home/{username}/repositories/{domain}/ (empty)", + "webroot": "/home/{username}/public_html/{domain}/", + "platforms": ["cPanel", "Bluehost", "HostGator", "SiteGround"] + } + }, + "content_update": { + "new_page": [ + "Insert row in pages.db: slug, title, hero fields, sections_json", + "Re-seed: python3 build/seed_databases.py", + "Rebuild Docker: docker build -t {domain}:local .", + "For new URL patterns: add location block to infra/nginx.conf" + ], + "edit_content": [ + "All body copy lives in sections_json column of pages.db", + "Never put body copy in PHP template files", + "Edit CSV source or build/seed_databases.py and re-run" + ], + "build_scripts": "Use JSON + template + Python for 4+ similar pages (location pages, service pages)" + }, + "security": { + "required_headers": [ + "X-Frame-Options: SAMEORIGIN", + "X-Content-Type-Options: nosniff", + "Referrer-Policy: strict-origin-when-cross-origin", + "Permissions-Policy: camera=(), microphone=(), geolocation=()", + "Strict-Transport-Security: max-age=31536000; includeSubDomains", + "Cross-Origin-Opener-Policy: same-origin", + "Cross-Origin-Resource-Policy: same-origin", + "Content-Security-Policy (tight, project-specific)" + ], + "php_hardening": [ + "expose_php = Off", + "display_errors = Off", + "open_basedir = /var/www/html:/var/www/data", + "disable_functions = exec,passthru,shell_exec,system,proc_open,popen,pcntl_exec" + ], + "php_fpm": "clear_env = no (CRITICAL: required for getenv())", + "blocking_sensitive_paths": [ + "location ~ /\\. { deny all; return 404; }", + "location ~* \\.(env|conf|yml|md|sh|py|sql|bak|log|dockerfile)$ { deny all; return 404; }" + ] + }, + "forms": { + "spam_protection": "Altcha (self-hosted, proof-of-work SHA-256, no third-party)", + "altcha_key_generation": "openssl rand -hex 32", + "altcha_csp_requirement": "worker-src 'self' blob: (CRITICAL)", + "rate_limiting": { + "nginx": "5 requests/min per IP, burst 3", + "php": "5/10min per IP, file-backed (/tmp/form-rate-limit/)" + }, + "security_layers": [ + "nginx rate limit: 5r/min per IP", + "PHP rate limit: 5/10min per IP", + "honeypot field: hidden 'website' input", + "time-on-page check: <3s = [REVIEW] in subject", + "Altcha proof-of-work: SHA-256 + HMAC verification", + "server-side validation: all fields checked, HTML-escaped", + "32KB body cap: reject oversized payloads" + ], + "email_providers": { + "resend": "REST API (transactional, PHP curl)", + "msmtp": "SMTP relay (for cPanel + existing SMTP)" + } + }, + "seo": { + "required_meta_tags": [ + "charset UTF-8", + "viewport width=device-width, initial-scale=1.0", + "title (under 60 chars)", + "description (150-160 chars)", + "canonical link", + "Open Graph: og:type, og:url, og:title, og:description, og:image, og:site_name", + "Twitter: card, url, title, description, image", + "robots index, follow", + "theme-color (mobile)", + "favicon.svg, favicon-32.png, apple-touch-icon" + ], + "required_files": [ + "/robots.txt (Disallow /api/, include Sitemap)", + "/sitemap.xml (one <url> per page with <lastmod>)", + "/llms.txt (llmstxt.org standard)" + ], + "schema_required": [ + "LocalBusiness (home page, location pages)", + "Service (service detail pages)", + "BreadcrumbList (every page)", + "FAQPage (FAQ pages only)" + ], + "og_image": "1200x630px, brand colors, under 200KB", + "title_rules": "{Service} | {Brand} | {City}, {State}", + "description_rules": "150-160 chars, action-oriented, include city + service" + }, + "images": { + "format": "AVIF + JPG fallback", + "picture_element_required": true, + "conversion_command": "convert input.jpg -resize 1920x -quality 80 -define avif:speed=6 output.avif", + "size_targets": { + "portrait": "original width, 80 quality, 50-120KB", + "hero": "1920px max, 80 quality, 150-350KB", + "og_social": "1200px, 85 quality, under 150KB" + }, + "hero_naming": "hero-{page-slug}.avif + .jpg fallback", + "attributes_required": ["alt text or alt=\"\"", "loading=\"lazy\" (except hero)", "width and height"] + }, + "mobile": { + "breakpoints": { + "320": "mobile-first base", + "360": "iPhone SE portrait", + "480": "small phones", + "600": "phones", + "768": "tablets (main mobile breakpoint)", + "900": "small laptops, tablet landscape", + "1023": "IMPORTANT: switch to mobile menu here", + "1024": "sub-desktop" + }, + "header_nav_switch_at": "max-width: 1023px (not 768px)", + "touch_targets_minimum": "44x44px (Apple HIG, WCAG)", + "grid_mobile_override": "grid-template-columns: 1fr !important at 900px", + "overflow_protection": "overflow-x: clip; max-width: 100%" + }, + "cookie_consent": { + "option1": { + "name": "orestbida/cookieconsent", + "stars": "4600+", + "size": "23KB UMD + 32KB CSS", + "license": "MIT", + "recommendation": "default choice" + }, + "option2": { + "name": "Osano Cookie Consent", + "stars": "3500+", + "size": "30KB bundle", + "license": "MIT" + }, + "when_not_needed": "GDPR Article 4(11): strictly necessary cookies (session, CSRF, form state) exempt from consent" + }, + "testing": { + "build_verification": "grep -rn '{{' site/ -- result: empty", + "container_health": "docker compose ps, docker logs, curl / returns 200", + "url_surface": "public paths 200, sensitive paths 404", + "mobile_responsive": "zero horizontal overflow at 320, 360, 390, 768, 900, 1023, 1024, 1200", + "form_e2e": "submit real form, verify email arrives", + "rate_limit": "request 6 returns 429 Too Many Requests", + "seo_surface": "all pages have title, canonical, og:, schema JSON-LD", + "cache_busting": "main.css?v=<unix-timestamp> changes on deploy", + "pre_launch_gates": [ + "All public URLs 200", + "All sensitive URLs 404", + "No sensitive files in container", + "Zero mobile overflow", + "Form submits, email arrives", + "Rate limit triggers", + "All pages have required meta tags", + "robots.txt and sitemap.xml exist", + "Zero em-dashes in HTML/JSON", + "Resend domain fully verified", + "Test email lands in primary inbox", + "Tested on real iPhone and Android", + "Lighthouse scores 90+ on all categories" + ] + }, + "never_use": [ + "Node.js / npm on frontend", + "WordPress for new builds", + "CSS frameworks (Bootstrap, Tailwind, Bulma)", + "JS frameworks (React, Vue, Angular, Svelte)", + "jQuery, Lodash, Moment, axios, utility libraries", + "CSS-in-JS, styled-components", + "Build tools requiring node_modules", + "Tracking pixels (except client-explicitly-requested)", + "Single monolithic database", + "Hardcoded copy in PHP templates", + "Docker Compose for arisingmedia.us stack", + "Google Drive / Sheets for content (evaluated and reverted 2026-06)" + ], + "secure_app_features": { + "when_to_use": "File conversion, encryption, payment processing, user authentication, rate-limited APIs", + "php_hardening": [ + "CSRF token on every POST endpoint", + "Rate limiting (nginx + PHP file-backed)", + "Altcha proof-of-work verification", + "At-rest encryption (sodium_crypto_secretbox + secretstream)", + "Signed download tokens (never expose file paths)", + "Server-side validation + HTML-escaped output", + "Session httponly + secure flags", + "storage/ directory access denied via .htaccess" + ], + "database": "SQLite with pdo_sqlite, schema migrations in try/catch, BEGIN IMMEDIATE transactions", + "environment": { + "clear_env": "no (php-fpm-pool.conf)", + "secrets": ".env (never hardcoded, never committed)", + "trust_proxy": "1 when behind reverse proxy", + "encryption_key": "32-byte hex QC_ENCRYPTION_KEY", + "altcha_key": "32-byte hex ALTCHA_HMAC_KEY" + }, + "reference": "quickconvert.us" + }, + "directory_structure": { + "src/api/": "PHP router, contact handler, templates, components, data (SQLite)", + "src/assets/": "CSS (tokens.css + main.css), JS (vanilla only), images (AVIF+JPG), altcha, cookieconsent", + "build/": "seed_databases.py", + "infra/": "nginx.conf, supervisord.conf, php-fpm-pool.conf, entrypoint.sh", + ".planning/": "not served, not in Docker image", + "root_files": "Dockerfile, docker-compose.yml, .dockerignore, .htaccess, .cpanel.yml, .gitignore, .env (ignored)" + } +} diff --git a/tools/verify-protection.sh b/tools/verify-protection.sh new file mode 100644 index 0000000..52983aa --- /dev/null +++ b/tools/verify-protection.sh @@ -0,0 +1,122 @@ +#!/usr/bin/env bash +# verify-protection.sh — confirm a deployed Arising Media site is not leaking +# build artifacts, server config, or dotfiles to the public web. +# +# Usage: +# ./verify-protection.sh <base-url> +# ./verify-protection.sh http://localhost:8010 +# ./verify-protection.sh https://cobhamtech.com +# +# Exit 0 if every check passes, 1 otherwise. Designed to be run after every +# deploy and in CI. + +set -euo pipefail + +BASE="${1:-}" +if [[ -z "$BASE" ]]; then + echo "usage: $0 <base-url>" >&2 + exit 2 +fi +BASE="${BASE%/}" + +# Required paths — site must serve these; missing = audit FAIL. +REQUIRED=( + "/" +) + +# Public paths — should be reachable; missing = WARN (content gap, not a leak). +PUBLIC=( + "/robots.txt" + "/sitemap.xml" +) + +# Sensitive paths — must NOT return 200. 404 or 403 is acceptable. +# This list mirrors the deny patterns in SOP 08 nginx.conf. +SENSITIVE=( + "/Dockerfile" + "/dockerfile" + "/docker-compose.yml" + "/nginx.conf" + "/.dockerignore" + "/.gitignore" + "/.git/config" + "/.git/HEAD" + "/.env" + "/.env.example" + "/api/.env" + "/.planning/" + "/.planning/build_locations.py" + "/.planning/build_services.py" + "/.planning/regen_images.py" + "/.planning/playwright_audit.py" + "/__pycache__/" + "/build_locations.py" + "/build_services.py" + "/regen_images.py" + "/playwright_audit.py" + "/server.py" + "/main.py" + "/README.md" + "/package.json" + "/composer.json" +) + +fail=0 + +warn=0 + +probe() { + local path="$1" + local expect="$2" # public | required | sensitive + local code + code=$(curl -k -s -o /dev/null -w '%{http_code}' --max-time 5 "${BASE}${path}" || echo "000") + case "$expect" in + required) + if [[ "$code" =~ ^(200|301|302|304)$ ]]; then + printf ' OK %-3s %s\n' "$code" "$path" + else + printf ' FAIL %-3s %s (required public path unreachable)\n' "$code" "$path" + fail=1 + fi + ;; + public) + if [[ "$code" =~ ^(200|301|302|304)$ ]]; then + printf ' OK %-3s %s\n' "$code" "$path" + else + printf ' WARN %-3s %s (public path unreachable — content gap, not a leak)\n' "$code" "$path" + warn=1 + fi + ;; + sensitive) + if [[ "$code" == "200" ]]; then + printf ' LEAK %-3s %s (must not return 200)\n' "$code" "$path" + fail=1 + else + printf ' OK %-3s %s\n' "$code" "$path" + fi + ;; + esac +} + +echo "Verifying ${BASE}" +echo +echo "Required paths (site must serve these):" +for p in "${REQUIRED[@]}"; do probe "$p" required; done +echo +echo "Public paths (should be reachable):" +for p in "${PUBLIC[@]}"; do probe "$p" public; done +echo +echo "Sensitive paths (must not be reachable):" +for p in "${SENSITIVE[@]}"; do probe "$p" sensitive; done +echo + +if [[ $fail -ne 0 ]]; then + echo "FAIL — exposure or required-path failure at ${BASE}" >&2 + exit 1 +elif [[ $warn -ne 0 ]]; then + echo "PASS (with warnings) — no exposure at ${BASE}, but missing public content" + exit 0 +else + echo "PASS — no exposure detected at ${BASE}" + exit 0 +fi diff --git a/wp-divi-pipeline-to-am-stack/00-overview.md b/wp-divi-pipeline-to-am-stack/00-overview.md new file mode 100644 index 0000000..a6bd9cd --- /dev/null +++ b/wp-divi-pipeline-to-am-stack/00-overview.md @@ -0,0 +1,94 @@ +# 00 — WP + Divi to AM Stack A Pipeline — Overview + +Converts a .wpress archive (All-in-One WP Migration) into a Stack A deployment: +PHP router + SQLite databases + vanilla JS/CSS. Never a 1:1 Divi copy. +Every migration is a content extraction and redesign, not a port. + +## Stack A output (what this pipeline produces) + +``` +src/api/router.php URL dispatcher +src/api/contact.php form handler (Resend via curl) +src/api/templates/*.php home | static | classes | schedule | glossary | blog +src/api/components/_header.php nav from nav.sqlite +src/api/components/_footer.php +src/api/data/*.sqlite one DB per content domain (see 09-stack-a-output.md) +build/seed_databases.py creates + seeds all SQLite DBs — THE source of truth +assets/ vanilla CSS/JS/images +infra/nginx.conf, supervisord.conf, php-fpm-pool.conf +Dockerfile (php:8.3-fpm-alpine) +docker-compose.yml +``` + +## Why NOT static HTML + +Any site with a glossary, blog, schedule, or recurring content model gets Stack A. +Editing content = edit seed_databases.py → reseed → rebuild. No PHP file edits. + +## Divi is the data source, not the design target + +Extract from Divi: +- Page content (headings, body copy, CTAs) +- Navigation menus (wp_terms + wp_termmeta) +- Header logo + tagline (wp_options: blogname, blogdescription, et_divi) +- Media (uploads/ → WebP → assets/images/) +- Design tokens (colors, fonts → tokens.css) +- SEO (Yoast wp_postmeta → pages.sqlite meta_description) +- Blog posts (wp_posts where post_type=post) +- Custom post types (testimonials, FAQs, glossary terms if present) + +Do NOT replicate: +- Divi section/row/column grid structure +- Divi module types (blurbs, toggles, CTAs, pricing tables) +- WordPress page slugs (map to clean slugs per nginx.conf pattern) +- WordPress menu item IDs + +## Pipeline phases + +``` +Phase 0 Setup Point pipeline at .wpress file; create working dirs +Phase 1 Extract Unpack .wpress → wpress-extract/ +Phase 2 DB Analysis Parse SQL dump; detect Divi version; inventory pages, posts, menus +Phase 3 Content Extract page sections + nav menus + blog posts from Divi +Phase 4 Design Pull colors + fonts → tokens.css draft +Phase 5 Media Catalog uploads/; convert to WebP; build media-manifest.json +Phase 6 Staging Map extracted JSON → seed_databases.py skeleton (content on standby) +Phase 7 Fill Agent fills each SQLite table row by row from staged JSON +Phase 8 Templates Scaffold PHP templates + components from AM reference +Phase 9 SEO Port titles, metas, canonicals, schema.org, redirect map +Phase 10 Build docker compose build && docker compose up -d +Phase 11 QA Lighthouse, protection check, grep for Divi residue +``` + +## CLI launcher + +``` +python3 scripts/migrate.py --wpress /path/to/backup.wpress --domain example.com +``` + +Runs phases 0-6 automatically, then prints agent breadcrumbs for phases 7-11. + +## Key missed items from prior migrations (REQUIRED fixes) + +1. **NAV MENUS**: Must extract wp_terms (taxonomy=nav_menu) + wp_termmeta for label/URL/order. + Output: nav.json → seeded into nav.sqlite (label, href, display_order, is_cta). + +2. **DIVI HEADER**: Must extract et_divi options from wp_options for logo, header layout, colors. + The _header.php must be written from scratch using AM design tokens, not copied from Divi. + +3. **MEDIA**: All uploads/ files must be: cataloged → copied to assets/images/ → converted to WebP. + Every image reference in content JSON must be updated to /assets/images/{filename}.webp. + +4. **SECTION REMAPPING**: Divi modules must be remapped to AM section types. + - blurb_module → feature_cards item + - toggle_module → accordion item + - cta_module → cta_band section + - pricing_module → booking_options section + - testimonial_mod → testimonials.sqlite row + - text_module → text_block section + +## Related SOPs + +- **09-stack-a-output.md** — SQLite schema + sections_json spec +- **10-agent-breadcrumbs.md** — Step-by-step ordered checklist for agent execution +- **00-stack-philosophy.md** — Stack A vs Stack B decision rationale diff --git a/wp-divi-pipeline-to-am-stack/01-wpress-extraction.md b/wp-divi-pipeline-to-am-stack/01-wpress-extraction.md new file mode 100644 index 0000000..b26905e --- /dev/null +++ b/wp-divi-pipeline-to-am-stack/01-wpress-extraction.md @@ -0,0 +1,120 @@ +# 01 — .wpress Extraction + +Unpack the All-in-One WP Migration `.wpress` archive into the project's +`.planning/wpress-extract/` directory. + +## .wpress binary format + +NOT a standard zip or tar. Custom sequential binary format: + +``` +[HEADER 4377 bytes] [FILE DATA n bytes] [HEADER] [FILE DATA] ... +``` + +Header breakdown: +``` +Offset Length Field +0 255 Filename (null-padded) +255 14 File size in bytes (ASCII decimal, null-padded) +269 12 mtime unix timestamp (ASCII decimal, null-padded) +281 4096 Relative path (null-padded) +4377 n Raw file bytes (size from header) +``` + +The archive ends when a header of all null bytes is encountered, or EOF. + +## Extraction script + +Script: `.am-webdesign-sops/wp-divi-pipeline/scripts/extract_wpress.py` + +```bash +python3 ~/.am-webdesign-sops-path/scripts/extract_wpress.py \ + .planning/vibrantyou-yoga-YYYYMMDD-*.wpress \ + .planning/wpress-extract/ +``` + +Or from the SOP scripts directory directly: + +```bash +python3 /home/sirdrez/arisingmedia-websites/.am-webdesign-sops/wp-divi-pipeline/scripts/extract_wpress.py \ + /home/sirdrez/arisingmedia-websites/{domain}/.planning/{file}.wpress \ + /home/sirdrez/arisingmedia-websites/{domain}/.planning/wpress-extract/ +``` + +Progress prints every 200 files. A 300-400MB archive typically extracts in +2-5 minutes and produces 1,000-5,000 files. + +## Expected archive contents + +After extraction, `wpress-extract/` contains: + +``` +wpress-extract/ +├── package.json ← archive metadata (domain, WP version, plugin list) +├── database.sql ← full MySQL dump (the most important file) +└── wp-content/ + ├── uploads/ ← all media (images, PDFs, videos) + │ └── YYYY/MM/ ← WordPress date-organized subdirs + ├── themes/ + │ ├── Divi/ ← Divi 4 theme files (if Divi 4) + │ └── divi-5/ ← Divi 5 theme files (if Divi 5) + └── plugins/ ← installed plugins (useful for form schema) + ├── gravityforms/ + └── contact-form-7/ +``` + +## Verify extraction + +After the script completes, confirm the key files exist: + +```bash +# Database dump present? +ls -lh .planning/wpress-extract/database.sql + +# Uploads present? +find .planning/wpress-extract/wp-content/uploads -name "*.jpg" | wc -l +find .planning/wpress-extract/wp-content/uploads -name "*.png" | wc -l + +# Archive metadata +cat .planning/wpress-extract/package.json +``` + +`package.json` contains the site URL, WordPress version, Divi version, and +plugin list — read it before proceeding to Phase 2. + +## Common issues + +**"Not a zip file" error** — Expected. The .wpress format is not zip. +The `extract_wpress.py` script handles it correctly. + +**Missing database.sql** — The archive may name it differently. Check: +```bash +find .planning/wpress-extract -name "*.sql" 2>/dev/null +``` + +**Partial extraction** — If the script stops early, check disk space: +```bash +df -h .planning/wpress-extract/ +``` +A 378MB .wpress typically expands to 1-3GB uncompressed. + +**Path traversal in filenames** — The script strips leading `/` and `.` from +paths. If files land in unexpected locations, check the raw path field with: +```bash +python3 -c " +import sys +HEADER_SIZE=4377; NAME_LEN=255; SIZE_LEN=14; MTIME_LEN=12; PATH_LEN=4096 +with open(sys.argv[1],'rb') as f: + for i in range(5): + h = f.read(HEADER_SIZE) + name = h[:NAME_LEN].split(b'\x00',1)[0].decode(errors='replace') + size = int(h[NAME_LEN:NAME_LEN+SIZE_LEN].split(b'\x00',1)[0] or 0) + path = h[NAME_LEN+SIZE_LEN+MTIME_LEN:].split(b'\x00',1)[0].decode(errors='replace') + print(f' [{i}] path={repr(path)} name={repr(name)} size={size}') + f.seek(size, 1) +" .planning/file.wpress +``` + +## Next step + +Proceed to `02-database-analysis.md` to inventory pages and detect Divi version. diff --git a/wp-divi-pipeline-to-am-stack/02-database-analysis.md b/wp-divi-pipeline-to-am-stack/02-database-analysis.md new file mode 100644 index 0000000..482ac92 --- /dev/null +++ b/wp-divi-pipeline-to-am-stack/02-database-analysis.md @@ -0,0 +1,151 @@ +# 02 — Database Analysis + +Parse the WordPress MySQL dump to inventory pages, detect Divi version, +extract design settings, and build the data JSON files that drive the AM build. + +## Script + +```bash +python3 /home/sirdrez/arisingmedia-websites/.am-webdesign-sops/wp-divi-pipeline/scripts/analyze_db.py \ + {domain}/.planning/wpress-extract/ \ + {domain}/.planning/data/ +``` + +Outputs three files into `.planning/data/`: +- `pages.json` — all published pages/posts with content and SEO meta +- `design-system.json` — colors, fonts, Divi settings +- `site-info.json` — domain, plugin list, WP version, Divi version + +## Divi version detection + +The script auto-detects Divi version by scanning `database.sql`: + +| Signal in SQL | Divi version | +|---------------|-------------| +| `wp:divi/` in post_content | Divi 5 | +| `[et_pb_section` in post_content | Divi 4 | + +**This determines the content extraction path.** Divi 4 → use `extract_divi4.py`. +Divi 5 → use `extract_divi5.py`. See `03-divi-content-extraction.md`. + +## Key WordPress tables + +| Table | Contents | Used for | +|-------|----------|---------| +| `wp_posts` | All pages, posts, attachments, layouts | Page inventory, content | +| `wp_postmeta` | Per-post metadata | ACF fields, Rank Math SEO, Divi layout JSON | +| `wp_options` | Site-wide settings | Divi theme settings, colors, fonts | +| `wp_gf_forms` | Gravity Forms definitions | Form field schema | +| `wp_gf_entries` | Gravity Form submissions | Not needed for migration | +| `wp_rank_math_seo_meta` | Rank Math SEO per page | SEO titles, descriptions | + +## Reading pages.json + +Each entry in `pages.json`: + +```json +{ + "id": "42", + "post_type": "page", + "slug": "about", + "title": "About VibrantYou Yoga", + "status": "publish", + "date": "2026-03-15", + "modified": "2026-04-10", + "content_raw": "<!-- wp:divi/section ... -->...", + "excerpt": "", + "parent_id": "0", + "menu_order": "3", + "seo_title": "About VibrantYou Yoga | Mindful Movement in [City]", + "seo_description": "...", + "seo_keywords": "yoga studio, mindful movement", + "acf": { + "vyy_hero_headline": "Move With Intention", + "vyy_hero_subhead": "..." + } +} +``` + +`content_raw` holds the raw Divi block markup. Pass it to the extractor scripts. +`acf` holds Advanced Custom Fields values — often cleaner than block content. + +## Reading design-system.json + +Contains extracted Divi theme settings. Key fields: + +```json +{ + "primary_color": "#1a8a7a", + "body_font": "DM Sans", + "header_font": "DM Serif Display", + "body_font_size": "16", + "body_line_height": "1.7", + "divi_version": "5", + "wp_version": "6.9.4", + "site_url": "https://vibrantyou.yoga", + "site_name": "VibrantYou Yoga" +} +``` + +Use these values to seed the AM `main.css` CSS custom properties block. + +## Manual inspection (when script output is sparse) + +Sometimes the Divi theme options are stored as PHP-serialized data. +Use grep to find and eyeball the raw values: + +```bash +DB=.planning/wpress-extract/database.sql + +# Divi global colors +grep -o "'et_divi[^']*','[^']*'" $DB | head -30 + +# Site name + URL +grep -E "'(siteurl|blogname|admin_email)','[^']*'" $DB + +# Rank Math SEO meta for a specific post +grep "rank_math_title\|rank_math_description" $DB | head -20 + +# All published page slugs +grep -o "post_name','[^']*'" $DB | grep -v "revision\|auto-draft" | sort | uniq +``` + +## Gravity Forms schema (for form replacement) + +Find form field definitions: + +```bash +grep "INSERT INTO \`wp_gf_forms\`" .planning/wpress-extract/database.sql | \ + python3 -c " +import sys, json, re +for line in sys.stdin: + m = re.search(r\"'([^']+)'\s*\)\s*;\", line) + if m: + try: print(json.dumps(json.loads(m.group(1).replace('\\\\\"','\"')), indent=2)[:2000]) + except: pass +" 2>/dev/null | head -100 +``` + +Field types seen in Gravity Forms: text, email, phone, textarea, select, checkbox, radio, name, address, fileupload. Map each to a plain HTML input equivalent. + +## Archive directory layout note + +The AIOIM .wpress format extracts flat — no `wp-content/` wrapper: + +``` +wpress-extract/ +├── database.sql ← NOT in wp-content/ +├── package.json +├── uploads/ ← NOT wp-content/uploads/ +├── themes/ ← NOT wp-content/themes/ +├── plugins/ ← NOT wp-content/plugins/ +└── et-cache/ +``` + +Scripts must reference `uploads/`, `themes/`, `plugins/` directly under +`wpress-extract/`, not `wpress-extract/wp-content/`. + +## Next step + +Once `pages.json` is written, proceed to `03-divi-content-extraction.md` +to parse `content_raw` for each page into structured AM-ready HTML. diff --git a/wp-divi-pipeline-to-am-stack/03-divi-content-extraction.md b/wp-divi-pipeline-to-am-stack/03-divi-content-extraction.md new file mode 100644 index 0000000..c8d3991 --- /dev/null +++ b/wp-divi-pipeline-to-am-stack/03-divi-content-extraction.md @@ -0,0 +1,157 @@ +# 03 — Divi Content Extraction + +Parse raw Divi page content from `pages.json` into clean, structured HTML +sections ready to map into AM templates. + +## Divi 4 vs Divi 5 — critical difference + +### Divi 4 (shortcode-based) + +Content is stored as shortcodes in `wp_posts.post_content`: + +``` +[et_pb_section fb_built="1" admin_label="Hero" _builder_version="4.27.4" + background_color="#0f5f53" ...] + [et_pb_row ...] + [et_pb_column type="4_4" ...] + [et_pb_text ...]<h1>Move With Intention</h1>[/et_pb_text] + [et_pb_button button_url="/contact" button_text="Book a Class" /] + [/et_pb_column] + [/et_pb_row] +[/et_pb_section] +``` + +Use `extract_divi4.py` → parses shortcode tree into section/row/module JSON. + +### Divi 5 (block-based) + +Content is stored as Gutenberg-style block comments: + +```html +<!-- wp:divi/section {"id":"section-abc123","attrs":{"backgroundColor":{"value":"#0f5f53"}}} --> +<div class="et_pb_section ..."> + <!-- wp:divi/row ... --> + <!-- wp:divi/column ... --> + <!-- wp:divi/text ... --> + <div class="et_pb_text_inner"><h1>Move With Intention</h1></div> + <!-- /wp:divi/text --> + <!-- /wp:divi/column --> + <!-- /wp:divi/row --> +</div> +<!-- /wp:divi/section --> +``` + +Use `extract_divi5.py` → strips block wrapper, extracts inner HTML per module. + +## Divi 5 extraction script + +```bash +python3 /home/sirdrez/arisingmedia-websites/.am-webdesign-sops/wp-divi-pipeline/scripts/extract_divi5.py \ + {domain}/.planning/data/pages.json \ + {domain}/.planning/data/content/ +``` + +Produces one JSON file per page: `content/{slug}.json` + +```json +{ + "slug": "about", + "title": "About VibrantYou Yoga", + "seo_title": "About VibrantYou Yoga | ...", + "seo_description": "...", + "sections": [ + { + "type": "hero", + "background_color": "#0f5f53", + "modules": [ + { "module": "text", "html": "<h1>Move With Intention</h1>" }, + { "module": "button", "text": "Book a Class", "url": "/contact/" } + ] + }, + { + "type": "standard", + "modules": [ + { "module": "text", "html": "<h2>Our Story</h2><p>...</p>" }, + { "module": "image", "src": "/assets/images/studio.webp", "alt": "..." } + ] + } + ] +} +``` + +## ACF fields take priority + +If a page has ACF fields (in `pages.json[].acf`), use those over block content. +ACF fields are typically cleaner, pre-authored copy without Divi wrapper noise. + +Convention for VYY-specific ACF keys: +- `vyy_hero_headline` → `<h1>` in hero section +- `vyy_hero_subhead` → `<p class="hero-lead">` in hero +- `vyy_hero_cta_text` → primary CTA button label +- `vyy_hero_cta_url` → primary CTA button href + +Always check `acf` keys before parsing `content_raw`. + +## Stripping Divi class/attribute noise + +After extraction, run every HTML snippet through the `clean_divi_html()` +function from `divi_to_html.py`: + +```python +from divi_to_html import clean_divi_html, rewrite_internal_links + +cleaned = clean_divi_html(raw_html) +cleaned = rewrite_internal_links(cleaned, staging_hosts=("vibrantyou.yoga",)) +``` + +This removes: +- `<!-- wp:divi/... -->` block comments +- `data-et-*`, `data-builder-*` attributes +- `et_pb_*`, `divi-builder-*`, `d5_*` class tokens +- Empty `class=""` attributes + +## What to extract per section type + +| Divi module | Extract | Map to AM element | +|-------------|---------|-------------------| +| `divi/text` | inner HTML | `<section>`, `<p>`, headings as-is | +| `divi/button` | `text`, `url` | `<a class="btn-primary">` | +| `divi/image` | `src`, `alt`, `title` | `<img>` → rewrite to WebP path | +| `divi/blurb` | icon, title, body | `.am-card` component | +| `divi/testimonial` | quote, author, company | `.am-testimonial` component | +| `divi/video` | `src`, poster | `<video>` or YouTube embed | +| `divi/contact_form` | field list | → replace with AM form, see `08` | +| `divi/accordion` | Q+A pairs | `<details><summary>` | +| `divi/fullwidth_header` | title, subhead, CTA | hero section | + +## Section background colors → AM section modifiers + +Divi 5 stores `backgroundColor` in the block `attrs` JSON. +Map to AM CSS modifier classes: + +| Divi background | AM class modifier | +|----------------|------------------| +| `#0f5f53` (dark teal) | `.section--dark` | +| `#1a8a7a` (mid teal) | `.section--brand` | +| `#f5f5f5` / `#fafafa` | `.section--light` | +| `#ffffff` / none | `.section--white` | + +## Content quality pass (required before HTML build) + +After extraction, review every page's content for: + +1. **Cut bloated copy** — WordPress sites often have 3x more text than needed. + Target 30-50% reduction. One clear idea per paragraph. +2. **Remove stale metrics** — "Over 500 students" only stays if it's verifiable. + Otherwise remove or mark `DRAFT NEEDED`. +3. **Remove plugin artifacts** — Gravity Forms shortcodes `[gravityforms id="1"]`, + Events Manager tags, Divi shortcode residue that survived extraction. +4. **Improve CTAs** — Replace generic "Learn More" with action-specific text: + "Book a Free Class", "View the Schedule", "Start Your Practice". +5. **Flag images** — Note every `<img>` that needs a real photo vs stock. + +## Next step + +Proceed to `04-design-system-extraction.md` to convert Divi theme settings +into AM CSS custom properties, then `05-content-migration.md` to build the +HTML templates. diff --git a/wp-divi-pipeline-to-am-stack/04-design-system-extraction.md b/wp-divi-pipeline-to-am-stack/04-design-system-extraction.md new file mode 100644 index 0000000..36ff95f --- /dev/null +++ b/wp-divi-pipeline-to-am-stack/04-design-system-extraction.md @@ -0,0 +1,172 @@ +# 04 — Design System Extraction + +Convert Divi theme settings into AM CSS custom properties. +The goal is to ENHANCE the design — cleaner, more modern — not replicate it. + +## Input + +`design-system.json` produced by `analyze_db.py`. Key fields: + +```json +{ + "primary_color": "#1a8a7a", + "body_font": "DM Sans", + "header_font": "DM Serif Display", + "body_font_size": "16", + "body_line_height": "1.7", + "site_name": "VibrantYou Yoga" +} +``` + +## Color palette strategy + +Never lift the Divi palette 1:1. Use extracted colors as the base and build a +full 5-step scale around the primary hue: + +| Token | Derived from | Role | +|-------|-------------|------| +| `--color-primary` | Divi accent_color | Buttons, links, active states | +| `--color-primary-dark` | Darken primary 15% | Hover states, section backgrounds | +| `--color-primary-light` | Lighten primary 40% | Subtle tints, borders | +| `--color-surface` | Always `#fafafa` | Page background | +| `--color-surface-alt` | `#f3f3f3` | Alternating sections | +| `--color-text` | Always `#1a1a1a` | Body copy | +| `--color-text-muted` | `#666` | Subheadings, captions | +| `--color-border` | 10% primary or `#e0e0e0` | Dividers, inputs | +| `--color-white` | `#ffffff` | Card backgrounds, hero text | + +For VibrantYou Yoga (primary `#1a8a7a`, dark `#0f5f53`): + +```css +:root { + --color-primary: #1a8a7a; + --color-primary-dark: #0f5f53; + --color-primary-light: #d4f0eb; + --color-surface: #fafafa; + --color-surface-alt: #f0f7f6; + --color-text: #1a1a1a; + --color-text-muted: #5a6e6b; + --color-border: #c8dedd; + --color-white: #ffffff; +} +``` + +## Typography strategy + +Use the extracted fonts but upgrade the type scale. +Divi's default type scale is too small and too flat. Aim for 1.25–1.333 modular ratio. + +```css +:root { + /* Fonts from design-system.json */ + --font-body: 'DM Sans', system-ui, sans-serif; + --font-heading: 'DM Serif Display', Georgia, serif; + + /* Modular scale (1.25 ratio from 16px base) */ + --text-xs: 0.75rem; /* 12px */ + --text-sm: 0.875rem; /* 14px */ + --text-base: 1rem; /* 16px */ + --text-lg: 1.125rem; /* 18px */ + --text-xl: 1.25rem; /* 20px */ + --text-2xl: 1.5rem; /* 24px */ + --text-3xl: 1.875rem; /* 30px */ + --text-4xl: 2.25rem; /* 36px */ + --text-5xl: 3rem; /* 48px */ + --text-6xl: 3.75rem; /* 60px */ + + /* Line heights */ + --leading-tight: 1.2; + --leading-normal: 1.6; + --leading-loose: 1.8; + + /* Font weights */ + --weight-normal: 400; + --weight-medium: 500; + --weight-semibold: 600; + --weight-bold: 700; +} +``` + +## Spacing and layout + +Divi uses pixel-based margins/paddings that must be converted to a consistent +rem-based spacing scale: + +```css +:root { + --space-1: 0.25rem; /* 4px */ + --space-2: 0.5rem; /* 8px */ + --space-3: 0.75rem; /* 12px */ + --space-4: 1rem; /* 16px */ + --space-5: 1.25rem; /* 20px */ + --space-6: 1.5rem; /* 24px */ + --space-8: 2rem; /* 32px */ + --space-10: 2.5rem; /* 40px */ + --space-12: 3rem; /* 48px */ + --space-16: 4rem; /* 64px */ + --space-20: 5rem; /* 80px */ + --space-24: 6rem; /* 96px */ + --space-32: 8rem; /* 128px */ + + /* Section vertical padding */ + --section-py: var(--space-20); /* 80px default */ + --section-py-sm: var(--space-12); /* 48px mobile */ + + /* Container */ + --container-max: 1200px; + --container-px: var(--space-6); + + /* Border radius */ + --radius-sm: 4px; + --radius-md: 8px; + --radius-lg: 12px; + --radius-xl: 20px; + --radius-full: 9999px; + + /* Shadows */ + --shadow-sm: 0 1px 3px rgba(0,0,0,.08); + --shadow-md: 0 4px 16px rgba(0,0,0,.1); + --shadow-lg: 0 12px 40px rgba(0,0,0,.12); +} +``` + +## Google Fonts import + +For DM Sans + DM Serif Display: + +```html +<link rel="preconnect" href="https://fonts.googleapis.com"> +<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> +<link href="https://fonts.googleapis.com/css2?family=DM+Sans:ital,opsz,wght@0,9..40,300;0,9..40,400;0,9..40,500;0,9..40,600;0,9..40,700;1,9..40,400&family=DM+Serif+Display:ital@0;1&display=swap" rel="stylesheet"> +``` + +## Enhancement rules (required) + +These upgrades apply to every AM migration regardless of source: + +1. **Increase contrast** — body text must be #1a1a1a on white (WCAG AA minimum). + Never use the grey-on-grey color schemes that Divi themes commonly use. + +2. **Whitespace is content** — section padding must be at minimum 80px vertical + on desktop. Divi often uses 40-60px which feels cramped. + +3. **One weight per heading level** — h1 at 700, h2 at 600, h3 at 500. + Divi often leaves all headings at the same weight. + +4. **Max-width prose** — body copy containers max 680px wide. Divi stretches + copy to full column width on 1200px screens, which is unreadable. + +5. **Brand color is a highlight, not a wallpaper** — primary color should + appear on buttons, links, and 1-2 hero sections only. Divi sites often + paint every other section in the primary color. + +## Output: main.css variables block + +Write the complete `:root {}` block into `src/assets/css/main.css` as the +first section. All other CSS rules reference only `var(--token-name)`. +Never hard-code a color, font, or spacing value outside of `:root`. + +## Next step + +Proceed to `05-content-migration.md` to map extracted content into AM HTML +templates using this design system. diff --git a/wp-divi-pipeline-to-am-stack/05-content-migration.md b/wp-divi-pipeline-to-am-stack/05-content-migration.md new file mode 100644 index 0000000..5fab6b4 --- /dev/null +++ b/wp-divi-pipeline-to-am-stack/05-content-migration.md @@ -0,0 +1,246 @@ +# 05 — Content Migration + +Map extracted Divi content into AM HTML templates. This is the build phase. +Follow `01-project-structure.md` for directory layout and `03-build-pipeline.md` +for JSON + template stamping. + +## Source files + +After running Phase 2-4 scripts, `.planning/data/` contains: + +``` +.planning/data/ +├── pages.json ← all published pages (from analyze_db.py) +├── site-info.json ← domain, plugin list, Divi version +├── design-system.json ← colors, fonts, spacing tokens +└── content/ + ├── home.json ← parsed sections for home page + ├── about.json ← parsed sections for about page + ├── services.json + └── ... ← one file per published page +``` + +## Information architecture for yoga sites + +Standard AM structure for a yoga studio / wellness site: + +``` +/ home (hero, classes preview, testimonials, CTA) +/about/ about / story / instructors +/classes/ class schedule index +/classes/{slug}.html one page per class type (hatha, vinyasa, yin, etc.) +/private-sessions/ 1:1 session offerings +/workshops/ workshops + retreats index +/contact/ contact + booking form +/blog/ optional blog index +/blog/{slug}.html individual blog posts +/404.html +/500.html +/robots.txt +/sitemap.xml +``` + +Map every WP page slug to this structure first. Some WP slugs may need to be +consolidated, renamed, or dropped. Document the redirect map in +`.planning/redirect-map.txt` (old slug → new path). + +## Build order + +Build in this sequence. Each page uses the previous as a reference: + +1. `src/assets/css/main.css` — design tokens, reset, typography, layout grid +2. `src/assets/css/components.css` — header, footer, hero, cards, forms, nav +3. `src/components/header.html` — navigation +4. `src/components/footer.html` — footer links, contact info +5. `src/assets/js/components.js` — fetch + inject header/footer +6. `src/assets/js/main.js` — scroll animations, intersection observer +7. `src/index.html` — home page (this IS the design system in working form) +8. `src/about/index.html` +9. `src/classes/index.html` + individual class pages (from JSON template if 4+) +10. `src/contact/index.html` + AM form +11. `src/blog/index.html` + individual posts +12. `src/robots.txt`, `src/sitemap.xml`, `src/404.html`, `src/500.html` + +## HTML page skeleton + +Every page uses the same skeleton. Copy from 06-seo-meta.md for the full +`<head>` requirements. Shell: + +```html +<!DOCTYPE html> +<html lang="en"> +<head> + <meta charset="UTF-8"> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + <meta name="site-root" content="/"> + <title>{{seo_title}} + + + + + + + +
+ +
+ +
+ + + + + + +``` + +## Section HTML patterns + +Map each `content/{slug}.json` section to one of these AM patterns: + +### Hero (role: "hero") + +```html +
+
+
+

Move With Intention

+

Discover yoga classes for all levels in [City].

+ +
+
+
+``` + +### Feature grid (4-col blurb modules) + +```html +
+
+

Why VibrantYou Yoga

+
+
+
+

All Levels Welcome

+

From first-timers to advanced practitioners.

+
+ +
+
+
+``` + +### Testimonials (3-col) + +```html +
+
+

What Students Say

+
+
+

"..."

+
+ Jane D. + Student since 2024 +
+
+
+
+
+``` + +### CTA section + +```html +
+
+

Ready to Begin?

+

Your first class is on us.

+ Book a Free Class +
+
+``` + +## Class pages — JSON template build + +If there are 4+ class types (Hatha, Vinyasa, Yin, Meditation, etc.), use the +build pipeline: + +``` +src/classes/ +├── _template.html ← class detail page template +├── hatha.html ← generated from classes.json +├── vinyasa.html +├── yin.html +└── meditation.html + +.planning/data/ +└── classes.json ← array of class objects +``` + +`classes.json` schema: +```json +[ + { + "slug": "hatha", + "name": "Hatha Yoga", + "title": "Hatha Yoga Classes | VibrantYou Yoga", + "meta_description": "...", + "canonical": "https://vibrantyou.yoga/classes/hatha.html", + "hero_h1": "Hatha Yoga", + "hero_lead": "A grounding practice for all experience levels.", + "description": "

...

", + "duration": "60 min", + "level": "All levels", + "schedule": "Mon, Wed, Fri — 9:00 AM", + "instructor": "Sarah M.", + "faqs": [ + { "q": "Do I need prior experience?", "a": "No." } + ] + } +] +``` + +## Events Manager → static schedule + +The site uses Events Manager plugin. For static migration: +- Extract recurring class schedule from the database (`wp_em_events` table) +- Convert to a static schedule table / cards in `src/classes/index.html` +- Do NOT recreate a dynamic booking system unless explicitly requested +- Link the "Book" button to the contact form or an external booking URL + +## Image remapping + +Every `` extracted from Divi content will have a WordPress +upload URL like `/wp-content/uploads/2026/03/image.jpg`. + +Remap to AM path: +- Source: `wpress-extract/uploads/2026/03/image.jpg` +- AM dest: `src/assets/images/image.webp` (after WebP conversion) +- HTML: `...` + +Always include `width`, `height`, `loading="lazy"`, and `alt` on every ``. + +## After build — verify + +```bash +# Zero unreplaced template placeholders +grep -rn "{{" src/**/*.html + +# All pages have canonical +grep -rL 'rel="canonical"' src/**/*.html + +# All images have alt text +grep -rn ' .planning/data/media-raw-list.txt + +wc -l .planning/data/media-raw-list.txt +``` + +## Step 2 — Skip WordPress-generated size variants + +WordPress auto-generates resized variants: `-150x150`, `-300x200`, `-768x512`, etc. +Skip these — they are redundant once we have the originals. + +```bash +grep -v -E "\-[0-9]+x[0-9]+\.(jpg|jpeg|png|webp)$" \ + .planning/data/media-raw-list.txt > .planning/data/media-originals.txt + +echo "Originals: $(wc -l < .planning/data/media-originals.txt)" +``` + +## Step 3 — Copy originals to src/assets/images/ + +Flatten the date-organized subdirs into a single flat directory. +Preserve filenames exactly (except extension will change to .webp). + +```bash +mkdir -p src/assets/images/ + +while IFS= read -r src_path; do + filename=$(basename "$src_path") + cp "$src_path" "src/assets/images/$filename" +done < .planning/data/media-originals.txt + +echo "Copied: $(ls src/assets/images/ | wc -l) files" +``` + +## Step 4 — Convert to WebP + +Use the project's standard WebP conversion script (see `12-image-assets.md`). +If cwebp is available: + +```bash +cd src/assets/images/ +for img in *.jpg *.jpeg *.png; do + [ -f "$img" ] || continue + base="${img%.*}" + cwebp -q 82 "$img" -o "${base}.webp" 2>/dev/null && rm "$img" +done +echo "WebP conversion done. Count: $(ls *.webp | wc -l)" +``` + +Or use the Python Pillow batch script if cwebp is not installed: + +```bash +python3 /home/sirdrez/arisingmedia-websites/.am-webdesign-sops/wp-divi-pipeline/scripts/convert_images.py \ + src/assets/images/ +``` + +## Step 5 — Generate media manifest + +After conversion, build the URL remap table used during HTML build: + +```bash +python3 -c " +import os, json +from pathlib import Path + +uploads_dir = Path('.planning/wpress-extract/uploads') +site_url = 'https://vibrantyou.yoga' +am_path = '/assets/images' + +manifest = [] +for root, dirs, files in os.walk(uploads_dir): + for f in files: + full = Path(root) / f + rel = full.relative_to(uploads_dir) + # WordPress URL for this file + wp_url = f'{site_url}/wp-content/uploads/{rel}' + # Strip size variants from slug + stem = Path(f).stem + import re + stem_clean = re.sub(r'-\d+x\d+$', '', stem) + am_url = f'{am_path}/{stem_clean}.webp' + manifest.append({'wp_url': wp_url, 'am_url': am_url, 'original': f}) + +Path('.planning/data/media-manifest.json').write_text( + json.dumps(manifest, indent=2)) +print(f'Manifest: {len(manifest)} entries') +" +``` + +## Step 6 — Apply manifest during HTML build + +When writing HTML from extracted content, use the manifest to rewrite +every WordPress upload URL: + +```python +import json, re + +manifest = json.loads(open('.planning/data/media-manifest.json').read()) +url_map = {m['wp_url']: m['am_url'] for m in manifest} + +def rewrite_media_urls(html: str) -> str: + for wp_url, am_url in url_map.items(): + html = html.replace(wp_url, am_url) + # Also rewrite relative /wp-content/uploads/ paths + html = re.sub( + r'/wp-content/uploads/\d{4}/\d{2}/([^"\'>\s]+)', + lambda m: f"/assets/images/{m.group(1).split('/')[-1].rsplit('.',1)[0]}.webp", + html + ) + return html +``` + +## Files to skip + +Do not migrate these WordPress system images to `src/assets/images/`: +- `woocommerce-placeholder.png` and variants +- `wp-includes/` images (WordPress core UI) +- Plugin admin icons (anything from `plugins/` in uploads) +- Files in `wc-logs/`, `ithemes-security/`, `amcu-chunks/` subdirs + +## Logo handling + +The logo is typically at: +``` +uploads/YYYY/MM/VibrantYouYogaLogo.png +``` + +Place the logo at: +- `src/assets/images/logo.webp` — standard WebP version +- `src/assets/svg/logo.svg` — if an SVG version exists (preferred) +- `src/assets/images/logo.png` — keep PNG fallback for email/OG use + +Reference in header.html: +```html + +``` + +## OG image + +Generate one 1200×630px OG image per `06-seo-meta.md` requirements. +Place at: `src/assets/images/og-default.jpg` + +## Next step + +Proceed to `07-seo-preservation.md` to build the redirect map and audit +every page's title, description, and canonical before the HTML build. diff --git a/wp-divi-pipeline-to-am-stack/07-seo-preservation.md b/wp-divi-pipeline-to-am-stack/07-seo-preservation.md new file mode 100644 index 0000000..ac0f757 --- /dev/null +++ b/wp-divi-pipeline-to-am-stack/07-seo-preservation.md @@ -0,0 +1,182 @@ +# 07 — SEO Preservation + +Before building HTML, map every WordPress page URL to its new AM URL and +ensure title, description, canonical, and schema.org are preserved or improved. + +## Step 1 — Inventory all WP URLs + +Extract every published page slug from `pages.json`: + +```bash +python3 -c " +import json +pages = json.load(open('.planning/data/pages.json')) +for p in pages: + slug = p['slug'] + ptype = p['post_type'] + print(f'/{slug}/ ({ptype}) title={p[\"title\"]!r}') +" | tee .planning/data/wp-url-inventory.txt +``` + +## Step 2 — Build redirect map + +Map each WP URL to the new AM URL. Write to `.planning/data/redirect-map.txt`: + +Format: `OLD_PATH -> NEW_PATH` + +Common mapping patterns for yoga sites: + +| Old WP URL | New AM URL | Action | +|-----------|-----------|--------| +| `/` | `/` | Same | +| `/about/` | `/about/` | Same | +| `/classes/` | `/classes/` | Same | +| `/yoga-class-name/` | `/classes/yoga-class-name.html` | Restructure | +| `/private-yoga-sessions/` | `/private-sessions/` | Rename | +| `/contact-us/` | `/contact/` | Simplify | +| `/?page_id=42` | `/about/` | WP ID → slug | +| `/blog/post-title/` | `/blog/post-title.html` | Flatten | +| `/events/event-name/` | `/classes/` | Consolidate into schedule | + +Redirects go into `infra/nginx.conf`: + +```nginx +# Exact-match redirects +location = /contact-us/ { return 301 /contact/; } +location = /private-yoga-sessions/ { return 301 /private-sessions/; } + +# WP page ID redirects +location = / { + if ($arg_page_id = "42") { return 301 /about/; } + if ($arg_p) { return 301 /blog/; } +} + +# WP upload URLs → AM asset paths (catch-all) +location ^~ /wp-content/uploads/ { + return 301 /assets/images/$uri; +} + +# Block all WP URLs +location ~ ^/wp-(admin|login|json|cron|includes|content/plugins|content/themes) { + return 410; +} +``` + +## Step 3 — Rank Math SEO extraction + +Rank Math stores titles and descriptions in `wp_postmeta`. +`analyze_db.py` already extracts these into `pages.json` as `seo_title` and `seo_description`. + +For each page, the priority order for SEO fields: +1. `seo_title` from Rank Math (if not empty and not a template like `%title% - %sitename%`) +2. `post_title` with AM format appended: `{Title} | VibrantYou Yoga` +3. Never leave title as the raw WP default + +Rank Math title templates use `%` tokens — strip them and rebuild: +```python +import re + +def clean_rm_title(rm_title: str, post_title: str, site_name: str) -> str: + if not rm_title or "%" in rm_title: + return f"{post_title} | {site_name}" + return rm_title + +def clean_rm_desc(rm_desc: str) -> str: + # Strip %token% placeholders + return re.sub(r"%[a-z_]+%", "", rm_desc).strip(" -|") +``` + +## Step 4 — Per-page SEO checklist + +For every page in `pages.json`, fill in this record before writing HTML: + +```json +{ + "slug": "about", + "new_path": "/about/", + "canonical": "https://vibrantyou.yoga/about/", + "title": "About VibrantYou Yoga | Mindful Movement in [City], [State]", + "description": "Meet the instructors and story behind VibrantYou Yoga. [150-160 chars, include city]", + "keywords": "yoga studio [city], yoga instructor, mindful movement", + "og_image": "/assets/images/about-studio.webp", + "schema_type": "AboutPage", + "h1": "Our Story" +} +``` + +Write to `.planning/data/seo-map.json`. The HTML build reads this file to +stamp `` tags. + +## Step 5 — Schema.org per page type + +| Page | Schema type | Required fields | +|------|------------|----------------| +| Home | `LocalBusiness` | name, url, telephone, address, areaServed, openingHours | +| About | `AboutPage` + `Organization` | name, description, founders | +| Classes index | `ItemList` of `Course` | name, url, description per class | +| Class detail | `Course` | name, description, provider, educationalLevel | +| Contact | `ContactPage` | name, url, telephone, email, address | +| Blog post | `Article` | headline, datePublished, author, image | +| 404 | none | — | + +LocalBusiness schema for vibrantyou.yoga (seed from `site-info.json`): +```json +{ + "@context": "https://schema.org", + "@type": ["LocalBusiness", "HealthAndBeautyBusiness"], + "@id": "https://vibrantyou.yoga/#business", + "name": "VibrantYou Yoga", + "url": "https://vibrantyou.yoga", + "telephone": "", + "priceRange": "$$", + "servesCuisine": null, + "currenciesAccepted": "USD", + "paymentAccepted": "Cash, Credit Card", + "address": { + "@type": "PostalAddress", + "streetAddress": "", + "addressLocality": "", + "addressRegion": "", + "postalCode": "", + "addressCountry": "US" + } +} +``` +Mark address fields `DRAFT NEEDED` — do not fabricate. Pull from `wp_options` +(`admin_email`, Events Manager location settings) or ask client. + +## Step 6 — Pre-launch SEO audit commands + +Run these before declaring the build complete: + +```bash +SITE=src + +# Every page has a +find $SITE -name "*.html" | xargs grep -L '<title>' | grep -v "_template" + +# Every page has meta description +find $SITE -name "*.html" | xargs grep -L 'name="description"' | grep -v "_template" + +# Every page has canonical +find $SITE -name "*.html" | xargs grep -L 'rel="canonical"' | grep -v "_template" + +# Every page has JSON-LD +find $SITE -name "*.html" | xargs grep -L 'application/ld+json' | grep -v "_template" + +# No WP URLs leaked into HTML +grep -r "wp-content\|wp-admin\|wordpress\|?p=\|?page_id=" $SITE --include="*.html" + +# No unreplaced template placeholders +grep -r "{{" $SITE --include="*.html" + +# No Divi class residue +grep -r "et_pb_\|divi-builder" $SITE --include="*.html" +``` + +All six commands must return zero results before launch. + +## Next step + +Proceed to `08-run-order.md` for the complete execution sequence, +then `02-wordpress-to-html-migration.md` Phase 7 for DNS cutover. diff --git a/wp-divi-pipeline-to-am-stack/08-run-order.md b/wp-divi-pipeline-to-am-stack/08-run-order.md new file mode 100644 index 0000000..20a8594 --- /dev/null +++ b/wp-divi-pipeline-to-am-stack/08-run-order.md @@ -0,0 +1,230 @@ +# 08 — Run Order (DEPRECATED) + +> **Superseded by `10-agent-breadcrumbs.md`.** +> This file described the WP → static HTML (Stack B) run order. +> The pipeline now targets Stack A (PHP router + SQLite). +> Use `10-agent-breadcrumbs.md` for the current ordered execution checklist. + +--- + +Step-by-step execution sequence for a complete .wpress → AM HTML migration. +Run each command, verify the output, then proceed to the next. + +## Prerequisites + +```bash +# Python 3.8+ required +python3 --version + +# cwebp for image conversion (optional — Python fallback available) +which cwebp || echo "cwebp not installed — will use Python Pillow fallback" + +# Set project domain variable (use throughout) +export DOMAIN="vibrantyou.yoga" +export PROJECT="/home/sirdrez/arisingmedia-websites/$DOMAIN" +export SOPS="/home/sirdrez/arisingmedia-websites/.am-webdesign-sops" +export WPRESS=$(ls $PROJECT/.planning/*.wpress | head -1) + +echo "Domain: $DOMAIN" +echo "Project: $PROJECT" +echo "Archive: $WPRESS" +``` + +--- + +## Phase 0 — Setup + +```bash +# Create directory structure +mkdir -p $PROJECT/{src/{about,services,contact,blog,classes,components,assets/{css,js,images,svg,fonts}},build,infra,api,.planning/{data/{content},scripts,wpress-extract}} + +# Verify archive +ls -lh $WPRESS +file $WPRESS +``` + +--- + +## Phase 1 — Extract archive + +```bash +python3 $SOPS/wp-divi-pipeline/scripts/extract_wpress.py \ + "$WPRESS" \ + "$PROJECT/.planning/wpress-extract/" + +# Verify +ls $PROJECT/.planning/wpress-extract/ +cat $PROJECT/.planning/wpress-extract/package.json | python3 -m json.tool | head -20 +ls -lh $PROJECT/.planning/wpress-extract/database.sql +``` + +Expected output: `DONE: N files | X MB` + +--- + +## Phase 2 — Database analysis + +```bash +python3 $SOPS/wp-divi-pipeline/scripts/analyze_db.py \ + "$PROJECT/.planning/wpress-extract/" \ + "$PROJECT/.planning/data/" + +# Verify +cat $PROJECT/.planning/data/site-info.json +echo "Pages: $(python3 -c "import json; print(len(json.load(open('$PROJECT/.planning/data/pages.json'))))")" +cat $PROJECT/.planning/data/design-system.json +``` + +Expected output: `pages.json (N pages/posts)` +If pages = 0, check the SQL prefix detection in the script output. + +--- + +## Phase 3 — Content extraction + +### Divi 5 (most common — check design-system.json divi_version first) + +```bash +python3 $SOPS/wp-divi-pipeline/scripts/extract_divi5.py \ + "$PROJECT/.planning/data/pages.json" \ + "$PROJECT/.planning/data/content/" + +# Verify +ls $PROJECT/.planning/data/content/ +cat $PROJECT/.planning/data/content/home.json | python3 -m json.tool | head -40 +``` + +--- + +## Phase 4 — Design system + +Read `$PROJECT/.planning/data/design-system.json` and seed `main.css`: + +```bash +cat $PROJECT/.planning/data/design-system.json +``` + +Manually translate to CSS custom properties per `04-design-system-extraction.md`. +Write to: `$PROJECT/src/assets/css/main.css` + +Key values for vibrantyou.yoga: +- Primary: #1a8a7a Dark: #0f5f53 +- Body font: DM Sans Heading font: DM Serif Display + +--- + +## Phase 5 — Media migration + +```bash +# Catalog originals (skip WP-generated size variants) +find $PROJECT/.planning/wpress-extract/uploads -type f \ + \( -name "*.jpg" -o -name "*.jpeg" -o -name "*.png" -o -name "*.webp" \) | \ + grep -v -E "\-[0-9]+x[0-9]+\.(jpg|jpeg|png|webp)$" | \ + sort > $PROJECT/.planning/data/media-originals.txt + +echo "Original images: $(wc -l < $PROJECT/.planning/data/media-originals.txt)" + +# Copy to src/assets/images/ +while IFS= read -r src; do + cp "$src" "$PROJECT/src/assets/images/$(basename $src)" +done < $PROJECT/.planning/data/media-originals.txt + +# Convert to WebP (cwebp path) +cd $PROJECT/src/assets/images/ +for img in *.jpg *.jpeg *.png; do + [ -f "$img" ] || continue + base="${img%.*}" + cwebp -q 82 "$img" -o "${base}.webp" 2>/dev/null && rm "$img" +done +echo "WebP count: $(ls *.webp 2>/dev/null | wc -l)" +cd $PROJECT +``` + +--- + +## Phase 6 — Build HTML + +Per `05-content-migration.md`, build pages in this order: + +```bash +# 1. Write src/assets/css/main.css (design tokens — manual) +# 2. Write src/assets/css/components.css (manual) +# 3. Write src/components/header.html (manual) +# 4. Write src/components/footer.html (manual) +# 5. Write src/assets/js/components.js (fetch + inject) +# 6. Write src/assets/js/main.js (scroll, animations) +# 7. Write src/index.html (home page — first, establishes design) +# 8. Write remaining pages + +# After build, verify zero unreplaced placeholders +grep -r "{{" $PROJECT/src --include="*.html" && echo "FAIL: placeholders found" || echo "OK" + +# Verify no Divi residue +grep -rn "et_pb_\|wp:divi\|\[et_pb" $PROJECT/src --include="*.html" && echo "FAIL: Divi residue" || echo "OK" +``` + +--- + +## Phase 7 — SEO audit + +```bash +cd $PROJECT/src + +# All pages have title +find . -name "*.html" | grep -v "_template" | xargs grep -L '<title>' | head + +# All pages have canonical +find . -name "*.html" | grep -v "_template" | xargs grep -L 'rel="canonical"' | head + +# All pages have JSON-LD +find . -name "*.html" | grep -v "_template" | xargs grep -L 'ld+json' | head + +cd $PROJECT +``` + +All commands must return empty output. + +--- + +## Phase 8 — Infra (Docker) + +```bash +# Copy infra from reference project +cp /home/sirdrez/arisingmedia-websites/vibrantyoucoaching.com/Dockerfile $PROJECT/ +cp /home/sirdrez/arisingmedia-websites/vibrantyoucoaching.com/docker-compose.yml $PROJECT/ +cp -r /home/sirdrez/arisingmedia-websites/vibrantyoucoaching.com/infra/ $PROJECT/infra/ + +# Update nginx.conf: set server_name to $DOMAIN, add redirects from 07-seo-preservation.md +# Update docker-compose.yml: set container_name and port + +# Test build +docker compose -f $PROJECT/docker-compose.yml build 2>&1 | tail -5 +docker compose -f $PROJECT/docker-compose.yml up -d +curl -I http://localhost:PORT/ 2>&1 | head -5 +``` + +--- + +## Phase 9 — Protection check + +```bash +# Run after deploy +bash $SOPS/tools/verify-protection.sh https://$DOMAIN + +# Must return exit 0 with no FAIL lines +``` + +--- + +## Checklist summary + +- [ ] Phase 0: Directories created +- [ ] Phase 1: .wpress extracted, database.sql present +- [ ] Phase 2: pages.json > 0 entries, design-system.json has colors + fonts +- [ ] Phase 3: content/ dir has one JSON per page +- [ ] Phase 4: main.css written with full :root{} token block +- [ ] Phase 5: WebP images in src/assets/images/ +- [ ] Phase 6: All HTML pages built, zero {{ placeholders, zero Divi residue +- [ ] Phase 7: All SEO audit commands return empty +- [ ] Phase 8: Docker container up, curl returns 200 +- [ ] Phase 9: verify-protection.sh exits 0 diff --git a/wp-divi-pipeline-to-am-stack/09-stack-a-output.md b/wp-divi-pipeline-to-am-stack/09-stack-a-output.md new file mode 100644 index 0000000..f8d3631 --- /dev/null +++ b/wp-divi-pipeline-to-am-stack/09-stack-a-output.md @@ -0,0 +1,370 @@ +# 09 — Stack A Output Spec (SQLite Schema + sections_json) + +## SQLite databases produced by seed_databases.py + +### pages.sqlite + +```sql +CREATE TABLE pages ( + id INTEGER PRIMARY KEY, + slug TEXT UNIQUE NOT NULL, + template TEXT NOT NULL, -- home | static | classes | schedule | glossary | blog + title TEXT NOT NULL, + meta_description TEXT, + canonical_url TEXT, + og_image TEXT, + schema_json TEXT, + hero_eyebrow TEXT, + hero_h1 TEXT, + hero_lead TEXT, + sections_json TEXT, -- JSON array of section objects + updated_at TEXT +); +``` + +### nav.sqlite + +```sql +CREATE TABLE nav_items ( + id INTEGER PRIMARY KEY, + label TEXT NOT NULL, + href TEXT NOT NULL, + display_order INTEGER DEFAULT 0, + is_cta INTEGER DEFAULT 0 -- 1 = render as button +); +``` + +### blog.sqlite + +```sql +CREATE TABLE posts ( + id INTEGER PRIMARY KEY, + slug TEXT UNIQUE NOT NULL, + title TEXT NOT NULL, + excerpt TEXT, + body_html TEXT, + author TEXT DEFAULT 'Admin', + published_at TEXT, + og_image TEXT, + tags TEXT +); +``` + +### testimonials.sqlite + +```sql +CREATE TABLE testimonials ( + id INTEGER PRIMARY KEY, + quote TEXT NOT NULL, + author_name TEXT NOT NULL, + author_role TEXT, + is_featured INTEGER DEFAULT 0, + display_order INTEGER DEFAULT 0 +); +``` + +### glossary.sqlite (if site has a glossary) + +```sql +CREATE TABLE terms ( + id INTEGER PRIMARY KEY, + slug TEXT UNIQUE NOT NULL, + term TEXT NOT NULL, + pronunciation TEXT, + definition TEXT NOT NULL, + category TEXT NOT NULL, + level TEXT NOT NULL, + display_order INTEGER DEFAULT 0 +); +``` + +### faq.sqlite (if site has FAQs) + +```sql +CREATE TABLE faqs ( + id INTEGER PRIMARY KEY, + question TEXT NOT NULL, + answer TEXT NOT NULL, + category TEXT NOT NULL, + display_order INTEGER DEFAULT 0 +); +``` + +## sections_json section types + +Each page row's sections_json is a JSON array. Each element is a typed object: + +### text_split + +Two-column: text on one side, image on the other. CTAs optional. + +```json +{ + "type": "text_split", + "eyebrow": "", + "h2": "", + "body": "", + "img": "/assets/images/x.webp", + "img_alt": "", + "cta_label": "", + "cta_href": "", + "reverse": false +} +``` + +### feature_cards + +Grid of 3-4 cards, each with icon + title + body. + +```json +{ + "type": "feature_cards", + "eyebrow": "", + "h2": "", + "lead": "", + "cards": [ + {"icon": "", "title": "", "body": ""} + ] +} +``` + +### accordion + +Collapsible question/answer pairs. + +```json +{ + "type": "accordion", + "eyebrow": "", + "h2": "", + "items": [ + {"q": "", "a": ""} + ] +} +``` + +### cta_band + +Full-width call-to-action with headline + button. + +```json +{ + "type": "cta_band", + "eyebrow": "", + "h2": "", + "lead": "", + "btn_label": "", + "btn_href": "", + "variant": "forest" +} +``` + +### text_block + +Simple text heading + body. + +```json +{ + "type": "text_block", + "eyebrow": "", + "h2": "", + "body": "" +} +``` + +### stats_strip + +Grid of stat + label pairs. + +```json +{ + "type": "stats_strip", + "stats": [ + {"value": "", "label": ""} + ] +} +``` + +### topic_pills + +Row of clickable topic/tag items. + +```json +{ + "type": "topic_pills", + "eyebrow": "", + "h2": "", + "items": [ + {"label": "", "href": ""} + ] +} +``` + +### form_contact + +Embedded contact form. + +```json +{ + "type": "form_contact", + "h2": "", + "lead": "" +} +``` + +### booking_options + +Pricing table or service options grid. + +```json +{ + "type": "booking_options", + "eyebrow": "", + "h2": "", + "options": [ + {"name": "", "price": "", "features": [], "cta_label": "", "cta_href": ""} + ] +} +``` + +## Divi module → section type mapping + +| Divi Module | AM Section Type | Notes | +|---|---|---| +| et_pb_blurb | feature_cards item | Extract icon, title, body | +| et_pb_toggle | accordion item | Extract q/a pairs | +| et_pb_cta | cta_band | Extract headline, button text, href | +| et_pb_pricing_table | booking_options | Extract plan names, prices, features | +| et_pb_testimonial | testimonials.sqlite row | Extract quote, author, role | +| et_pb_text | text_block | Extract body copy | +| et_pb_code | text_block (sanitized) | Extract HTML, remove script tags | +| et_pb_number_counter | stats_strip item | Extract number, label | +| et_pb_button | cta_band (minimal) | Extract button text, href | +| et_pb_menu / header | nav.sqlite rows | Extract label, URL, menu order | + +## seed_databases.py structure + +Every migration generates a seed_databases.py at `build/seed_databases.py`. + +Template structure: + +```python +import sqlite3 +import json +from pathlib import Path + +pages_path = Path('src/api/data/pages.sqlite') +nav_path = Path('src/api/data/nav.sqlite') +blog_path = Path('src/api/data/blog.sqlite') +testimonials_path = Path('src/api/data/testimonials.sqlite') + +def seed_pages(conn): + """INSERT all pages with sections_json and hero data.""" + cursor = conn.cursor() + cursor.execute(''' + CREATE TABLE IF NOT EXISTS pages ( + id INTEGER PRIMARY KEY, + slug TEXT UNIQUE NOT NULL, + template TEXT NOT NULL, + title TEXT NOT NULL, + meta_description TEXT, + canonical_url TEXT, + og_image TEXT, + schema_json TEXT, + hero_eyebrow TEXT, + hero_h1 TEXT, + hero_lead TEXT, + sections_json TEXT, + updated_at TEXT + ) + ''') + + pages = [ + ('home', 'home', 'Home', 'Home meta', '/home', '', '{}', + '', 'Welcome', 'Lead text', json.dumps([...])), + # ... more rows + ] + for page in pages: + cursor.execute( + 'INSERT INTO pages (slug, template, title, meta_description, canonical_url, og_image, schema_json, hero_eyebrow, hero_h1, hero_lead, sections_json, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime("now"))', + page + ) + +def seed_nav(conn): + """INSERT navigation items from nav.json.""" + cursor = conn.cursor() + cursor.execute(''' + CREATE TABLE IF NOT EXISTS nav_items ( + id INTEGER PRIMARY KEY, + label TEXT NOT NULL, + href TEXT NOT NULL, + display_order INTEGER DEFAULT 0, + is_cta INTEGER DEFAULT 0 + ) + ''') + + items = [ + ('Home', '/', 0, 0), + ('About', '/about', 1, 0), + ('Contact', '/contact', 2, 1), + # ... more rows + ] + for item in items: + cursor.execute( + 'INSERT INTO nav_items (label, href, display_order, is_cta) VALUES (?, ?, ?, ?)', + item + ) + +def seed_blog(conn): + """INSERT blog posts if site has a blog.""" + cursor = conn.cursor() + cursor.execute(''' + CREATE TABLE IF NOT EXISTS posts ( + id INTEGER PRIMARY KEY, + slug TEXT UNIQUE NOT NULL, + title TEXT NOT NULL, + excerpt TEXT, + body_html TEXT, + author TEXT DEFAULT 'Admin', + published_at TEXT, + og_image TEXT, + tags TEXT + ) + ''') + # ... INSERT rows + +def seed_testimonials(conn): + """INSERT testimonials if present.""" + # ... CREATE TABLE + INSERT rows + +if __name__ == '__main__': + for db_path, seeder_fn in [ + (pages_path, seed_pages), + (nav_path, seed_nav), + (blog_path, seed_blog), + (testimonials_path, seed_testimonials), + ]: + if db_path.exists(): + db_path.unlink() # clear if re-running + conn = sqlite3.connect(db_path) + seeder_fn(conn) + conn.commit() + conn.close() + print(f"seeded: {db_path.name}") + + print("All databases seeded successfully.") +``` + +## Content validation checklist + +After staging seed_databases.py and before running it: + +- [ ] No raw Divi shortcode residue: `[et_pb_`, `[vc_`, etc. +- [ ] No em-dashes (—): replace with commas, periods, or spaces +- [ ] No "Netherlands" or other location-specific copy (unless intentional) +- [ ] hero_h1 is 5-10 words (brand voice, not generic) +- [ ] Each section type matches the spec above (no custom types) +- [ ] All images are `/assets/images/{name}.webp` (not absolute URLs) +- [ ] All CTAs point to correct slugs (`/about`, `/contact`, etc.) +- [ ] Nav items include at least 3 menu links +- [ ] At least one nav item has `is_cta=1` (usually Contact or Book) diff --git a/wp-divi-pipeline-to-am-stack/10-agent-breadcrumbs.md b/wp-divi-pipeline-to-am-stack/10-agent-breadcrumbs.md new file mode 100644 index 0000000..1e577d3 --- /dev/null +++ b/wp-divi-pipeline-to-am-stack/10-agent-breadcrumbs.md @@ -0,0 +1,249 @@ +# 10 — Agent Execution Breadcrumbs + +Step-by-step ordered checklist for an agent executing a .wpress migration to Stack A. +Each step has: input, command, expected output, verification. Complete each before next. + +## Pre-flight + +- [ ] .wpress file confirmed at `$PROJECT/.planning/*.wpress` +- [ ] python3 --version >= 3.8 +- [ ] docker compose version confirmed +- [ ] DOMAIN and PROJECT env vars set + +## Step 1 — Extract archive + +**INPUT:** `$WPRESS` (path to .wpress file) + +**CMD:** +```bash +python3 $SOPS/wp-divi-pipeline-to-am-stack/scripts/extract_wpress.py "$WPRESS" "$PROJECT/.planning/wpress-extract/" +``` + +**VERIFY:** +```bash +ls $PROJECT/.planning/wpress-extract/ +``` + +Expected: `database.sql` and `wp-content/` present + +**BLOCK:** If database.sql missing, .wpress format differs — check extract_wpress.py logs. + +--- + +## Step 2 — Analyze database + +**INPUT:** `$PROJECT/.planning/wpress-extract/database.sql` + +**CMD:** +```bash +python3 $SOPS/wp-divi-pipeline-to-am-stack/scripts/analyze_db.py "$PROJECT/.planning/wpress-extract/" "$PROJECT/.planning/data/" +``` + +**VERIFY:** +```bash +cat $PROJECT/.planning/data/pages.json | python3 -m json.tool | head -20 +cat $PROJECT/.planning/data/site-info.json +``` + +Expected: page objects with slug + title visible; divi_version: 4 or 5 + +**BLOCK:** If pages.json empty, check table prefix detection in analyze_db.py output. + +--- + +## Step 3 — Extract nav menus + +**INPUT:** `$PROJECT/.planning/wpress-extract/database.sql` + +**CMD:** +```bash +python3 $SOPS/wp-divi-pipeline-to-am-stack/scripts/extract_nav.py "$PROJECT/.planning/wpress-extract/" "$PROJECT/.planning/data/" +``` + +**VERIFY:** +```bash +cat $PROJECT/.planning/data/nav.json | python3 -m json.tool +``` + +Expected: array of `{label, href, display_order, is_cta}` objects. At least 3 items. + +**NOTE:** `is_cta=1` for "Book", "Get Started", "Contact", "Sign Up" type items. + +--- + +## Step 4 — Extract page content + +**INPUT:** `$PROJECT/.planning/data/pages.json` + `wpress-extract/` + +**CMD:** (choose based on Divi version from Step 2) + +Divi 5: +```bash +python3 $SOPS/wp-divi-pipeline-to-am-stack/scripts/extract_divi5.py "$PROJECT/.planning/data/pages.json" "$PROJECT/.planning/data/content/" +``` + +Divi 4: +```bash +python3 $SOPS/wp-divi-pipeline-to-am-stack/scripts/extract_divi4.py "$PROJECT/.planning/data/pages.json" "$PROJECT/.planning/data/content/" +``` + +**VERIFY:** +```bash +ls $PROJECT/.planning/data/content/ +cat $PROJECT/.planning/data/content/home.json | python3 -m json.tool | head -40 +``` + +Expected: one .json file per page (home.json, about.json, etc.); sections array with type fields visible. + +--- + +## Step 5 — Extract media + +**INPUT:** `$PROJECT/.planning/wpress-extract/wp-content/uploads/` + +**CMD:** +```bash +python3 $SOPS/wp-divi-pipeline-to-am-stack/scripts/extract_media.py "$PROJECT/.planning/wpress-extract/" "$PROJECT/.planning/data/" "$PROJECT/assets/images/" +``` + +**VERIFY:** +```bash +ls $PROJECT/assets/images/ | head -10 +cat $PROJECT/.planning/data/media-manifest.json | python3 -m json.tool | head -20 +``` + +Expected: .webp files present; media-manifest.json shows `original_url → /assets/images/x.webp` mapping. + +--- + +## Step 6 — Stage seed_databases.py skeleton + +**INPUT:** All .json files in `$PROJECT/.planning/data/content/` + `nav.json` + `media-manifest.json` + +**CMD:** +```bash +python3 $SOPS/wp-divi-pipeline-to-am-stack/scripts/stage_seed.py "$PROJECT/.planning/data/" "$PROJECT/build/seed_databases.py" --domain "$DOMAIN" +``` + +**VERIFY:** +```bash +python3 -c "import ast; ast.parse(open('$PROJECT/build/seed_databases.py').read()); print('syntax OK')" +grep "def seed_pages" $PROJECT/build/seed_databases.py +``` + +Expected: seed_databases.py is valid Python; contains seed_pages, seed_nav functions. + +**NOTE:** Content stubs are in place. Human/agent reviews + fills in prose before running. + +--- + +## Step 7 — Review and fill content + +**MANUAL:** Open `$PROJECT/build/seed_databases.py` + +For each page's `sections_json`: +- [ ] Confirm `hero_h1` and `hero_lead` match the brand (not raw Divi copy-paste) +- [ ] Confirm each section has correct type (see 09-stack-a-output.md mapping) +- [ ] Replace any em-dashes (—) with commas or periods +- [ ] Replace any Divi shortcode residue (`[et_pb_`, `vc_`, etc.) +- [ ] Ensure no "Netherlands" or location-specific copy if site is global +- [ ] Confirm nav items in `seed_nav()` match final site IA +- [ ] Verify all image paths are `/assets/images/{name}.webp` +- [ ] Verify all CTAs point to correct slugs (`/about`, `/contact`, etc.) + +--- + +## Step 8 — Run seed_databases.py + +**CMD:** +```bash +cd $PROJECT && python3 build/seed_databases.py +``` + +**VERIFY:** +```bash +ls -lh src/api/data/ +``` + +Expected: Output line shows counts > 0: `seeded: pages=N nav=N blog=N ...`. Database files exist. + +**BLOCK:** Any count=0 means that seeder function has an error — fix before continuing. + +--- + +## Step 9 — Scaffold PHP templates + +**CMD:** Copy reference templates from vibrantyou.yoga as starting point: + +```bash +VYOGA="/home/sirdrez/arisingmedia-websites/vibrantyou.yoga" +cp $VYOGA/src/api/router.php $PROJECT/src/api/router.php +cp $VYOGA/src/api/contact.php $PROJECT/src/api/contact.php +cp $VYOGA/src/api/templates/static.php $PROJECT/src/api/templates/static.php +cp $VYOGA/src/api/templates/home.php $PROJECT/src/api/templates/home.php +cp $VYOGA/src/api/components/_header.php $PROJECT/src/api/components/_header.php +cp $VYOGA/src/api/components/_footer.php $PROJECT/src/api/components/_footer.php +cp -r $VYOGA/assets/css $PROJECT/assets/ +cp -r $VYOGA/assets/js $PROJECT/assets/ +cp $VYOGA/Dockerfile $PROJECT/ +cp $VYOGA/docker-compose.yml $PROJECT/ +cp -r $VYOGA/infra $PROJECT/ +``` + +**VERIFY:** +```bash +php -l $PROJECT/src/api/router.php +``` + +Expected: `No syntax errors detected` + +**NOTE:** Update brand name, colors, and any site-specific logic in templates. + +**NOTE:** `_header.php` reads from nav.sqlite — no hardcoded nav needed. + +--- + +## Step 10 — Build and test + +**CMD:** +```bash +cd $PROJECT && docker compose build --no-cache && docker compose up -d +``` + +**VERIFY:** +```bash +sleep 5 +curl -I http://localhost:8000/ +curl -s http://localhost:8000/ | grep -i "title\|h1" | head -3 +``` + +Expected: HTTP 200; site name visible in page. + +--- + +## Step 11 — Protection + SEO check + +**CMD:** +```bash +bash /home/sirdrez/arisingmedia-websites/.am-webdesign-sops/tools/verify-protection.sh http://localhost:8000 +``` + +**VERIFY:** Exit 0, no FAIL lines + +--- + +## Step 12 — Lighthouse + cleanup + +**MANUAL:** +- Open Firefox: `firefox http://localhost:8000/` +- Run Lighthouse (DevTools > Lighthouse) + +**TARGET:** +- Performance >= 90 +- SEO >= 95 +- Accessibility >= 90 + +**CLEANUP:** +```bash +cd $PROJECT && docker compose down +``` diff --git a/wp-divi-pipeline-to-am-stack/README.md b/wp-divi-pipeline-to-am-stack/README.md new file mode 100644 index 0000000..876781c --- /dev/null +++ b/wp-divi-pipeline-to-am-stack/README.md @@ -0,0 +1,81 @@ +# WP + Divi to AM Stack A Pipeline — SOP Index + +End-to-end playbook for converting any WordPress / Divi site backup (.wpress) +into an Arising Media Stack A deployment: PHP router + SQLite + vanilla JS/CSS. + +## Quick start (CLI launcher) + +```bash +python3 scripts/migrate.py --wpress /path/to/backup.wpress --domain example.com +``` + +Runs phases 0-6 automatically (extract, analyze, nav, content, media, stage seed). +Prints agent breadcrumbs for phases 7-11. See `10-agent-breadcrumbs.md` for the +complete ordered execution checklist. + +## SOPs in this folder + +| File | Phase | Description | +|------|-------|-------------| +| `00-overview.md` | — | Pipeline overview, philosophy, what to extract vs not replicate | +| `01-wpress-extraction.md` | 1 | .wpress binary format, extraction script, verification | +| `02-database-analysis.md` | 2 | MySQL dump parsing, page inventory, Divi version detection | +| `03-divi-content-extraction.md` | 3 | Divi 4 shortcodes vs Divi 5 blocks, extraction scripts | +| `04-design-system-extraction.md` | 4 | Colors, fonts, spacing → tokens.css | +| `05-content-migration.md` | 5-6 | Section remapping, content staging, seed_databases.py | +| `06-media-assets.md` | 5 | Upload migration, WebP conversion, media manifest | +| `07-seo-preservation.md` | 7 | Redirect map, Rank Math extraction, schema.org | +| `08-run-order.md` | — | DEPRECATED — superseded by `10-agent-breadcrumbs.md` | +| `09-stack-a-output.md` | — | SQLite schemas, sections_json spec, Divi→AM module mapping | +| `10-agent-breadcrumbs.md` | 0-11 | Ordered agent execution checklist (.wpress → live Docker) | + +## Scripts in scripts/ + +| Script | Purpose | +|--------|---------| +| `migrate.py` | CLI launcher — runs phases 0-6, prints breadcrumbs for 7-11 | +| `run_pipeline.sh` | Legacy shell wrapper (pre-migrate.py) | +| `extract_wpress.py` | Unpack .wpress binary archive | +| `analyze_db.py` | Parse SQL dump → pages.json + design-system.json | +| `extract_divi5.py` | Parse Divi 5 blocks → per-page content JSON | +| `extract_nav.py` | Extract WordPress nav menus → nav.json | +| `stage_seed.py` | Map extracted JSON → seed_databases.py skeleton (Phase 6) | + +## Key facts about .wpress archives + +- Format: Custom sequential binary (NOT zip/tar) — 4377-byte headers +- Table prefix in SQL dump: `SERVMASK_PREFIX_` (placeholder, NOT `wp_`) +- Directory layout: flat — `uploads/`, `themes/`, `plugins/` at archive root (no `wp-content/` wrapper) +- Divi 5 stores theme settings in `et_divi` option as PHP-serialized array + +## vibrantyou.yoga — extracted data reference + +Site: Vibrant You Yoga (instructor: Meghan) +Domain: https://vibrantyou.yoga +Divi version: 5.0.3 +WP version: 6.9.4 + +Design system: +- Primary: #1a8a7a Dark: #0f5f53 Secondary: #2ea3f2 +- Body: #5a6b68 Headings: #2d2d2d +- Body font: DM Sans 17px / 1.6 lh +- Heading font: DM Serif Display 600 / 36px / 1.2 lh + +Pages to migrate (22 published): +- home, about, classes, schedule, instructors, contact, blog, faq +- book (private sessions), online-yoga, donate +- Drop: video-category, video-tag, search-videos, user-videos, player-embed, + categories, tags, my-bookings (all plugin-generated archive pages) + +Plugins requiring AM replacements: +- Gravity Forms + Stripe → AM HTML form + Python API + Resend +- Events Manager → static schedule table in /schedule/ +- All-in-One Video Gallery → embed YouTube/Vimeo directly or drop + +## Related SOPs + +- `../01-project-structure.md` — AM deployment directory layout +- `../02-wordpress-to-html-migration.md` — Original 8-phase WP migration playbook +- `../03-build-pipeline.md` — JSON + template stamping for repeated pages +- `../06-seo-meta.md` — Full `<head>` requirements, schema.org per page type +- `../tools/verify-protection.sh` — Post-deploy security audit diff --git a/wp-divi-pipeline-to-am-stack/scripts/__pycache__/stage_seed.cpython-313.pyc b/wp-divi-pipeline-to-am-stack/scripts/__pycache__/stage_seed.cpython-313.pyc new file mode 100644 index 0000000..5ae9b2f Binary files /dev/null and b/wp-divi-pipeline-to-am-stack/scripts/__pycache__/stage_seed.cpython-313.pyc differ diff --git a/wp-divi-pipeline-to-am-stack/scripts/analyze_db.py b/wp-divi-pipeline-to-am-stack/scripts/analyze_db.py new file mode 100644 index 0000000..3ba303c --- /dev/null +++ b/wp-divi-pipeline-to-am-stack/scripts/analyze_db.py @@ -0,0 +1,368 @@ +#!/usr/bin/env python3 +"""Analyze WordPress MySQL dump from a .wpress extract. + +Parses database.sql and outputs: + - pages.json : all published pages with title, slug, content, SEO meta + - design-system.json : colors, fonts from wp_options (Divi theme settings) + - site-info.json : domain, WP version, detected Divi version, plugin list + +Usage: + python3 analyze_db.py <extract_dir> <output_data_dir> + + extract_dir : path to wpress-extract/ (contains database.sql) + output_data_dir : where to write JSON output files (e.g. .planning/data/) +""" +from __future__ import annotations + +import json +import os +import re +import sys +from pathlib import Path +from typing import Any + + +# --------------------------------------------------------------------------- +# SQL parsing helpers +# --------------------------------------------------------------------------- + +def _unescape_sql(s: str) -> str: + """Undo MySQL string escaping.""" + return (s + .replace("\\'", "'") + .replace('\\"', '"') + .replace("\\\\", "\\") + .replace("\\n", "\n") + .replace("\\r", "\r") + .replace("\\t", "\t") + .replace("\\0", "\0")) + + +def _parse_values_block(sql_block: str) -> list[list[str]]: + """Extract rows from a multi-row INSERT VALUES block. + + Handles commas inside quoted strings via a simple state machine. + Returns list of rows; each row is a list of raw string values. + """ + rows: list[list[str]] = [] + # Find VALUES section + m = re.search(r"VALUES\s*", sql_block, re.IGNORECASE) + if not m: + return rows + rest = sql_block[m.end():] + + i = 0 + n = len(rest) + while i < n: + # Skip to '(' + while i < n and rest[i] != '(': + i += 1 + if i >= n: + break + i += 1 # skip '(' + + row: list[str] = [] + field = [] + in_quote = False + quote_char = '' + + while i < n: + c = rest[i] + if not in_quote: + if c in ("'", '"'): + in_quote = True + quote_char = c + i += 1 + continue + elif c == ',' : + row.append("".join(field)) + field = [] + i += 1 + continue + elif c == ')': + row.append("".join(field)) + field = [] + rows.append(row) + i += 1 + break + elif c == 'N' and rest[i:i+4] == 'NULL': + field.append('\x00NULL\x00') + i += 4 + continue + else: + field.append(c) + i += 1 + else: + if c == '\\' and i + 1 < n: + field.append(c) + field.append(rest[i + 1]) + i += 2 + continue + elif c == quote_char: + in_quote = False + i += 1 + continue + else: + field.append(c) + i += 1 + + return rows + + +def load_table(sql_text: str, table_name: str) -> list[dict]: + """Return all rows for table_name as list of dicts.""" + # Find column definition + col_re = re.compile( + rf"CREATE TABLE `{re.escape(table_name)}`\s*\((.*?)\)\s*ENGINE", + re.DOTALL | re.IGNORECASE, + ) + m = col_re.search(sql_text) + if not m: + return [] + col_block = m.group(1) + cols = re.findall(r"`([^`]+)`\s+(?:bigint|int|mediumint|smallint|tinyint|varchar|text|mediumtext|longtext|char|datetime|date|float|double|decimal|enum|set|blob|mediumblob|longblob)", col_block, re.IGNORECASE) + + # Find INSERT blocks for this table + insert_re = re.compile( + rf"INSERT INTO `{re.escape(table_name)}`\s+VALUES\s*\(.+?\);", + re.DOTALL | re.IGNORECASE, + ) + rows_out: list[dict] = [] + for block in insert_re.finditer(sql_text): + parsed = _parse_values_block(block.group(0)) + for row in parsed: + d: dict[str, Any] = {} + for idx, col in enumerate(cols): + val = row[idx] if idx < len(row) else "" + if val == "\x00NULL\x00": + d[col] = None + else: + d[col] = _unescape_sql(val) + rows_out.append(d) + return rows_out + + +# --------------------------------------------------------------------------- +# Divi version detection +# --------------------------------------------------------------------------- + +def detect_divi_version(sql_text: str) -> str: + if "wp:divi/" in sql_text: + return "5" + if "[et_pb_section" in sql_text: + return "4" + # Check et_theme_builder version in options + m = re.search(r"'et_theme_builder_api_version','([^']+)'", sql_text) + if m: + return "5" + return "unknown" + + +# --------------------------------------------------------------------------- +# Options extraction +# --------------------------------------------------------------------------- + +def load_options(sql_text: str, prefix: str = "wp_") -> dict[str, str]: + table = f"{prefix}options" + rows = load_table(sql_text, table) + return {r["option_name"]: r["option_value"] for r in rows if r.get("option_name")} + + +def _parse_php_serialized_pairs(raw: str) -> dict[str, str]: + """Extract key/value string pairs from a PHP-serialized array. + + Handles both escaped (SQL-dump) and unescaped forms. + Only returns s->s pairs (string key, string value). + """ + result: dict[str, str] = {} + # SQL dumps escape double-quotes as \\", giving patterns like: + # s:9:\\"body_font\\";s:7:\\"DM Sans\\"; + # Also handle unescaped form: s:9:"body_font";s:7:"DM Sans"; + pat = re.compile( + r's:\d+:\\"([^"\\]+)\\";s:\d+:\\"([^"\\]*)\\"' # SQL-escaped + r'|s:\d+:"([^"]+)";s:\d+:"([^"]*)"', # plain + ) + for m in pat.finditer(raw): + if m.group(1) is not None: + k, v = m.group(1), m.group(2) + else: + k, v = m.group(3), m.group(4) + result[k] = v + return result + + +def extract_design_system(options: dict[str, str]) -> dict: + """Pull Divi theme colors, fonts, and spacing from wp_options.""" + raw = options.get("et_divi", "") or options.get("et_divi_options", "") + + design: dict[str, Any] = {} + + # Parse PHP-serialized et_divi option (Divi 4 + 5 store settings here) + if raw: + pairs = _parse_php_serialized_pairs(raw) + # Map Divi option keys to design-system keys + key_map = { + "accent_color": "primary_color_dark", + "link_color": "primary_color", + "body_font": "body_font", + "heading_font": "heading_font", + "header_font": "heading_font", # Divi 4 alias + "body_font_size": "body_font_size", + "body_line_height": "body_line_height", + "heading_font_weight": "heading_font_weight", + "header_text_size": "heading_font_size", + "header_line_height": "heading_line_height", + "header_color": "heading_color", + "font_color": "body_color", + "secondary_accent_color": "secondary_color", + } + for divi_key, design_key in key_map.items(): + if divi_key in pairs: + design.setdefault(design_key, pairs[divi_key]) + + # Site info + design["site_url"] = options.get("siteurl", "") + design["site_name"] = options.get("blogname", "") + + return design + + +# --------------------------------------------------------------------------- +# Page extraction +# --------------------------------------------------------------------------- + +def extract_pages(sql_text: str, prefix: str = "wp_") -> list[dict]: + """Return all published pages and posts with SEO meta.""" + posts = load_table(sql_text, f"{prefix}posts") + postmeta = load_table(sql_text, f"{prefix}postmeta") + + # Build postmeta lookup: post_id -> {meta_key: meta_value} + meta_map: dict[str, dict[str, str]] = {} + for row in postmeta: + pid = str(row.get("post_id", "")) + meta_map.setdefault(pid, {})[row.get("meta_key", "")] = row.get("meta_value", "") + + pages = [] + for p in posts: + if p.get("post_status") not in ("publish",): + continue + post_type = p.get("post_type", "") + if post_type not in ("page", "post", "event"): + continue + + pid = str(p.get("ID", "")) + meta = meta_map.get(pid, {}) + + # Rank Math SEO fields + rm_title = meta.get("rank_math_title", "") + rm_desc = meta.get("rank_math_description", "") + rm_focus = meta.get("rank_math_focus_keyword", "") + + entry = { + "id": pid, + "post_type": post_type, + "slug": p.get("post_name", ""), + "title": p.get("post_title", ""), + "status": p.get("post_status", ""), + "date": p.get("post_date", "")[:10], + "modified": p.get("post_modified", "")[:10], + "content_raw": p.get("post_content", ""), + "excerpt": p.get("post_excerpt", ""), + "parent_id": p.get("post_parent", "0"), + "menu_order": p.get("menu_order", "0"), + "seo_title": rm_title, + "seo_description": rm_desc, + "seo_keywords": rm_focus, + "acf": {k: v for k, v in meta.items() if not k.startswith("_") and not k.startswith("rank_math") and not k.startswith("et_")}, + } + pages.append(entry) + + pages.sort(key=lambda x: int(x["menu_order"] or 0)) + return pages + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def main(): + if len(sys.argv) < 3: + print(f"Usage: {sys.argv[0]} <extract_dir> <output_data_dir>") + sys.exit(1) + + extract_dir = Path(sys.argv[1]) + out_dir = Path(sys.argv[2]) + out_dir.mkdir(parents=True, exist_ok=True) + + sql_file = extract_dir / "database.sql" + if not sql_file.exists(): + # Search for it + found = list(extract_dir.rglob("*.sql")) + if not found: + print(f"ERROR: No .sql file found under {extract_dir}") + sys.exit(1) + sql_file = found[0] + print(f"Found SQL at: {sql_file}") + + print(f"Loading {sql_file} ({sql_file.stat().st_size / 1024 / 1024:.1f} MB)...") + sql_text = sql_file.read_text(encoding="utf-8", errors="replace") + + # Detect Divi version + divi_version = detect_divi_version(sql_text) + print(f"Divi version detected: {divi_version}") + + # Load wp_options + pkg = {} + pkg_file = extract_dir / "package.json" + if pkg_file.exists(): + pkg = json.loads(pkg_file.read_text()) + + # AIOIM dumps use SERVMASK_PREFIX_ as a placeholder in the SQL file. + # Detect which prefix the dump actually uses. + if "SERVMASK_PREFIX_" in sql_text: + sql_prefix = "SERVMASK_PREFIX_" + else: + sql_prefix = pkg.get("Database", {}).get("Prefix", "wp_") + runtime_prefix = pkg.get("Database", {}).get("Prefix", "wp_") + print(f"SQL prefix: {sql_prefix!r} (runtime prefix: {runtime_prefix!r})") + + options = load_options(sql_text, sql_prefix) + print(f"Loaded {len(options)} options") + + # Design system + design = extract_design_system(options) + design["divi_version"] = divi_version + design["wp_version"] = pkg.get("WordPress", {}).get("Version", "") + design["plugins"] = pkg.get("Plugins", []) + (out_dir / "design-system.json").write_text(json.dumps(design, indent=2, ensure_ascii=False)) + print(f"Wrote design-system.json ({len(design)} keys)") + + # Pages + pages = extract_pages(sql_text, sql_prefix) + (out_dir / "pages.json").write_text(json.dumps(pages, indent=2, ensure_ascii=False)) + print(f"Wrote pages.json ({len(pages)} pages/posts)") + + # Site info summary + site_info = { + "domain": pkg.get("SiteURL", options.get("siteurl", "")), + "name": options.get("blogname", ""), + "tagline": options.get("blogdescription", ""), + "admin_email": options.get("admin_email", ""), + "wp_version": pkg.get("WordPress", {}).get("Version", ""), + "divi_version": divi_version, + "plugins": pkg.get("Plugins", []), + "prefix": runtime_prefix, + "total_pages": len([p for p in pages if p["post_type"] == "page"]), + "total_posts": len([p for p in pages if p["post_type"] == "post"]), + } + (out_dir / "site-info.json").write_text(json.dumps(site_info, indent=2, ensure_ascii=False)) + print(f"Wrote site-info.json") + + print(f"\nDone. Output in: {out_dir}") + print(f" pages.json : {len(pages)} entries") + print(f" design-system.json: {len(design)} keys") + print(f" site-info.json : done") + + +if __name__ == "__main__": + main() diff --git a/wp-divi-pipeline-to-am-stack/scripts/extract_divi5.py b/wp-divi-pipeline-to-am-stack/scripts/extract_divi5.py new file mode 100644 index 0000000..5369f3f --- /dev/null +++ b/wp-divi-pipeline-to-am-stack/scripts/extract_divi5.py @@ -0,0 +1,271 @@ +#!/usr/bin/env python3 +"""Extract content from Divi 5 block markup in pages.json. + +Reads .planning/data/pages.json (produced by analyze_db.py) and for each page +parses the `content_raw` Divi 5 block structure into a clean per-page JSON +under .planning/data/content/{slug}.json. + +Usage: + python3 extract_divi5.py <pages_json> <output_dir> + + pages_json : path to .planning/data/pages.json + output_dir : directory to write {slug}.json files (created if missing) +""" +from __future__ import annotations + +import json +import re +import sys +from pathlib import Path +from html.parser import HTMLParser + + +# --------------------------------------------------------------------------- +# HTML inner-text extractor +# --------------------------------------------------------------------------- + +class _TextExtractor(HTMLParser): + def __init__(self): + super().__init__() + self.parts: list[str] = [] + + def handle_data(self, data: str): + self.parts.append(data) + + def get_text(self) -> str: + return " ".join(self.parts).strip() + + +def _text(html: str) -> str: + p = _TextExtractor() + p.feed(html) + return p.get_text() + + +# --------------------------------------------------------------------------- +# Divi block parsing +# --------------------------------------------------------------------------- + +# Matches opening block comment: <!-- wp:divi/MODULE {JSON} --> +_BLOCK_OPEN = re.compile(r"<!--\s*wp:(divi/[a-z0-9_-]+)\s*(.*?)--?>", re.DOTALL) +# Matches closing block comment: <!-- /wp:divi/MODULE --> +_BLOCK_CLOSE = re.compile(r"<!--\s*/wp:(divi/[a-z0-9_-]+)\s*-->") + +# Strip et_pb_* class tokens and data-et-* attributes +_ET_CLASS = re.compile(r"\b(et_pb_[a-z0-9_-]+|divi-[a-z0-9_-]+-[a-z0-9_-]+|d5_[a-z0-9_-]+)\b", re.IGNORECASE) +_ET_ATTR = re.compile(r'\s+data-(?:et|builder|module-id|module-class|d5)-[a-z0-9_-]+\s*=\s*"[^"]*"', re.IGNORECASE) +_EMPTY_CL = re.compile(r'\s+class="\s*"') + + +def _clean(html: str) -> str: + """Strip Divi noise from an HTML fragment.""" + out = _BLOCK_OPEN.sub("", html) + out = _BLOCK_CLOSE.sub("", out) + out = _ET_ATTR.sub("", out) + out = _ET_CLASS.sub("", out) + out = _EMPTY_CL.sub("", out) + out = re.sub(r"\n{3,}", "\n\n", out) + return out.strip() + + +def _parse_attrs(raw_json: str) -> dict: + """Parse the JSON attrs blob from a block comment (may be empty).""" + raw_json = raw_json.strip() + if not raw_json: + return {} + try: + return json.loads(raw_json) + except Exception: + return {} + + +def _extract_inner(content: str, block_type: str) -> str: + """Return the raw inner HTML of the first matching block.""" + open_pat = re.compile(rf"<!--\s*wp:{re.escape(block_type)}[^>]*-->", re.DOTALL) + close_pat = re.compile(rf"<!--\s*/wp:{re.escape(block_type)}\s*-->") + m = open_pat.search(content) + if not m: + return "" + start = m.end() + m2 = close_pat.search(content, start) + end = m2.start() if m2 else len(content) + return content[start:end] + + +def _bg_color(attrs: dict) -> str: + """Extract background colour from Divi 5 attrs dict.""" + bg = attrs.get("backgroundColor", {}) + if isinstance(bg, dict): + return bg.get("value", bg.get("color", "")) + return str(bg) if bg else "" + + +def _section_type(bg: str) -> str: + """Classify section by background colour.""" + dark_colors = {"#0f5f53", "#1a3a34", "#0d4d42"} + brand_colors = {"#1a8a7a", "#20a090"} + light_colors = {"#f5f5f5", "#fafafa", "#f0f0f0", "#efefef"} + bg_lower = bg.lower().strip() + if bg_lower in dark_colors: + return "dark" + if bg_lower in brand_colors: + return "brand" + if bg_lower in light_colors: + return "light" + if bg_lower in ("#ffffff", "#fff", ""): + return "white" + return "custom" + + +# --------------------------------------------------------------------------- +# Section/module extraction +# --------------------------------------------------------------------------- + +def _extract_modules(section_html: str) -> list[dict]: + """Walk block comments inside a section and extract module data.""" + modules: list[dict] = [] + pos = 0 + content = section_html + + for m in _BLOCK_OPEN.finditer(content): + block_type = m.group(1) # e.g. "divi/text" + attrs = _parse_attrs(m.group(2)) + inner_start = m.end() + + # Find matching close tag + close_pat = re.compile(rf"<!--\s*/wp:{re.escape(block_type)}\s*-->") + close_m = close_pat.search(content, inner_start) + inner_html = content[inner_start : close_m.start() if close_m else len(content)] + clean_inner = _clean(inner_html) + + module_type = block_type.split("/")[-1] # "text", "button", "image", etc. + + mod: dict = {"module": module_type} + + if module_type == "text": + mod["html"] = clean_inner + mod["text"] = _text(clean_inner) + + elif module_type in ("button", "cta"): + mod["text"] = attrs.get("buttonText", _text(clean_inner)) + mod["url"] = attrs.get("buttonUrl", attrs.get("url", "#")) + + elif module_type == "image": + src = attrs.get("src", attrs.get("url", "")) + mod["src"] = src + mod["alt"] = attrs.get("altText", attrs.get("alt", "")) + mod["caption"] = attrs.get("caption", "") + + elif module_type == "blurb": + mod["title"] = attrs.get("title", "") + mod["icon"] = attrs.get("iconName", "") + mod["html"] = clean_inner + mod["text"] = _text(clean_inner) + + elif module_type == "testimonial": + mod["quote"] = attrs.get("content", _text(clean_inner)) + mod["author"] = attrs.get("authorName", "") + mod["company"] = attrs.get("authorJobTitle", "") + + elif module_type == "video": + mod["src"] = attrs.get("src", "") + mod["poster"] = attrs.get("poster", attrs.get("image", "")) + + elif module_type in ("accordion", "toggle"): + items = re.findall(r"<dt[^>]*>(.*?)</dt>\s*<dd[^>]*>(.*?)</dd>", clean_inner, re.DOTALL) + mod["items"] = [{"q": q.strip(), "a": a.strip()} for q, a in items] + + elif module_type == "contact_form": + mod["form_id"] = attrs.get("formId", "") + mod["note"] = "REPLACE with AM vanilla form — see 08-forms.md" + + else: + mod["html"] = clean_inner + mod["attrs"] = attrs + + modules.append(mod) + + return modules + + +def parse_page_content(content_raw: str) -> list[dict]: + """Parse Divi 5 block content into a list of section dicts.""" + sections: list[dict] = [] + + section_pat = re.compile(r"<!--\s*wp:divi/section(.*?)-->", re.DOTALL) + section_close = re.compile(r"<!--\s*/wp:divi/section\s*-->") + + for sm in section_pat.finditer(content_raw): + attrs = _parse_attrs(sm.group(1).strip()) + start = sm.end() + close_m = section_close.search(content_raw, start) + sec_html = content_raw[start : close_m.start() if close_m else len(content_raw)] + + bg = _bg_color(attrs) + sec_type = _section_type(bg) + modules = _extract_modules(sec_html) + + # Determine semantic role from first module + role = "content" + if modules and modules[0]["module"] in ("fullwidth_header", "text"): + first_html = modules[0].get("html", "") + if "<h1" in first_html: + role = "hero" + + sections.append({ + "role": role, + "section_type": sec_type, + "background_color": bg, + "attrs": attrs, + "modules": modules, + }) + + return sections + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def main(): + if len(sys.argv) < 3: + print(f"Usage: {sys.argv[0]} <pages_json> <output_dir>") + sys.exit(1) + + pages_path = Path(sys.argv[1]) + out_dir = Path(sys.argv[2]) + out_dir.mkdir(parents=True, exist_ok=True) + + pages = json.loads(pages_path.read_text(encoding="utf-8")) + print(f"Processing {len(pages)} pages...") + + for page in pages: + slug = page.get("slug") or f"page-{page['id']}" + content = page.get("content_raw", "") + + sections = parse_page_content(content) if content.strip() else [] + + output = { + "id": page["id"], + "slug": slug, + "title": page["title"], + "post_type": page["post_type"], + "seo_title": page.get("seo_title", ""), + "seo_description": page.get("seo_description", ""), + "seo_keywords": page.get("seo_keywords", ""), + "acf": page.get("acf", {}), + "date": page.get("date", ""), + "modified": page.get("modified", ""), + "sections": sections, + "section_count": len(sections), + } + + out_file = out_dir / f"{slug}.json" + out_file.write_text(json.dumps(output, indent=2, ensure_ascii=False)) + print(f" {slug}.json ({len(sections)} sections)") + + print(f"\nDone. {len(pages)} content files in {out_dir}") + + +if __name__ == "__main__": + main() diff --git a/wp-divi-pipeline-to-am-stack/scripts/extract_nav.py b/wp-divi-pipeline-to-am-stack/scripts/extract_nav.py new file mode 100644 index 0000000..179ce7b --- /dev/null +++ b/wp-divi-pipeline-to-am-stack/scripts/extract_nav.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +""" +extract_nav.py — Extract WordPress navigation menus from database.sql dump. +Outputs nav.json: [{label, href, display_order, is_cta}] + +Usage: python3 extract_nav.py <wpress-extract-dir> <output-data-dir> +""" +import sys, re, json, os + +CTA_KEYWORDS = {'book', 'get started', 'contact', 'sign up', 'register', 'join', 'buy', 'shop'} + +def extract_nav(extract_dir: str, data_dir: str): + sql_path = os.path.join(extract_dir, 'database.sql') + if not os.path.exists(sql_path): + print(f"ERROR: {sql_path} not found", file=sys.stderr) + sys.exit(1) + + with open(sql_path, encoding='utf-8', errors='replace') as f: + sql = f.read() + + # Detect table prefix + prefix_match = re.search(r"INSERT INTO `(\w+)options`", sql) + prefix = prefix_match.group(1) if prefix_match else 'wp_' + + # Find nav menu items: post_type = 'nav_menu_item' + # Extract INSERT rows from wp_posts + posts_pattern = re.compile( + r"INSERT INTO `%sposts`[^;]+?;" % re.escape(prefix), + re.DOTALL | re.IGNORECASE + ) + postmeta_pattern = re.compile( + r"INSERT INTO `%spostmeta`[^;]+?;" % re.escape(prefix), + re.DOTALL | re.IGNORECASE + ) + + nav_posts = {} + for m in posts_pattern.finditer(sql): + rows = re.findall(r"\((\d+),[^,]*,'[^']*','[^']*','([^']*)'[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,[^,]*,'([^']*)'[^,]*,[^,]*,\d+,'nav_menu_item'", m.group()) + for post_id, post_title, post_status in rows: + if post_status == 'publish': + nav_posts[post_id] = {'label': post_title, 'href': '/', 'menu_order': 0} + + if not nav_posts: + # Fallback: simpler pattern + for m in posts_pattern.finditer(sql): + block = m.group() + ids = re.findall(r"\((\d+),", block) + titles = re.findall(r"'([^']{1,60})'", block) + for i, post_id in enumerate(ids): + if i < len(titles) and titles[i]: + nav_posts[post_id] = {'label': titles[i], 'href': '/', 'menu_order': i} + + # Extract menu item URLs from postmeta (_menu_item_url or _menu_item_object_id) + for m in postmeta_pattern.finditer(sql): + block = m.group() + # _menu_item_url + url_matches = re.findall(r"\((\d+),\s*\d+,\s*'_menu_item_url',\s*'([^']*)'\)", block) + for post_id, url in url_matches: + if post_id in nav_posts and url: + nav_posts[post_id]['href'] = url + # _menu_item_menu_order + order_matches = re.findall(r"\((\d+),\s*\d+,\s*'_menu_item_menu_order',\s*'(\d+)'\)", block) + for post_id, order in order_matches: + if post_id in nav_posts: + nav_posts[post_id]['menu_order'] = int(order) + + # Clean up hrefs: make relative if same domain + items = [] + for idx, (post_id, item) in enumerate(sorted(nav_posts.items(), key=lambda x: x[1].get('menu_order', 0))): + label = item['label'].strip() + href = item['href'].strip() + if not label: + continue + # Make relative + href = re.sub(r'https?://[^/]+', '', href) or '/' + if not href.startswith('/'): + href = '/' + href + is_cta = 1 if any(kw in label.lower() for kw in CTA_KEYWORDS) else 0 + items.append({ + 'label': label, + 'href': href, + 'display_order': idx + 1, + 'is_cta': is_cta + }) + + os.makedirs(data_dir, exist_ok=True) + out_path = os.path.join(data_dir, 'nav.json') + with open(out_path, 'w', encoding='utf-8') as f: + json.dump(items, f, indent=2, ensure_ascii=False) + + print(f"nav.json: {len(items)} items → {out_path}") + for item in items: + print(f" {'[CTA]' if item['is_cta'] else ' '} {item['label']} → {item['href']}") + +if __name__ == '__main__': + if len(sys.argv) != 3: + print("Usage: python3 extract_nav.py <wpress-extract-dir> <output-data-dir>") + sys.exit(1) + extract_nav(sys.argv[1], sys.argv[2]) diff --git a/wp-divi-pipeline-to-am-stack/scripts/extract_wpress.py b/wp-divi-pipeline-to-am-stack/scripts/extract_wpress.py new file mode 100644 index 0000000..59fad45 --- /dev/null +++ b/wp-divi-pipeline-to-am-stack/scripts/extract_wpress.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +"""Extract All-in-One WP Migration .wpress archive. + +Usage: + python3 extract_wpress.py <path/to/file.wpress> <output/directory> + +The .wpress format is a sequential binary archive with 4377-byte headers: + 255 bytes filename (null-padded) + 14 bytes file size in bytes (ASCII digits, null-padded) + 12 bytes mtime unix timestamp (ASCII digits, null-padded) + 4096 bytes relative path (null-padded) +Followed immediately by the raw file bytes, then the next header. +""" +import os +import sys +import argparse +from pathlib import Path + +HEADER_SIZE = 4377 +NAME_LEN = 255 +SIZE_LEN = 14 +MTIME_LEN = 12 +PATH_LEN = 4096 + + +def _parse_int(b: bytes) -> int: + s = b.split(b"\x00", 1)[0].decode(errors="replace").strip() + return int(s) if s else 0 + + +def _parse_str(b: bytes) -> str: + return b.split(b"\x00", 1)[0].decode(errors="replace") + + +def extract(wpress_path: str, out_dir: str, verbose: bool = True) -> dict: + out = Path(out_dir) + out.mkdir(parents=True, exist_ok=True) + count = 0 + total_bytes = 0 + skipped = 0 + + with open(wpress_path, "rb") as f: + while True: + header = f.read(HEADER_SIZE) + if not header or len(header) < HEADER_SIZE: + break + if header == b"\x00" * HEADER_SIZE: + break + + name = _parse_str(header[0:NAME_LEN]) + size = _parse_int(header[NAME_LEN : NAME_LEN + SIZE_LEN]) + mtime = _parse_int(header[NAME_LEN + SIZE_LEN : NAME_LEN + SIZE_LEN + MTIME_LEN]) + path = _parse_str(header[NAME_LEN + SIZE_LEN + MTIME_LEN : NAME_LEN + SIZE_LEN + MTIME_LEN + PATH_LEN]) + + # Sanitise path traversal + path = path.lstrip("/").lstrip("\\").lstrip(".") + path = path.lstrip("/") + + dest_dir = out / path if path else out + dest_dir.mkdir(parents=True, exist_ok=True) + dest_file = dest_dir / name + + if not name: + skipped += 1 + f.seek(size, 1) + continue + + with open(dest_file, "wb") as o: + remaining = size + while remaining > 0: + chunk = f.read(min(65536, remaining)) + if not chunk: + break + o.write(chunk) + remaining -= len(chunk) + + try: + if mtime > 0: + os.utime(dest_file, (mtime, mtime)) + except Exception: + pass + + count += 1 + total_bytes += size + + if verbose and count % 200 == 0: + print(f" [{count} files | {total_bytes / 1024 / 1024:.1f} MB extracted]", flush=True) + + result = { + "files": count, + "bytes": total_bytes, + "mb": round(total_bytes / 1024 / 1024, 1), + "skipped": skipped, + "out_dir": str(out), + } + print(f"DONE: {count} files | {result['mb']} MB -> {out_dir} (skipped {skipped})") + return result + + +def main(): + p = argparse.ArgumentParser(description="Extract .wpress archive") + p.add_argument("wpress", help="Path to .wpress file") + p.add_argument("outdir", help="Destination directory") + p.add_argument("-q", "--quiet", action="store_true", help="Suppress progress output") + args = p.parse_args() + extract(args.wpress, args.outdir, verbose=not args.quiet) + + +if __name__ == "__main__": + main() diff --git a/wp-divi-pipeline-to-am-stack/scripts/migrate.py b/wp-divi-pipeline-to-am-stack/scripts/migrate.py new file mode 100644 index 0000000..9a4504d --- /dev/null +++ b/wp-divi-pipeline-to-am-stack/scripts/migrate.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 +""" +migrate.py — AM Stack A migration launcher. +Points at a .wpress file and runs all extraction phases automatically. +Phases 7+ require human/agent review of staged seed_databases.py. + +Usage: + python3 migrate.py --wpress /path/to/backup.wpress --domain example.com [--project /path/to/project] + +Output: + Runs phases 0-6, then prints agent breadcrumbs for phases 7-11. +""" +import argparse, os, sys, subprocess, json + +SOPS = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +SCRIPTS = os.path.join(SOPS, 'scripts') + +def run(cmd: list, label: str) -> bool: + print(f"\n[{label}] Running: {' '.join(cmd)}") + result = subprocess.run(cmd, capture_output=False) + if result.returncode != 0: + print(f"[{label}] FAILED (exit {result.returncode})") + return False + print(f"[{label}] OK") + return True + +def phase_header(n: int, title: str): + print(f"\n{'='*60}") + print(f" Phase {n} — {title}") + print(f"{'='*60}") + +def main(): + parser = argparse.ArgumentParser(description='AM Stack A migration launcher') + parser.add_argument('--wpress', required=True, help='Path to .wpress backup file') + parser.add_argument('--domain', required=True, help='Target domain (e.g. example.com)') + parser.add_argument('--project', help='Project directory (default: ~/arisingmedia-websites/{domain})') + args = parser.parse_args() + + wpress = os.path.abspath(args.wpress) + domain = args.domain + project = args.project or os.path.expanduser(f'~/arisingmedia-websites/{domain}') + extract_dir = os.path.join(project, '.planning', 'wpress-extract') + data_dir = os.path.join(project, '.planning', 'data') + content_dir = os.path.join(data_dir, 'content') + + if not os.path.exists(wpress): + print(f"ERROR: .wpress file not found: {wpress}") + sys.exit(1) + + print(f"\nAM Stack A Migration Pipeline") + print(f" Domain: {domain}") + print(f" Project: {project}") + print(f" Archive: {wpress}") + + # Phase 0 — Setup + phase_header(0, 'Setup') + for d in [extract_dir, data_dir, content_dir, + os.path.join(project, 'assets', 'images'), + os.path.join(project, 'build'), + os.path.join(project, 'src', 'api', 'data'), + os.path.join(project, 'src', 'api', 'templates'), + os.path.join(project, 'src', 'api', 'components')]: + os.makedirs(d, exist_ok=True) + print(f" mkdir {d}") + + # Phase 1 — Extract + phase_header(1, 'Extract .wpress archive') + if not run(['python3', os.path.join(SCRIPTS, 'extract_wpress.py'), wpress, extract_dir], 'Phase 1'): + sys.exit(1) + + # Phase 2 — DB Analysis + phase_header(2, 'Database analysis') + if not run(['python3', os.path.join(SCRIPTS, 'analyze_db.py'), extract_dir, data_dir], 'Phase 2'): + sys.exit(1) + + # Detect Divi version + site_info_path = os.path.join(data_dir, 'site-info.json') + divi_version = 5 + if os.path.exists(site_info_path): + with open(site_info_path) as f: + info = json.load(f) + divi_version = info.get('divi_version', 5) + print(f" Divi version detected: {divi_version}") + + # Phase 3 — Nav extraction + phase_header(3, 'Extract navigation menus') + run(['python3', os.path.join(SCRIPTS, 'extract_nav.py'), extract_dir, data_dir], 'Phase 3 (nav)') + + # Phase 3 — Content extraction + extract_script = f'extract_divi{divi_version}.py' + pages_json = os.path.join(data_dir, 'pages.json') + if not run(['python3', os.path.join(SCRIPTS, extract_script), pages_json, content_dir], f'Phase 3 (divi{divi_version})'): + print(f" WARNING: content extraction had errors — review {content_dir}") + + # Phase 5 — Media + phase_header(5, 'Extract and convert media') + run(['python3', os.path.join(SCRIPTS, 'extract_media.py'), extract_dir, data_dir, + os.path.join(project, 'assets', 'images')], 'Phase 5') + + # Phase 6 — Stage seed_databases.py + phase_header(6, 'Stage seed_databases.py skeleton') + seed_path = os.path.join(project, 'build', 'seed_databases.py') + # Check if stage_seed.py exists + stage_script = os.path.join(SCRIPTS, 'stage_seed.py') + if os.path.exists(stage_script): + run(['python3', stage_script, data_dir, seed_path, '--domain', domain], 'Phase 6') + else: + print(f" WARNING: stage_seed.py not found — seed_databases.py must be written manually") + print(f" Reference: /home/sirdrez/arisingmedia-websites/vibrantyou.yoga/build/seed_databases.py") + + # Print agent breadcrumbs for remaining phases + print(f"\n{'='*60}") + print(" EXTRACTION COMPLETE — Manual/Agent phases follow") + print(f"{'='*60}") + print(f""" +Phases 0-6 complete. Staged content is at: + {data_dir}/content/ ← extracted page sections (JSON) + {data_dir}/nav.json ← navigation items + {data_dir}/media-manifest.json ← image URL mappings + {seed_path} ← seed_databases.py skeleton + +Next steps (see 10-agent-breadcrumbs.md for full detail): + + Phase 7 — REVIEW seed_databases.py + Open: {seed_path} + For each page: verify sections_json has correct section types + Replace em-dashes. Remove Divi shortcode residue. Review nav items. + + Phase 8 — RUN seed_databases.py + cd {project} && python3 build/seed_databases.py + Verify: output shows all counts > 0 + + Phase 9 — SCAFFOLD PHP templates + Copy from reference: vibrantyou.yoga/src/api/ + Update brand name and colors in _header.php + _footer.php + + Phase 10 — BUILD + cd {project} && docker compose build --no-cache && docker compose up -d + Verify: curl -I http://localhost:PORT/ + + Phase 11 — QA + bash {SOPS}/../tools/verify-protection.sh http://localhost:PORT + Lighthouse in Firefox + +Reference: {SOPS}/wp-divi-pipeline-to-am-stack/10-agent-breadcrumbs.md +""") + +if __name__ == '__main__': + main() diff --git a/wp-divi-pipeline-to-am-stack/scripts/run_pipeline.sh b/wp-divi-pipeline-to-am-stack/scripts/run_pipeline.sh new file mode 100644 index 0000000..b958847 --- /dev/null +++ b/wp-divi-pipeline-to-am-stack/scripts/run_pipeline.sh @@ -0,0 +1,175 @@ +#!/usr/bin/env bash +# run_pipeline.sh — AM WP+Divi to HTML pipeline master script +# Usage: bash run_pipeline.sh <domain> +# Example: bash run_pipeline.sh vibrantyou.yoga +set -euo pipefail + +DOMAIN="${1:-}" +if [ -z "$DOMAIN" ]; then + echo "Usage: $0 <domain>" + echo " Example: $0 vibrantyou.yoga" + exit 1 +fi + +PROJECT="/home/sirdrez/arisingmedia-websites/$DOMAIN" +SOPS="/home/sirdrez/arisingmedia-websites/.am-webdesign-sops" +SCRIPTS="$SOPS/wp-divi-pipeline/scripts" +WPRESS=$(ls "$PROJECT/.planning/"*.wpress 2>/dev/null | head -1) + +if [ -z "$WPRESS" ]; then + echo "ERROR: No .wpress file found in $PROJECT/.planning/" + exit 1 +fi + +echo "================================================" +echo " AM WP+Divi Pipeline" +echo " Domain: $DOMAIN" +echo " Archive: $(basename $WPRESS)" +echo "================================================" +echo "" + +# --------------------------------------------------------------------------- +# Phase 0 — Directory structure +# --------------------------------------------------------------------------- +echo "[Phase 0] Creating directory structure..." +mkdir -p "$PROJECT"/{src/{about,services,contact,blog,classes,components,assets/{css,js,images,svg,fonts}},build,infra,api} +mkdir -p "$PROJECT/.planning"/{data/{content},scripts,wpress-extract} +echo " OK: directories created" +echo "" + +# --------------------------------------------------------------------------- +# Phase 1 — Extract .wpress archive +# --------------------------------------------------------------------------- +EXTRACT_DIR="$PROJECT/.planning/wpress-extract" + +if [ -f "$EXTRACT_DIR/database.sql" ]; then + echo "[Phase 1] Archive already extracted — skipping" + echo " Found: $EXTRACT_DIR/database.sql" +else + echo "[Phase 1] Extracting archive (this may take a few minutes)..." + python3 "$SCRIPTS/extract_wpress.py" "$WPRESS" "$EXTRACT_DIR" + echo " OK: extraction complete" +fi +echo "" + +# --------------------------------------------------------------------------- +# Phase 2 — Database analysis +# --------------------------------------------------------------------------- +DATA_DIR="$PROJECT/.planning/data" +echo "[Phase 2] Analyzing database..." +python3 "$SCRIPTS/analyze_db.py" "$EXTRACT_DIR" "$DATA_DIR" + +PAGE_COUNT=$(python3 -c "import json; print(len(json.load(open('$DATA_DIR/pages.json'))))" 2>/dev/null || echo 0) +echo " OK: $PAGE_COUNT pages extracted" +echo "" + +# --------------------------------------------------------------------------- +# Phase 3 — Content extraction (Divi 5) +# --------------------------------------------------------------------------- +echo "[Phase 3] Extracting Divi 5 content..." +python3 "$SCRIPTS/extract_divi5.py" \ + "$DATA_DIR/pages.json" \ + "$DATA_DIR/content/" +echo " OK: content JSON files written" +echo "" + +# --------------------------------------------------------------------------- +# Phase 4 — Design system (manual step) +# --------------------------------------------------------------------------- +echo "[Phase 4] Design system (MANUAL STEP REQUIRED)" +echo " Read: $DATA_DIR/design-system.json" +echo " Write: $PROJECT/src/assets/css/main.css" +echo " Ref: $SOPS/wp-divi-pipeline/04-design-system-extraction.md" +echo "" + +# --------------------------------------------------------------------------- +# Phase 5 — Media migration +# --------------------------------------------------------------------------- +UPLOADS_DIR="$EXTRACT_DIR/uploads" +IMAGES_DIR="$PROJECT/src/assets/images" + +if [ -d "$UPLOADS_DIR" ]; then + echo "[Phase 5] Migrating media..." + # Catalog originals (skip WP-generated size variants) + find "$UPLOADS_DIR" -type f \( -name "*.jpg" -o -name "*.jpeg" -o -name "*.png" -o -name "*.gif" -o -name "*.webp" \) \ + | grep -v -E "\-[0-9]+x[0-9]+\.(jpg|jpeg|png|webp|gif)$" \ + | sort > "$DATA_DIR/media-originals.txt" + + MEDIA_COUNT=$(wc -l < "$DATA_DIR/media-originals.txt") + echo " Found: $MEDIA_COUNT original images" + + # Copy to src/assets/images/ + while IFS= read -r src_img; do + fname=$(basename "$src_img") + cp "$src_img" "$IMAGES_DIR/$fname" + done < "$DATA_DIR/media-originals.txt" + + # Convert to WebP if cwebp available + if command -v cwebp &>/dev/null; then + echo " Converting to WebP..." + cd "$IMAGES_DIR" + for img in *.jpg *.jpeg *.png; do + [ -f "$img" ] || continue + base="${img%.*}" + cwebp -q 82 "$img" -o "${base}.webp" 2>/dev/null && rm "$img" + done + WEBP_COUNT=$(ls *.webp 2>/dev/null | wc -l) + echo " WebP files: $WEBP_COUNT" + cd "$PROJECT" + else + echo " WARN: cwebp not found — images copied as-is (convert manually)" + fi + echo " OK: media migrated to $IMAGES_DIR" +else + echo "[Phase 5] No uploads/ directory found — skipping media migration" +fi +echo "" + +# --------------------------------------------------------------------------- +# Phase 6 — HTML build (manual step) +# --------------------------------------------------------------------------- +echo "[Phase 6] HTML Build (MANUAL STEP REQUIRED)" +echo " Ref: $SOPS/wp-divi-pipeline/05-content-migration.md" +echo " Build order:" +echo " 1. src/assets/css/main.css" +echo " 2. src/assets/css/components.css" +echo " 3. src/components/header.html" +echo " 4. src/components/footer.html" +echo " 5. src/assets/js/components.js" +echo " 6. src/assets/js/main.js" +echo " 7. src/index.html (home — design system anchor)" +echo " 8. Remaining pages" +echo "" + +# --------------------------------------------------------------------------- +# Phase 7 — SEO audit +# --------------------------------------------------------------------------- +echo "[Phase 7] SEO audit (run after HTML build):" +echo " grep -rL '<title>' $PROJECT/src --include='*.html' | grep -v _template" +echo " grep -rL 'canonical' $PROJECT/src --include='*.html' | grep -v _template" +echo " grep -rL 'ld+json' $PROJECT/src --include='*.html' | grep -v _template" +echo " grep -r '{{' $PROJECT/src --include='*.html'" +echo "" + +# --------------------------------------------------------------------------- +# Phase 8 — Infra +# --------------------------------------------------------------------------- +echo "[Phase 8] Infra setup:" +echo " Copy Dockerfile + docker-compose.yml from vibrantyoucoaching.com" +echo " Update server_name in infra/nginx.conf to: $DOMAIN" +echo " Run: docker compose up -d --build" +echo "" + +# --------------------------------------------------------------------------- +# Phase 9 — Protection check +# --------------------------------------------------------------------------- +echo "[Phase 9] After deploy, run:" +echo " bash $SOPS/tools/verify-protection.sh https://$DOMAIN" +echo "" + +echo "================================================" +echo " Pipeline setup complete." +echo " Phases 0-3 + 5 executed automatically." +echo " Phases 4, 6, 7, 8, 9 require manual steps." +echo " See $SOPS/wp-divi-pipeline/ for all SOPs." +echo "================================================" diff --git a/wp-divi-pipeline-to-am-stack/scripts/stage_seed.py b/wp-divi-pipeline-to-am-stack/scripts/stage_seed.py new file mode 100644 index 0000000..74d1521 --- /dev/null +++ b/wp-divi-pipeline-to-am-stack/scripts/stage_seed.py @@ -0,0 +1,574 @@ +#!/usr/bin/env python3 +""" +stage_seed.py — Phase 6 of WP/Divi → Stack A migration pipeline. + +Reads extracted JSON from prior pipeline run and generates a seed_databases.py +skeleton for the target project. Human/agent reviews [FILL] markers and fills +gaps before running the seeder. + +Usage: + python3 stage_seed.py <data_dir> <seed_path> --domain <domain> [--force] + +Example: + python3 stage_seed.py /path/to/.planning/data build/seed_databases.py --domain example.com +""" + +import argparse +import json +import os +import re +from datetime import datetime + + +def slugify(text): + """Convert text to URL-safe slug.""" + return re.sub(r'[^a-z0-9]+', '-', text.lower()).strip('-') + + +def infer_template(slug): + """Infer template type from page slug.""" + slug_lower = slug.lower() + if slug_lower == 'home': + return 'home' + elif slug_lower in ('classes', 'class'): + return 'classes' + elif slug_lower == 'schedule': + return 'schedule' + elif slug_lower == 'glossary': + return 'glossary' + elif slug_lower in ('blog', 'posts', 'articles'): + return 'blog' + else: + return 'static' + + +def load_json_file(path): + """Load JSON file, return empty dict/list if not found.""" + if not os.path.exists(path): + return None + try: + with open(path, 'r') as f: + return json.load(f) + except Exception as e: + print(f"Warning: Failed to load {path}: {e}") + return None + + +def generate_seed_script(data_dir, domain, design_system, pages, glossary, nav): + """Generate the seed_databases.py script content.""" + now = datetime.now().isoformat() + + # Build pages_data list in outer scope + pages_list = [] + for page in pages: + if page.get('status') != 'publish' or page.get('post_type') != 'page': + continue + + slug = page.get('slug', '') + title = page.get('title', '[FILL] Title needed') + meta_desc = page.get('seo_description', '') + if not meta_desc: + meta_desc = f"[FILL] Meta description for {slug}" + + canonical = f"https://{domain}/{slug}/" if slug != 'home' else f"https://{domain}/" + date_str = page.get('date', datetime.now().isoformat()) + + # Infer template + template_map = { + 'home': 'home', + 'classes': 'classes', + 'schedule': 'schedule', + 'glossary': 'glossary', + 'blog': 'blog', + } + template = template_map.get(slug, 'static') + + pages_list.append({ + 'slug': slug, + 'template': template, + 'title': title, + 'meta_description': meta_desc, + 'canonical_url': canonical, + 'hero_h1': f"[FILL] {title}", + 'sections_json': '[]', + 'updated_at': date_str + }) + + # Build pages_data JSON string + pages_json_str = json.dumps(pages_list, indent=8) + + script = f'''#!/usr/bin/env python3 +""" +seed_databases.py — generated by stage_seed.py on {now} +Source: {data_dir} +Domain: {domain} + +EDIT THIS FILE then run: python3 build/seed_databases.py +Content marked [FILL] needs human/agent review before seeding. +""" +import sqlite3 +import json +import os +from datetime import datetime + +DB_DIR = os.path.join(os.path.dirname(__file__), '..', 'src', 'api', 'data') +os.makedirs(DB_DIR, exist_ok=True) + + +def slugify(text): + """Convert text to URL-safe slug.""" + import re + return re.sub(r'[^a-z0-9]+', '-', text.lower()).strip('-') + + +def seed_pages(): + """Create pages.sqlite and populate with published pages.""" + db_path = os.path.join(DB_DIR, 'pages.sqlite') + conn = sqlite3.connect(db_path) + c = conn.cursor() + + c.execute(""" + CREATE TABLE IF NOT EXISTS pages ( + id INTEGER PRIMARY KEY, + slug TEXT UNIQUE NOT NULL, + template TEXT NOT NULL, + title TEXT NOT NULL, + meta_description TEXT, + canonical_url TEXT, + og_image TEXT, + schema_json TEXT, + hero_eyebrow TEXT, + hero_h1 TEXT, + hero_lead TEXT, + sections_json TEXT, + updated_at TEXT + ) + """) + + pages_data = {pages_json_str} + + for page in pages_data: + c.execute(""" + INSERT OR REPLACE INTO pages + (slug, template, title, meta_description, canonical_url, hero_h1, sections_json, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, ( + page['slug'], + page['template'], + page['title'], + page['meta_description'], + page['canonical_url'], + page['hero_h1'], + page['sections_json'], + page['updated_at'] + )) + + conn.commit() + conn.close() + print(f"✓ pages.sqlite created with {{len(pages_data)}} pages") + + +def seed_nav(): + """Create nav.sqlite and populate navigation items.""" + db_path = os.path.join(DB_DIR, 'nav.sqlite') + conn = sqlite3.connect(db_path) + c = conn.cursor() + + c.execute(""" + CREATE TABLE IF NOT EXISTS nav_items ( + id INTEGER PRIMARY KEY, + label TEXT NOT NULL, + href TEXT NOT NULL, + display_order INTEGER DEFAULT 0, + is_cta INTEGER DEFAULT 0 + ) + """) +''' + + if nav: + script += f''' + nav_items = {json.dumps(nav, indent=8)} + + for item in nav_items: + c.execute(""" + INSERT INTO nav_items (label, href, display_order, is_cta) + VALUES (?, ?, ?, ?) + """, (item['label'], item['href'], item.get('display_order', 0), item.get('is_cta', 0))) + + conn.commit() + conn.close() + print(f"✓ nav.sqlite created with {{len(nav_items)}} nav items") +''' + else: + script += ''' + # [FILL] nav.json not found — add navigation items manually + # Example: + # nav_items = [ + # {"label": "Home", "href": "/", "display_order": 1, "is_cta": 0}, + # {"label": "Classes", "href": "/classes", "display_order": 2, "is_cta": 0}, + # {"label": "Schedule", "href": "/schedule", "display_order": 3, "is_cta": 0}, + # {"label": "Get Started", "href": "/contact", "display_order": 4, "is_cta": 1}, + # ] + # Then uncomment and insert rows + + conn.commit() + conn.close() + print("✓ nav.sqlite created (empty — [FILL] navigation items)") +''' + + # Seed glossary + if glossary: + script += f''' + + +def seed_glossary(): + """Create glossary.sqlite and populate terms.""" + db_path = os.path.join(DB_DIR, 'glossary.sqlite') + conn = sqlite3.connect(db_path) + c = conn.cursor() + + c.execute(""" + CREATE TABLE IF NOT EXISTS terms ( + id INTEGER PRIMARY KEY, + slug TEXT UNIQUE NOT NULL, + term TEXT NOT NULL, + pronunciation TEXT, + definition TEXT NOT NULL, + category TEXT NOT NULL, + level TEXT NOT NULL, + display_order INTEGER DEFAULT 0 + ) + """) + + glossary_items = {json.dumps(glossary, indent=8)} + + for idx, item in enumerate(glossary_items): + fields = item.get('fields', {{}}) + term = fields.get('sanskrit_name', '[FILL] Term needed') + slug = slugify(term) + pronunciation = fields.get('pronunciation', '') + definition = fields.get('definition', '[FILL] Definition needed') + category = fields.get('category', 'yoga') + level = fields.get('level', 'beginner') + + c.execute(""" + INSERT OR REPLACE INTO terms + (slug, term, pronunciation, definition, category, level, display_order) + VALUES (?, ?, ?, ?, ?, ?, ?) + """, (slug, term, pronunciation, definition, category, level, idx)) + + conn.commit() + conn.close() + print(f"✓ glossary.sqlite created with {{len(glossary_items)}} terms") +''' + else: + script += ''' + + +def seed_glossary(): + """Create glossary.sqlite (empty — no glossary.json found).""" + db_path = os.path.join(DB_DIR, 'glossary.sqlite') + conn = sqlite3.connect(db_path) + c = conn.cursor() + + c.execute(""" + CREATE TABLE IF NOT EXISTS terms ( + id INTEGER PRIMARY KEY, + slug TEXT UNIQUE NOT NULL, + term TEXT NOT NULL, + pronunciation TEXT, + definition TEXT NOT NULL, + category TEXT NOT NULL, + level TEXT NOT NULL, + display_order INTEGER DEFAULT 0 + ) + """) + + conn.commit() + conn.close() + print("✓ glossary.sqlite created (empty)") +''' + + script += ''' + + +def seed_testimonials(): + """Create testimonials.sqlite (empty stub).""" + db_path = os.path.join(DB_DIR, 'testimonials.sqlite') + conn = sqlite3.connect(db_path) + c = conn.cursor() + + c.execute(""" + CREATE TABLE IF NOT EXISTS testimonials ( + id INTEGER PRIMARY KEY, + quote TEXT NOT NULL, + author_name TEXT NOT NULL, + author_role TEXT, + is_featured INTEGER DEFAULT 0 + ) + """) + + # [FILL] Add testimonials extracted from Divi testimonial modules or client-provided + # rows = [ + # {"quote": "...", "author_name": "...", "author_role": "...", "is_featured": 0}, + # ] + + conn.commit() + conn.close() + print("✓ testimonials.sqlite created (empty — [FILL] add testimonials)") + + +def seed_blog(): + """Create blog.sqlite (empty stub).""" + db_path = os.path.join(DB_DIR, 'blog.sqlite') + conn = sqlite3.connect(db_path) + c = conn.cursor() + + c.execute(""" + CREATE TABLE IF NOT EXISTS posts ( + id INTEGER PRIMARY KEY, + slug TEXT UNIQUE NOT NULL, + title TEXT NOT NULL, + excerpt TEXT, + content TEXT, + author TEXT, + published_at TEXT, + is_featured INTEGER DEFAULT 0 + ) + """) + + # [FILL] Add blog posts extracted from WP posts table + # rows = [ + # {"slug": "...", "title": "...", "excerpt": "...", "content": "...", "author": "...", "published_at": "..."}, + # ] + + conn.commit() + conn.close() + print("✓ blog.sqlite created (empty — [FILL] add blog posts)") + + +def seed_videos(): + """Create videos.sqlite (empty stub).""" + db_path = os.path.join(DB_DIR, 'videos.sqlite') + conn = sqlite3.connect(db_path) + c = conn.cursor() + + c.execute(""" + CREATE TABLE IF NOT EXISTS videos ( + id INTEGER PRIMARY KEY, + slug TEXT UNIQUE NOT NULL, + title TEXT NOT NULL, + duration TEXT, + embed_url TEXT, + thumbnail TEXT, + category TEXT, + level TEXT, + is_free INTEGER DEFAULT 1 + ) + """) + + # [FILL] Add on-demand video entries if site has video content + # rows = [ + # {"slug": "...", "title": "...", "duration": "12:34", "embed_url": "...", "category": "...", "level": "..."}, + # ] + + conn.commit() + conn.close() + print("✓ videos.sqlite created (empty — [FILL] add videos)") + + +def seed_events(): + """Create events.sqlite (empty stub).""" + db_path = os.path.join(DB_DIR, 'events.sqlite') + conn = sqlite3.connect(db_path) + c = conn.cursor() + + c.execute(""" + CREATE TABLE IF NOT EXISTS events ( + id INTEGER PRIMARY KEY, + slug TEXT UNIQUE NOT NULL, + title TEXT NOT NULL, + event_date TEXT, + time_cet TEXT, + format TEXT, + capacity INTEGER, + price_eur REAL, + status TEXT DEFAULT 'open' + ) + """) + + # [FILL] Add workshop/event entries + # rows = [ + # {"slug": "...", "title": "...", "event_date": "2026-06-15", "time_cet": "10:00", "format": "online", "capacity": 20, "price_eur": 29.99}, + # ] + + conn.commit() + conn.close() + print("✓ events.sqlite created (empty — [FILL] add events)") + + +def seed_schedule(): + """Create schedule.sqlite (empty stub).""" + db_path = os.path.join(DB_DIR, 'schedule.sqlite') + conn = sqlite3.connect(db_path) + c = conn.cursor() + + c.execute(""" + CREATE TABLE IF NOT EXISTS classes ( + id INTEGER PRIMARY KEY, + day_of_week TEXT NOT NULL, + day_order INTEGER NOT NULL, + time_cet TEXT NOT NULL, + class_name TEXT NOT NULL, + level TEXT NOT NULL, + format TEXT NOT NULL, + duration_min INTEGER NOT NULL, + badge_variant TEXT DEFAULT '' + ) + """) + + # [FILL] Add recurring class schedule rows + # rows = [ + # {"day_of_week": "Monday", "day_order": 1, "time_cet": "10:00", "class_name": "Hatha Yoga", "level": "beginner", "format": "online", "duration_min": 60, "badge_variant": "featured"}, + # ] + + conn.commit() + conn.close() + print("✓ schedule.sqlite created (empty — [FILL] add class schedule)") + + +def seed_instructors(): + """Create instructors.sqlite (empty stub).""" + db_path = os.path.join(DB_DIR, 'instructors.sqlite') + conn = sqlite3.connect(db_path) + c = conn.cursor() + + c.execute(""" + CREATE TABLE IF NOT EXISTS instructors ( + id INTEGER PRIMARY KEY, + slug TEXT UNIQUE NOT NULL, + name TEXT NOT NULL, + title TEXT, + bio TEXT, + certifications TEXT, + image TEXT, + is_primary INTEGER DEFAULT 0 + ) + """) + + # [FILL] Add instructor rows + # rows = [ + # {"slug": "alice-johnson", "name": "Alice Johnson", "title": "Lead Instructor", "bio": "...", "certifications": "...", "is_primary": 1}, + # ] + + conn.commit() + conn.close() + print("✓ instructors.sqlite created (empty — [FILL] add instructors)") + + +def seed_packages(): + """Create packages.sqlite (empty stub).""" + db_path = os.path.join(DB_DIR, 'packages.sqlite') + conn = sqlite3.connect(db_path) + c = conn.cursor() + + c.execute(""" + CREATE TABLE IF NOT EXISTS packages ( + id INTEGER PRIMARY KEY, + slug TEXT UNIQUE NOT NULL, + name TEXT NOT NULL, + price_eur REAL, + sessions_count INTEGER, + validity_days INTEGER, + is_featured INTEGER DEFAULT 0 + ) + """) + + # [FILL] Add class pack/package options + # rows = [ + # {"slug": "starter", "name": "Starter Pack", "price_eur": 49.99, "sessions_count": 5, "validity_days": 30, "is_featured": 0}, + # {"slug": "unlimited", "name": "Unlimited Monthly", "price_eur": 99.99, "sessions_count": None, "validity_days": 30, "is_featured": 1}, + # ] + + conn.commit() + conn.close() + print("✓ packages.sqlite created (empty — [FILL] add packages)") + + +if __name__ == '__main__': + seed_pages() + seed_nav() + seed_glossary() + seed_testimonials() + seed_blog() + seed_videos() + seed_events() + seed_schedule() + seed_instructors() + seed_packages() + print("\\nSeeding complete. Review [FILL] markers before running in production.") +''' + + return script + + +def main(): + parser = argparse.ArgumentParser( + description='Generate seed_databases.py from extracted WP/Divi JSON data' + ) + parser.add_argument('data_dir', help='Path to extracted data directory (.planning/data/)') + parser.add_argument('seed_path', help='Output path for seed_databases.py') + parser.add_argument('--domain', required=True, help='Domain name (e.g., example.com)') + parser.add_argument('--force', action='store_true', help='Overwrite existing seed_databases.py') + + args = parser.parse_args() + + # Validate inputs + if not os.path.isdir(args.data_dir): + print(f"Error: data_dir not found: {args.data_dir}") + return 1 + + if os.path.exists(args.seed_path) and not args.force: + print(f"Error: seed_databases.py already exists at {args.seed_path}") + print("Use --force to overwrite") + return 1 + + # Load required data files + pages = load_json_file(os.path.join(args.data_dir, 'pages.json')) + if not pages: + print("Error: pages.json not found or invalid") + return 1 + + design_system = load_json_file(os.path.join(args.data_dir, 'design-system.json')) + glossary = load_json_file(os.path.join(args.data_dir, 'glossary.json')) + nav = load_json_file(os.path.join(args.data_dir, 'nav.json')) + + # Generate script + script_content = generate_seed_script( + args.data_dir, + args.domain, + design_system, + pages, + glossary, + nav + ) + + # Write output + os.makedirs(os.path.dirname(args.seed_path), exist_ok=True) + with open(args.seed_path, 'w') as f: + f.write(script_content) + + # Make executable + os.chmod(args.seed_path, 0o755) + + print(f"✓ Generated: {args.seed_path}") + print(f" Pages: {len([p for p in pages if p.get('status') == 'publish' and p.get('post_type') == 'page'])}") + print(f" Glossary terms: {len(glossary) if glossary else 0}") + print(f" Nav items: {len(nav) if nav else 0}") + print("\nNext: Review [FILL] markers, then run: python3 " + args.seed_path) + + return 0 + + +if __name__ == '__main__': + exit(main()) diff --git a/wp-migration.json b/wp-migration.json new file mode 100644 index 0000000..6536d0c --- /dev/null +++ b/wp-migration.json @@ -0,0 +1,50 @@ +{ + "meta": { + "author": "Andre Cobham / Arising Media", + "updated": "2026-06-09", + "version": "1.0", + "description": "WordPress to AM PHP stack migration configuration and run order" + }, + "input": { + "format": ".wpress (All-in-One WP Migration backup)", + "supported_builders": ["Divi", "Elementor", "classic", "Gutenberg"], + "database_format": "MySQL dump extracted from .wpress" + }, + "output": { + "stack": "php:8.3-fpm-alpine + nginx + supervisord", + "data_layer": "SQLite (one db per content domain)", + "assets": "WebP only, baked into Docker image", + "routing": "PHP router + nginx location blocks" + }, + "pipeline_phases": [ + {"phase": 1, "name": "Extract", "description": "Unpack .wpress archive, extract MySQL dump and uploads folder"}, + {"phase": 2, "name": "Analyze", "description": "Parse WordPress DB dump, detect Divi version, inventory pages, extract content"}, + {"phase": 3, "name": "Design extraction", "description": "Extract color tokens, typography, layout patterns from Divi CSS"}, + {"phase": 4, "name": "Content migration", "description": "Rewrite content clean into SQLite pages and page_sections"}, + {"phase": 5, "name": "Media migration", "description": "Catalog uploads, skip WP-generated size variants, convert to WebP, remap paths"}, + {"phase": 6, "name": "SEO preservation", "description": "Map old WP URLs to new AM slugs, generate 301 redirect map"}, + {"phase": 7, "name": "Build", "description": "Scaffold AM project structure, seed SQLite DBs, write PHP templates"}, + {"phase": 8, "name": "Verify", "description": "Docker build, HTTP 200 all pages, mobile check, SEO audit, zero em-dashes"} + ], + "rules": [ + "Never a 1:1 Divi copy. Every migration is a content extraction and redesign", + "Never migrate the WordPress database. Content is rewritten cleaner", + "Never run headless WordPress or WordPress as API", + "Strip all Divi shortcodes, plugin CSS, and JS bundles", + "All media converted to WebP before baking into Docker image", + "URL slugs cleaned to flat lowercase-hyphen format", + "301 redirect map required for all changed URLs" + ], + "never_migrate": [ + "wp-admin paths", + "Divi shortcode markup", + "WordPress plugin CSS/JS", + "wp-content/cache", + "WordPress user accounts or session data" + ], + "tooling": { + "extraction": "scripts in wp-divi-pipeline-to-am-stack/scripts/", + "sop_folder": ".am-webdesign-sops/wp-divi-pipeline-to-am-stack/", + "reference_docs": ["00-overview.md", "08-run-order.md", "10-agent-breadcrumbs.md"] + } +}