recent updates

2026-06-09 18:31:59 +02:00
parent 398b94965c
commit 94f7a1f72a
42 changed files with 8686 additions and 0 deletions
@@ -0,0 +1,239 @@
+#!/usr/bin/env python3
+
+import sqlite3
+import glob
+import os
+import re
+from datetime import datetime
+
+DB_PATH = "/home/sirdrez/arisingmedia-websites/.am-webdesign-sops/sops.db"
+SOP_DIR = "/home/sirdrez/arisingmedia-websites/.am-webdesign-sops"
+
+def init_db():
+    """Initialize database with fresh schema."""
+    conn = sqlite3.connect(DB_PATH)
+    cursor = conn.cursor()
+
+    # Drop tables in reverse dependency order
+    cursor.execute("DROP TABLE IF EXISTS sop_fts")
+    cursor.execute("DROP TABLE IF EXISTS rules")
+    cursor.execute("DROP TABLE IF EXISTS sop_sections")
+    cursor.execute("DROP TABLE IF EXISTS sops")
+
+    # Create tables
+    cursor.execute("""
+        CREATE TABLE sops (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            number TEXT,
+            filename TEXT,
+            title TEXT,
+            full_content TEXT,
+            updated_at TEXT
+        )
+    """)
+
+    cursor.execute("""
+        CREATE TABLE sop_sections (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            sop_id INTEGER REFERENCES sops(id),
+            heading_level INTEGER,
+            title TEXT,
+            content TEXT
+        )
+    """)
+
+    cursor.execute("""
+        CREATE TABLE rules (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            category TEXT,
+            rule TEXT,
+            source_sop TEXT,
+            source_section TEXT
+        )
+    """)
+
+    cursor.execute("""
+        CREATE VIRTUAL TABLE sop_fts USING fts5(
+            sop_number,
+            sop_title,
+            section_title,
+            content
+        )
+    """)
+
+    conn.commit()
+    return conn
+
+def extract_number_from_filename(filename):
+    """Extract number prefix from filename (e.g., '00' from '00-stack-philosophy.md')."""
+    match = re.match(r'^(\d+)', filename)
+    if match:
+        return match.group(1)
+    return ""
+
+def extract_first_heading(content):
+    """Extract first line starting with # as title."""
+    for line in content.split('\n'):
+        if line.startswith('#'):
+            return line.lstrip('#').strip()
+    return ""
+
+def split_into_sections(content):
+    """Split content into sections by ## or ### headings."""
+    sections = []
+    current_section = None
+    current_content = []
+
+    lines = content.split('\n')
+
+    for line in lines:
+        if line.startswith('##'):
+            # Save previous section if exists
+            if current_section:
+                current_section['content'] = '\n'.join(current_content).strip()
+                sections.append(current_section)
+
+            # Determine heading level
+            heading_level = 2
+            if line.startswith('###'):
+                heading_level = 3
+
+            current_section = {
+                'heading_level': heading_level,
+                'title': line.lstrip('#').strip()
+            }
+            current_content = []
+        elif current_section:
+            current_content.append(line)
+
+    # Save last section
+    if current_section:
+        current_section['content'] = '\n'.join(current_content).strip()
+        sections.append(current_section)
+
+    return sections
+
+def extract_rules_from_section(section_title, section_content, category_map):
+    """Extract rules from section if title matches keyword patterns."""
+    title_lower = section_title.lower()
+    rules = []
+
+    # Determine category
+    category = None
+    if any(keyword in title_lower for keyword in ['never use', 'mandatory', 'rules', 'what we never']):
+        if 'never' in title_lower:
+            category = 'never_use'
+        elif 'mandatory' in title_lower or 'pattern' in title_lower:
+            category = 'mandatory'
+
+    if not category:
+        return rules
+
+    # Extract bullet points
+    for line in section_content.split('\n'):
+        stripped = line.strip()
+        if stripped.startswith('-') or stripped.startswith('*'):
+            rule_text = stripped.lstrip('-*').strip()
+            if rule_text:
+                rules.append({
+                    'category': category,
+                    'rule': rule_text
+                })
+
+    return rules
+
+def process_sop_files(conn):
+    """Process all .md files and populate database."""
+    cursor = conn.cursor()
+
+    # Get all .md files in top level only
+    md_files = glob.glob(os.path.join(SOP_DIR, "*.md"))
+    md_files.sort()
+
+    sop_count = 0
+    section_count = 0
+    rule_count = 0
+
+    for filepath in md_files:
+        filename = os.path.basename(filepath)
+
+        # Skip certain files
+        if filename in ['README.md', 'STACK.md', 'CONTENT.md', 'OPTIMIZATION.md']:
+            continue
+
+        with open(filepath, 'r', encoding='utf-8') as f:
+            full_content = f.read()
+
+        # Extract metadata
+        number = extract_number_from_filename(filename)
+        title = extract_first_heading(full_content)
+        updated_at = datetime.now().isoformat()
+
+        # Insert SOP record
+        cursor.execute("""
+            INSERT INTO sops (number, filename, title, full_content, updated_at)
+            VALUES (?, ?, ?, ?, ?)
+        """, (number, filename, title, full_content, updated_at))
+
+        sop_id = cursor.lastrowid
+        sop_count += 1
+
+        # Split into sections and insert
+        sections = split_into_sections(full_content)
+
+        for section in sections:
+            cursor.execute("""
+                INSERT INTO sop_sections (sop_id, heading_level, title, content)
+                VALUES (?, ?, ?, ?)
+            """, (sop_id, section['heading_level'], section['title'], section['content']))
+
+            section_count += 1
+
+            # Extract rules from section
+            rules = extract_rules_from_section(section['title'], section['content'], {})
+
+            for rule in rules:
+                cursor.execute("""
+                    INSERT INTO rules (category, rule, source_sop, source_section)
+                    VALUES (?, ?, ?, ?)
+                """, (rule['category'], rule['rule'], filename, section['title']))
+
+                rule_count += 1
+
+    conn.commit()
+    return sop_count, section_count, rule_count
+
+def rebuild_fts(conn):
+    """Rebuild FTS index."""
+    cursor = conn.cursor()
+
+    cursor.execute("""
+        INSERT INTO sop_fts(sop_number, sop_title, section_title, content)
+        SELECT s.number, s.title, ss.title, ss.content
+        FROM sop_sections ss JOIN sops s ON ss.sop_id = s.id
+    """)
+
+    conn.commit()
+
+def main():
+    """Main entry point."""
+    try:
+        conn = init_db()
+        sop_count, section_count, rule_count = process_sop_files(conn)
+        rebuild_fts(conn)
+        conn.close()
+
+        print(f"SOP Database built successfully:")
+        print(f"  SOPs loaded: {sop_count}")
+        print(f"  Sections indexed: {section_count}")
+        print(f"  Rules extracted: {rule_count}")
+        print(f"  Database: {DB_PATH}")
+
+    except Exception as e:
+        print(f"Error: {e}")
+        import traceback
+        traceback.print_exc()
+        exit(1)
+
+if __name__ == "__main__":
+    main()