240 lines
6.8 KiB
Python
240 lines
6.8 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import sqlite3
|
|
import glob
|
|
import os
|
|
import re
|
|
from datetime import datetime
|
|
|
|
DB_PATH = "/home/sirdrez/arisingmedia-websites/.am-webdesign-sops/sops.db"
|
|
SOP_DIR = "/home/sirdrez/arisingmedia-websites/.am-webdesign-sops"
|
|
|
|
def init_db():
|
|
"""Initialize database with fresh schema."""
|
|
conn = sqlite3.connect(DB_PATH)
|
|
cursor = conn.cursor()
|
|
|
|
# Drop tables in reverse dependency order
|
|
cursor.execute("DROP TABLE IF EXISTS sop_fts")
|
|
cursor.execute("DROP TABLE IF EXISTS rules")
|
|
cursor.execute("DROP TABLE IF EXISTS sop_sections")
|
|
cursor.execute("DROP TABLE IF EXISTS sops")
|
|
|
|
# Create tables
|
|
cursor.execute("""
|
|
CREATE TABLE sops (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
number TEXT,
|
|
filename TEXT,
|
|
title TEXT,
|
|
full_content TEXT,
|
|
updated_at TEXT
|
|
)
|
|
""")
|
|
|
|
cursor.execute("""
|
|
CREATE TABLE sop_sections (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
sop_id INTEGER REFERENCES sops(id),
|
|
heading_level INTEGER,
|
|
title TEXT,
|
|
content TEXT
|
|
)
|
|
""")
|
|
|
|
cursor.execute("""
|
|
CREATE TABLE rules (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
category TEXT,
|
|
rule TEXT,
|
|
source_sop TEXT,
|
|
source_section TEXT
|
|
)
|
|
""")
|
|
|
|
cursor.execute("""
|
|
CREATE VIRTUAL TABLE sop_fts USING fts5(
|
|
sop_number,
|
|
sop_title,
|
|
section_title,
|
|
content
|
|
)
|
|
""")
|
|
|
|
conn.commit()
|
|
return conn
|
|
|
|
def extract_number_from_filename(filename):
|
|
"""Extract number prefix from filename (e.g., '00' from '00-stack-philosophy.md')."""
|
|
match = re.match(r'^(\d+)', filename)
|
|
if match:
|
|
return match.group(1)
|
|
return ""
|
|
|
|
def extract_first_heading(content):
|
|
"""Extract first line starting with # as title."""
|
|
for line in content.split('\n'):
|
|
if line.startswith('#'):
|
|
return line.lstrip('#').strip()
|
|
return ""
|
|
|
|
def split_into_sections(content):
|
|
"""Split content into sections by ## or ### headings."""
|
|
sections = []
|
|
current_section = None
|
|
current_content = []
|
|
|
|
lines = content.split('\n')
|
|
|
|
for line in lines:
|
|
if line.startswith('##'):
|
|
# Save previous section if exists
|
|
if current_section:
|
|
current_section['content'] = '\n'.join(current_content).strip()
|
|
sections.append(current_section)
|
|
|
|
# Determine heading level
|
|
heading_level = 2
|
|
if line.startswith('###'):
|
|
heading_level = 3
|
|
|
|
current_section = {
|
|
'heading_level': heading_level,
|
|
'title': line.lstrip('#').strip()
|
|
}
|
|
current_content = []
|
|
elif current_section:
|
|
current_content.append(line)
|
|
|
|
# Save last section
|
|
if current_section:
|
|
current_section['content'] = '\n'.join(current_content).strip()
|
|
sections.append(current_section)
|
|
|
|
return sections
|
|
|
|
def extract_rules_from_section(section_title, section_content, category_map):
|
|
"""Extract rules from section if title matches keyword patterns."""
|
|
title_lower = section_title.lower()
|
|
rules = []
|
|
|
|
# Determine category
|
|
category = None
|
|
if any(keyword in title_lower for keyword in ['never use', 'mandatory', 'rules', 'what we never']):
|
|
if 'never' in title_lower:
|
|
category = 'never_use'
|
|
elif 'mandatory' in title_lower or 'pattern' in title_lower:
|
|
category = 'mandatory'
|
|
|
|
if not category:
|
|
return rules
|
|
|
|
# Extract bullet points
|
|
for line in section_content.split('\n'):
|
|
stripped = line.strip()
|
|
if stripped.startswith('-') or stripped.startswith('*'):
|
|
rule_text = stripped.lstrip('-*').strip()
|
|
if rule_text:
|
|
rules.append({
|
|
'category': category,
|
|
'rule': rule_text
|
|
})
|
|
|
|
return rules
|
|
|
|
def process_sop_files(conn):
|
|
"""Process all .md files and populate database."""
|
|
cursor = conn.cursor()
|
|
|
|
# Get all .md files in top level only
|
|
md_files = glob.glob(os.path.join(SOP_DIR, "*.md"))
|
|
md_files.sort()
|
|
|
|
sop_count = 0
|
|
section_count = 0
|
|
rule_count = 0
|
|
|
|
for filepath in md_files:
|
|
filename = os.path.basename(filepath)
|
|
|
|
# Skip certain files
|
|
if filename in ['README.md', 'STACK.md', 'CONTENT.md', 'OPTIMIZATION.md']:
|
|
continue
|
|
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
full_content = f.read()
|
|
|
|
# Extract metadata
|
|
number = extract_number_from_filename(filename)
|
|
title = extract_first_heading(full_content)
|
|
updated_at = datetime.now().isoformat()
|
|
|
|
# Insert SOP record
|
|
cursor.execute("""
|
|
INSERT INTO sops (number, filename, title, full_content, updated_at)
|
|
VALUES (?, ?, ?, ?, ?)
|
|
""", (number, filename, title, full_content, updated_at))
|
|
|
|
sop_id = cursor.lastrowid
|
|
sop_count += 1
|
|
|
|
# Split into sections and insert
|
|
sections = split_into_sections(full_content)
|
|
|
|
for section in sections:
|
|
cursor.execute("""
|
|
INSERT INTO sop_sections (sop_id, heading_level, title, content)
|
|
VALUES (?, ?, ?, ?)
|
|
""", (sop_id, section['heading_level'], section['title'], section['content']))
|
|
|
|
section_count += 1
|
|
|
|
# Extract rules from section
|
|
rules = extract_rules_from_section(section['title'], section['content'], {})
|
|
|
|
for rule in rules:
|
|
cursor.execute("""
|
|
INSERT INTO rules (category, rule, source_sop, source_section)
|
|
VALUES (?, ?, ?, ?)
|
|
""", (rule['category'], rule['rule'], filename, section['title']))
|
|
|
|
rule_count += 1
|
|
|
|
conn.commit()
|
|
return sop_count, section_count, rule_count
|
|
|
|
def rebuild_fts(conn):
|
|
"""Rebuild FTS index."""
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute("""
|
|
INSERT INTO sop_fts(sop_number, sop_title, section_title, content)
|
|
SELECT s.number, s.title, ss.title, ss.content
|
|
FROM sop_sections ss JOIN sops s ON ss.sop_id = s.id
|
|
""")
|
|
|
|
conn.commit()
|
|
|
|
def main():
|
|
"""Main entry point."""
|
|
try:
|
|
conn = init_db()
|
|
sop_count, section_count, rule_count = process_sop_files(conn)
|
|
rebuild_fts(conn)
|
|
conn.close()
|
|
|
|
print(f"SOP Database built successfully:")
|
|
print(f" SOPs loaded: {sop_count}")
|
|
print(f" Sections indexed: {section_count}")
|
|
print(f" Rules extracted: {rule_count}")
|
|
print(f" Database: {DB_PATH}")
|
|
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
exit(1)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|