recent updates

This commit is contained in:
2026-06-09 18:31:59 +02:00
parent 398b94965c
commit 94f7a1f72a
42 changed files with 8686 additions and 0 deletions
+239
View File
@@ -0,0 +1,239 @@
#!/usr/bin/env python3
import sqlite3
import glob
import os
import re
from datetime import datetime
DB_PATH = "/home/sirdrez/arisingmedia-websites/.am-webdesign-sops/sops.db"
SOP_DIR = "/home/sirdrez/arisingmedia-websites/.am-webdesign-sops"
def init_db():
"""Initialize database with fresh schema."""
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
# Drop tables in reverse dependency order
cursor.execute("DROP TABLE IF EXISTS sop_fts")
cursor.execute("DROP TABLE IF EXISTS rules")
cursor.execute("DROP TABLE IF EXISTS sop_sections")
cursor.execute("DROP TABLE IF EXISTS sops")
# Create tables
cursor.execute("""
CREATE TABLE sops (
id INTEGER PRIMARY KEY AUTOINCREMENT,
number TEXT,
filename TEXT,
title TEXT,
full_content TEXT,
updated_at TEXT
)
""")
cursor.execute("""
CREATE TABLE sop_sections (
id INTEGER PRIMARY KEY AUTOINCREMENT,
sop_id INTEGER REFERENCES sops(id),
heading_level INTEGER,
title TEXT,
content TEXT
)
""")
cursor.execute("""
CREATE TABLE rules (
id INTEGER PRIMARY KEY AUTOINCREMENT,
category TEXT,
rule TEXT,
source_sop TEXT,
source_section TEXT
)
""")
cursor.execute("""
CREATE VIRTUAL TABLE sop_fts USING fts5(
sop_number,
sop_title,
section_title,
content
)
""")
conn.commit()
return conn
def extract_number_from_filename(filename):
"""Extract number prefix from filename (e.g., '00' from '00-stack-philosophy.md')."""
match = re.match(r'^(\d+)', filename)
if match:
return match.group(1)
return ""
def extract_first_heading(content):
"""Extract first line starting with # as title."""
for line in content.split('\n'):
if line.startswith('#'):
return line.lstrip('#').strip()
return ""
def split_into_sections(content):
"""Split content into sections by ## or ### headings."""
sections = []
current_section = None
current_content = []
lines = content.split('\n')
for line in lines:
if line.startswith('##'):
# Save previous section if exists
if current_section:
current_section['content'] = '\n'.join(current_content).strip()
sections.append(current_section)
# Determine heading level
heading_level = 2
if line.startswith('###'):
heading_level = 3
current_section = {
'heading_level': heading_level,
'title': line.lstrip('#').strip()
}
current_content = []
elif current_section:
current_content.append(line)
# Save last section
if current_section:
current_section['content'] = '\n'.join(current_content).strip()
sections.append(current_section)
return sections
def extract_rules_from_section(section_title, section_content, category_map):
"""Extract rules from section if title matches keyword patterns."""
title_lower = section_title.lower()
rules = []
# Determine category
category = None
if any(keyword in title_lower for keyword in ['never use', 'mandatory', 'rules', 'what we never']):
if 'never' in title_lower:
category = 'never_use'
elif 'mandatory' in title_lower or 'pattern' in title_lower:
category = 'mandatory'
if not category:
return rules
# Extract bullet points
for line in section_content.split('\n'):
stripped = line.strip()
if stripped.startswith('-') or stripped.startswith('*'):
rule_text = stripped.lstrip('-*').strip()
if rule_text:
rules.append({
'category': category,
'rule': rule_text
})
return rules
def process_sop_files(conn):
"""Process all .md files and populate database."""
cursor = conn.cursor()
# Get all .md files in top level only
md_files = glob.glob(os.path.join(SOP_DIR, "*.md"))
md_files.sort()
sop_count = 0
section_count = 0
rule_count = 0
for filepath in md_files:
filename = os.path.basename(filepath)
# Skip certain files
if filename in ['README.md', 'STACK.md', 'CONTENT.md', 'OPTIMIZATION.md']:
continue
with open(filepath, 'r', encoding='utf-8') as f:
full_content = f.read()
# Extract metadata
number = extract_number_from_filename(filename)
title = extract_first_heading(full_content)
updated_at = datetime.now().isoformat()
# Insert SOP record
cursor.execute("""
INSERT INTO sops (number, filename, title, full_content, updated_at)
VALUES (?, ?, ?, ?, ?)
""", (number, filename, title, full_content, updated_at))
sop_id = cursor.lastrowid
sop_count += 1
# Split into sections and insert
sections = split_into_sections(full_content)
for section in sections:
cursor.execute("""
INSERT INTO sop_sections (sop_id, heading_level, title, content)
VALUES (?, ?, ?, ?)
""", (sop_id, section['heading_level'], section['title'], section['content']))
section_count += 1
# Extract rules from section
rules = extract_rules_from_section(section['title'], section['content'], {})
for rule in rules:
cursor.execute("""
INSERT INTO rules (category, rule, source_sop, source_section)
VALUES (?, ?, ?, ?)
""", (rule['category'], rule['rule'], filename, section['title']))
rule_count += 1
conn.commit()
return sop_count, section_count, rule_count
def rebuild_fts(conn):
"""Rebuild FTS index."""
cursor = conn.cursor()
cursor.execute("""
INSERT INTO sop_fts(sop_number, sop_title, section_title, content)
SELECT s.number, s.title, ss.title, ss.content
FROM sop_sections ss JOIN sops s ON ss.sop_id = s.id
""")
conn.commit()
def main():
"""Main entry point."""
try:
conn = init_db()
sop_count, section_count, rule_count = process_sop_files(conn)
rebuild_fts(conn)
conn.close()
print(f"SOP Database built successfully:")
print(f" SOPs loaded: {sop_count}")
print(f" Sections indexed: {section_count}")
print(f" Rules extracted: {rule_count}")
print(f" Database: {DB_PATH}")
except Exception as e:
print(f"Error: {e}")
import traceback
traceback.print_exc()
exit(1)
if __name__ == "__main__":
main()