recent updates
This commit is contained in:
@@ -0,0 +1,239 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sqlite3
|
||||
import glob
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
DB_PATH = "/home/sirdrez/arisingmedia-websites/.am-webdesign-sops/sops.db"
|
||||
SOP_DIR = "/home/sirdrez/arisingmedia-websites/.am-webdesign-sops"
|
||||
|
||||
def init_db():
|
||||
"""Initialize database with fresh schema."""
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Drop tables in reverse dependency order
|
||||
cursor.execute("DROP TABLE IF EXISTS sop_fts")
|
||||
cursor.execute("DROP TABLE IF EXISTS rules")
|
||||
cursor.execute("DROP TABLE IF EXISTS sop_sections")
|
||||
cursor.execute("DROP TABLE IF EXISTS sops")
|
||||
|
||||
# Create tables
|
||||
cursor.execute("""
|
||||
CREATE TABLE sops (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
number TEXT,
|
||||
filename TEXT,
|
||||
title TEXT,
|
||||
full_content TEXT,
|
||||
updated_at TEXT
|
||||
)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE TABLE sop_sections (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
sop_id INTEGER REFERENCES sops(id),
|
||||
heading_level INTEGER,
|
||||
title TEXT,
|
||||
content TEXT
|
||||
)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE TABLE rules (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
category TEXT,
|
||||
rule TEXT,
|
||||
source_sop TEXT,
|
||||
source_section TEXT
|
||||
)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE VIRTUAL TABLE sop_fts USING fts5(
|
||||
sop_number,
|
||||
sop_title,
|
||||
section_title,
|
||||
content
|
||||
)
|
||||
""")
|
||||
|
||||
conn.commit()
|
||||
return conn
|
||||
|
||||
def extract_number_from_filename(filename):
|
||||
"""Extract number prefix from filename (e.g., '00' from '00-stack-philosophy.md')."""
|
||||
match = re.match(r'^(\d+)', filename)
|
||||
if match:
|
||||
return match.group(1)
|
||||
return ""
|
||||
|
||||
def extract_first_heading(content):
|
||||
"""Extract first line starting with # as title."""
|
||||
for line in content.split('\n'):
|
||||
if line.startswith('#'):
|
||||
return line.lstrip('#').strip()
|
||||
return ""
|
||||
|
||||
def split_into_sections(content):
|
||||
"""Split content into sections by ## or ### headings."""
|
||||
sections = []
|
||||
current_section = None
|
||||
current_content = []
|
||||
|
||||
lines = content.split('\n')
|
||||
|
||||
for line in lines:
|
||||
if line.startswith('##'):
|
||||
# Save previous section if exists
|
||||
if current_section:
|
||||
current_section['content'] = '\n'.join(current_content).strip()
|
||||
sections.append(current_section)
|
||||
|
||||
# Determine heading level
|
||||
heading_level = 2
|
||||
if line.startswith('###'):
|
||||
heading_level = 3
|
||||
|
||||
current_section = {
|
||||
'heading_level': heading_level,
|
||||
'title': line.lstrip('#').strip()
|
||||
}
|
||||
current_content = []
|
||||
elif current_section:
|
||||
current_content.append(line)
|
||||
|
||||
# Save last section
|
||||
if current_section:
|
||||
current_section['content'] = '\n'.join(current_content).strip()
|
||||
sections.append(current_section)
|
||||
|
||||
return sections
|
||||
|
||||
def extract_rules_from_section(section_title, section_content, category_map):
|
||||
"""Extract rules from section if title matches keyword patterns."""
|
||||
title_lower = section_title.lower()
|
||||
rules = []
|
||||
|
||||
# Determine category
|
||||
category = None
|
||||
if any(keyword in title_lower for keyword in ['never use', 'mandatory', 'rules', 'what we never']):
|
||||
if 'never' in title_lower:
|
||||
category = 'never_use'
|
||||
elif 'mandatory' in title_lower or 'pattern' in title_lower:
|
||||
category = 'mandatory'
|
||||
|
||||
if not category:
|
||||
return rules
|
||||
|
||||
# Extract bullet points
|
||||
for line in section_content.split('\n'):
|
||||
stripped = line.strip()
|
||||
if stripped.startswith('-') or stripped.startswith('*'):
|
||||
rule_text = stripped.lstrip('-*').strip()
|
||||
if rule_text:
|
||||
rules.append({
|
||||
'category': category,
|
||||
'rule': rule_text
|
||||
})
|
||||
|
||||
return rules
|
||||
|
||||
def process_sop_files(conn):
|
||||
"""Process all .md files and populate database."""
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Get all .md files in top level only
|
||||
md_files = glob.glob(os.path.join(SOP_DIR, "*.md"))
|
||||
md_files.sort()
|
||||
|
||||
sop_count = 0
|
||||
section_count = 0
|
||||
rule_count = 0
|
||||
|
||||
for filepath in md_files:
|
||||
filename = os.path.basename(filepath)
|
||||
|
||||
# Skip certain files
|
||||
if filename in ['README.md', 'STACK.md', 'CONTENT.md', 'OPTIMIZATION.md']:
|
||||
continue
|
||||
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
full_content = f.read()
|
||||
|
||||
# Extract metadata
|
||||
number = extract_number_from_filename(filename)
|
||||
title = extract_first_heading(full_content)
|
||||
updated_at = datetime.now().isoformat()
|
||||
|
||||
# Insert SOP record
|
||||
cursor.execute("""
|
||||
INSERT INTO sops (number, filename, title, full_content, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
""", (number, filename, title, full_content, updated_at))
|
||||
|
||||
sop_id = cursor.lastrowid
|
||||
sop_count += 1
|
||||
|
||||
# Split into sections and insert
|
||||
sections = split_into_sections(full_content)
|
||||
|
||||
for section in sections:
|
||||
cursor.execute("""
|
||||
INSERT INTO sop_sections (sop_id, heading_level, title, content)
|
||||
VALUES (?, ?, ?, ?)
|
||||
""", (sop_id, section['heading_level'], section['title'], section['content']))
|
||||
|
||||
section_count += 1
|
||||
|
||||
# Extract rules from section
|
||||
rules = extract_rules_from_section(section['title'], section['content'], {})
|
||||
|
||||
for rule in rules:
|
||||
cursor.execute("""
|
||||
INSERT INTO rules (category, rule, source_sop, source_section)
|
||||
VALUES (?, ?, ?, ?)
|
||||
""", (rule['category'], rule['rule'], filename, section['title']))
|
||||
|
||||
rule_count += 1
|
||||
|
||||
conn.commit()
|
||||
return sop_count, section_count, rule_count
|
||||
|
||||
def rebuild_fts(conn):
|
||||
"""Rebuild FTS index."""
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute("""
|
||||
INSERT INTO sop_fts(sop_number, sop_title, section_title, content)
|
||||
SELECT s.number, s.title, ss.title, ss.content
|
||||
FROM sop_sections ss JOIN sops s ON ss.sop_id = s.id
|
||||
""")
|
||||
|
||||
conn.commit()
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
try:
|
||||
conn = init_db()
|
||||
sop_count, section_count, rule_count = process_sop_files(conn)
|
||||
rebuild_fts(conn)
|
||||
conn.close()
|
||||
|
||||
print(f"SOP Database built successfully:")
|
||||
print(f" SOPs loaded: {sop_count}")
|
||||
print(f" Sections indexed: {section_count}")
|
||||
print(f" Rules extracted: {rule_count}")
|
||||
print(f" Database: {DB_PATH}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user