feat: add database mode for LLM message storage and analytics

Implements a database mode that stores LLM prompts and responses in PostgreSQL
instead of making API calls. This enables:
- Testing without consuming API credits
- Collecting analytics on usage patterns
- Development and debugging workflows

Changes:
- Added DatabaseClient class for PostgreSQL operations
- Modified LLMAnalyzer to support database/API mode toggle
- Added USE_DATABASE config flag to switch between modes
- Included Docker Compose setup for PostgreSQL
- Added utility scripts for database init and analytics viewing
- Comprehensive documentation in DATABASE_MODE.md

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2026-03-10 21:13:13 -04:00
parent 11a4aba46f
commit 44456cb073
11 changed files with 952 additions and 4 deletions
+42
View File
@@ -0,0 +1,42 @@
#!/usr/bin/env python3
"""Initialize the SPARC database schema.
This script creates the necessary tables and indexes for storing
LLM messages for testing and analytics.
Usage:
python scripts/init_database.py
"""
import sys
import os
# Add parent directory to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from SPARC import config
from SPARC.database import DatabaseClient
def main():
"""Initialize the database schema."""
print("Initializing SPARC database...")
print(f"Database URL: {config.database_url}")
try:
db_client = DatabaseClient(config.database_url)
db_client.initialize_schema()
print("Database schema initialized successfully!")
print("\nTables created:")
print(" - llm_messages: Stores all LLM prompts and responses")
print("\nIndexes created:")
print(" - idx_messages_timestamp: For time-based queries")
print(" - idx_messages_company: For company-specific queries")
except Exception as e:
print(f"Error initializing database: {e}")
sys.exit(1)
if __name__ == "__main__":
main()
+67
View File
@@ -0,0 +1,67 @@
#!/usr/bin/env python3
"""View analytics from the message database.
This script displays statistics about stored LLM messages including
usage by company, analysis type, and time periods.
Usage:
python scripts/view_analytics.py [--days DAYS]
"""
import sys
import os
import argparse
import json
# Add parent directory to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from SPARC import config
from SPARC.database import DatabaseClient
def main():
"""Display analytics from the database."""
parser = argparse.ArgumentParser(description="View SPARC message analytics")
parser.add_argument(
"--days",
type=int,
default=30,
help="Number of days to analyze (default: 30)"
)
args = parser.parse_args()
print(f"SPARC Analytics - Last {args.days} days")
print("=" * 70)
try:
db_client = DatabaseClient(config.database_url)
analytics = db_client.get_analytics(days=args.days)
print(f"\nTotal Messages: {analytics['total_messages']}")
print("\nMessages by Company:")
if analytics['by_company']:
for item in analytics['by_company']:
company = item['company_name'] or '(unknown)'
print(f" {company}: {item['count']}")
else:
print(" No data")
print("\nMessages by Analysis Type:")
if analytics['by_type']:
for item in analytics['by_type']:
analysis_type = item['analysis_type'] or '(unknown)'
print(f" {analysis_type}: {item['count']}")
else:
print(" No data")
print("\n" + "=" * 70)
except Exception as e:
print(f"Error retrieving analytics: {e}")
sys.exit(1)
if __name__ == "__main__":
main()
+78
View File
@@ -0,0 +1,78 @@
#!/usr/bin/env python3
"""View stored messages from the database.
This script displays stored LLM messages with filtering options.
Usage:
python scripts/view_messages.py [--company COMPANY] [--type TYPE] [--limit LIMIT]
"""
import sys
import os
import argparse
from datetime import datetime
# Add parent directory to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from SPARC import config
from SPARC.database import DatabaseClient
def main():
"""Display messages from the database."""
parser = argparse.ArgumentParser(description="View stored SPARC messages")
parser.add_argument(
"--company",
type=str,
help="Filter by company name"
)
parser.add_argument(
"--type",
type=str,
choices=["single_patent", "portfolio"],
help="Filter by analysis type"
)
parser.add_argument(
"--limit",
type=int,
default=10,
help="Maximum number of messages to display (default: 10)"
)
args = parser.parse_args()
print("SPARC Stored Messages")
print("=" * 70)
try:
db_client = DatabaseClient(config.database_url)
messages = db_client.get_messages(
company_name=args.company,
analysis_type=args.type,
limit=args.limit
)
if not messages:
print("\nNo messages found.")
return
print(f"\nShowing {len(messages)} message(s):\n")
for i, msg in enumerate(messages, 1):
print(f"Message #{msg['id']} - {msg['timestamp']}")
print(f"Company: {msg['company_name'] or '(unknown)'}")
print(f"Type: {msg['analysis_type'] or '(unknown)'}")
print(f"Model: {msg['model'] or '(unknown)'}")
print(f"\nPrompt (first 200 chars):")
print(f" {msg['prompt'][:200]}...")
print(f"\nResponse (first 200 chars):")
print(f" {msg['response'][:200] if msg['response'] else '(no response)'}...")
print("\n" + "-" * 70 + "\n")
except Exception as e:
print(f"Error retrieving messages: {e}")
sys.exit(1)
if __name__ == "__main__":
main()