From 44456cb073e4e47a75d03c42148d0fb17282cfc7 Mon Sep 17 00:00:00 2001
From: 0xWheatyz <wyatt@leeworks.dev>
Date: Tue, 10 Mar 2026 21:13:13 -0400
Subject: [PATCH] feat: add database mode for LLM message storage and analytics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements a database mode that stores LLM prompts and responses in PostgreSQL
instead of making API calls. This enables:
- Testing without consuming API credits
- Collecting analytics on usage patterns
- Development and debugging workflows

Changes:
- Added DatabaseClient class for PostgreSQL operations
- Modified LLMAnalyzer to support database/API mode toggle
- Added USE_DATABASE config flag to switch between modes
- Included Docker Compose setup for PostgreSQL
- Added utility scripts for database init and analytics viewing
- Comprehensive documentation in DATABASE_MODE.md

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .env.example              |  16 ++
 DATABASE_MODE.md          | 318 ++++++++++++++++++++++++++++++++++++++
 SPARC/config.py           |   8 +
 SPARC/database.py         | 210 +++++++++++++++++++++++++
 SPARC/llm.py              |  93 ++++++++++-
 docker-compose.yml        |  36 +++++
 requirements.txt          |   1 +
 scripts/init_database.py  |  42 +++++
 scripts/view_analytics.py |  67 ++++++++
 scripts/view_messages.py  |  78 ++++++++++
 test_database_mode.py     |  87 +++++++++++
 11 files changed, 952 insertions(+), 4 deletions(-)
 create mode 100644 .env.example
 create mode 100644 DATABASE_MODE.md
 create mode 100644 SPARC/database.py
 create mode 100644 docker-compose.yml
 create mode 100644 scripts/init_database.py
 create mode 100644 scripts/view_analytics.py
 create mode 100644 scripts/view_messages.py
 create mode 100644 test_database_mode.py

diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..1d776d0
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,16 @@
+# SPARC Configuration
+
+# SerpAPI key for patent search
+API_KEY=your_serpapi_key_here
+
+# OpenRouter API key for LLM analysis
+OPENROUTER_API_KEY=your_openrouter_key_here
+
+# Database configuration (for docker-compose setup)
+DATABASE_URL=postgresql://postgres:postgres@localhost:5432/sparc
+
+# Toggle between database mode and API mode
+# When USE_DATABASE=true: stores all messages in database instead of sending to OpenRouter
+# When USE_DATABASE=false: sends messages to OpenRouter API as normal
+# Default: false
+USE_DATABASE=false
diff --git a/DATABASE_MODE.md b/DATABASE_MODE.md
new file mode 100644
index 0000000..4beceb0
--- /dev/null
+++ b/DATABASE_MODE.md
@@ -0,0 +1,318 @@
+# Database Mode for Testing and Analytics
+
+This document explains how to use SPARC's database mode for storing LLM messages for testing and analytics purposes.
+
+## Overview
+
+SPARC supports two modes of operation:
+
+1. **API Mode** (default): Messages are sent to OpenRouter's API and you receive real LLM responses
+2. **Database Mode**: Messages are stored in a PostgreSQL database without making API calls, useful for:
+   - Testing the application without consuming API credits
+   - Collecting analytics on message patterns and usage
+   - Development and debugging
+
+## Setup
+
+### 1. Start the Database
+
+Use docker-compose to start the PostgreSQL database:
+
+```bash
+docker-compose up -d postgres
+```
+
+This will start a PostgreSQL instance accessible at `localhost:5432`.
+
+### 2. Initialize the Database Schema
+
+Run the initialization script to create the necessary tables:
+
+```bash
+python scripts/init_database.py
+```
+
+This creates the `llm_messages` table and indexes for efficient querying.
+
+### 3. Configure Environment Variables
+
+Create a `.env` file (or copy from `.env.example`):
+
+```bash
+cp .env.example .env
+```
+
+Edit `.env` and set:
+
+```env
+# For database mode (testing/analytics)
+USE_DATABASE=true
+DATABASE_URL=postgresql://postgres:postgres@localhost:5432/sparc
+
+# For API mode (production)
+USE_DATABASE=false
+OPENROUTER_API_KEY=your_openrouter_key_here
+```
+
+## Usage
+
+### Running in Database Mode
+
+Set `USE_DATABASE=true` in your `.env` file, then run the application normally:
+
+```bash
+python main.py
+```
+
+Instead of sending messages to OpenRouter, the application will:
+- Store all prompts in the database
+- Return a placeholder response
+- Log metadata (company name, analysis type, timestamps)
+
+### Running in API Mode
+
+Set `USE_DATABASE=false` in your `.env` file, then run the application normally:
+
+```bash
+python main.py
+```
+
+The application will send messages to OpenRouter and return real LLM responses.
+
+### Hybrid Mode (Optional)
+
+You can also enable database logging while still using the API by initializing the database client in your code. The `LLMAnalyzer` will automatically log all API calls to the database if a database client is available.
+
+## Viewing Analytics
+
+### View Message Statistics
+
+```bash
+python scripts/view_analytics.py
+```
+
+Options:
+- `--days N`: Analyze messages from the last N days (default: 30)
+
+Example output:
+```
+SPARC Analytics - Last 30 days
+======================================================================
+
+Total Messages: 45
+
+Messages by Company:
+  nvidia: 25
+  intel: 12
+  amd: 8
+
+Messages by Analysis Type:
+  portfolio: 30
+  single_patent: 15
+
+======================================================================
+```
+
+### View Stored Messages
+
+```bash
+python scripts/view_messages.py
+```
+
+Options:
+- `--company COMPANY`: Filter by company name
+- `--type TYPE`: Filter by analysis type (single_patent or portfolio)
+- `--limit N`: Maximum number of messages to display (default: 10)
+
+Examples:
+```bash
+# View last 10 messages
+python scripts/view_messages.py
+
+# View all messages for nvidia
+python scripts/view_messages.py --company nvidia --limit 100
+
+# View portfolio analyses only
+python scripts/view_messages.py --type portfolio
+```
+
+## Database Schema
+
+### llm_messages Table
+
+| Column | Type | Description |
+|--------|------|-------------|
+| id | SERIAL | Primary key |
+| timestamp | TIMESTAMP | When the message was created |
+| company_name | VARCHAR(255) | Company being analyzed |
+| analysis_type | VARCHAR(50) | Type of analysis (single_patent, portfolio) |
+| model | VARCHAR(100) | LLM model identifier |
+| prompt | TEXT | The full prompt sent to the LLM |
+| response | TEXT | The response from the LLM |
+| metadata | JSONB | Additional metadata (patent IDs, content length, etc.) |
+| token_usage | JSONB | Token usage statistics (when available) |
+| created_at | TIMESTAMP | Record creation timestamp |
+
+### Indexes
+
+- `idx_messages_timestamp`: Speeds up time-based queries
+- `idx_messages_company`: Speeds up company-specific queries
+
+## Docker Compose
+
+The included `docker-compose.yml` provides:
+
+1. **PostgreSQL Database**:
+   - Image: `postgres:16-alpine`
+   - Port: `5432`
+   - Credentials: postgres/postgres
+   - Database: sparc
+   - Persistent storage via volume
+
+2. **Application Container** (optional):
+   - Builds from Dockerfile
+   - Connects to PostgreSQL
+   - Mounts current directory
+
+### Start Services
+
+```bash
+# Start just the database
+docker-compose up -d postgres
+
+# Start everything
+docker-compose up -d
+
+# View logs
+docker-compose logs -f
+
+# Stop services
+docker-compose down
+
+# Stop and remove volumes (WARNING: deletes data)
+docker-compose down -v
+```
+
+## Toggling Between Modes
+
+You can easily switch between modes by changing the `USE_DATABASE` environment variable:
+
+### Quick Toggle (temporary, for testing)
+
+```bash
+# Run in database mode
+USE_DATABASE=true python main.py
+
+# Run in API mode
+USE_DATABASE=false python main.py
+```
+
+### Persistent Toggle
+
+Edit your `.env` file:
+
+```env
+# For testing/analytics
+USE_DATABASE=true
+
+# For production use
+USE_DATABASE=false
+```
+
+## Use Cases
+
+### Testing Without API Costs
+
+During development, enable database mode to test the full application flow without consuming API credits:
+
+```bash
+USE_DATABASE=true python main.py
+```
+
+### Collecting Usage Analytics
+
+Enable database mode in a test environment to collect analytics on:
+- Which companies are analyzed most frequently
+- Types of analyses performed
+- Prompt patterns and lengths
+- Usage over time
+
+### Development and Debugging
+
+Database mode is useful for:
+- Testing patent parsing logic without API calls
+- Debugging the full pipeline end-to-end
+- Collecting sample prompts for optimization
+- Understanding token usage patterns (when in API mode with logging)
+
+## Troubleshooting
+
+### Connection Refused
+
+If you get "connection refused" errors:
+
+1. Ensure PostgreSQL is running: `docker-compose ps`
+2. Check the DATABASE_URL in your `.env` file
+3. Wait for the database to be healthy: `docker-compose logs postgres`
+
+### Schema Not Found
+
+If you get "relation does not exist" errors:
+
+1. Run the initialization script: `python scripts/init_database.py`
+2. Verify tables were created: `docker-compose exec postgres psql -U postgres -d sparc -c "\dt"`
+
+### Permission Denied
+
+If you get permission errors:
+
+1. Check your DATABASE_URL credentials match docker-compose.yml
+2. Ensure the database container is running: `docker-compose up -d postgres`
+
+## Advanced Usage
+
+### Direct Database Access
+
+You can access the database directly using psql:
+
+```bash
+docker-compose exec postgres psql -U postgres -d sparc
+```
+
+Example queries:
+
+```sql
+-- View all messages
+SELECT id, company_name, analysis_type, timestamp FROM llm_messages ORDER BY timestamp DESC LIMIT 10;
+
+-- Count messages by company
+SELECT company_name, COUNT(*) FROM llm_messages GROUP BY company_name;
+
+-- View recent prompts
+SELECT prompt FROM llm_messages ORDER BY timestamp DESC LIMIT 5;
+```
+
+### Programmatic Access
+
+You can use the `DatabaseClient` directly in your code:
+
+```python
+from SPARC.database import DatabaseClient
+from SPARC import config
+
+db = DatabaseClient(config.database_url)
+
+# Get messages
+messages = db.get_messages(company_name="nvidia", limit=10)
+
+# Get analytics
+analytics = db.get_analytics(days=7)
+
+# Store a custom message
+db.store_message(
+    prompt="test prompt",
+    response="test response",
+    company_name="test",
+    analysis_type="custom"
+)
+```
diff --git a/SPARC/config.py b/SPARC/config.py
index 33f3035..08dbc7a 100644
--- a/SPARC/config.py
+++ b/SPARC/config.py
@@ -12,3 +12,11 @@ api_key = os.getenv("API_KEY")
 
 # OpenRouter API key for LLM analysis
 openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
+
+# Database configuration
+database_url = os.getenv("DATABASE_URL", "postgresql://postgres:postgres@localhost:5432/sparc")
+
+# Toggle between database mode and API mode
+# When True: stores all messages in database instead of sending to OpenRouter
+# When False: sends messages to OpenRouter API as normal
+use_database = os.getenv("USE_DATABASE", "false").lower() in ("true", "1", "yes")
diff --git a/SPARC/database.py b/SPARC/database.py
new file mode 100644
index 0000000..c0fae7d
--- /dev/null
+++ b/SPARC/database.py
@@ -0,0 +1,210 @@
+"""Database client for storing and retrieving LLM messages."""
+
+import psycopg2
+from psycopg2.extras import RealDictCursor
+from typing import Dict, List, Optional
+from datetime import datetime
+import json
+
+
+class DatabaseClient:
+    """Handles database operations for message storage and retrieval."""
+
+    def __init__(self, database_url: str):
+        """Initialize the database client.
+
+        Args:
+            database_url: PostgreSQL connection string
+        """
+        self.database_url = database_url
+        self.conn = None
+
+    def connect(self):
+        """Establish database connection."""
+        if not self.conn or self.conn.closed:
+            self.conn = psycopg2.connect(self.database_url)
+
+    def close(self):
+        """Close database connection."""
+        if self.conn and not self.conn.closed:
+            self.conn.close()
+
+    def initialize_schema(self):
+        """Create database tables if they don't exist."""
+        self.connect()
+
+        with self.conn.cursor() as cursor:
+            # Create messages table
+            cursor.execute("""
+                CREATE TABLE IF NOT EXISTS llm_messages (
+                    id SERIAL PRIMARY KEY,
+                    timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    company_name VARCHAR(255),
+                    analysis_type VARCHAR(50),
+                    model VARCHAR(100),
+                    prompt TEXT NOT NULL,
+                    response TEXT,
+                    metadata JSONB,
+                    token_usage JSONB,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                )
+            """)
+
+            # Create index on timestamp for analytics queries
+            cursor.execute("""
+                CREATE INDEX IF NOT EXISTS idx_messages_timestamp
+                ON llm_messages(timestamp)
+            """)
+
+            # Create index on company_name for filtering
+            cursor.execute("""
+                CREATE INDEX IF NOT EXISTS idx_messages_company
+                ON llm_messages(company_name)
+            """)
+
+            self.conn.commit()
+
+    def store_message(
+        self,
+        prompt: str,
+        response: str,
+        company_name: Optional[str] = None,
+        analysis_type: Optional[str] = None,
+        model: Optional[str] = None,
+        metadata: Optional[Dict] = None,
+        token_usage: Optional[Dict] = None,
+    ) -> int:
+        """Store an LLM message exchange in the database.
+
+        Args:
+            prompt: The prompt sent to the LLM
+            response: The response from the LLM
+            company_name: Name of company being analyzed
+            analysis_type: Type of analysis (e.g., 'single_patent', 'portfolio')
+            model: Model identifier used
+            metadata: Additional metadata as dict
+            token_usage: Token usage information
+
+        Returns:
+            The ID of the inserted record
+        """
+        self.connect()
+
+        with self.conn.cursor() as cursor:
+            cursor.execute(
+                """
+                INSERT INTO llm_messages
+                (prompt, response, company_name, analysis_type, model, metadata, token_usage)
+                VALUES (%s, %s, %s, %s, %s, %s, %s)
+                RETURNING id
+                """,
+                (
+                    prompt,
+                    response,
+                    company_name,
+                    analysis_type,
+                    model,
+                    json.dumps(metadata) if metadata else None,
+                    json.dumps(token_usage) if token_usage else None,
+                ),
+            )
+
+            message_id = cursor.fetchone()[0]
+            self.conn.commit()
+
+            return message_id
+
+    def get_messages(
+        self,
+        company_name: Optional[str] = None,
+        analysis_type: Optional[str] = None,
+        limit: int = 100,
+        offset: int = 0,
+    ) -> List[Dict]:
+        """Retrieve messages from the database.
+
+        Args:
+            company_name: Filter by company name
+            analysis_type: Filter by analysis type
+            limit: Maximum number of records to return
+            offset: Number of records to skip
+
+        Returns:
+            List of message dictionaries
+        """
+        self.connect()
+
+        query = "SELECT * FROM llm_messages WHERE 1=1"
+        params = []
+
+        if company_name:
+            query += " AND company_name = %s"
+            params.append(company_name)
+
+        if analysis_type:
+            query += " AND analysis_type = %s"
+            params.append(analysis_type)
+
+        query += " ORDER BY timestamp DESC LIMIT %s OFFSET %s"
+        params.extend([limit, offset])
+
+        with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
+            cursor.execute(query, params)
+            return [dict(row) for row in cursor.fetchall()]
+
+    def get_analytics(self, days: int = 30) -> Dict:
+        """Get analytics on message usage.
+
+        Args:
+            days: Number of days to look back
+
+        Returns:
+            Dictionary with analytics data
+        """
+        self.connect()
+
+        with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
+            # Total messages
+            cursor.execute(
+                """
+                SELECT COUNT(*) as total_messages
+                FROM llm_messages
+                WHERE timestamp >= NOW() - INTERVAL '%s days'
+                """,
+                (days,),
+            )
+            total = cursor.fetchone()["total_messages"]
+
+            # Messages by company
+            cursor.execute(
+                """
+                SELECT company_name, COUNT(*) as count
+                FROM llm_messages
+                WHERE timestamp >= NOW() - INTERVAL '%s days'
+                GROUP BY company_name
+                ORDER BY count DESC
+                LIMIT 10
+                """,
+                (days,),
+            )
+            by_company = cursor.fetchall()
+
+            # Messages by type
+            cursor.execute(
+                """
+                SELECT analysis_type, COUNT(*) as count
+                FROM llm_messages
+                WHERE timestamp >= NOW() - INTERVAL '%s days'
+                GROUP BY analysis_type
+                ORDER BY count DESC
+                """,
+                (days,),
+            )
+            by_type = cursor.fetchall()
+
+            return {
+                "total_messages": total,
+                "by_company": [dict(row) for row in by_company],
+                "by_type": [dict(row) for row in by_type],
+                "period_days": days,
+            }
diff --git a/SPARC/llm.py b/SPARC/llm.py
index df52668..ef56dae 100644
--- a/SPARC/llm.py
+++ b/SPARC/llm.py
@@ -2,22 +2,33 @@
 
 from openai import OpenAI
 from SPARC import config
+from SPARC.database import DatabaseClient
 from typing import Dict
 
 
 class LLMAnalyzer:
     """Handles LLM-based analysis of patent content."""
 
-    def __init__(self, api_key: str | None = None, test_mode: bool = False):
+    def __init__(self, api_key: str | None = None, test_mode: bool = False, use_database: bool | None = None):
         """Initialize the LLM analyzer.
 
         Args:
           api_key: OpenRouter API key. If None, will attempt to load from config.
           test_mode: If True, print prompts instead of making API calls
+          use_database: If True, store messages in database instead of calling API.
+                       If None, will use config.use_database
         """
         self.test_mode = test_mode
+        self.use_database = use_database if use_database is not None else config.use_database
+        self.db_client = None
 
-        if (api_key or config.openrouter_api_key) and not test_mode:
+        # Initialize database client if in database mode
+        if self.use_database:
+            self.db_client = DatabaseClient(config.database_url)
+            self.db_client.initialize_schema()
+
+        # Initialize OpenRouter client if not in database mode
+        if (api_key or config.openrouter_api_key) and not test_mode and not self.use_database:
             self.client = OpenAI(
                 api_key=api_key or config.openrouter_api_key,
                 base_url="https://openrouter.ai/api/v1"
@@ -57,13 +68,47 @@ Provide a concise analysis (2-3 paragraphs) focusing on what this patent reveals
             print("=" * 80)
             return "[TEST MODE - No API call made]"
 
+        # Database mode: store the prompt and return a placeholder response
+        if self.use_database:
+            response_text = "[DATABASE MODE] Message stored for testing/analytics. Enable API mode to get actual analysis."
+
+            self.db_client.store_message(
+                prompt=prompt,
+                response=response_text,
+                company_name=company_name,
+                analysis_type="single_patent",
+                model=self.model if hasattr(self, 'model') else None,
+                metadata={"patent_content_length": len(patent_content)}
+            )
+
+            return response_text
+
+        # API mode: send to OpenRouter
         if self.client:
             response = self.client.chat.completions.create(
                 model=self.model,
                 max_tokens=1024,
                 messages=[{"role": "user", "content": prompt}],
             )
-            return response.choices[0].message.content
+            response_text = response.choices[0].message.content
+
+            # Store in database if db_client is available (for logging even in API mode)
+            if self.db_client:
+                self.db_client.store_message(
+                    prompt=prompt,
+                    response=response_text,
+                    company_name=company_name,
+                    analysis_type="single_patent",
+                    model=self.model,
+                    metadata={"patent_content_length": len(patent_content)},
+                    token_usage={
+                        "prompt_tokens": response.usage.prompt_tokens,
+                        "completion_tokens": response.usage.completion_tokens,
+                        "total_tokens": response.usage.total_tokens
+                    } if hasattr(response, 'usage') else None
+                )
+
+            return response_text
        
     def analyze_patent_portfolio(
         self, patents_data: list[Dict[str, str]], company_name: str
@@ -105,6 +150,25 @@ Provide a comprehensive analysis (4-5 paragraphs) with a final verdict on the co
             print(prompt)
             return "[TEST MODE]"
 
+        # Database mode: store the prompt and return a placeholder response
+        if self.use_database:
+            response_text = "[DATABASE MODE] Message stored for testing/analytics. Enable API mode to get actual analysis."
+
+            self.db_client.store_message(
+                prompt=prompt,
+                response=response_text,
+                company_name=company_name,
+                analysis_type="portfolio",
+                model=self.model if hasattr(self, 'model') else None,
+                metadata={
+                    "patent_count": len(patents_data),
+                    "patent_ids": [p['patent_id'] for p in patents_data]
+                }
+            )
+
+            return response_text
+
+        # API mode: send to OpenRouter
         try:
             response = self.client.chat.completions.create(
                 model=self.model,
@@ -112,7 +176,28 @@ Provide a comprehensive analysis (4-5 paragraphs) with a final verdict on the co
                 messages=[{"role": "user", "content": prompt}],
             )
 
-            return response.choices[0].message.content
+            response_text = response.choices[0].message.content
+
+            # Store in database if db_client is available (for logging even in API mode)
+            if self.db_client:
+                self.db_client.store_message(
+                    prompt=prompt,
+                    response=response_text,
+                    company_name=company_name,
+                    analysis_type="portfolio",
+                    model=self.model,
+                    metadata={
+                        "patent_count": len(patents_data),
+                        "patent_ids": [p['patent_id'] for p in patents_data]
+                    },
+                    token_usage={
+                        "prompt_tokens": response.usage.prompt_tokens,
+                        "completion_tokens": response.usage.completion_tokens,
+                        "total_tokens": response.usage.total_tokens
+                    } if hasattr(response, 'usage') else None
+                )
+
+            return response_text
         except AttributeError:
             return prompt
        
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..cf5c2b5
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,36 @@
+services:
+  postgres:
+    image: postgres:16-alpine
+    container_name: sparc-postgres
+    environment:
+      POSTGRES_USER: postgres
+      POSTGRES_PASSWORD: postgres
+      POSTGRES_DB: sparc
+    ports:
+      - "5432:5432"
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U postgres"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+
+  app:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    container_name: sparc-app
+    depends_on:
+      postgres:
+        condition: service_healthy
+    environment:
+      USE_DATABASE: true
+      DATABASE_URL: postgresql://postgres:postgres@postgres:5432/sparc
+    ports:
+      - "8000:8000"
+    volumes:
+      - .:/app
+
+volumes:
+  postgres_data:
diff --git a/requirements.txt b/requirements.txt
index de9f444..df43541 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,4 @@ requests
 pytest
 pytest-mock
 openai
+psycopg2-binary
diff --git a/scripts/init_database.py b/scripts/init_database.py
new file mode 100644
index 0000000..82e11bb
--- /dev/null
+++ b/scripts/init_database.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+"""Initialize the SPARC database schema.
+
+This script creates the necessary tables and indexes for storing
+LLM messages for testing and analytics.
+
+Usage:
+    python scripts/init_database.py
+"""
+
+import sys
+import os
+
+# Add parent directory to path
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from SPARC import config
+from SPARC.database import DatabaseClient
+
+
+def main():
+    """Initialize the database schema."""
+    print("Initializing SPARC database...")
+    print(f"Database URL: {config.database_url}")
+
+    try:
+        db_client = DatabaseClient(config.database_url)
+        db_client.initialize_schema()
+        print("Database schema initialized successfully!")
+        print("\nTables created:")
+        print("  - llm_messages: Stores all LLM prompts and responses")
+        print("\nIndexes created:")
+        print("  - idx_messages_timestamp: For time-based queries")
+        print("  - idx_messages_company: For company-specific queries")
+
+    except Exception as e:
+        print(f"Error initializing database: {e}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/view_analytics.py b/scripts/view_analytics.py
new file mode 100644
index 0000000..b90a5c4
--- /dev/null
+++ b/scripts/view_analytics.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+"""View analytics from the message database.
+
+This script displays statistics about stored LLM messages including
+usage by company, analysis type, and time periods.
+
+Usage:
+    python scripts/view_analytics.py [--days DAYS]
+"""
+
+import sys
+import os
+import argparse
+import json
+
+# Add parent directory to path
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from SPARC import config
+from SPARC.database import DatabaseClient
+
+
+def main():
+    """Display analytics from the database."""
+    parser = argparse.ArgumentParser(description="View SPARC message analytics")
+    parser.add_argument(
+        "--days",
+        type=int,
+        default=30,
+        help="Number of days to analyze (default: 30)"
+    )
+    args = parser.parse_args()
+
+    print(f"SPARC Analytics - Last {args.days} days")
+    print("=" * 70)
+
+    try:
+        db_client = DatabaseClient(config.database_url)
+        analytics = db_client.get_analytics(days=args.days)
+
+        print(f"\nTotal Messages: {analytics['total_messages']}")
+
+        print("\nMessages by Company:")
+        if analytics['by_company']:
+            for item in analytics['by_company']:
+                company = item['company_name'] or '(unknown)'
+                print(f"  {company}: {item['count']}")
+        else:
+            print("  No data")
+
+        print("\nMessages by Analysis Type:")
+        if analytics['by_type']:
+            for item in analytics['by_type']:
+                analysis_type = item['analysis_type'] or '(unknown)'
+                print(f"  {analysis_type}: {item['count']}")
+        else:
+            print("  No data")
+
+        print("\n" + "=" * 70)
+
+    except Exception as e:
+        print(f"Error retrieving analytics: {e}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/view_messages.py b/scripts/view_messages.py
new file mode 100644
index 0000000..e2fe352
--- /dev/null
+++ b/scripts/view_messages.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+"""View stored messages from the database.
+
+This script displays stored LLM messages with filtering options.
+
+Usage:
+    python scripts/view_messages.py [--company COMPANY] [--type TYPE] [--limit LIMIT]
+"""
+
+import sys
+import os
+import argparse
+from datetime import datetime
+
+# Add parent directory to path
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from SPARC import config
+from SPARC.database import DatabaseClient
+
+
+def main():
+    """Display messages from the database."""
+    parser = argparse.ArgumentParser(description="View stored SPARC messages")
+    parser.add_argument(
+        "--company",
+        type=str,
+        help="Filter by company name"
+    )
+    parser.add_argument(
+        "--type",
+        type=str,
+        choices=["single_patent", "portfolio"],
+        help="Filter by analysis type"
+    )
+    parser.add_argument(
+        "--limit",
+        type=int,
+        default=10,
+        help="Maximum number of messages to display (default: 10)"
+    )
+    args = parser.parse_args()
+
+    print("SPARC Stored Messages")
+    print("=" * 70)
+
+    try:
+        db_client = DatabaseClient(config.database_url)
+        messages = db_client.get_messages(
+            company_name=args.company,
+            analysis_type=args.type,
+            limit=args.limit
+        )
+
+        if not messages:
+            print("\nNo messages found.")
+            return
+
+        print(f"\nShowing {len(messages)} message(s):\n")
+
+        for i, msg in enumerate(messages, 1):
+            print(f"Message #{msg['id']} - {msg['timestamp']}")
+            print(f"Company: {msg['company_name'] or '(unknown)'}")
+            print(f"Type: {msg['analysis_type'] or '(unknown)'}")
+            print(f"Model: {msg['model'] or '(unknown)'}")
+            print(f"\nPrompt (first 200 chars):")
+            print(f"  {msg['prompt'][:200]}...")
+            print(f"\nResponse (first 200 chars):")
+            print(f"  {msg['response'][:200] if msg['response'] else '(no response)'}...")
+            print("\n" + "-" * 70 + "\n")
+
+    except Exception as e:
+        print(f"Error retrieving messages: {e}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/test_database_mode.py b/test_database_mode.py
new file mode 100644
index 0000000..7508ef2
--- /dev/null
+++ b/test_database_mode.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+"""Test script to verify database mode functionality.
+
+This script tests the LLMAnalyzer in database mode without requiring
+actual API keys or patent downloads.
+"""
+
+from SPARC.llm import LLMAnalyzer
+from SPARC.database import DatabaseClient
+from SPARC import config
+
+def test_database_mode():
+    """Test that database mode stores messages correctly."""
+    print("Testing Database Mode")
+    print("=" * 70)
+
+    # Initialize analyzer in database mode
+    print("\n1. Initializing LLMAnalyzer in database mode...")
+    analyzer = LLMAnalyzer(use_database=True)
+
+    print(f"   - use_database: {analyzer.use_database}")
+    print(f"   - db_client: {analyzer.db_client is not None}")
+
+    # Test single patent analysis
+    print("\n2. Testing single patent analysis (database mode)...")
+    result = analyzer.analyze_patent_content(
+        patent_content="Test patent content about semiconductor innovation",
+        company_name="TestCorp"
+    )
+    print(f"   Result: {result}")
+
+    # Test portfolio analysis
+    print("\n3. Testing portfolio analysis (database mode)...")
+    test_patents = [
+        {"patent_id": "US001", "content": "First test patent"},
+        {"patent_id": "US002", "content": "Second test patent"},
+    ]
+    result = analyzer.analyze_patent_portfolio(
+        patents_data=test_patents,
+        company_name="TestCorp"
+    )
+    print(f"   Result: {result}")
+
+    # Verify messages were stored
+    print("\n4. Verifying messages were stored...")
+    db_client = DatabaseClient(config.database_url)
+    messages = db_client.get_messages(company_name="TestCorp", limit=10)
+    print(f"   Found {len(messages)} stored messages")
+
+    for msg in messages:
+        print(f"   - ID: {msg['id']}, Type: {msg['analysis_type']}, Timestamp: {msg['timestamp']}")
+
+    # Get analytics
+    print("\n5. Getting analytics...")
+    analytics = db_client.get_analytics(days=1)
+    print(f"   Total messages: {analytics['total_messages']}")
+    print(f"   By company: {analytics['by_company']}")
+    print(f"   By type: {analytics['by_type']}")
+
+    print("\n" + "=" * 70)
+    print("Database mode test completed successfully!")
+
+def test_api_mode():
+    """Test that API mode initializes correctly."""
+    print("\nTesting API Mode")
+    print("=" * 70)
+
+    print("\n1. Initializing LLMAnalyzer in API mode...")
+    analyzer = LLMAnalyzer(use_database=False, test_mode=True)
+
+    print(f"   - use_database: {analyzer.use_database}")
+    print(f"   - test_mode: {analyzer.test_mode}")
+
+    print("\n2. Testing single patent analysis (test mode)...")
+    result = analyzer.analyze_patent_content(
+        patent_content="Test patent content",
+        company_name="TestCorp2"
+    )
+    print(f"   Result: {result}")
+
+    print("\n" + "=" * 70)
+    print("API mode test completed successfully!")
+
+if __name__ == "__main__":
+    test_database_mode()
+    print("\n")
+    test_api_mode()