From 44456cb073e4e47a75d03c42148d0fb17282cfc7 Mon Sep 17 00:00:00 2001 From: 0xWheatyz Date: Tue, 10 Mar 2026 21:13:13 -0400 Subject: [PATCH] feat: add database mode for LLM message storage and analytics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements a database mode that stores LLM prompts and responses in PostgreSQL instead of making API calls. This enables: - Testing without consuming API credits - Collecting analytics on usage patterns - Development and debugging workflows Changes: - Added DatabaseClient class for PostgreSQL operations - Modified LLMAnalyzer to support database/API mode toggle - Added USE_DATABASE config flag to switch between modes - Included Docker Compose setup for PostgreSQL - Added utility scripts for database init and analytics viewing - Comprehensive documentation in DATABASE_MODE.md 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .env.example | 16 ++ DATABASE_MODE.md | 318 ++++++++++++++++++++++++++++++++++++++ SPARC/config.py | 8 + SPARC/database.py | 210 +++++++++++++++++++++++++ SPARC/llm.py | 93 ++++++++++- docker-compose.yml | 36 +++++ requirements.txt | 1 + scripts/init_database.py | 42 +++++ scripts/view_analytics.py | 67 ++++++++ scripts/view_messages.py | 78 ++++++++++ test_database_mode.py | 87 +++++++++++ 11 files changed, 952 insertions(+), 4 deletions(-) create mode 100644 .env.example create mode 100644 DATABASE_MODE.md create mode 100644 SPARC/database.py create mode 100644 docker-compose.yml create mode 100644 scripts/init_database.py create mode 100644 scripts/view_analytics.py create mode 100644 scripts/view_messages.py create mode 100644 test_database_mode.py diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..1d776d0 --- /dev/null +++ b/.env.example @@ -0,0 +1,16 @@ +# SPARC Configuration + +# SerpAPI key for patent search +API_KEY=your_serpapi_key_here + +# OpenRouter API key for LLM analysis +OPENROUTER_API_KEY=your_openrouter_key_here + +# Database configuration (for docker-compose setup) +DATABASE_URL=postgresql://postgres:postgres@localhost:5432/sparc + +# Toggle between database mode and API mode +# When USE_DATABASE=true: stores all messages in database instead of sending to OpenRouter +# When USE_DATABASE=false: sends messages to OpenRouter API as normal +# Default: false +USE_DATABASE=false diff --git a/DATABASE_MODE.md b/DATABASE_MODE.md new file mode 100644 index 0000000..4beceb0 --- /dev/null +++ b/DATABASE_MODE.md @@ -0,0 +1,318 @@ +# Database Mode for Testing and Analytics + +This document explains how to use SPARC's database mode for storing LLM messages for testing and analytics purposes. + +## Overview + +SPARC supports two modes of operation: + +1. **API Mode** (default): Messages are sent to OpenRouter's API and you receive real LLM responses +2. **Database Mode**: Messages are stored in a PostgreSQL database without making API calls, useful for: + - Testing the application without consuming API credits + - Collecting analytics on message patterns and usage + - Development and debugging + +## Setup + +### 1. Start the Database + +Use docker-compose to start the PostgreSQL database: + +```bash +docker-compose up -d postgres +``` + +This will start a PostgreSQL instance accessible at `localhost:5432`. + +### 2. Initialize the Database Schema + +Run the initialization script to create the necessary tables: + +```bash +python scripts/init_database.py +``` + +This creates the `llm_messages` table and indexes for efficient querying. + +### 3. Configure Environment Variables + +Create a `.env` file (or copy from `.env.example`): + +```bash +cp .env.example .env +``` + +Edit `.env` and set: + +```env +# For database mode (testing/analytics) +USE_DATABASE=true +DATABASE_URL=postgresql://postgres:postgres@localhost:5432/sparc + +# For API mode (production) +USE_DATABASE=false +OPENROUTER_API_KEY=your_openrouter_key_here +``` + +## Usage + +### Running in Database Mode + +Set `USE_DATABASE=true` in your `.env` file, then run the application normally: + +```bash +python main.py +``` + +Instead of sending messages to OpenRouter, the application will: +- Store all prompts in the database +- Return a placeholder response +- Log metadata (company name, analysis type, timestamps) + +### Running in API Mode + +Set `USE_DATABASE=false` in your `.env` file, then run the application normally: + +```bash +python main.py +``` + +The application will send messages to OpenRouter and return real LLM responses. + +### Hybrid Mode (Optional) + +You can also enable database logging while still using the API by initializing the database client in your code. The `LLMAnalyzer` will automatically log all API calls to the database if a database client is available. + +## Viewing Analytics + +### View Message Statistics + +```bash +python scripts/view_analytics.py +``` + +Options: +- `--days N`: Analyze messages from the last N days (default: 30) + +Example output: +``` +SPARC Analytics - Last 30 days +====================================================================== + +Total Messages: 45 + +Messages by Company: + nvidia: 25 + intel: 12 + amd: 8 + +Messages by Analysis Type: + portfolio: 30 + single_patent: 15 + +====================================================================== +``` + +### View Stored Messages + +```bash +python scripts/view_messages.py +``` + +Options: +- `--company COMPANY`: Filter by company name +- `--type TYPE`: Filter by analysis type (single_patent or portfolio) +- `--limit N`: Maximum number of messages to display (default: 10) + +Examples: +```bash +# View last 10 messages +python scripts/view_messages.py + +# View all messages for nvidia +python scripts/view_messages.py --company nvidia --limit 100 + +# View portfolio analyses only +python scripts/view_messages.py --type portfolio +``` + +## Database Schema + +### llm_messages Table + +| Column | Type | Description | +|--------|------|-------------| +| id | SERIAL | Primary key | +| timestamp | TIMESTAMP | When the message was created | +| company_name | VARCHAR(255) | Company being analyzed | +| analysis_type | VARCHAR(50) | Type of analysis (single_patent, portfolio) | +| model | VARCHAR(100) | LLM model identifier | +| prompt | TEXT | The full prompt sent to the LLM | +| response | TEXT | The response from the LLM | +| metadata | JSONB | Additional metadata (patent IDs, content length, etc.) | +| token_usage | JSONB | Token usage statistics (when available) | +| created_at | TIMESTAMP | Record creation timestamp | + +### Indexes + +- `idx_messages_timestamp`: Speeds up time-based queries +- `idx_messages_company`: Speeds up company-specific queries + +## Docker Compose + +The included `docker-compose.yml` provides: + +1. **PostgreSQL Database**: + - Image: `postgres:16-alpine` + - Port: `5432` + - Credentials: postgres/postgres + - Database: sparc + - Persistent storage via volume + +2. **Application Container** (optional): + - Builds from Dockerfile + - Connects to PostgreSQL + - Mounts current directory + +### Start Services + +```bash +# Start just the database +docker-compose up -d postgres + +# Start everything +docker-compose up -d + +# View logs +docker-compose logs -f + +# Stop services +docker-compose down + +# Stop and remove volumes (WARNING: deletes data) +docker-compose down -v +``` + +## Toggling Between Modes + +You can easily switch between modes by changing the `USE_DATABASE` environment variable: + +### Quick Toggle (temporary, for testing) + +```bash +# Run in database mode +USE_DATABASE=true python main.py + +# Run in API mode +USE_DATABASE=false python main.py +``` + +### Persistent Toggle + +Edit your `.env` file: + +```env +# For testing/analytics +USE_DATABASE=true + +# For production use +USE_DATABASE=false +``` + +## Use Cases + +### Testing Without API Costs + +During development, enable database mode to test the full application flow without consuming API credits: + +```bash +USE_DATABASE=true python main.py +``` + +### Collecting Usage Analytics + +Enable database mode in a test environment to collect analytics on: +- Which companies are analyzed most frequently +- Types of analyses performed +- Prompt patterns and lengths +- Usage over time + +### Development and Debugging + +Database mode is useful for: +- Testing patent parsing logic without API calls +- Debugging the full pipeline end-to-end +- Collecting sample prompts for optimization +- Understanding token usage patterns (when in API mode with logging) + +## Troubleshooting + +### Connection Refused + +If you get "connection refused" errors: + +1. Ensure PostgreSQL is running: `docker-compose ps` +2. Check the DATABASE_URL in your `.env` file +3. Wait for the database to be healthy: `docker-compose logs postgres` + +### Schema Not Found + +If you get "relation does not exist" errors: + +1. Run the initialization script: `python scripts/init_database.py` +2. Verify tables were created: `docker-compose exec postgres psql -U postgres -d sparc -c "\dt"` + +### Permission Denied + +If you get permission errors: + +1. Check your DATABASE_URL credentials match docker-compose.yml +2. Ensure the database container is running: `docker-compose up -d postgres` + +## Advanced Usage + +### Direct Database Access + +You can access the database directly using psql: + +```bash +docker-compose exec postgres psql -U postgres -d sparc +``` + +Example queries: + +```sql +-- View all messages +SELECT id, company_name, analysis_type, timestamp FROM llm_messages ORDER BY timestamp DESC LIMIT 10; + +-- Count messages by company +SELECT company_name, COUNT(*) FROM llm_messages GROUP BY company_name; + +-- View recent prompts +SELECT prompt FROM llm_messages ORDER BY timestamp DESC LIMIT 5; +``` + +### Programmatic Access + +You can use the `DatabaseClient` directly in your code: + +```python +from SPARC.database import DatabaseClient +from SPARC import config + +db = DatabaseClient(config.database_url) + +# Get messages +messages = db.get_messages(company_name="nvidia", limit=10) + +# Get analytics +analytics = db.get_analytics(days=7) + +# Store a custom message +db.store_message( + prompt="test prompt", + response="test response", + company_name="test", + analysis_type="custom" +) +``` diff --git a/SPARC/config.py b/SPARC/config.py index 33f3035..08dbc7a 100644 --- a/SPARC/config.py +++ b/SPARC/config.py @@ -12,3 +12,11 @@ api_key = os.getenv("API_KEY") # OpenRouter API key for LLM analysis openrouter_api_key = os.getenv("OPENROUTER_API_KEY") + +# Database configuration +database_url = os.getenv("DATABASE_URL", "postgresql://postgres:postgres@localhost:5432/sparc") + +# Toggle between database mode and API mode +# When True: stores all messages in database instead of sending to OpenRouter +# When False: sends messages to OpenRouter API as normal +use_database = os.getenv("USE_DATABASE", "false").lower() in ("true", "1", "yes") diff --git a/SPARC/database.py b/SPARC/database.py new file mode 100644 index 0000000..c0fae7d --- /dev/null +++ b/SPARC/database.py @@ -0,0 +1,210 @@ +"""Database client for storing and retrieving LLM messages.""" + +import psycopg2 +from psycopg2.extras import RealDictCursor +from typing import Dict, List, Optional +from datetime import datetime +import json + + +class DatabaseClient: + """Handles database operations for message storage and retrieval.""" + + def __init__(self, database_url: str): + """Initialize the database client. + + Args: + database_url: PostgreSQL connection string + """ + self.database_url = database_url + self.conn = None + + def connect(self): + """Establish database connection.""" + if not self.conn or self.conn.closed: + self.conn = psycopg2.connect(self.database_url) + + def close(self): + """Close database connection.""" + if self.conn and not self.conn.closed: + self.conn.close() + + def initialize_schema(self): + """Create database tables if they don't exist.""" + self.connect() + + with self.conn.cursor() as cursor: + # Create messages table + cursor.execute(""" + CREATE TABLE IF NOT EXISTS llm_messages ( + id SERIAL PRIMARY KEY, + timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + company_name VARCHAR(255), + analysis_type VARCHAR(50), + model VARCHAR(100), + prompt TEXT NOT NULL, + response TEXT, + metadata JSONB, + token_usage JSONB, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + """) + + # Create index on timestamp for analytics queries + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_messages_timestamp + ON llm_messages(timestamp) + """) + + # Create index on company_name for filtering + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_messages_company + ON llm_messages(company_name) + """) + + self.conn.commit() + + def store_message( + self, + prompt: str, + response: str, + company_name: Optional[str] = None, + analysis_type: Optional[str] = None, + model: Optional[str] = None, + metadata: Optional[Dict] = None, + token_usage: Optional[Dict] = None, + ) -> int: + """Store an LLM message exchange in the database. + + Args: + prompt: The prompt sent to the LLM + response: The response from the LLM + company_name: Name of company being analyzed + analysis_type: Type of analysis (e.g., 'single_patent', 'portfolio') + model: Model identifier used + metadata: Additional metadata as dict + token_usage: Token usage information + + Returns: + The ID of the inserted record + """ + self.connect() + + with self.conn.cursor() as cursor: + cursor.execute( + """ + INSERT INTO llm_messages + (prompt, response, company_name, analysis_type, model, metadata, token_usage) + VALUES (%s, %s, %s, %s, %s, %s, %s) + RETURNING id + """, + ( + prompt, + response, + company_name, + analysis_type, + model, + json.dumps(metadata) if metadata else None, + json.dumps(token_usage) if token_usage else None, + ), + ) + + message_id = cursor.fetchone()[0] + self.conn.commit() + + return message_id + + def get_messages( + self, + company_name: Optional[str] = None, + analysis_type: Optional[str] = None, + limit: int = 100, + offset: int = 0, + ) -> List[Dict]: + """Retrieve messages from the database. + + Args: + company_name: Filter by company name + analysis_type: Filter by analysis type + limit: Maximum number of records to return + offset: Number of records to skip + + Returns: + List of message dictionaries + """ + self.connect() + + query = "SELECT * FROM llm_messages WHERE 1=1" + params = [] + + if company_name: + query += " AND company_name = %s" + params.append(company_name) + + if analysis_type: + query += " AND analysis_type = %s" + params.append(analysis_type) + + query += " ORDER BY timestamp DESC LIMIT %s OFFSET %s" + params.extend([limit, offset]) + + with self.conn.cursor(cursor_factory=RealDictCursor) as cursor: + cursor.execute(query, params) + return [dict(row) for row in cursor.fetchall()] + + def get_analytics(self, days: int = 30) -> Dict: + """Get analytics on message usage. + + Args: + days: Number of days to look back + + Returns: + Dictionary with analytics data + """ + self.connect() + + with self.conn.cursor(cursor_factory=RealDictCursor) as cursor: + # Total messages + cursor.execute( + """ + SELECT COUNT(*) as total_messages + FROM llm_messages + WHERE timestamp >= NOW() - INTERVAL '%s days' + """, + (days,), + ) + total = cursor.fetchone()["total_messages"] + + # Messages by company + cursor.execute( + """ + SELECT company_name, COUNT(*) as count + FROM llm_messages + WHERE timestamp >= NOW() - INTERVAL '%s days' + GROUP BY company_name + ORDER BY count DESC + LIMIT 10 + """, + (days,), + ) + by_company = cursor.fetchall() + + # Messages by type + cursor.execute( + """ + SELECT analysis_type, COUNT(*) as count + FROM llm_messages + WHERE timestamp >= NOW() - INTERVAL '%s days' + GROUP BY analysis_type + ORDER BY count DESC + """, + (days,), + ) + by_type = cursor.fetchall() + + return { + "total_messages": total, + "by_company": [dict(row) for row in by_company], + "by_type": [dict(row) for row in by_type], + "period_days": days, + } diff --git a/SPARC/llm.py b/SPARC/llm.py index df52668..ef56dae 100644 --- a/SPARC/llm.py +++ b/SPARC/llm.py @@ -2,22 +2,33 @@ from openai import OpenAI from SPARC import config +from SPARC.database import DatabaseClient from typing import Dict class LLMAnalyzer: """Handles LLM-based analysis of patent content.""" - def __init__(self, api_key: str | None = None, test_mode: bool = False): + def __init__(self, api_key: str | None = None, test_mode: bool = False, use_database: bool | None = None): """Initialize the LLM analyzer. Args: api_key: OpenRouter API key. If None, will attempt to load from config. test_mode: If True, print prompts instead of making API calls + use_database: If True, store messages in database instead of calling API. + If None, will use config.use_database """ self.test_mode = test_mode + self.use_database = use_database if use_database is not None else config.use_database + self.db_client = None - if (api_key or config.openrouter_api_key) and not test_mode: + # Initialize database client if in database mode + if self.use_database: + self.db_client = DatabaseClient(config.database_url) + self.db_client.initialize_schema() + + # Initialize OpenRouter client if not in database mode + if (api_key or config.openrouter_api_key) and not test_mode and not self.use_database: self.client = OpenAI( api_key=api_key or config.openrouter_api_key, base_url="https://openrouter.ai/api/v1" @@ -57,13 +68,47 @@ Provide a concise analysis (2-3 paragraphs) focusing on what this patent reveals print("=" * 80) return "[TEST MODE - No API call made]" + # Database mode: store the prompt and return a placeholder response + if self.use_database: + response_text = "[DATABASE MODE] Message stored for testing/analytics. Enable API mode to get actual analysis." + + self.db_client.store_message( + prompt=prompt, + response=response_text, + company_name=company_name, + analysis_type="single_patent", + model=self.model if hasattr(self, 'model') else None, + metadata={"patent_content_length": len(patent_content)} + ) + + return response_text + + # API mode: send to OpenRouter if self.client: response = self.client.chat.completions.create( model=self.model, max_tokens=1024, messages=[{"role": "user", "content": prompt}], ) - return response.choices[0].message.content + response_text = response.choices[0].message.content + + # Store in database if db_client is available (for logging even in API mode) + if self.db_client: + self.db_client.store_message( + prompt=prompt, + response=response_text, + company_name=company_name, + analysis_type="single_patent", + model=self.model, + metadata={"patent_content_length": len(patent_content)}, + token_usage={ + "prompt_tokens": response.usage.prompt_tokens, + "completion_tokens": response.usage.completion_tokens, + "total_tokens": response.usage.total_tokens + } if hasattr(response, 'usage') else None + ) + + return response_text def analyze_patent_portfolio( self, patents_data: list[Dict[str, str]], company_name: str @@ -105,6 +150,25 @@ Provide a comprehensive analysis (4-5 paragraphs) with a final verdict on the co print(prompt) return "[TEST MODE]" + # Database mode: store the prompt and return a placeholder response + if self.use_database: + response_text = "[DATABASE MODE] Message stored for testing/analytics. Enable API mode to get actual analysis." + + self.db_client.store_message( + prompt=prompt, + response=response_text, + company_name=company_name, + analysis_type="portfolio", + model=self.model if hasattr(self, 'model') else None, + metadata={ + "patent_count": len(patents_data), + "patent_ids": [p['patent_id'] for p in patents_data] + } + ) + + return response_text + + # API mode: send to OpenRouter try: response = self.client.chat.completions.create( model=self.model, @@ -112,7 +176,28 @@ Provide a comprehensive analysis (4-5 paragraphs) with a final verdict on the co messages=[{"role": "user", "content": prompt}], ) - return response.choices[0].message.content + response_text = response.choices[0].message.content + + # Store in database if db_client is available (for logging even in API mode) + if self.db_client: + self.db_client.store_message( + prompt=prompt, + response=response_text, + company_name=company_name, + analysis_type="portfolio", + model=self.model, + metadata={ + "patent_count": len(patents_data), + "patent_ids": [p['patent_id'] for p in patents_data] + }, + token_usage={ + "prompt_tokens": response.usage.prompt_tokens, + "completion_tokens": response.usage.completion_tokens, + "total_tokens": response.usage.total_tokens + } if hasattr(response, 'usage') else None + ) + + return response_text except AttributeError: return prompt diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..cf5c2b5 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,36 @@ +services: + postgres: + image: postgres:16-alpine + container_name: sparc-postgres + environment: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: sparc + ports: + - "5432:5432" + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres"] + interval: 10s + timeout: 5s + retries: 5 + + app: + build: + context: . + dockerfile: Dockerfile + container_name: sparc-app + depends_on: + postgres: + condition: service_healthy + environment: + USE_DATABASE: true + DATABASE_URL: postgresql://postgres:postgres@postgres:5432/sparc + ports: + - "8000:8000" + volumes: + - .:/app + +volumes: + postgres_data: diff --git a/requirements.txt b/requirements.txt index de9f444..df43541 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ requests pytest pytest-mock openai +psycopg2-binary diff --git a/scripts/init_database.py b/scripts/init_database.py new file mode 100644 index 0000000..82e11bb --- /dev/null +++ b/scripts/init_database.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +"""Initialize the SPARC database schema. + +This script creates the necessary tables and indexes for storing +LLM messages for testing and analytics. + +Usage: + python scripts/init_database.py +""" + +import sys +import os + +# Add parent directory to path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from SPARC import config +from SPARC.database import DatabaseClient + + +def main(): + """Initialize the database schema.""" + print("Initializing SPARC database...") + print(f"Database URL: {config.database_url}") + + try: + db_client = DatabaseClient(config.database_url) + db_client.initialize_schema() + print("Database schema initialized successfully!") + print("\nTables created:") + print(" - llm_messages: Stores all LLM prompts and responses") + print("\nIndexes created:") + print(" - idx_messages_timestamp: For time-based queries") + print(" - idx_messages_company: For company-specific queries") + + except Exception as e: + print(f"Error initializing database: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/scripts/view_analytics.py b/scripts/view_analytics.py new file mode 100644 index 0000000..b90a5c4 --- /dev/null +++ b/scripts/view_analytics.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +"""View analytics from the message database. + +This script displays statistics about stored LLM messages including +usage by company, analysis type, and time periods. + +Usage: + python scripts/view_analytics.py [--days DAYS] +""" + +import sys +import os +import argparse +import json + +# Add parent directory to path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from SPARC import config +from SPARC.database import DatabaseClient + + +def main(): + """Display analytics from the database.""" + parser = argparse.ArgumentParser(description="View SPARC message analytics") + parser.add_argument( + "--days", + type=int, + default=30, + help="Number of days to analyze (default: 30)" + ) + args = parser.parse_args() + + print(f"SPARC Analytics - Last {args.days} days") + print("=" * 70) + + try: + db_client = DatabaseClient(config.database_url) + analytics = db_client.get_analytics(days=args.days) + + print(f"\nTotal Messages: {analytics['total_messages']}") + + print("\nMessages by Company:") + if analytics['by_company']: + for item in analytics['by_company']: + company = item['company_name'] or '(unknown)' + print(f" {company}: {item['count']}") + else: + print(" No data") + + print("\nMessages by Analysis Type:") + if analytics['by_type']: + for item in analytics['by_type']: + analysis_type = item['analysis_type'] or '(unknown)' + print(f" {analysis_type}: {item['count']}") + else: + print(" No data") + + print("\n" + "=" * 70) + + except Exception as e: + print(f"Error retrieving analytics: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/scripts/view_messages.py b/scripts/view_messages.py new file mode 100644 index 0000000..e2fe352 --- /dev/null +++ b/scripts/view_messages.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +"""View stored messages from the database. + +This script displays stored LLM messages with filtering options. + +Usage: + python scripts/view_messages.py [--company COMPANY] [--type TYPE] [--limit LIMIT] +""" + +import sys +import os +import argparse +from datetime import datetime + +# Add parent directory to path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from SPARC import config +from SPARC.database import DatabaseClient + + +def main(): + """Display messages from the database.""" + parser = argparse.ArgumentParser(description="View stored SPARC messages") + parser.add_argument( + "--company", + type=str, + help="Filter by company name" + ) + parser.add_argument( + "--type", + type=str, + choices=["single_patent", "portfolio"], + help="Filter by analysis type" + ) + parser.add_argument( + "--limit", + type=int, + default=10, + help="Maximum number of messages to display (default: 10)" + ) + args = parser.parse_args() + + print("SPARC Stored Messages") + print("=" * 70) + + try: + db_client = DatabaseClient(config.database_url) + messages = db_client.get_messages( + company_name=args.company, + analysis_type=args.type, + limit=args.limit + ) + + if not messages: + print("\nNo messages found.") + return + + print(f"\nShowing {len(messages)} message(s):\n") + + for i, msg in enumerate(messages, 1): + print(f"Message #{msg['id']} - {msg['timestamp']}") + print(f"Company: {msg['company_name'] or '(unknown)'}") + print(f"Type: {msg['analysis_type'] or '(unknown)'}") + print(f"Model: {msg['model'] or '(unknown)'}") + print(f"\nPrompt (first 200 chars):") + print(f" {msg['prompt'][:200]}...") + print(f"\nResponse (first 200 chars):") + print(f" {msg['response'][:200] if msg['response'] else '(no response)'}...") + print("\n" + "-" * 70 + "\n") + + except Exception as e: + print(f"Error retrieving messages: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/test_database_mode.py b/test_database_mode.py new file mode 100644 index 0000000..7508ef2 --- /dev/null +++ b/test_database_mode.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +"""Test script to verify database mode functionality. + +This script tests the LLMAnalyzer in database mode without requiring +actual API keys or patent downloads. +""" + +from SPARC.llm import LLMAnalyzer +from SPARC.database import DatabaseClient +from SPARC import config + +def test_database_mode(): + """Test that database mode stores messages correctly.""" + print("Testing Database Mode") + print("=" * 70) + + # Initialize analyzer in database mode + print("\n1. Initializing LLMAnalyzer in database mode...") + analyzer = LLMAnalyzer(use_database=True) + + print(f" - use_database: {analyzer.use_database}") + print(f" - db_client: {analyzer.db_client is not None}") + + # Test single patent analysis + print("\n2. Testing single patent analysis (database mode)...") + result = analyzer.analyze_patent_content( + patent_content="Test patent content about semiconductor innovation", + company_name="TestCorp" + ) + print(f" Result: {result}") + + # Test portfolio analysis + print("\n3. Testing portfolio analysis (database mode)...") + test_patents = [ + {"patent_id": "US001", "content": "First test patent"}, + {"patent_id": "US002", "content": "Second test patent"}, + ] + result = analyzer.analyze_patent_portfolio( + patents_data=test_patents, + company_name="TestCorp" + ) + print(f" Result: {result}") + + # Verify messages were stored + print("\n4. Verifying messages were stored...") + db_client = DatabaseClient(config.database_url) + messages = db_client.get_messages(company_name="TestCorp", limit=10) + print(f" Found {len(messages)} stored messages") + + for msg in messages: + print(f" - ID: {msg['id']}, Type: {msg['analysis_type']}, Timestamp: {msg['timestamp']}") + + # Get analytics + print("\n5. Getting analytics...") + analytics = db_client.get_analytics(days=1) + print(f" Total messages: {analytics['total_messages']}") + print(f" By company: {analytics['by_company']}") + print(f" By type: {analytics['by_type']}") + + print("\n" + "=" * 70) + print("Database mode test completed successfully!") + +def test_api_mode(): + """Test that API mode initializes correctly.""" + print("\nTesting API Mode") + print("=" * 70) + + print("\n1. Initializing LLMAnalyzer in API mode...") + analyzer = LLMAnalyzer(use_database=False, test_mode=True) + + print(f" - use_database: {analyzer.use_database}") + print(f" - test_mode: {analyzer.test_mode}") + + print("\n2. Testing single patent analysis (test mode)...") + result = analyzer.analyze_patent_content( + patent_content="Test patent content", + company_name="TestCorp2" + ) + print(f" Result: {result}") + + print("\n" + "=" * 70) + print("API mode test completed successfully!") + +if __name__ == "__main__": + test_database_mode() + print("\n") + test_api_mode()