diff --git a/.env.example b/.env.example
index 1d776d0..acf4901 100644
--- a/.env.example
+++ b/.env.example
@@ -6,11 +6,16 @@ API_KEY=your_serpapi_key_here
# OpenRouter API key for LLM analysis
OPENROUTER_API_KEY=your_openrouter_key_here
-# Database configuration (for docker-compose setup)
+# Database configuration
+# All messages are stored in the database for persistence and caching
DATABASE_URL=postgresql://postgres:postgres@localhost:5432/sparc
-# Toggle between database mode and API mode
-# When USE_DATABASE=true: stores all messages in database instead of sending to OpenRouter
-# When USE_DATABASE=false: sends messages to OpenRouter API as normal
-# Default: false
-USE_DATABASE=false
+# Cache configuration
+# When USE_CACHE=true: check database for cached responses before making API calls
+# When USE_CACHE=false: always make fresh API calls (still stores results in database)
+# Default: true
+USE_CACHE=true
+
+# JWT Secret for authentication
+# IMPORTANT: Change this to a secure random string in production
+JWT_SECRET=your-secure-jwt-secret-change-in-production
diff --git a/.gitea/workflows/build.yaml b/.gitea/workflows/build.yaml
index d0f1a75..1517e6d 100644
--- a/.gitea/workflows/build.yaml
+++ b/.gitea/workflows/build.yaml
@@ -1,4 +1,4 @@
-name: Build and Push Docker Image
+name: Build and Push Docker Images
on:
push:
@@ -9,7 +9,7 @@ on:
workflow_dispatch:
jobs:
- build-and-push:
+ build-api:
runs-on: ubuntu-latest
steps:
- name: Install dependencies
@@ -31,32 +31,24 @@ jobs:
REPO_OWNER="${{ gitea.repository_owner }}"
REPO_NAME="${{ gitea.repository }}"
- # Extract repository name without owner
REPO_NAME_ONLY=$(echo "$REPO_NAME" | cut -d'/' -f2)
-
- # Convert to lowercase for Docker registry compatibility
REPO_OWNER_LOWER=$(echo "$REPO_OWNER" | tr '[:upper:]' '[:lower:]')
REPO_NAME_LOWER=$(echo "$REPO_NAME_ONLY" | tr '[:upper:]' '[:lower:]')
- # Base image path
IMAGE_BASE="${REGISTRY}/${REPO_OWNER_LOWER}/${REPO_NAME_LOWER}"
- # Determine tag based on ref
case "${{ gitea.ref }}" in
refs/tags/*)
- # Tag push - use the tag name
TAG_NAME="${{ gitea.ref_name }}"
echo "IMAGE_TAG=${IMAGE_BASE}:${TAG_NAME}" >> $GITHUB_OUTPUT
echo "PUSH_LATEST=true" >> $GITHUB_OUTPUT
;;
refs/heads/main)
- # Main branch - use commit SHA (shortened to 7 chars) and latest
SHORT_SHA=$(echo "${{ gitea.sha }}" | cut -c1-7)
echo "IMAGE_TAG=${IMAGE_BASE}:${SHORT_SHA}" >> $GITHUB_OUTPUT
echo "PUSH_LATEST=true" >> $GITHUB_OUTPUT
;;
*)
- # Other branches - use branch name
BRANCH_TAG=$(echo "${{ gitea.ref_name }}" | sed 's/\//-/g')
echo "IMAGE_TAG=${IMAGE_BASE}:${BRANCH_TAG}" >> $GITHUB_OUTPUT
echo "PUSH_LATEST=false" >> $GITHUB_OUTPUT
@@ -70,13 +62,13 @@ jobs:
run: |
echo "${{ secrets.PERSONAL_TOKEN }}" | docker login gitea.leeworks.dev -u "${{ gitea.actor }}" --password-stdin
- - name: Build and push with Docker
+ - name: Build and push API image
shell: sh
run: |
- echo "Building image..."
+ echo "Building API image..."
docker build -t ${{ steps.tags.outputs.IMAGE_TAG }} .
- echo "Pushing image..."
+ echo "Pushing API image..."
docker push ${{ steps.tags.outputs.IMAGE_TAG }}
if [ "${{ steps.tags.outputs.PUSH_LATEST }}" = "true" ]; then
@@ -85,5 +77,74 @@ jobs:
docker push ${{ steps.tags.outputs.IMAGE_LATEST }}
fi
- echo "Build and push completed successfully!"
- echo "Image available at ${{ steps.tags.outputs.IMAGE_TAG }}"
+ echo "API image available at ${{ steps.tags.outputs.IMAGE_TAG }}"
+
+ build-frontend:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Install dependencies
+ shell: sh
+ run: |
+ apk add --no-cache git docker-cli
+
+ - name: Checkout code
+ shell: sh
+ run: |
+ git clone https://gitea.leeworks.dev/${{ gitea.repository }}.git .
+ git checkout ${{ gitea.sha }}
+
+ - name: Determine image tags
+ id: tags
+ shell: sh
+ run: |
+ REGISTRY="gitea.leeworks.dev"
+ REPO_OWNER="${{ gitea.repository_owner }}"
+ REPO_NAME="${{ gitea.repository }}"
+
+ REPO_NAME_ONLY=$(echo "$REPO_NAME" | cut -d'/' -f2)
+ REPO_OWNER_LOWER=$(echo "$REPO_OWNER" | tr '[:upper:]' '[:lower:]')
+ REPO_NAME_LOWER=$(echo "$REPO_NAME_ONLY" | tr '[:upper:]' '[:lower:]')
+
+ IMAGE_BASE="${REGISTRY}/${REPO_OWNER_LOWER}/${REPO_NAME_LOWER}"
+
+ case "${{ gitea.ref }}" in
+ refs/tags/*)
+ TAG_NAME="${{ gitea.ref_name }}"
+ echo "IMAGE_TAG=${IMAGE_BASE}:frontend-${TAG_NAME}" >> $GITHUB_OUTPUT
+ echo "PUSH_LATEST=true" >> $GITHUB_OUTPUT
+ ;;
+ refs/heads/main)
+ SHORT_SHA=$(echo "${{ gitea.sha }}" | cut -c1-7)
+ echo "IMAGE_TAG=${IMAGE_BASE}:frontend-${SHORT_SHA}" >> $GITHUB_OUTPUT
+ echo "PUSH_LATEST=true" >> $GITHUB_OUTPUT
+ ;;
+ *)
+ BRANCH_TAG=$(echo "${{ gitea.ref_name }}" | sed 's/\//-/g')
+ echo "IMAGE_TAG=${IMAGE_BASE}:frontend-${BRANCH_TAG}" >> $GITHUB_OUTPUT
+ echo "PUSH_LATEST=false" >> $GITHUB_OUTPUT
+ ;;
+ esac
+
+ echo "IMAGE_LATEST=${IMAGE_BASE}:frontend-latest" >> $GITHUB_OUTPUT
+
+ - name: Login to registry
+ shell: sh
+ run: |
+ echo "${{ secrets.PERSONAL_TOKEN }}" | docker login gitea.leeworks.dev -u "${{ gitea.actor }}" --password-stdin
+
+ - name: Build and push frontend image
+ shell: sh
+ run: |
+ echo "Building frontend image..."
+ docker build -t ${{ steps.tags.outputs.IMAGE_TAG }} ./frontend
+
+ echo "Pushing frontend image..."
+ docker push ${{ steps.tags.outputs.IMAGE_TAG }}
+
+ if [ "${{ steps.tags.outputs.PUSH_LATEST }}" = "true" ]; then
+ echo "Tagging and pushing frontend-latest..."
+ docker tag ${{ steps.tags.outputs.IMAGE_TAG }} ${{ steps.tags.outputs.IMAGE_LATEST }}
+ docker push ${{ steps.tags.outputs.IMAGE_LATEST }}
+ fi
+
+ echo "Frontend image available at ${{ steps.tags.outputs.IMAGE_TAG }}"
diff --git a/README.md b/README.md
index d4ae497..c9b0b10 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ SPARC automatically collects, parses, and analyzes patents from companies to pro
- **Portfolio Analysis**: Evaluates multiple patents holistically for comprehensive insights
- **Batch Processing**: Analyze multiple companies concurrently with progress tracking
- **REST API**: FastAPI web service with async job support
-- **Dashboard**: Interactive Streamlit visualization dashboard
+- **Dashboard**: React TypeScript web dashboard with authentication
- **Robust Testing**: 40 tests covering all major functionality
## Architecture
@@ -27,7 +27,9 @@ SPARC/
├── serp_api.py # Patent retrieval and PDF parsing
├── llm.py # Claude AI integration via OpenRouter
├── analyzer.py # High-level orchestration
-├── api.py # FastAPI web service
+├── api.py # FastAPI web service with auth endpoints
+├── auth.py # JWT authentication module
+├── database.py # PostgreSQL storage with caching
├── types.py # Data models
└── config.py # Environment configuration
```
@@ -48,7 +50,7 @@ docker-compose up -d
# Access the services
# - API: http://localhost:8000
-# - Dashboard: http://localhost:8501
+# - Dashboard: http://localhost:8080
# - API Docs: http://localhost:8000/docs
```
@@ -186,21 +188,22 @@ curl -X POST http://localhost:8000/analyze/batch/async \
-d '{"companies": ["nvidia", "amd", "intel", "qualcomm"]}'
```
-### Visualization Dashboard
+### Web Dashboard
-Launch the interactive Streamlit dashboard:
+The React dashboard is included in Docker Compose:
```bash
-streamlit run dashboard.py
+docker-compose up -d
```
Dashboard features:
+- **Authentication**: User registration, login, and JWT-based sessions
- **Company Analysis**: Analyze individual companies with real-time results
-- **Batch Analysis**: Process multiple companies with progress tracking and charts
-- **Analytics**: View historical analysis data and trends (requires database mode)
-- **System Status**: Monitor database and analyzer health
+- **Batch Analysis**: Process multiple companies with progress tracking
+- **Analytics**: View historical analysis data and trends
+- **Admin Panel**: User management for administrators
-The dashboard runs at `http://localhost:8501` by default.
+The dashboard runs at `http://localhost:8080` when using Docker Compose.
## Running Tests
@@ -280,4 +283,4 @@ For open source projects, say how it is licensed.
Core functionality complete. Ready for production use with API keys configured.
-All major features implemented: REST API, Streamlit dashboard, Docker containerization, database storage, and multi-company batch processing.
+All major features implemented: REST API, React dashboard with authentication, Docker containerization, database storage with caching, and multi-company batch processing.
diff --git a/SPARC/api.py b/SPARC/api.py
index 2a75fee..cf76493 100644
--- a/SPARC/api.py
+++ b/SPARC/api.py
@@ -5,12 +5,23 @@ Provides REST API endpoints for analyzing company patent portfolios.
from contextlib import asynccontextmanager
from datetime import datetime
-from typing import Annotated
+from typing import Annotated, List
-from fastapi import BackgroundTasks, FastAPI, HTTPException, Query
-from pydantic import BaseModel, Field
+from fastapi import BackgroundTasks, Depends, FastAPI, HTTPException, Query
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, EmailStr, Field
+from SPARC import config
from SPARC.analyzer import CompanyAnalyzer
+from SPARC.auth import (
+ TokenResponse,
+ UserResponse,
+ create_tokens,
+ decode_token,
+ get_current_admin,
+ get_current_user,
+ get_db_client,
+)
from SPARC.types import BatchAnalysisResult, CompanyAnalysisResult
@@ -67,6 +78,42 @@ class HealthResponse(BaseModel):
timestamp: datetime
+# Auth request/response models
+class RegisterRequest(BaseModel):
+ """User registration request."""
+
+ email: EmailStr
+ password: str = Field(..., min_length=8, description="Password (min 8 characters)")
+
+
+class LoginRequest(BaseModel):
+ """User login request."""
+
+ email: EmailStr
+ password: str
+
+
+class RefreshRequest(BaseModel):
+ """Token refresh request."""
+
+ refresh_token: str
+
+
+class UpdateRoleRequest(BaseModel):
+ """Update user role request."""
+
+ role: str = Field(..., pattern="^(admin|user)$")
+
+
+class AnalyticsResponse(BaseModel):
+ """Analytics response model."""
+
+ total_messages: int
+ by_company: List[dict]
+ by_type: List[dict]
+ period_days: int
+
+
# In-memory job storage (for demo; production would use Redis/DB)
_jobs: dict[str, JobStatus] = {}
_job_counter = 0
@@ -116,6 +163,196 @@ app = FastAPI(
lifespan=lifespan,
)
+# Add CORS middleware for React frontend
+app.add_middleware(
+ CORSMiddleware,
+ allow_origins=["http://localhost:3000", "http://localhost:5173"],
+ allow_credentials=True,
+ allow_methods=["*"],
+ allow_headers=["*"],
+)
+
+
+# ============== Auth Endpoints ==============
+
+
+@app.post("/auth/register", response_model=UserResponse, tags=["Auth"])
+async def register(request: RegisterRequest):
+ """Register a new user.
+
+ The first registered user automatically becomes an admin.
+ """
+ db = get_db_client()
+
+ # First user becomes admin
+ user_count = db.get_user_count()
+ role = "admin" if user_count == 0 else "user"
+
+ user = db.create_user(
+ email=request.email,
+ password=request.password,
+ role=role,
+ )
+
+ if not user:
+ raise HTTPException(
+ status_code=400,
+ detail="Email already registered",
+ )
+
+ return UserResponse(
+ id=user["id"],
+ email=user["email"],
+ role=user["role"],
+ created_at=user["created_at"],
+ )
+
+
+@app.post("/auth/login", response_model=TokenResponse, tags=["Auth"])
+async def login(request: LoginRequest):
+ """Authenticate user and return JWT tokens."""
+ db = get_db_client()
+
+ user = db.authenticate_user(request.email, request.password)
+
+ if not user:
+ raise HTTPException(
+ status_code=401,
+ detail="Invalid email or password",
+ )
+
+ return create_tokens(user["id"], user["email"], user["role"])
+
+
+@app.post("/auth/refresh", response_model=TokenResponse, tags=["Auth"])
+async def refresh_token(request: RefreshRequest):
+ """Refresh access token using refresh token."""
+ payload = decode_token(request.refresh_token)
+
+ if not payload or payload.type != "refresh":
+ raise HTTPException(
+ status_code=401,
+ detail="Invalid refresh token",
+ )
+
+ db = get_db_client()
+ user = db.get_user_by_id(payload.user_id)
+
+ if not user:
+ raise HTTPException(
+ status_code=401,
+ detail="User not found",
+ )
+
+ return create_tokens(user["id"], user["email"], user["role"])
+
+
+@app.get("/auth/me", response_model=UserResponse, tags=["Auth"])
+async def get_me(current_user: UserResponse = Depends(get_current_user)):
+ """Get current authenticated user."""
+ return current_user
+
+
+# ============== Admin Endpoints ==============
+
+
+@app.get("/admin/users", response_model=List[UserResponse], tags=["Admin"])
+async def list_users(
+ limit: int = Query(default=100, ge=1, le=1000),
+ offset: int = Query(default=0, ge=0),
+ _: UserResponse = Depends(get_current_admin),
+):
+ """List all users (admin only)."""
+ db = get_db_client()
+ users = db.get_all_users(limit=limit, offset=offset)
+
+ return [
+ UserResponse(
+ id=u["id"],
+ email=u["email"],
+ role=u["role"],
+ created_at=u["created_at"],
+ )
+ for u in users
+ ]
+
+
+@app.patch("/admin/users/{user_id}/role", response_model=UserResponse, tags=["Admin"])
+async def update_user_role(
+ user_id: int,
+ request: UpdateRoleRequest,
+ current_admin: UserResponse = Depends(get_current_admin),
+):
+ """Update a user's role (admin only)."""
+ if user_id == current_admin.id:
+ raise HTTPException(
+ status_code=400,
+ detail="Cannot change your own role",
+ )
+
+ db = get_db_client()
+ user = db.update_user_role(user_id, request.role)
+
+ if not user:
+ raise HTTPException(
+ status_code=404,
+ detail="User not found",
+ )
+
+ return UserResponse(
+ id=user["id"],
+ email=user["email"],
+ role=user["role"],
+ created_at=user["created_at"],
+ )
+
+
+@app.delete("/admin/users/{user_id}", tags=["Admin"])
+async def delete_user(
+ user_id: int,
+ current_admin: UserResponse = Depends(get_current_admin),
+):
+ """Delete a user (admin only)."""
+ if user_id == current_admin.id:
+ raise HTTPException(
+ status_code=400,
+ detail="Cannot delete yourself",
+ )
+
+ db = get_db_client()
+ deleted = db.delete_user(user_id)
+
+ if not deleted:
+ raise HTTPException(
+ status_code=404,
+ detail="User not found",
+ )
+
+ return {"message": "User deleted"}
+
+
+# ============== Analytics Endpoint ==============
+
+
+@app.get("/analytics", response_model=AnalyticsResponse, tags=["Analytics"])
+async def get_analytics(
+ days: int = Query(default=30, ge=1, le=365),
+ _: UserResponse = Depends(get_current_user),
+):
+ """Get analytics data (authenticated users only)."""
+ db = get_db_client()
+ analytics = db.get_analytics(days=days)
+
+ return AnalyticsResponse(
+ total_messages=analytics["total_messages"],
+ by_company=analytics["by_company"],
+ by_type=analytics["by_type"],
+ period_days=analytics["period_days"],
+ )
+
+
+# ============== System Endpoints ==============
+
@app.get("/health", response_model=HealthResponse, tags=["System"])
async def health_check():
@@ -132,7 +369,10 @@ async def health_check():
response_model=CompanyAnalysisResponse,
tags=["Analysis"],
)
-async def analyze_company(company_name: str):
+async def analyze_company(
+ company_name: str,
+ _: UserResponse = Depends(get_current_user),
+):
"""Analyze a single company's patent portfolio.
This endpoint retrieves recent patents for the specified company,
@@ -156,7 +396,10 @@ async def analyze_company(company_name: str):
response_model=BatchAnalysisResponse,
tags=["Analysis"],
)
-async def analyze_companies_batch(request: BatchAnalysisRequest):
+async def analyze_companies_batch(
+ request: BatchAnalysisRequest,
+ _: UserResponse = Depends(get_current_user),
+):
"""Analyze multiple companies' patent portfolios.
Processes companies concurrently for improved performance.
@@ -209,7 +452,9 @@ def _run_batch_job(job_id: str, companies: list[str], max_workers: int):
@app.post("/analyze/batch/async", response_model=JobStatus, tags=["Analysis"])
async def analyze_companies_async(
- request: BatchAnalysisRequest, background_tasks: BackgroundTasks
+ request: BatchAnalysisRequest,
+ background_tasks: BackgroundTasks,
+ _: UserResponse = Depends(get_current_user),
):
"""Start an asynchronous batch analysis job.
@@ -243,7 +488,10 @@ async def analyze_companies_async(
@app.get("/jobs/{job_id}", response_model=JobStatus, tags=["Jobs"])
-async def get_job_status(job_id: str):
+async def get_job_status(
+ job_id: str,
+ _: UserResponse = Depends(get_current_user),
+):
"""Get the status of a background analysis job.
Args:
@@ -265,6 +513,7 @@ async def list_jobs(
Query(description="Filter by status: pending, running, completed, failed"),
] = None,
limit: Annotated[int, Query(ge=1, le=100)] = 10,
+ _: UserResponse = Depends(get_current_user),
):
"""List all analysis jobs.
diff --git a/SPARC/auth.py b/SPARC/auth.py
new file mode 100644
index 0000000..4a5a28f
--- /dev/null
+++ b/SPARC/auth.py
@@ -0,0 +1,210 @@
+"""JWT authentication utilities for SPARC API."""
+
+import os
+from datetime import datetime, timedelta, timezone
+from typing import Optional
+
+import jwt
+from fastapi import Depends, HTTPException, status
+from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
+from pydantic import BaseModel
+
+from SPARC import config
+from SPARC.database import DatabaseClient
+
+# JWT Configuration
+JWT_SECRET = os.getenv("JWT_SECRET", "sparc-secret-key-change-in-production")
+JWT_ALGORITHM = "HS256"
+ACCESS_TOKEN_EXPIRE_MINUTES = 30
+REFRESH_TOKEN_EXPIRE_DAYS = 7
+
+security = HTTPBearer()
+
+
+class TokenPayload(BaseModel):
+ """JWT token payload."""
+
+ sub: str # user_id as string (JWT RFC 7519 requires sub to be a string)
+ email: str
+ role: str
+ exp: datetime
+ type: str # "access" or "refresh"
+
+ @property
+ def user_id(self) -> int:
+ """Get user_id as integer."""
+ return int(self.sub)
+
+
+class TokenResponse(BaseModel):
+ """Token response model."""
+
+ access_token: str
+ refresh_token: str
+ token_type: str = "bearer"
+
+
+class UserResponse(BaseModel):
+ """User response model."""
+
+ id: int
+ email: str
+ role: str
+ created_at: datetime
+
+
+def create_access_token(user_id: int, email: str, role: str) -> str:
+ """Create a JWT access token.
+
+ Args:
+ user_id: User ID
+ email: User email
+ role: User role
+
+ Returns:
+ Encoded JWT token
+ """
+ expire = datetime.now(timezone.utc) + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
+ payload = {
+ "sub": str(user_id),
+ "email": email,
+ "role": role,
+ "exp": expire,
+ "type": "access",
+ }
+ return jwt.encode(payload, JWT_SECRET, algorithm=JWT_ALGORITHM)
+
+
+def create_refresh_token(user_id: int, email: str, role: str) -> str:
+ """Create a JWT refresh token.
+
+ Args:
+ user_id: User ID
+ email: User email
+ role: User role
+
+ Returns:
+ Encoded JWT token
+ """
+ expire = datetime.now(timezone.utc) + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
+ payload = {
+ "sub": str(user_id),
+ "email": email,
+ "role": role,
+ "exp": expire,
+ "type": "refresh",
+ }
+ return jwt.encode(payload, JWT_SECRET, algorithm=JWT_ALGORITHM)
+
+
+def create_tokens(user_id: int, email: str, role: str) -> TokenResponse:
+ """Create both access and refresh tokens.
+
+ Args:
+ user_id: User ID
+ email: User email
+ role: User role
+
+ Returns:
+ TokenResponse with both tokens
+ """
+ return TokenResponse(
+ access_token=create_access_token(user_id, email, role),
+ refresh_token=create_refresh_token(user_id, email, role),
+ )
+
+
+def decode_token(token: str) -> Optional[TokenPayload]:
+ """Decode and validate a JWT token.
+
+ Args:
+ token: JWT token string
+
+ Returns:
+ TokenPayload if valid, None otherwise
+ """
+ try:
+ payload = jwt.decode(token, JWT_SECRET, algorithms=[JWT_ALGORITHM])
+ return TokenPayload(**payload)
+ except jwt.ExpiredSignatureError:
+ return None
+ except jwt.InvalidTokenError:
+ return None
+
+
+def get_db_client() -> DatabaseClient:
+ """Get database client for auth operations."""
+ client = DatabaseClient(config.database_url)
+ client.connect()
+ return client
+
+
+async def get_current_user(
+ credentials: HTTPAuthorizationCredentials = Depends(security),
+) -> UserResponse:
+ """Get the current authenticated user from JWT token.
+
+ Args:
+ credentials: Bearer token from request
+
+ Returns:
+ UserResponse with user details
+
+ Raises:
+ HTTPException: If token is invalid or expired
+ """
+ token = credentials.credentials
+ payload = decode_token(token)
+
+ if not payload:
+ raise HTTPException(
+ status_code=status.HTTP_401_UNAUTHORIZED,
+ detail="Invalid or expired token",
+ headers={"WWW-Authenticate": "Bearer"},
+ )
+
+ if payload.type != "access":
+ raise HTTPException(
+ status_code=status.HTTP_401_UNAUTHORIZED,
+ detail="Invalid token type",
+ headers={"WWW-Authenticate": "Bearer"},
+ )
+
+ db = get_db_client()
+ user = db.get_user_by_id(payload.user_id)
+
+ if not user:
+ raise HTTPException(
+ status_code=status.HTTP_401_UNAUTHORIZED,
+ detail="User not found",
+ headers={"WWW-Authenticate": "Bearer"},
+ )
+
+ return UserResponse(
+ id=user["id"],
+ email=user["email"],
+ role=user["role"],
+ created_at=user["created_at"],
+ )
+
+
+async def get_current_admin(
+ current_user: UserResponse = Depends(get_current_user),
+) -> UserResponse:
+ """Require admin role for the current user.
+
+ Args:
+ current_user: Current authenticated user
+
+ Returns:
+ UserResponse if admin
+
+ Raises:
+ HTTPException: If user is not admin
+ """
+ if current_user.role != "admin":
+ raise HTTPException(
+ status_code=status.HTTP_403_FORBIDDEN,
+ detail="Admin access required",
+ )
+ return current_user
diff --git a/SPARC/config.py b/SPARC/config.py
index 08dbc7a..11a12a2 100644
--- a/SPARC/config.py
+++ b/SPARC/config.py
@@ -13,10 +13,15 @@ api_key = os.getenv("API_KEY")
# OpenRouter API key for LLM analysis
openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
-# Database configuration
+# Database configuration - all messages are stored in the database
+# The database serves as both a persistent store and a cache layer
database_url = os.getenv("DATABASE_URL", "postgresql://postgres:postgres@localhost:5432/sparc")
-# Toggle between database mode and API mode
-# When True: stores all messages in database instead of sending to OpenRouter
-# When False: sends messages to OpenRouter API as normal
+# Cache configuration
+# When enabled (default), the system checks the database for cached responses
+# before making API calls, saving tokens and reducing latency
+use_cache = os.getenv("USE_CACHE", "true").lower() in ("true", "1", "yes")
+
+# Legacy compatibility - USE_DATABASE is deprecated, database is always used
+# This variable is kept for backwards compatibility but has no effect
use_database = os.getenv("USE_DATABASE", "false").lower() in ("true", "1", "yes")
diff --git a/SPARC/database.py b/SPARC/database.py
index c0fae7d..609f152 100644
--- a/SPARC/database.py
+++ b/SPARC/database.py
@@ -1,10 +1,12 @@
-"""Database client for storing and retrieving LLM messages."""
+"""Database client for storing and retrieving LLM messages and user authentication."""
import psycopg2
from psycopg2.extras import RealDictCursor
from typing import Dict, List, Optional
from datetime import datetime
import json
+import hashlib
+import bcrypt
class DatabaseClient:
@@ -43,10 +45,12 @@ class DatabaseClient:
analysis_type VARCHAR(50),
model VARCHAR(100),
prompt TEXT NOT NULL,
+ prompt_hash VARCHAR(64),
response TEXT,
metadata JSONB,
token_usage JSONB,
- created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+ is_cached BOOLEAN DEFAULT FALSE
)
""")
@@ -62,8 +66,109 @@ class DatabaseClient:
ON llm_messages(company_name)
""")
+ # Add prompt_hash and is_cached columns if they don't exist (for existing tables)
+ # This must run BEFORE creating the index on prompt_hash
+ cursor.execute("""
+ DO $$
+ BEGIN
+ IF NOT EXISTS (
+ SELECT 1 FROM information_schema.columns
+ WHERE table_name = 'llm_messages' AND column_name = 'prompt_hash'
+ ) THEN
+ ALTER TABLE llm_messages ADD COLUMN prompt_hash VARCHAR(64);
+ END IF;
+ IF NOT EXISTS (
+ SELECT 1 FROM information_schema.columns
+ WHERE table_name = 'llm_messages' AND column_name = 'is_cached'
+ ) THEN
+ ALTER TABLE llm_messages ADD COLUMN is_cached BOOLEAN DEFAULT FALSE;
+ END IF;
+ END $$;
+ """)
+
+ # Create index on prompt_hash for cache lookups
+ cursor.execute("""
+ CREATE INDEX IF NOT EXISTS idx_messages_prompt_hash
+ ON llm_messages(prompt_hash)
+ """)
+
+ # Create users table for authentication
+ cursor.execute("""
+ CREATE TABLE IF NOT EXISTS users (
+ id SERIAL PRIMARY KEY,
+ email VARCHAR(255) UNIQUE NOT NULL,
+ password_hash VARCHAR(255) NOT NULL,
+ role VARCHAR(20) DEFAULT 'user' CHECK (role IN ('admin', 'user')),
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+ )
+ """)
+
+ # Create index on email for fast lookups
+ cursor.execute("""
+ CREATE INDEX IF NOT EXISTS idx_users_email
+ ON users(email)
+ """)
+
self.conn.commit()
+ @staticmethod
+ def hash_prompt(prompt: str) -> str:
+ """Generate a hash of the prompt for cache lookups.
+
+ Args:
+ prompt: The prompt text to hash
+
+ Returns:
+ SHA-256 hash of the prompt
+ """
+ return hashlib.sha256(prompt.encode()).hexdigest()
+
+ def get_cached_response(
+ self,
+ prompt: str,
+ company_name: Optional[str] = None,
+ analysis_type: Optional[str] = None,
+ ) -> Optional[Dict]:
+ """Look up a cached response for a given prompt.
+
+ Args:
+ prompt: The prompt to look up
+ company_name: Optional company name filter
+ analysis_type: Optional analysis type filter
+
+ Returns:
+ Cached message dict if found, None otherwise
+ """
+ self.connect()
+
+ prompt_hash = self.hash_prompt(prompt)
+
+ query = """
+ SELECT * FROM llm_messages
+ WHERE prompt_hash = %s
+ AND response IS NOT NULL
+ AND response NOT LIKE '[DATABASE MODE]%%'
+ AND response NOT LIKE '[TEST MODE]%%'
+ AND response NOT LIKE '[NO API]%%'
+ """
+ params = [prompt_hash]
+
+ if company_name:
+ query += " AND company_name = %s"
+ params.append(company_name)
+
+ if analysis_type:
+ query += " AND analysis_type = %s"
+ params.append(analysis_type)
+
+ query += " ORDER BY timestamp DESC LIMIT 1"
+
+ with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
+ cursor.execute(query, params)
+ result = cursor.fetchone()
+ return dict(result) if result else None
+
def store_message(
self,
prompt: str,
@@ -73,6 +178,7 @@ class DatabaseClient:
model: Optional[str] = None,
metadata: Optional[Dict] = None,
token_usage: Optional[Dict] = None,
+ is_cached: bool = False,
) -> int:
"""Store an LLM message exchange in the database.
@@ -84,28 +190,33 @@ class DatabaseClient:
model: Model identifier used
metadata: Additional metadata as dict
token_usage: Token usage information
+ is_cached: Whether this response was served from cache
Returns:
The ID of the inserted record
"""
self.connect()
+ prompt_hash = self.hash_prompt(prompt)
+
with self.conn.cursor() as cursor:
cursor.execute(
"""
INSERT INTO llm_messages
- (prompt, response, company_name, analysis_type, model, metadata, token_usage)
- VALUES (%s, %s, %s, %s, %s, %s, %s)
+ (prompt, prompt_hash, response, company_name, analysis_type, model, metadata, token_usage, is_cached)
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
RETURNING id
""",
(
prompt,
+ prompt_hash,
response,
company_name,
analysis_type,
model,
json.dumps(metadata) if metadata else None,
json.dumps(token_usage) if token_usage else None,
+ is_cached,
),
)
@@ -208,3 +319,212 @@ class DatabaseClient:
"by_type": [dict(row) for row in by_type],
"period_days": days,
}
+
+ # User Authentication Methods
+
+ @staticmethod
+ def hash_password(password: str) -> str:
+ """Hash a password using bcrypt.
+
+ Args:
+ password: Plain text password
+
+ Returns:
+ Hashed password string
+ """
+ return bcrypt.hashpw(password.encode(), bcrypt.gensalt()).decode()
+
+ @staticmethod
+ def verify_password(password: str, password_hash: str) -> bool:
+ """Verify a password against its hash.
+
+ Args:
+ password: Plain text password
+ password_hash: Stored hash
+
+ Returns:
+ True if password matches
+ """
+ return bcrypt.checkpw(password.encode(), password_hash.encode())
+
+ def create_user(
+ self,
+ email: str,
+ password: str,
+ role: str = "user",
+ ) -> Optional[Dict]:
+ """Create a new user.
+
+ Args:
+ email: User email
+ password: Plain text password
+ role: User role ('admin' or 'user')
+
+ Returns:
+ Created user dict or None if email exists
+ """
+ self.connect()
+
+ password_hash = self.hash_password(password)
+
+ try:
+ with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
+ cursor.execute(
+ """
+ INSERT INTO users (email, password_hash, role)
+ VALUES (%s, %s, %s)
+ RETURNING id, email, role, created_at
+ """,
+ (email, password_hash, role),
+ )
+ user = cursor.fetchone()
+ self.conn.commit()
+ return dict(user) if user else None
+ except psycopg2.errors.UniqueViolation:
+ self.conn.rollback()
+ return None
+
+ def authenticate_user(self, email: str, password: str) -> Optional[Dict]:
+ """Authenticate a user by email and password.
+
+ Args:
+ email: User email
+ password: Plain text password
+
+ Returns:
+ User dict if authenticated, None otherwise
+ """
+ self.connect()
+
+ with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
+ cursor.execute(
+ "SELECT * FROM users WHERE email = %s",
+ (email,),
+ )
+ user = cursor.fetchone()
+
+ if user and self.verify_password(password, user["password_hash"]):
+ return {
+ "id": user["id"],
+ "email": user["email"],
+ "role": user["role"],
+ "created_at": user["created_at"],
+ }
+ return None
+
+ def get_user_by_id(self, user_id: int) -> Optional[Dict]:
+ """Get a user by ID.
+
+ Args:
+ user_id: User ID
+
+ Returns:
+ User dict or None
+ """
+ self.connect()
+
+ with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
+ cursor.execute(
+ "SELECT id, email, role, created_at FROM users WHERE id = %s",
+ (user_id,),
+ )
+ user = cursor.fetchone()
+ return dict(user) if user else None
+
+ def get_user_by_email(self, email: str) -> Optional[Dict]:
+ """Get a user by email.
+
+ Args:
+ email: User email
+
+ Returns:
+ User dict or None
+ """
+ self.connect()
+
+ with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
+ cursor.execute(
+ "SELECT id, email, role, created_at FROM users WHERE email = %s",
+ (email,),
+ )
+ user = cursor.fetchone()
+ return dict(user) if user else None
+
+ def get_all_users(self, limit: int = 100, offset: int = 0) -> List[Dict]:
+ """Get all users (admin only).
+
+ Args:
+ limit: Maximum number of users
+ offset: Offset for pagination
+
+ Returns:
+ List of user dicts
+ """
+ self.connect()
+
+ with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
+ cursor.execute(
+ """
+ SELECT id, email, role, created_at
+ FROM users
+ ORDER BY created_at DESC
+ LIMIT %s OFFSET %s
+ """,
+ (limit, offset),
+ )
+ return [dict(row) for row in cursor.fetchall()]
+
+ def update_user_role(self, user_id: int, role: str) -> Optional[Dict]:
+ """Update a user's role (admin only).
+
+ Args:
+ user_id: User ID
+ role: New role ('admin' or 'user')
+
+ Returns:
+ Updated user dict or None
+ """
+ self.connect()
+
+ with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
+ cursor.execute(
+ """
+ UPDATE users
+ SET role = %s, updated_at = CURRENT_TIMESTAMP
+ WHERE id = %s
+ RETURNING id, email, role, created_at
+ """,
+ (role, user_id),
+ )
+ user = cursor.fetchone()
+ self.conn.commit()
+ return dict(user) if user else None
+
+ def delete_user(self, user_id: int) -> bool:
+ """Delete a user (admin only).
+
+ Args:
+ user_id: User ID
+
+ Returns:
+ True if deleted
+ """
+ self.connect()
+
+ with self.conn.cursor() as cursor:
+ cursor.execute("DELETE FROM users WHERE id = %s", (user_id,))
+ deleted = cursor.rowcount > 0
+ self.conn.commit()
+ return deleted
+
+ def get_user_count(self) -> int:
+ """Get total user count.
+
+ Returns:
+ Number of users
+ """
+ self.connect()
+
+ with self.conn.cursor() as cursor:
+ cursor.execute("SELECT COUNT(*) FROM users")
+ return cursor.fetchone()[0]
diff --git a/SPARC/llm.py b/SPARC/llm.py
index ef56dae..2e60c9b 100644
--- a/SPARC/llm.py
+++ b/SPARC/llm.py
@@ -9,31 +9,29 @@ from typing import Dict
class LLMAnalyzer:
"""Handles LLM-based analysis of patent content."""
- def __init__(self, api_key: str | None = None, test_mode: bool = False, use_database: bool | None = None):
+ def __init__(self, api_key: str | None = None, test_mode: bool = False, use_cache: bool | None = None):
"""Initialize the LLM analyzer.
Args:
api_key: OpenRouter API key. If None, will attempt to load from config.
test_mode: If True, print prompts instead of making API calls
- use_database: If True, store messages in database instead of calling API.
- If None, will use config.use_database
+ use_cache: If True, check database cache before making API calls.
+ If None, uses config.use_cache (default: True)
"""
self.test_mode = test_mode
- self.use_database = use_database if use_database is not None else config.use_database
- self.db_client = None
+ self.use_cache = use_cache if use_cache is not None else config.use_cache
+ self.model = "anthropic/claude-3.5-sonnet"
- # Initialize database client if in database mode
- if self.use_database:
- self.db_client = DatabaseClient(config.database_url)
- self.db_client.initialize_schema()
+ # Always initialize database client for storage and caching
+ self.db_client = DatabaseClient(config.database_url)
+ self.db_client.initialize_schema()
- # Initialize OpenRouter client if not in database mode
- if (api_key or config.openrouter_api_key) and not test_mode and not self.use_database:
+ # Initialize OpenRouter client if API key is available
+ if (api_key or config.openrouter_api_key) and not test_mode:
self.client = OpenAI(
api_key=api_key or config.openrouter_api_key,
base_url="https://openrouter.ai/api/v1"
)
- self.model = "anthropic/claude-3.5-sonnet"
else:
self.client = None
@@ -68,22 +66,31 @@ Provide a concise analysis (2-3 paragraphs) focusing on what this patent reveals
print("=" * 80)
return "[TEST MODE - No API call made]"
- # Database mode: store the prompt and return a placeholder response
- if self.use_database:
- response_text = "[DATABASE MODE] Message stored for testing/analytics. Enable API mode to get actual analysis."
-
- self.db_client.store_message(
+ # Check cache first
+ if self.use_cache:
+ cached = self.db_client.get_cached_response(
prompt=prompt,
- response=response_text,
company_name=company_name,
- analysis_type="single_patent",
- model=self.model if hasattr(self, 'model') else None,
- metadata={"patent_content_length": len(patent_content)}
+ analysis_type="single_patent"
)
+ if cached:
+ # Log the cache hit
+ self.db_client.store_message(
+ prompt=prompt,
+ response=cached["response"],
+ company_name=company_name,
+ analysis_type="single_patent",
+ model=self.model,
+ metadata={
+ "patent_content_length": len(patent_content),
+ "cache_hit": True,
+ "original_message_id": cached["id"]
+ },
+ is_cached=True
+ )
+ return cached["response"]
- return response_text
-
- # API mode: send to OpenRouter
+ # Call API if no cache hit and client is available
if self.client:
response = self.client.chat.completions.create(
model=self.model,
@@ -92,23 +99,34 @@ Provide a concise analysis (2-3 paragraphs) focusing on what this patent reveals
)
response_text = response.choices[0].message.content
- # Store in database if db_client is available (for logging even in API mode)
- if self.db_client:
- self.db_client.store_message(
- prompt=prompt,
- response=response_text,
- company_name=company_name,
- analysis_type="single_patent",
- model=self.model,
- metadata={"patent_content_length": len(patent_content)},
- token_usage={
- "prompt_tokens": response.usage.prompt_tokens,
- "completion_tokens": response.usage.completion_tokens,
- "total_tokens": response.usage.total_tokens
- } if hasattr(response, 'usage') else None
- )
+ # Store in database for future cache lookups
+ self.db_client.store_message(
+ prompt=prompt,
+ response=response_text,
+ company_name=company_name,
+ analysis_type="single_patent",
+ model=self.model,
+ metadata={"patent_content_length": len(patent_content)},
+ token_usage={
+ "prompt_tokens": response.usage.prompt_tokens,
+ "completion_tokens": response.usage.completion_tokens,
+ "total_tokens": response.usage.total_tokens
+ } if hasattr(response, 'usage') else None
+ )
return response_text
+
+ # No API client available - store prompt for later processing
+ placeholder = "[NO API] Prompt stored in database. Configure OPENROUTER_API_KEY to enable analysis."
+ self.db_client.store_message(
+ prompt=prompt,
+ response=placeholder,
+ company_name=company_name,
+ analysis_type="single_patent",
+ model=self.model,
+ metadata={"patent_content_length": len(patent_content), "pending": True}
+ )
+ return placeholder
def analyze_patent_portfolio(
self, patents_data: list[Dict[str, str]], company_name: str
@@ -150,46 +168,54 @@ Provide a comprehensive analysis (4-5 paragraphs) with a final verdict on the co
print(prompt)
return "[TEST MODE]"
- # Database mode: store the prompt and return a placeholder response
- if self.use_database:
- response_text = "[DATABASE MODE] Message stored for testing/analytics. Enable API mode to get actual analysis."
+ metadata = {
+ "patent_count": len(patents_data),
+ "patent_ids": [p['patent_id'] for p in patents_data]
+ }
- self.db_client.store_message(
+ # Check cache first
+ if self.use_cache:
+ cached = self.db_client.get_cached_response(
prompt=prompt,
- response=response_text,
company_name=company_name,
- analysis_type="portfolio",
- model=self.model if hasattr(self, 'model') else None,
- metadata={
- "patent_count": len(patents_data),
- "patent_ids": [p['patent_id'] for p in patents_data]
- }
+ analysis_type="portfolio"
)
+ if cached:
+ # Log the cache hit
+ self.db_client.store_message(
+ prompt=prompt,
+ response=cached["response"],
+ company_name=company_name,
+ analysis_type="portfolio",
+ model=self.model,
+ metadata={
+ **metadata,
+ "cache_hit": True,
+ "original_message_id": cached["id"]
+ },
+ is_cached=True
+ )
+ return cached["response"]
- return response_text
+ # Call API if no cache hit and client is available
+ if self.client:
+ try:
+ response = self.client.chat.completions.create(
+ model=self.model,
+ max_tokens=2048,
+ messages=[{"role": "user", "content": prompt}],
+ )
- # API mode: send to OpenRouter
- try:
- response = self.client.chat.completions.create(
- model=self.model,
- max_tokens=2048,
- messages=[{"role": "user", "content": prompt}],
- )
+ response_text = response.choices[0].message.content
- response_text = response.choices[0].message.content
-
- # Store in database if db_client is available (for logging even in API mode)
- if self.db_client:
+ # Store in database for future cache lookups
self.db_client.store_message(
prompt=prompt,
response=response_text,
company_name=company_name,
analysis_type="portfolio",
model=self.model,
- metadata={
- "patent_count": len(patents_data),
- "patent_ids": [p['patent_id'] for p in patents_data]
- },
+ metadata=metadata,
token_usage={
"prompt_tokens": response.usage.prompt_tokens,
"completion_tokens": response.usage.completion_tokens,
@@ -197,7 +223,19 @@ Provide a comprehensive analysis (4-5 paragraphs) with a final verdict on the co
} if hasattr(response, 'usage') else None
)
- return response_text
- except AttributeError:
- return prompt
+ return response_text
+ except AttributeError:
+ return prompt
+
+ # No API client available - store prompt for later processing
+ placeholder = "[NO API] Prompt stored in database. Configure OPENROUTER_API_KEY to enable analysis."
+ self.db_client.store_message(
+ prompt=prompt,
+ response=placeholder,
+ company_name=company_name,
+ analysis_type="portfolio",
+ model=self.model,
+ metadata={**metadata, "pending": True}
+ )
+ return placeholder
diff --git a/dashboard.py b/dashboard.py
deleted file mode 100644
index af9f001..0000000
--- a/dashboard.py
+++ /dev/null
@@ -1,778 +0,0 @@
-"""SPARC Visualization Dashboard.
-
-A Streamlit-based dashboard for visualizing patent analysis results.
-Run with: streamlit run dashboard.py
-"""
-
-import streamlit as st
-import plotly.express as px
-import plotly.graph_objects as go
-import pandas as pd
-from datetime import datetime, timedelta
-
-from SPARC.analyzer import CompanyAnalyzer
-from SPARC.database import DatabaseClient
-from SPARC import config
-
-
-st.set_page_config(
- page_title="SPARC Dashboard",
- page_icon="⚡",
- layout="wide",
- initial_sidebar_state="collapsed",
-)
-
-# Modern CSS styling
-st.markdown("""
-
-""", unsafe_allow_html=True)
-
-
-@st.cache_resource
-def get_analyzer():
- """Get or create the CompanyAnalyzer instance."""
- return CompanyAnalyzer()
-
-
-@st.cache_resource
-def get_db_client():
- """Get database client if available."""
- if config.use_database:
- try:
- client = DatabaseClient()
- client.connect()
- return client
- except Exception:
- return None
- return None
-
-
-def render_header():
- """Render the modern dashboard header."""
- st.markdown("""
-
-
-
⚡ SPARC
- Semiconductor Patent Analytics
-
-
- """, unsafe_allow_html=True)
-
-
-def render_navigation():
- """Render horizontal tab navigation at the top."""
- tabs = st.tabs(["🔍 Company Analysis", "📦 Batch Analysis", "📊 Analytics", "ℹ️ About"])
- return tabs
-
-
-def render_company_analysis():
- """Render single company analysis page."""
- st.markdown('
Single Company Analysis
', unsafe_allow_html=True)
- st.markdown("Analyze a company's patent portfolio using AI-powered insights.")
-
- st.markdown("")
-
- # Search card
- with st.container():
- col1, col2 = st.columns([3, 1])
-
- with col1:
- company_name = st.text_input(
- "Company Name",
- placeholder="Enter company name (e.g., nvidia, intel, amd)",
- help="Enter the company name to analyze their patent portfolio",
- label_visibility="collapsed",
- )
-
- with col2:
- analyze_btn = st.button("🔍 Analyze", type="primary", use_container_width=True)
-
- if analyze_btn and company_name:
- with st.spinner(f"Analyzing {company_name}..."):
- analyzer = get_analyzer()
- result = analyzer._analyze_company_safe(company_name)
-
- if result.success:
- st.success(f"✓ Analysis complete for {company_name.upper()}")
-
- st.markdown("")
-
- # Metrics row with custom styling
- col1, col2, col3 = st.columns(3)
- with col1:
- st.metric("Patents Found", result.patent_count)
- with col2:
- st.metric("Analysis Status", "Complete")
- with col3:
- st.metric("Timestamp", result.timestamp.strftime("%H:%M:%S"))
-
- st.markdown("")
-
- # Analysis content in a styled container
- st.markdown('
AI Analysis Results
', unsafe_allow_html=True)
- with st.container():
- st.markdown(result.analysis)
-
- else:
- st.error(f"Analysis failed: {result.error}")
-
- elif not company_name and analyze_btn:
- st.warning("Please enter a company name to analyze.")
-
-
-def render_batch_analysis():
- """Render batch analysis page."""
- st.markdown('
Batch Company Analysis
', unsafe_allow_html=True)
- st.markdown("Analyze multiple companies simultaneously for comparative insights.")
-
- st.markdown("")
-
- # Input section
- col1, col2 = st.columns([2, 1])
-
- with col1:
- companies_input = st.text_area(
- "Company Names",
- placeholder="Enter company names (one per line or comma-separated):\nnvidia\namd\nintel\nqualcomm",
- height=150,
- label_visibility="collapsed",
- )
-
- with col2:
- st.markdown("**Configuration**")
- max_workers = st.slider("Concurrent Workers", 1, 5, 3, help="Number of parallel analysis threads")
- st.markdown("")
- analyze_btn = st.button(
- "🚀 Run Batch Analysis", type="primary", use_container_width=True
- )
-
- if analyze_btn and companies_input:
- # Parse company names
- companies = [
- c.strip()
- for c in companies_input.replace(",", "\n").split("\n")
- if c.strip()
- ]
-
- if not companies:
- st.warning("Please enter at least one company name")
- return
-
- st.info(f"🔄 Starting analysis of {len(companies)} companies...")
-
- # Progress tracking
- progress_bar = st.progress(0)
- status_text = st.empty()
-
- analyzer = get_analyzer()
-
- def update_progress(company: str, completed: int, total: int):
- progress = completed / total
- progress_bar.progress(progress)
- status_text.text(f"Analyzing {company}... ({completed}/{total})")
-
- result = analyzer.analyze_companies(
- companies=companies,
- max_workers=max_workers,
- progress_callback=update_progress,
- )
-
- progress_bar.progress(1.0)
- status_text.text("✓ Analysis complete!")
-
- st.markdown("")
-
- # Summary metrics
- st.markdown('
+ SPARC (Semiconductor Patent & Analytics Report Core)
+ is an AI-powered patent analysis platform that evaluates company performance by analyzing their
+ patent portfolios with cutting-edge language models.
+