diff --git a/.env.example b/.env.example index 1d776d0..acf4901 100644 --- a/.env.example +++ b/.env.example @@ -6,11 +6,16 @@ API_KEY=your_serpapi_key_here # OpenRouter API key for LLM analysis OPENROUTER_API_KEY=your_openrouter_key_here -# Database configuration (for docker-compose setup) +# Database configuration +# All messages are stored in the database for persistence and caching DATABASE_URL=postgresql://postgres:postgres@localhost:5432/sparc -# Toggle between database mode and API mode -# When USE_DATABASE=true: stores all messages in database instead of sending to OpenRouter -# When USE_DATABASE=false: sends messages to OpenRouter API as normal -# Default: false -USE_DATABASE=false +# Cache configuration +# When USE_CACHE=true: check database for cached responses before making API calls +# When USE_CACHE=false: always make fresh API calls (still stores results in database) +# Default: true +USE_CACHE=true + +# JWT Secret for authentication +# IMPORTANT: Change this to a secure random string in production +JWT_SECRET=your-secure-jwt-secret-change-in-production diff --git a/.gitea/workflows/build.yaml b/.gitea/workflows/build.yaml index d0f1a75..1517e6d 100644 --- a/.gitea/workflows/build.yaml +++ b/.gitea/workflows/build.yaml @@ -1,4 +1,4 @@ -name: Build and Push Docker Image +name: Build and Push Docker Images on: push: @@ -9,7 +9,7 @@ on: workflow_dispatch: jobs: - build-and-push: + build-api: runs-on: ubuntu-latest steps: - name: Install dependencies @@ -31,32 +31,24 @@ jobs: REPO_OWNER="${{ gitea.repository_owner }}" REPO_NAME="${{ gitea.repository }}" - # Extract repository name without owner REPO_NAME_ONLY=$(echo "$REPO_NAME" | cut -d'/' -f2) - - # Convert to lowercase for Docker registry compatibility REPO_OWNER_LOWER=$(echo "$REPO_OWNER" | tr '[:upper:]' '[:lower:]') REPO_NAME_LOWER=$(echo "$REPO_NAME_ONLY" | tr '[:upper:]' '[:lower:]') - # Base image path IMAGE_BASE="${REGISTRY}/${REPO_OWNER_LOWER}/${REPO_NAME_LOWER}" - # Determine tag based on ref case "${{ gitea.ref }}" in refs/tags/*) - # Tag push - use the tag name TAG_NAME="${{ gitea.ref_name }}" echo "IMAGE_TAG=${IMAGE_BASE}:${TAG_NAME}" >> $GITHUB_OUTPUT echo "PUSH_LATEST=true" >> $GITHUB_OUTPUT ;; refs/heads/main) - # Main branch - use commit SHA (shortened to 7 chars) and latest SHORT_SHA=$(echo "${{ gitea.sha }}" | cut -c1-7) echo "IMAGE_TAG=${IMAGE_BASE}:${SHORT_SHA}" >> $GITHUB_OUTPUT echo "PUSH_LATEST=true" >> $GITHUB_OUTPUT ;; *) - # Other branches - use branch name BRANCH_TAG=$(echo "${{ gitea.ref_name }}" | sed 's/\//-/g') echo "IMAGE_TAG=${IMAGE_BASE}:${BRANCH_TAG}" >> $GITHUB_OUTPUT echo "PUSH_LATEST=false" >> $GITHUB_OUTPUT @@ -70,13 +62,13 @@ jobs: run: | echo "${{ secrets.PERSONAL_TOKEN }}" | docker login gitea.leeworks.dev -u "${{ gitea.actor }}" --password-stdin - - name: Build and push with Docker + - name: Build and push API image shell: sh run: | - echo "Building image..." + echo "Building API image..." docker build -t ${{ steps.tags.outputs.IMAGE_TAG }} . - echo "Pushing image..." + echo "Pushing API image..." docker push ${{ steps.tags.outputs.IMAGE_TAG }} if [ "${{ steps.tags.outputs.PUSH_LATEST }}" = "true" ]; then @@ -85,5 +77,74 @@ jobs: docker push ${{ steps.tags.outputs.IMAGE_LATEST }} fi - echo "Build and push completed successfully!" - echo "Image available at ${{ steps.tags.outputs.IMAGE_TAG }}" + echo "API image available at ${{ steps.tags.outputs.IMAGE_TAG }}" + + build-frontend: + runs-on: ubuntu-latest + steps: + - name: Install dependencies + shell: sh + run: | + apk add --no-cache git docker-cli + + - name: Checkout code + shell: sh + run: | + git clone https://gitea.leeworks.dev/${{ gitea.repository }}.git . + git checkout ${{ gitea.sha }} + + - name: Determine image tags + id: tags + shell: sh + run: | + REGISTRY="gitea.leeworks.dev" + REPO_OWNER="${{ gitea.repository_owner }}" + REPO_NAME="${{ gitea.repository }}" + + REPO_NAME_ONLY=$(echo "$REPO_NAME" | cut -d'/' -f2) + REPO_OWNER_LOWER=$(echo "$REPO_OWNER" | tr '[:upper:]' '[:lower:]') + REPO_NAME_LOWER=$(echo "$REPO_NAME_ONLY" | tr '[:upper:]' '[:lower:]') + + IMAGE_BASE="${REGISTRY}/${REPO_OWNER_LOWER}/${REPO_NAME_LOWER}" + + case "${{ gitea.ref }}" in + refs/tags/*) + TAG_NAME="${{ gitea.ref_name }}" + echo "IMAGE_TAG=${IMAGE_BASE}:frontend-${TAG_NAME}" >> $GITHUB_OUTPUT + echo "PUSH_LATEST=true" >> $GITHUB_OUTPUT + ;; + refs/heads/main) + SHORT_SHA=$(echo "${{ gitea.sha }}" | cut -c1-7) + echo "IMAGE_TAG=${IMAGE_BASE}:frontend-${SHORT_SHA}" >> $GITHUB_OUTPUT + echo "PUSH_LATEST=true" >> $GITHUB_OUTPUT + ;; + *) + BRANCH_TAG=$(echo "${{ gitea.ref_name }}" | sed 's/\//-/g') + echo "IMAGE_TAG=${IMAGE_BASE}:frontend-${BRANCH_TAG}" >> $GITHUB_OUTPUT + echo "PUSH_LATEST=false" >> $GITHUB_OUTPUT + ;; + esac + + echo "IMAGE_LATEST=${IMAGE_BASE}:frontend-latest" >> $GITHUB_OUTPUT + + - name: Login to registry + shell: sh + run: | + echo "${{ secrets.PERSONAL_TOKEN }}" | docker login gitea.leeworks.dev -u "${{ gitea.actor }}" --password-stdin + + - name: Build and push frontend image + shell: sh + run: | + echo "Building frontend image..." + docker build -t ${{ steps.tags.outputs.IMAGE_TAG }} ./frontend + + echo "Pushing frontend image..." + docker push ${{ steps.tags.outputs.IMAGE_TAG }} + + if [ "${{ steps.tags.outputs.PUSH_LATEST }}" = "true" ]; then + echo "Tagging and pushing frontend-latest..." + docker tag ${{ steps.tags.outputs.IMAGE_TAG }} ${{ steps.tags.outputs.IMAGE_LATEST }} + docker push ${{ steps.tags.outputs.IMAGE_LATEST }} + fi + + echo "Frontend image available at ${{ steps.tags.outputs.IMAGE_TAG }}" diff --git a/README.md b/README.md index d4ae497..c9b0b10 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ SPARC automatically collects, parses, and analyzes patents from companies to pro - **Portfolio Analysis**: Evaluates multiple patents holistically for comprehensive insights - **Batch Processing**: Analyze multiple companies concurrently with progress tracking - **REST API**: FastAPI web service with async job support -- **Dashboard**: Interactive Streamlit visualization dashboard +- **Dashboard**: React TypeScript web dashboard with authentication - **Robust Testing**: 40 tests covering all major functionality ## Architecture @@ -27,7 +27,9 @@ SPARC/ ├── serp_api.py # Patent retrieval and PDF parsing ├── llm.py # Claude AI integration via OpenRouter ├── analyzer.py # High-level orchestration -├── api.py # FastAPI web service +├── api.py # FastAPI web service with auth endpoints +├── auth.py # JWT authentication module +├── database.py # PostgreSQL storage with caching ├── types.py # Data models └── config.py # Environment configuration ``` @@ -48,7 +50,7 @@ docker-compose up -d # Access the services # - API: http://localhost:8000 -# - Dashboard: http://localhost:8501 +# - Dashboard: http://localhost:8080 # - API Docs: http://localhost:8000/docs ``` @@ -186,21 +188,22 @@ curl -X POST http://localhost:8000/analyze/batch/async \ -d '{"companies": ["nvidia", "amd", "intel", "qualcomm"]}' ``` -### Visualization Dashboard +### Web Dashboard -Launch the interactive Streamlit dashboard: +The React dashboard is included in Docker Compose: ```bash -streamlit run dashboard.py +docker-compose up -d ``` Dashboard features: +- **Authentication**: User registration, login, and JWT-based sessions - **Company Analysis**: Analyze individual companies with real-time results -- **Batch Analysis**: Process multiple companies with progress tracking and charts -- **Analytics**: View historical analysis data and trends (requires database mode) -- **System Status**: Monitor database and analyzer health +- **Batch Analysis**: Process multiple companies with progress tracking +- **Analytics**: View historical analysis data and trends +- **Admin Panel**: User management for administrators -The dashboard runs at `http://localhost:8501` by default. +The dashboard runs at `http://localhost:8080` when using Docker Compose. ## Running Tests @@ -280,4 +283,4 @@ For open source projects, say how it is licensed. Core functionality complete. Ready for production use with API keys configured. -All major features implemented: REST API, Streamlit dashboard, Docker containerization, database storage, and multi-company batch processing. +All major features implemented: REST API, React dashboard with authentication, Docker containerization, database storage with caching, and multi-company batch processing. diff --git a/SPARC/api.py b/SPARC/api.py index 2a75fee..cf76493 100644 --- a/SPARC/api.py +++ b/SPARC/api.py @@ -5,12 +5,23 @@ Provides REST API endpoints for analyzing company patent portfolios. from contextlib import asynccontextmanager from datetime import datetime -from typing import Annotated +from typing import Annotated, List -from fastapi import BackgroundTasks, FastAPI, HTTPException, Query -from pydantic import BaseModel, Field +from fastapi import BackgroundTasks, Depends, FastAPI, HTTPException, Query +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel, EmailStr, Field +from SPARC import config from SPARC.analyzer import CompanyAnalyzer +from SPARC.auth import ( + TokenResponse, + UserResponse, + create_tokens, + decode_token, + get_current_admin, + get_current_user, + get_db_client, +) from SPARC.types import BatchAnalysisResult, CompanyAnalysisResult @@ -67,6 +78,42 @@ class HealthResponse(BaseModel): timestamp: datetime +# Auth request/response models +class RegisterRequest(BaseModel): + """User registration request.""" + + email: EmailStr + password: str = Field(..., min_length=8, description="Password (min 8 characters)") + + +class LoginRequest(BaseModel): + """User login request.""" + + email: EmailStr + password: str + + +class RefreshRequest(BaseModel): + """Token refresh request.""" + + refresh_token: str + + +class UpdateRoleRequest(BaseModel): + """Update user role request.""" + + role: str = Field(..., pattern="^(admin|user)$") + + +class AnalyticsResponse(BaseModel): + """Analytics response model.""" + + total_messages: int + by_company: List[dict] + by_type: List[dict] + period_days: int + + # In-memory job storage (for demo; production would use Redis/DB) _jobs: dict[str, JobStatus] = {} _job_counter = 0 @@ -116,6 +163,196 @@ app = FastAPI( lifespan=lifespan, ) +# Add CORS middleware for React frontend +app.add_middleware( + CORSMiddleware, + allow_origins=["http://localhost:3000", "http://localhost:5173"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +# ============== Auth Endpoints ============== + + +@app.post("/auth/register", response_model=UserResponse, tags=["Auth"]) +async def register(request: RegisterRequest): + """Register a new user. + + The first registered user automatically becomes an admin. + """ + db = get_db_client() + + # First user becomes admin + user_count = db.get_user_count() + role = "admin" if user_count == 0 else "user" + + user = db.create_user( + email=request.email, + password=request.password, + role=role, + ) + + if not user: + raise HTTPException( + status_code=400, + detail="Email already registered", + ) + + return UserResponse( + id=user["id"], + email=user["email"], + role=user["role"], + created_at=user["created_at"], + ) + + +@app.post("/auth/login", response_model=TokenResponse, tags=["Auth"]) +async def login(request: LoginRequest): + """Authenticate user and return JWT tokens.""" + db = get_db_client() + + user = db.authenticate_user(request.email, request.password) + + if not user: + raise HTTPException( + status_code=401, + detail="Invalid email or password", + ) + + return create_tokens(user["id"], user["email"], user["role"]) + + +@app.post("/auth/refresh", response_model=TokenResponse, tags=["Auth"]) +async def refresh_token(request: RefreshRequest): + """Refresh access token using refresh token.""" + payload = decode_token(request.refresh_token) + + if not payload or payload.type != "refresh": + raise HTTPException( + status_code=401, + detail="Invalid refresh token", + ) + + db = get_db_client() + user = db.get_user_by_id(payload.user_id) + + if not user: + raise HTTPException( + status_code=401, + detail="User not found", + ) + + return create_tokens(user["id"], user["email"], user["role"]) + + +@app.get("/auth/me", response_model=UserResponse, tags=["Auth"]) +async def get_me(current_user: UserResponse = Depends(get_current_user)): + """Get current authenticated user.""" + return current_user + + +# ============== Admin Endpoints ============== + + +@app.get("/admin/users", response_model=List[UserResponse], tags=["Admin"]) +async def list_users( + limit: int = Query(default=100, ge=1, le=1000), + offset: int = Query(default=0, ge=0), + _: UserResponse = Depends(get_current_admin), +): + """List all users (admin only).""" + db = get_db_client() + users = db.get_all_users(limit=limit, offset=offset) + + return [ + UserResponse( + id=u["id"], + email=u["email"], + role=u["role"], + created_at=u["created_at"], + ) + for u in users + ] + + +@app.patch("/admin/users/{user_id}/role", response_model=UserResponse, tags=["Admin"]) +async def update_user_role( + user_id: int, + request: UpdateRoleRequest, + current_admin: UserResponse = Depends(get_current_admin), +): + """Update a user's role (admin only).""" + if user_id == current_admin.id: + raise HTTPException( + status_code=400, + detail="Cannot change your own role", + ) + + db = get_db_client() + user = db.update_user_role(user_id, request.role) + + if not user: + raise HTTPException( + status_code=404, + detail="User not found", + ) + + return UserResponse( + id=user["id"], + email=user["email"], + role=user["role"], + created_at=user["created_at"], + ) + + +@app.delete("/admin/users/{user_id}", tags=["Admin"]) +async def delete_user( + user_id: int, + current_admin: UserResponse = Depends(get_current_admin), +): + """Delete a user (admin only).""" + if user_id == current_admin.id: + raise HTTPException( + status_code=400, + detail="Cannot delete yourself", + ) + + db = get_db_client() + deleted = db.delete_user(user_id) + + if not deleted: + raise HTTPException( + status_code=404, + detail="User not found", + ) + + return {"message": "User deleted"} + + +# ============== Analytics Endpoint ============== + + +@app.get("/analytics", response_model=AnalyticsResponse, tags=["Analytics"]) +async def get_analytics( + days: int = Query(default=30, ge=1, le=365), + _: UserResponse = Depends(get_current_user), +): + """Get analytics data (authenticated users only).""" + db = get_db_client() + analytics = db.get_analytics(days=days) + + return AnalyticsResponse( + total_messages=analytics["total_messages"], + by_company=analytics["by_company"], + by_type=analytics["by_type"], + period_days=analytics["period_days"], + ) + + +# ============== System Endpoints ============== + @app.get("/health", response_model=HealthResponse, tags=["System"]) async def health_check(): @@ -132,7 +369,10 @@ async def health_check(): response_model=CompanyAnalysisResponse, tags=["Analysis"], ) -async def analyze_company(company_name: str): +async def analyze_company( + company_name: str, + _: UserResponse = Depends(get_current_user), +): """Analyze a single company's patent portfolio. This endpoint retrieves recent patents for the specified company, @@ -156,7 +396,10 @@ async def analyze_company(company_name: str): response_model=BatchAnalysisResponse, tags=["Analysis"], ) -async def analyze_companies_batch(request: BatchAnalysisRequest): +async def analyze_companies_batch( + request: BatchAnalysisRequest, + _: UserResponse = Depends(get_current_user), +): """Analyze multiple companies' patent portfolios. Processes companies concurrently for improved performance. @@ -209,7 +452,9 @@ def _run_batch_job(job_id: str, companies: list[str], max_workers: int): @app.post("/analyze/batch/async", response_model=JobStatus, tags=["Analysis"]) async def analyze_companies_async( - request: BatchAnalysisRequest, background_tasks: BackgroundTasks + request: BatchAnalysisRequest, + background_tasks: BackgroundTasks, + _: UserResponse = Depends(get_current_user), ): """Start an asynchronous batch analysis job. @@ -243,7 +488,10 @@ async def analyze_companies_async( @app.get("/jobs/{job_id}", response_model=JobStatus, tags=["Jobs"]) -async def get_job_status(job_id: str): +async def get_job_status( + job_id: str, + _: UserResponse = Depends(get_current_user), +): """Get the status of a background analysis job. Args: @@ -265,6 +513,7 @@ async def list_jobs( Query(description="Filter by status: pending, running, completed, failed"), ] = None, limit: Annotated[int, Query(ge=1, le=100)] = 10, + _: UserResponse = Depends(get_current_user), ): """List all analysis jobs. diff --git a/SPARC/auth.py b/SPARC/auth.py new file mode 100644 index 0000000..4a5a28f --- /dev/null +++ b/SPARC/auth.py @@ -0,0 +1,210 @@ +"""JWT authentication utilities for SPARC API.""" + +import os +from datetime import datetime, timedelta, timezone +from typing import Optional + +import jwt +from fastapi import Depends, HTTPException, status +from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer +from pydantic import BaseModel + +from SPARC import config +from SPARC.database import DatabaseClient + +# JWT Configuration +JWT_SECRET = os.getenv("JWT_SECRET", "sparc-secret-key-change-in-production") +JWT_ALGORITHM = "HS256" +ACCESS_TOKEN_EXPIRE_MINUTES = 30 +REFRESH_TOKEN_EXPIRE_DAYS = 7 + +security = HTTPBearer() + + +class TokenPayload(BaseModel): + """JWT token payload.""" + + sub: str # user_id as string (JWT RFC 7519 requires sub to be a string) + email: str + role: str + exp: datetime + type: str # "access" or "refresh" + + @property + def user_id(self) -> int: + """Get user_id as integer.""" + return int(self.sub) + + +class TokenResponse(BaseModel): + """Token response model.""" + + access_token: str + refresh_token: str + token_type: str = "bearer" + + +class UserResponse(BaseModel): + """User response model.""" + + id: int + email: str + role: str + created_at: datetime + + +def create_access_token(user_id: int, email: str, role: str) -> str: + """Create a JWT access token. + + Args: + user_id: User ID + email: User email + role: User role + + Returns: + Encoded JWT token + """ + expire = datetime.now(timezone.utc) + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES) + payload = { + "sub": str(user_id), + "email": email, + "role": role, + "exp": expire, + "type": "access", + } + return jwt.encode(payload, JWT_SECRET, algorithm=JWT_ALGORITHM) + + +def create_refresh_token(user_id: int, email: str, role: str) -> str: + """Create a JWT refresh token. + + Args: + user_id: User ID + email: User email + role: User role + + Returns: + Encoded JWT token + """ + expire = datetime.now(timezone.utc) + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS) + payload = { + "sub": str(user_id), + "email": email, + "role": role, + "exp": expire, + "type": "refresh", + } + return jwt.encode(payload, JWT_SECRET, algorithm=JWT_ALGORITHM) + + +def create_tokens(user_id: int, email: str, role: str) -> TokenResponse: + """Create both access and refresh tokens. + + Args: + user_id: User ID + email: User email + role: User role + + Returns: + TokenResponse with both tokens + """ + return TokenResponse( + access_token=create_access_token(user_id, email, role), + refresh_token=create_refresh_token(user_id, email, role), + ) + + +def decode_token(token: str) -> Optional[TokenPayload]: + """Decode and validate a JWT token. + + Args: + token: JWT token string + + Returns: + TokenPayload if valid, None otherwise + """ + try: + payload = jwt.decode(token, JWT_SECRET, algorithms=[JWT_ALGORITHM]) + return TokenPayload(**payload) + except jwt.ExpiredSignatureError: + return None + except jwt.InvalidTokenError: + return None + + +def get_db_client() -> DatabaseClient: + """Get database client for auth operations.""" + client = DatabaseClient(config.database_url) + client.connect() + return client + + +async def get_current_user( + credentials: HTTPAuthorizationCredentials = Depends(security), +) -> UserResponse: + """Get the current authenticated user from JWT token. + + Args: + credentials: Bearer token from request + + Returns: + UserResponse with user details + + Raises: + HTTPException: If token is invalid or expired + """ + token = credentials.credentials + payload = decode_token(token) + + if not payload: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid or expired token", + headers={"WWW-Authenticate": "Bearer"}, + ) + + if payload.type != "access": + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid token type", + headers={"WWW-Authenticate": "Bearer"}, + ) + + db = get_db_client() + user = db.get_user_by_id(payload.user_id) + + if not user: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="User not found", + headers={"WWW-Authenticate": "Bearer"}, + ) + + return UserResponse( + id=user["id"], + email=user["email"], + role=user["role"], + created_at=user["created_at"], + ) + + +async def get_current_admin( + current_user: UserResponse = Depends(get_current_user), +) -> UserResponse: + """Require admin role for the current user. + + Args: + current_user: Current authenticated user + + Returns: + UserResponse if admin + + Raises: + HTTPException: If user is not admin + """ + if current_user.role != "admin": + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Admin access required", + ) + return current_user diff --git a/SPARC/config.py b/SPARC/config.py index 08dbc7a..11a12a2 100644 --- a/SPARC/config.py +++ b/SPARC/config.py @@ -13,10 +13,15 @@ api_key = os.getenv("API_KEY") # OpenRouter API key for LLM analysis openrouter_api_key = os.getenv("OPENROUTER_API_KEY") -# Database configuration +# Database configuration - all messages are stored in the database +# The database serves as both a persistent store and a cache layer database_url = os.getenv("DATABASE_URL", "postgresql://postgres:postgres@localhost:5432/sparc") -# Toggle between database mode and API mode -# When True: stores all messages in database instead of sending to OpenRouter -# When False: sends messages to OpenRouter API as normal +# Cache configuration +# When enabled (default), the system checks the database for cached responses +# before making API calls, saving tokens and reducing latency +use_cache = os.getenv("USE_CACHE", "true").lower() in ("true", "1", "yes") + +# Legacy compatibility - USE_DATABASE is deprecated, database is always used +# This variable is kept for backwards compatibility but has no effect use_database = os.getenv("USE_DATABASE", "false").lower() in ("true", "1", "yes") diff --git a/SPARC/database.py b/SPARC/database.py index c0fae7d..609f152 100644 --- a/SPARC/database.py +++ b/SPARC/database.py @@ -1,10 +1,12 @@ -"""Database client for storing and retrieving LLM messages.""" +"""Database client for storing and retrieving LLM messages and user authentication.""" import psycopg2 from psycopg2.extras import RealDictCursor from typing import Dict, List, Optional from datetime import datetime import json +import hashlib +import bcrypt class DatabaseClient: @@ -43,10 +45,12 @@ class DatabaseClient: analysis_type VARCHAR(50), model VARCHAR(100), prompt TEXT NOT NULL, + prompt_hash VARCHAR(64), response TEXT, metadata JSONB, token_usage JSONB, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + is_cached BOOLEAN DEFAULT FALSE ) """) @@ -62,8 +66,109 @@ class DatabaseClient: ON llm_messages(company_name) """) + # Add prompt_hash and is_cached columns if they don't exist (for existing tables) + # This must run BEFORE creating the index on prompt_hash + cursor.execute(""" + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'llm_messages' AND column_name = 'prompt_hash' + ) THEN + ALTER TABLE llm_messages ADD COLUMN prompt_hash VARCHAR(64); + END IF; + IF NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'llm_messages' AND column_name = 'is_cached' + ) THEN + ALTER TABLE llm_messages ADD COLUMN is_cached BOOLEAN DEFAULT FALSE; + END IF; + END $$; + """) + + # Create index on prompt_hash for cache lookups + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_messages_prompt_hash + ON llm_messages(prompt_hash) + """) + + # Create users table for authentication + cursor.execute(""" + CREATE TABLE IF NOT EXISTS users ( + id SERIAL PRIMARY KEY, + email VARCHAR(255) UNIQUE NOT NULL, + password_hash VARCHAR(255) NOT NULL, + role VARCHAR(20) DEFAULT 'user' CHECK (role IN ('admin', 'user')), + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + """) + + # Create index on email for fast lookups + cursor.execute(""" + CREATE INDEX IF NOT EXISTS idx_users_email + ON users(email) + """) + self.conn.commit() + @staticmethod + def hash_prompt(prompt: str) -> str: + """Generate a hash of the prompt for cache lookups. + + Args: + prompt: The prompt text to hash + + Returns: + SHA-256 hash of the prompt + """ + return hashlib.sha256(prompt.encode()).hexdigest() + + def get_cached_response( + self, + prompt: str, + company_name: Optional[str] = None, + analysis_type: Optional[str] = None, + ) -> Optional[Dict]: + """Look up a cached response for a given prompt. + + Args: + prompt: The prompt to look up + company_name: Optional company name filter + analysis_type: Optional analysis type filter + + Returns: + Cached message dict if found, None otherwise + """ + self.connect() + + prompt_hash = self.hash_prompt(prompt) + + query = """ + SELECT * FROM llm_messages + WHERE prompt_hash = %s + AND response IS NOT NULL + AND response NOT LIKE '[DATABASE MODE]%%' + AND response NOT LIKE '[TEST MODE]%%' + AND response NOT LIKE '[NO API]%%' + """ + params = [prompt_hash] + + if company_name: + query += " AND company_name = %s" + params.append(company_name) + + if analysis_type: + query += " AND analysis_type = %s" + params.append(analysis_type) + + query += " ORDER BY timestamp DESC LIMIT 1" + + with self.conn.cursor(cursor_factory=RealDictCursor) as cursor: + cursor.execute(query, params) + result = cursor.fetchone() + return dict(result) if result else None + def store_message( self, prompt: str, @@ -73,6 +178,7 @@ class DatabaseClient: model: Optional[str] = None, metadata: Optional[Dict] = None, token_usage: Optional[Dict] = None, + is_cached: bool = False, ) -> int: """Store an LLM message exchange in the database. @@ -84,28 +190,33 @@ class DatabaseClient: model: Model identifier used metadata: Additional metadata as dict token_usage: Token usage information + is_cached: Whether this response was served from cache Returns: The ID of the inserted record """ self.connect() + prompt_hash = self.hash_prompt(prompt) + with self.conn.cursor() as cursor: cursor.execute( """ INSERT INTO llm_messages - (prompt, response, company_name, analysis_type, model, metadata, token_usage) - VALUES (%s, %s, %s, %s, %s, %s, %s) + (prompt, prompt_hash, response, company_name, analysis_type, model, metadata, token_usage, is_cached) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) RETURNING id """, ( prompt, + prompt_hash, response, company_name, analysis_type, model, json.dumps(metadata) if metadata else None, json.dumps(token_usage) if token_usage else None, + is_cached, ), ) @@ -208,3 +319,212 @@ class DatabaseClient: "by_type": [dict(row) for row in by_type], "period_days": days, } + + # User Authentication Methods + + @staticmethod + def hash_password(password: str) -> str: + """Hash a password using bcrypt. + + Args: + password: Plain text password + + Returns: + Hashed password string + """ + return bcrypt.hashpw(password.encode(), bcrypt.gensalt()).decode() + + @staticmethod + def verify_password(password: str, password_hash: str) -> bool: + """Verify a password against its hash. + + Args: + password: Plain text password + password_hash: Stored hash + + Returns: + True if password matches + """ + return bcrypt.checkpw(password.encode(), password_hash.encode()) + + def create_user( + self, + email: str, + password: str, + role: str = "user", + ) -> Optional[Dict]: + """Create a new user. + + Args: + email: User email + password: Plain text password + role: User role ('admin' or 'user') + + Returns: + Created user dict or None if email exists + """ + self.connect() + + password_hash = self.hash_password(password) + + try: + with self.conn.cursor(cursor_factory=RealDictCursor) as cursor: + cursor.execute( + """ + INSERT INTO users (email, password_hash, role) + VALUES (%s, %s, %s) + RETURNING id, email, role, created_at + """, + (email, password_hash, role), + ) + user = cursor.fetchone() + self.conn.commit() + return dict(user) if user else None + except psycopg2.errors.UniqueViolation: + self.conn.rollback() + return None + + def authenticate_user(self, email: str, password: str) -> Optional[Dict]: + """Authenticate a user by email and password. + + Args: + email: User email + password: Plain text password + + Returns: + User dict if authenticated, None otherwise + """ + self.connect() + + with self.conn.cursor(cursor_factory=RealDictCursor) as cursor: + cursor.execute( + "SELECT * FROM users WHERE email = %s", + (email,), + ) + user = cursor.fetchone() + + if user and self.verify_password(password, user["password_hash"]): + return { + "id": user["id"], + "email": user["email"], + "role": user["role"], + "created_at": user["created_at"], + } + return None + + def get_user_by_id(self, user_id: int) -> Optional[Dict]: + """Get a user by ID. + + Args: + user_id: User ID + + Returns: + User dict or None + """ + self.connect() + + with self.conn.cursor(cursor_factory=RealDictCursor) as cursor: + cursor.execute( + "SELECT id, email, role, created_at FROM users WHERE id = %s", + (user_id,), + ) + user = cursor.fetchone() + return dict(user) if user else None + + def get_user_by_email(self, email: str) -> Optional[Dict]: + """Get a user by email. + + Args: + email: User email + + Returns: + User dict or None + """ + self.connect() + + with self.conn.cursor(cursor_factory=RealDictCursor) as cursor: + cursor.execute( + "SELECT id, email, role, created_at FROM users WHERE email = %s", + (email,), + ) + user = cursor.fetchone() + return dict(user) if user else None + + def get_all_users(self, limit: int = 100, offset: int = 0) -> List[Dict]: + """Get all users (admin only). + + Args: + limit: Maximum number of users + offset: Offset for pagination + + Returns: + List of user dicts + """ + self.connect() + + with self.conn.cursor(cursor_factory=RealDictCursor) as cursor: + cursor.execute( + """ + SELECT id, email, role, created_at + FROM users + ORDER BY created_at DESC + LIMIT %s OFFSET %s + """, + (limit, offset), + ) + return [dict(row) for row in cursor.fetchall()] + + def update_user_role(self, user_id: int, role: str) -> Optional[Dict]: + """Update a user's role (admin only). + + Args: + user_id: User ID + role: New role ('admin' or 'user') + + Returns: + Updated user dict or None + """ + self.connect() + + with self.conn.cursor(cursor_factory=RealDictCursor) as cursor: + cursor.execute( + """ + UPDATE users + SET role = %s, updated_at = CURRENT_TIMESTAMP + WHERE id = %s + RETURNING id, email, role, created_at + """, + (role, user_id), + ) + user = cursor.fetchone() + self.conn.commit() + return dict(user) if user else None + + def delete_user(self, user_id: int) -> bool: + """Delete a user (admin only). + + Args: + user_id: User ID + + Returns: + True if deleted + """ + self.connect() + + with self.conn.cursor() as cursor: + cursor.execute("DELETE FROM users WHERE id = %s", (user_id,)) + deleted = cursor.rowcount > 0 + self.conn.commit() + return deleted + + def get_user_count(self) -> int: + """Get total user count. + + Returns: + Number of users + """ + self.connect() + + with self.conn.cursor() as cursor: + cursor.execute("SELECT COUNT(*) FROM users") + return cursor.fetchone()[0] diff --git a/SPARC/llm.py b/SPARC/llm.py index ef56dae..2e60c9b 100644 --- a/SPARC/llm.py +++ b/SPARC/llm.py @@ -9,31 +9,29 @@ from typing import Dict class LLMAnalyzer: """Handles LLM-based analysis of patent content.""" - def __init__(self, api_key: str | None = None, test_mode: bool = False, use_database: bool | None = None): + def __init__(self, api_key: str | None = None, test_mode: bool = False, use_cache: bool | None = None): """Initialize the LLM analyzer. Args: api_key: OpenRouter API key. If None, will attempt to load from config. test_mode: If True, print prompts instead of making API calls - use_database: If True, store messages in database instead of calling API. - If None, will use config.use_database + use_cache: If True, check database cache before making API calls. + If None, uses config.use_cache (default: True) """ self.test_mode = test_mode - self.use_database = use_database if use_database is not None else config.use_database - self.db_client = None + self.use_cache = use_cache if use_cache is not None else config.use_cache + self.model = "anthropic/claude-3.5-sonnet" - # Initialize database client if in database mode - if self.use_database: - self.db_client = DatabaseClient(config.database_url) - self.db_client.initialize_schema() + # Always initialize database client for storage and caching + self.db_client = DatabaseClient(config.database_url) + self.db_client.initialize_schema() - # Initialize OpenRouter client if not in database mode - if (api_key or config.openrouter_api_key) and not test_mode and not self.use_database: + # Initialize OpenRouter client if API key is available + if (api_key or config.openrouter_api_key) and not test_mode: self.client = OpenAI( api_key=api_key or config.openrouter_api_key, base_url="https://openrouter.ai/api/v1" ) - self.model = "anthropic/claude-3.5-sonnet" else: self.client = None @@ -68,22 +66,31 @@ Provide a concise analysis (2-3 paragraphs) focusing on what this patent reveals print("=" * 80) return "[TEST MODE - No API call made]" - # Database mode: store the prompt and return a placeholder response - if self.use_database: - response_text = "[DATABASE MODE] Message stored for testing/analytics. Enable API mode to get actual analysis." - - self.db_client.store_message( + # Check cache first + if self.use_cache: + cached = self.db_client.get_cached_response( prompt=prompt, - response=response_text, company_name=company_name, - analysis_type="single_patent", - model=self.model if hasattr(self, 'model') else None, - metadata={"patent_content_length": len(patent_content)} + analysis_type="single_patent" ) + if cached: + # Log the cache hit + self.db_client.store_message( + prompt=prompt, + response=cached["response"], + company_name=company_name, + analysis_type="single_patent", + model=self.model, + metadata={ + "patent_content_length": len(patent_content), + "cache_hit": True, + "original_message_id": cached["id"] + }, + is_cached=True + ) + return cached["response"] - return response_text - - # API mode: send to OpenRouter + # Call API if no cache hit and client is available if self.client: response = self.client.chat.completions.create( model=self.model, @@ -92,23 +99,34 @@ Provide a concise analysis (2-3 paragraphs) focusing on what this patent reveals ) response_text = response.choices[0].message.content - # Store in database if db_client is available (for logging even in API mode) - if self.db_client: - self.db_client.store_message( - prompt=prompt, - response=response_text, - company_name=company_name, - analysis_type="single_patent", - model=self.model, - metadata={"patent_content_length": len(patent_content)}, - token_usage={ - "prompt_tokens": response.usage.prompt_tokens, - "completion_tokens": response.usage.completion_tokens, - "total_tokens": response.usage.total_tokens - } if hasattr(response, 'usage') else None - ) + # Store in database for future cache lookups + self.db_client.store_message( + prompt=prompt, + response=response_text, + company_name=company_name, + analysis_type="single_patent", + model=self.model, + metadata={"patent_content_length": len(patent_content)}, + token_usage={ + "prompt_tokens": response.usage.prompt_tokens, + "completion_tokens": response.usage.completion_tokens, + "total_tokens": response.usage.total_tokens + } if hasattr(response, 'usage') else None + ) return response_text + + # No API client available - store prompt for later processing + placeholder = "[NO API] Prompt stored in database. Configure OPENROUTER_API_KEY to enable analysis." + self.db_client.store_message( + prompt=prompt, + response=placeholder, + company_name=company_name, + analysis_type="single_patent", + model=self.model, + metadata={"patent_content_length": len(patent_content), "pending": True} + ) + return placeholder def analyze_patent_portfolio( self, patents_data: list[Dict[str, str]], company_name: str @@ -150,46 +168,54 @@ Provide a comprehensive analysis (4-5 paragraphs) with a final verdict on the co print(prompt) return "[TEST MODE]" - # Database mode: store the prompt and return a placeholder response - if self.use_database: - response_text = "[DATABASE MODE] Message stored for testing/analytics. Enable API mode to get actual analysis." + metadata = { + "patent_count": len(patents_data), + "patent_ids": [p['patent_id'] for p in patents_data] + } - self.db_client.store_message( + # Check cache first + if self.use_cache: + cached = self.db_client.get_cached_response( prompt=prompt, - response=response_text, company_name=company_name, - analysis_type="portfolio", - model=self.model if hasattr(self, 'model') else None, - metadata={ - "patent_count": len(patents_data), - "patent_ids": [p['patent_id'] for p in patents_data] - } + analysis_type="portfolio" ) + if cached: + # Log the cache hit + self.db_client.store_message( + prompt=prompt, + response=cached["response"], + company_name=company_name, + analysis_type="portfolio", + model=self.model, + metadata={ + **metadata, + "cache_hit": True, + "original_message_id": cached["id"] + }, + is_cached=True + ) + return cached["response"] - return response_text + # Call API if no cache hit and client is available + if self.client: + try: + response = self.client.chat.completions.create( + model=self.model, + max_tokens=2048, + messages=[{"role": "user", "content": prompt}], + ) - # API mode: send to OpenRouter - try: - response = self.client.chat.completions.create( - model=self.model, - max_tokens=2048, - messages=[{"role": "user", "content": prompt}], - ) + response_text = response.choices[0].message.content - response_text = response.choices[0].message.content - - # Store in database if db_client is available (for logging even in API mode) - if self.db_client: + # Store in database for future cache lookups self.db_client.store_message( prompt=prompt, response=response_text, company_name=company_name, analysis_type="portfolio", model=self.model, - metadata={ - "patent_count": len(patents_data), - "patent_ids": [p['patent_id'] for p in patents_data] - }, + metadata=metadata, token_usage={ "prompt_tokens": response.usage.prompt_tokens, "completion_tokens": response.usage.completion_tokens, @@ -197,7 +223,19 @@ Provide a comprehensive analysis (4-5 paragraphs) with a final verdict on the co } if hasattr(response, 'usage') else None ) - return response_text - except AttributeError: - return prompt + return response_text + except AttributeError: + return prompt + + # No API client available - store prompt for later processing + placeholder = "[NO API] Prompt stored in database. Configure OPENROUTER_API_KEY to enable analysis." + self.db_client.store_message( + prompt=prompt, + response=placeholder, + company_name=company_name, + analysis_type="portfolio", + model=self.model, + metadata={**metadata, "pending": True} + ) + return placeholder diff --git a/dashboard.py b/dashboard.py deleted file mode 100644 index af9f001..0000000 --- a/dashboard.py +++ /dev/null @@ -1,778 +0,0 @@ -"""SPARC Visualization Dashboard. - -A Streamlit-based dashboard for visualizing patent analysis results. -Run with: streamlit run dashboard.py -""" - -import streamlit as st -import plotly.express as px -import plotly.graph_objects as go -import pandas as pd -from datetime import datetime, timedelta - -from SPARC.analyzer import CompanyAnalyzer -from SPARC.database import DatabaseClient -from SPARC import config - - -st.set_page_config( - page_title="SPARC Dashboard", - page_icon="⚡", - layout="wide", - initial_sidebar_state="collapsed", -) - -# Modern CSS styling -st.markdown(""" - -""", unsafe_allow_html=True) - - -@st.cache_resource -def get_analyzer(): - """Get or create the CompanyAnalyzer instance.""" - return CompanyAnalyzer() - - -@st.cache_resource -def get_db_client(): - """Get database client if available.""" - if config.use_database: - try: - client = DatabaseClient() - client.connect() - return client - except Exception: - return None - return None - - -def render_header(): - """Render the modern dashboard header.""" - st.markdown(""" - - """, unsafe_allow_html=True) - - -def render_navigation(): - """Render horizontal tab navigation at the top.""" - tabs = st.tabs(["🔍 Company Analysis", "📦 Batch Analysis", "📊 Analytics", "ℹ️ About"]) - return tabs - - -def render_company_analysis(): - """Render single company analysis page.""" - st.markdown('

Single Company Analysis

', unsafe_allow_html=True) - st.markdown("Analyze a company's patent portfolio using AI-powered insights.") - - st.markdown("") - - # Search card - with st.container(): - col1, col2 = st.columns([3, 1]) - - with col1: - company_name = st.text_input( - "Company Name", - placeholder="Enter company name (e.g., nvidia, intel, amd)", - help="Enter the company name to analyze their patent portfolio", - label_visibility="collapsed", - ) - - with col2: - analyze_btn = st.button("🔍 Analyze", type="primary", use_container_width=True) - - if analyze_btn and company_name: - with st.spinner(f"Analyzing {company_name}..."): - analyzer = get_analyzer() - result = analyzer._analyze_company_safe(company_name) - - if result.success: - st.success(f"✓ Analysis complete for {company_name.upper()}") - - st.markdown("") - - # Metrics row with custom styling - col1, col2, col3 = st.columns(3) - with col1: - st.metric("Patents Found", result.patent_count) - with col2: - st.metric("Analysis Status", "Complete") - with col3: - st.metric("Timestamp", result.timestamp.strftime("%H:%M:%S")) - - st.markdown("") - - # Analysis content in a styled container - st.markdown('

AI Analysis Results

', unsafe_allow_html=True) - with st.container(): - st.markdown(result.analysis) - - else: - st.error(f"Analysis failed: {result.error}") - - elif not company_name and analyze_btn: - st.warning("Please enter a company name to analyze.") - - -def render_batch_analysis(): - """Render batch analysis page.""" - st.markdown('

Batch Company Analysis

', unsafe_allow_html=True) - st.markdown("Analyze multiple companies simultaneously for comparative insights.") - - st.markdown("") - - # Input section - col1, col2 = st.columns([2, 1]) - - with col1: - companies_input = st.text_area( - "Company Names", - placeholder="Enter company names (one per line or comma-separated):\nnvidia\namd\nintel\nqualcomm", - height=150, - label_visibility="collapsed", - ) - - with col2: - st.markdown("**Configuration**") - max_workers = st.slider("Concurrent Workers", 1, 5, 3, help="Number of parallel analysis threads") - st.markdown("") - analyze_btn = st.button( - "🚀 Run Batch Analysis", type="primary", use_container_width=True - ) - - if analyze_btn and companies_input: - # Parse company names - companies = [ - c.strip() - for c in companies_input.replace(",", "\n").split("\n") - if c.strip() - ] - - if not companies: - st.warning("Please enter at least one company name") - return - - st.info(f"🔄 Starting analysis of {len(companies)} companies...") - - # Progress tracking - progress_bar = st.progress(0) - status_text = st.empty() - - analyzer = get_analyzer() - - def update_progress(company: str, completed: int, total: int): - progress = completed / total - progress_bar.progress(progress) - status_text.text(f"Analyzing {company}... ({completed}/{total})") - - result = analyzer.analyze_companies( - companies=companies, - max_workers=max_workers, - progress_callback=update_progress, - ) - - progress_bar.progress(1.0) - status_text.text("✓ Analysis complete!") - - st.markdown("") - - # Summary metrics - st.markdown('

Results Summary

', unsafe_allow_html=True) - col1, col2, col3, col4 = st.columns(4) - with col1: - st.metric("Total Companies", result.total_companies) - with col2: - st.metric("Successful", result.successful) - with col3: - st.metric("Failed", result.failed) - with col4: - success_rate = ( - (result.successful / result.total_companies * 100) - if result.total_companies > 0 - else 0 - ) - st.metric("Success Rate", f"{success_rate:.1f}%") - - # Results chart - if result.results: - df = pd.DataFrame( - [ - { - "Company": r.company_name.upper(), - "Patents": r.patent_count, - "Status": "Success" if r.success else "Failed", - } - for r in result.results - ] - ) - - fig = px.bar( - df, - x="Company", - y="Patents", - color="Status", - color_discrete_map={"Success": "#10b981", "Failed": "#ef4444"}, - title="", - ) - fig.update_layout( - plot_bgcolor="rgba(0,0,0,0)", - paper_bgcolor="rgba(0,0,0,0)", - font_color="#94a3b8", - legend=dict( - orientation="h", - yanchor="bottom", - y=1.02, - xanchor="right", - x=1 - ), - xaxis=dict(showgrid=False), - yaxis=dict(showgrid=True, gridcolor="rgba(99, 102, 241, 0.1)"), - ) - st.plotly_chart(fig, use_container_width=True) - - st.markdown("") - - # Individual results - st.markdown('

Detailed Results

', unsafe_allow_html=True) - for r in result.results: - status_icon = "✓" if r.success else "✗" - status_class = "status-success" if r.success else "status-error" - with st.expander( - f"{status_icon} {r.company_name.upper()} — {r.patent_count} patents" - ): - if r.success: - st.markdown(r.analysis) - else: - st.error(r.error) - - -def render_analytics(): - """Render analytics page with database insights.""" - st.markdown('

Analytics Dashboard

', unsafe_allow_html=True) - st.markdown("Track historical analysis data and view insights.") - - db_client = get_db_client() - - if not db_client: - st.markdown("") - st.markdown(""" -
- ⚠️ Database Not Connected
- Set USE_DATABASE=true in your .env file to enable analytics tracking. -
- """, unsafe_allow_html=True) - st.info("Analytics features require storing analysis results in PostgreSQL for historical tracking.") - return - - st.markdown("") - - # Time range selector - col1, col2, col3 = st.columns([1, 2, 1]) - with col1: - days = st.selectbox("Time Range", [7, 14, 30, 90], index=0, format_func=lambda x: f"Last {x} days") - - try: - analytics = db_client.get_analytics(days=days) - - if not analytics: - st.info("No analytics data available yet. Run some analyses first!") - return - - st.markdown("") - - # Summary metrics - col1, col2, col3 = st.columns(3) - - with col1: - total = analytics.get("total_messages", 0) - st.metric("Total Analyses", total) - - with col2: - companies = len(analytics.get("by_company", {})) - st.metric("Companies Analyzed", companies) - - with col3: - types = len(analytics.get("by_type", {})) - st.metric("Analysis Types", types) - - st.markdown("") - - # Charts - col1, col2 = st.columns(2) - - with col1: - by_company = analytics.get("by_company", {}) - if by_company: - df = pd.DataFrame( - [{"Company": k.upper(), "Count": v} for k, v in by_company.items()] - ) - fig = px.pie( - df, values="Count", names="Company", title="Distribution by Company", - hole=0.4, - color_discrete_sequence=px.colors.sequential.Purp_r, - ) - fig.update_layout( - plot_bgcolor="rgba(0,0,0,0)", - paper_bgcolor="rgba(0,0,0,0)", - font_color="#94a3b8", - ) - st.plotly_chart(fig, use_container_width=True) - - with col2: - by_type = analytics.get("by_type", {}) - if by_type: - df = pd.DataFrame( - [{"Type": k, "Count": v} for k, v in by_type.items()] - ) - fig = px.bar(df, x="Type", y="Count", title="Analysis Types", - color_discrete_sequence=["#6366f1"]) - fig.update_layout( - plot_bgcolor="rgba(0,0,0,0)", - paper_bgcolor="rgba(0,0,0,0)", - font_color="#94a3b8", - xaxis=dict(showgrid=False), - yaxis=dict(showgrid=True, gridcolor="rgba(99, 102, 241, 0.1)"), - ) - st.plotly_chart(fig, use_container_width=True) - - st.markdown("") - - # Recent messages - st.markdown('

Recent Analyses

', unsafe_allow_html=True) - messages = db_client.get_messages(limit=10) - - if messages: - for msg in messages: - with st.expander( - f"📄 {msg.get('company_name', 'Unknown').upper()} — {msg.get('analysis_type', 'N/A')} ({msg.get('timestamp', 'N/A')})" - ): - st.markdown(f"**Model:** `{msg.get('model', 'N/A')}`") - if msg.get("response"): - st.markdown(msg["response"][:500] + "...") - - except Exception as e: - st.error(f"Error fetching analytics: {e}") - - -def render_about(): - """Render about page.""" - st.markdown('

About SPARC

', unsafe_allow_html=True) - - col1, col2 = st.columns([2, 1]) - - with col1: - st.markdown(""" - **SPARC** (Semiconductor Patent & Analytics Report Core) is an AI-powered patent analysis - platform that evaluates company performance by analyzing their patent portfolios - with cutting-edge language models. - """) - - st.markdown("") - st.markdown("**Key Features**") - - features = [ - ("🔍", "Patent Retrieval", "Automated collection via SerpAPI's Google Patents"), - ("📄", "Intelligent Parsing", "Extracts key sections from patent documents"), - ("🤖", "AI Analysis", "Deep analysis powered by Claude 3.5 Sonnet"), - ("⚡", "Batch Processing", "Analyze multiple companies concurrently"), - ("🌐", "REST API", "FastAPI web service for seamless integration"), - ("📊", "Analytics", "Track and visualize historical analysis data"), - ] - - for icon, title, desc in features: - st.markdown(f""" -
- {icon} -
- {title}
- {desc} -
-
- """, unsafe_allow_html=True) - - with col2: - st.markdown("**Technology Stack**") - st.markdown(""" -
-
-
Backend
Python, FastAPI
-
AI Model
Claude 3.5 Sonnet
-
Database
PostgreSQL
-
Dashboard
Streamlit, Plotly
-
Data Source
SerpAPI Patents
-
-
- """, unsafe_allow_html=True) - - st.markdown("") - st.markdown("**API Endpoints**") - st.code("http://localhost:8000/docs", language=None) - st.code("http://localhost:8000/health", language=None) - - st.markdown("") - st.markdown("") - - # System status - st.markdown('

System Status

', unsafe_allow_html=True) - - col1, col2, col3 = st.columns(3) - - with col1: - db_client = get_db_client() - if db_client: - st.markdown(""" -
-
-
Database
-
Connected
-
- """, unsafe_allow_html=True) - else: - st.markdown(""" -
-
-
Database
-
Not Configured
-
- """, unsafe_allow_html=True) - - with col2: - analyzer = get_analyzer() - if analyzer: - st.markdown(""" -
-
-
Analyzer
-
Ready
-
- """, unsafe_allow_html=True) - else: - st.markdown(""" -
-
-
Analyzer
-
Not Initialized
-
- """, unsafe_allow_html=True) - - with col3: - st.markdown(""" -
-
-
Dashboard
-
Online
-
- """, unsafe_allow_html=True) - - -def main(): - """Main dashboard entry point.""" - render_header() - tabs = render_navigation() - - with tabs[0]: - render_company_analysis() - with tabs[1]: - render_batch_analysis() - with tabs[2]: - render_analytics() - with tabs[3]: - render_about() - - -if __name__ == "__main__": - main() diff --git a/docker-compose.yml b/docker-compose.yml index 4d471c8..7aa4e63 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -23,7 +23,6 @@ services: command: python scripts/init_database.py environment: DATABASE_URL: postgresql://postgres:postgres@postgres:5432/sparc - USE_DATABASE: "true" depends_on: postgres: condition: service_healthy @@ -37,7 +36,8 @@ services: API_KEY: ${API_KEY} OPENROUTER_API_KEY: ${OPENROUTER_API_KEY} DATABASE_URL: postgresql://postgres:postgres@postgres:5432/sparc - USE_DATABASE: "true" + USE_CACHE: "true" + JWT_SECRET: ${JWT_SECRET:-sparc-secret-key-change-in-production} ports: - "8000:8000" depends_on: @@ -50,20 +50,12 @@ services: restart: unless-stopped dashboard: - build: . + build: ./frontend container_name: sparc-dashboard - command: streamlit run dashboard.py --server.port 8501 --server.address 0.0.0.0 - environment: - API_KEY: ${API_KEY} - OPENROUTER_API_KEY: ${OPENROUTER_API_KEY} - DATABASE_URL: postgresql://postgres:postgres@postgres:5432/sparc - USE_DATABASE: "true" ports: - - "8501:8501" + - "8080:80" depends_on: - api - volumes: - - ./patents:/app/patents restart: unless-stopped volumes: diff --git a/docs/DATABASE_MODE.md b/docs/DATABASE_MODE.md index 4beceb0..842c77f 100644 --- a/docs/DATABASE_MODE.md +++ b/docs/DATABASE_MODE.md @@ -1,16 +1,19 @@ -# Database Mode for Testing and Analytics +# Database Storage and Caching -This document explains how to use SPARC's database mode for storing LLM messages for testing and analytics purposes. +This document explains how SPARC uses PostgreSQL for storing LLM messages, enabling response caching and analytics. ## Overview -SPARC supports two modes of operation: +SPARC stores all LLM interactions in PostgreSQL, providing: -1. **API Mode** (default): Messages are sent to OpenRouter's API and you receive real LLM responses -2. **Database Mode**: Messages are stored in a PostgreSQL database without making API calls, useful for: - - Testing the application without consuming API credits - - Collecting analytics on message patterns and usage - - Development and debugging +- **Response Caching**: Avoid redundant API calls for previously analyzed patents +- **Analytics**: Track usage patterns, token consumption, and analysis history +- **Persistence**: Maintain analysis history across sessions + +SPARC supports two cache modes: + +1. **Cache Mode** (default, `USE_CACHE=true`): Check database for cached responses before making API calls +2. **Fresh Mode** (`USE_CACHE=false`): Always make fresh API calls (still stores results in database) ## Setup @@ -45,43 +48,43 @@ cp .env.example .env Edit `.env` and set: ```env -# For database mode (testing/analytics) -USE_DATABASE=true +# Database connection (required) DATABASE_URL=postgresql://postgres:postgres@localhost:5432/sparc -# For API mode (production) -USE_DATABASE=false +# Cache mode: use cached responses when available +USE_CACHE=true + +# API key for fresh LLM calls OPENROUTER_API_KEY=your_openrouter_key_here ``` ## Usage -### Running in Database Mode +### Running with Cache Mode (Default) -Set `USE_DATABASE=true` in your `.env` file, then run the application normally: +Set `USE_CACHE=true` in your `.env` file, then run the application normally: ```bash python main.py ``` -Instead of sending messages to OpenRouter, the application will: -- Store all prompts in the database -- Return a placeholder response -- Log metadata (company name, analysis type, timestamps) +The application will: +- Check the database for cached responses matching the request +- If found, return the cached response (no API call) +- If not found, make an API call and store the response for future use -### Running in API Mode +### Running with Fresh Mode -Set `USE_DATABASE=false` in your `.env` file, then run the application normally: +Set `USE_CACHE=false` in your `.env` file to always get fresh responses: ```bash python main.py ``` -The application will send messages to OpenRouter and return real LLM responses. - -### Hybrid Mode (Optional) - -You can also enable database logging while still using the API by initializing the database client in your code. The `LLMAnalyzer` will automatically log all API calls to the database if a database client is available. +The application will: +- Always send messages to OpenRouter for real LLM responses +- Store all responses in the database +- Useful when you need the latest analysis or want to refresh cached data ## Viewing Analytics @@ -195,16 +198,16 @@ docker-compose down -v ## Toggling Between Modes -You can easily switch between modes by changing the `USE_DATABASE` environment variable: +You can easily switch between modes by changing the `USE_CACHE` environment variable: -### Quick Toggle (temporary, for testing) +### Quick Toggle (temporary) ```bash -# Run in database mode -USE_DATABASE=true python main.py +# Run with caching enabled +USE_CACHE=true python main.py -# Run in API mode -USE_DATABASE=false python main.py +# Run with fresh API calls +USE_CACHE=false python main.py ``` ### Persistent Toggle @@ -212,38 +215,48 @@ USE_DATABASE=false python main.py Edit your `.env` file: ```env -# For testing/analytics -USE_DATABASE=true +# Use cached responses when available (recommended for most use) +USE_CACHE=true -# For production use -USE_DATABASE=false +# Always make fresh API calls +USE_CACHE=false ``` ## Use Cases -### Testing Without API Costs +### Cost Optimization with Caching -During development, enable database mode to test the full application flow without consuming API credits: +Cache mode reduces API costs by reusing previous analysis results: ```bash -USE_DATABASE=true python main.py +USE_CACHE=true python main.py +``` + +If the same company/patent combination was analyzed before, the cached response is returned instantly. + +### Fresh Analysis + +When you need the latest LLM analysis (e.g., after model updates): + +```bash +USE_CACHE=false python main.py ``` ### Collecting Usage Analytics -Enable database mode in a test environment to collect analytics on: +The database stores all interactions, enabling analytics on: - Which companies are analyzed most frequently - Types of analyses performed -- Prompt patterns and lengths -- Usage over time +- Token usage and costs over time +- Response caching hit rates ### Development and Debugging -Database mode is useful for: -- Testing patent parsing logic without API calls +Database storage is useful for: +- Reviewing actual prompts sent to the LLM +- Analyzing response patterns - Debugging the full pipeline end-to-end -- Collecting sample prompts for optimization -- Understanding token usage patterns (when in API mode with logging) +- Understanding token usage patterns ## Troubleshooting diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md index ed43b64..bb7bfd9 100644 --- a/docs/DEPLOYMENT.md +++ b/docs/DEPLOYMENT.md @@ -64,7 +64,7 @@ docker-compose ps # You should see: # - sparc-postgres (healthy) # - sparc-api (running on port 8000) -# - sparc-dashboard (running on port 8501) +# - sparc-dashboard (running on port 8080) ``` The database is automatically initialized by the `init-db` service. @@ -116,11 +116,13 @@ docker-compose up -d postgres # Wait for database to be healthy, then initialize python scripts/init_database.py -# Terminal 1: Start FastAPI backend +# Start FastAPI backend uvicorn SPARC.api:app --host 0.0.0.0 --port 8000 --reload -# Terminal 2: Start Streamlit dashboard -streamlit run dashboard.py --server.port 8501 --server.address 0.0.0.0 +# For the React frontend (separate terminal) +cd frontend +npm install +npm run dev ``` --- @@ -141,7 +143,7 @@ Access the services: |---------|-----| | REST API | http://localhost:8000 | | API Documentation (Swagger) | http://localhost:8000/docs | -| Dashboard (Web UI) | http://localhost:8501 | +| Dashboard (Web UI) | http://localhost:8080 | --- @@ -149,16 +151,17 @@ Access the services: ### Via Dashboard (Web UI) -1. Open http://localhost:8501 -2. Select **"Company Analysis"** from the sidebar -3. Enter a company name (e.g., "Intel") -4. Click **"Analyze"** +1. Open http://localhost:8080 +2. Register a new account or login (default admin: `admin` / `admin`) +3. Navigate to **"Analysis"** from the sidebar +4. Enter a company name (e.g., "Intel") +5. Click **"Analyze"** This will: - Query SerpAPI for recent patents - Download and parse patent PDFs - Send patent content to Claude for analysis -- Store prompt/response in PostgreSQL +- Store prompt/response in PostgreSQL (with caching) - Display results in the dashboard ### Via REST API @@ -233,12 +236,12 @@ docker exec -it sparc-postgres psql -U postgres -d sparc -c \ | Component | Purpose | |-----------|---------| -| **Dashboard** | Streamlit web UI for interactive analysis | -| **FastAPI** | REST API for programmatic access | +| **Dashboard** | React TypeScript web UI with authentication | +| **FastAPI** | REST API with JWT authentication | | **Analyzer** | Orchestrates patent retrieval and LLM analysis | | **SerpAPI** | Retrieves patent data from Google Patents | | **OpenRouter** | Routes requests to Claude for AI analysis | -| **PostgreSQL** | Stores prompts, responses, and analytics | +| **PostgreSQL** | Stores prompts, responses, users, and cached results | --- @@ -248,10 +251,9 @@ docker exec -it sparc-postgres psql -U postgres -d sparc -c \ |----------|----------|---------|-------------| | `API_KEY` | Yes | - | SerpAPI key for patent search | | `OPENROUTER_API_KEY` | Yes | - | OpenRouter API key for Claude access | -| `DATABASE_URL` | Yes* | - | PostgreSQL connection string | -| `USE_DATABASE` | No | `false` | Set to `true` to enable database storage | - -*Required when `USE_DATABASE=true` +| `DATABASE_URL` | Yes | - | PostgreSQL connection string | +| `USE_CACHE` | No | `true` | Check database for cached responses before API calls | +| `JWT_SECRET` | Yes | - | Secret key for JWT authentication (change in production!) | ### Database URL Format @@ -273,9 +275,9 @@ The `docker-compose.yml` includes all services needed for production: | Service | Container | Port | Description | |---------|-----------|------|-------------| | `postgres` | sparc-postgres | 5432 | PostgreSQL database | -| `init-db` | sparc-init-db | - | One-time database initialization | -| `api` | sparc-api | 8000 | FastAPI REST API | -| `dashboard` | sparc-dashboard | 8501 | Streamlit web UI | +| `init-db` | sparc-init-db | - | One-time database initialization (seeds admin user) | +| `api` | sparc-api | 8000 | FastAPI REST API with JWT auth | +| `dashboard` | sparc-dashboard | 8080 | React TypeScript web UI | ### Common Docker Compose Commands @@ -382,11 +384,11 @@ cp .env.example .env docker-compose up -d postgres python scripts/init_database.py uvicorn SPARC.api:app --reload & -streamlit run dashboard.py +cd frontend && npm install && npm run dev & # Check status curl http://localhost:8000/health -open http://localhost:8501 +open http://localhost:8080 # View data python scripts/view_analytics.py diff --git a/frontend/.gitignore b/frontend/.gitignore new file mode 100644 index 0000000..e01ce8f --- /dev/null +++ b/frontend/.gitignore @@ -0,0 +1,22 @@ +# Dependencies +node_modules/ + +# Build output +dist/ + +# Local env files +.env.local +.env.*.local + +# Editor directories +.vscode/ +.idea/ + +# OS files +.DS_Store +Thumbs.db + +# Debug logs +npm-debug.log* +yarn-debug.log* +yarn-error.log* diff --git a/frontend/Dockerfile b/frontend/Dockerfile new file mode 100644 index 0000000..00f8746 --- /dev/null +++ b/frontend/Dockerfile @@ -0,0 +1,29 @@ +# Build stage +FROM node:20-alpine AS build + +WORKDIR /app + +# Copy package files +COPY package.json package-lock.json* ./ + +# Install dependencies +RUN npm install + +# Copy source files +COPY . . + +# Build the application +RUN npm run build + +# Production stage +FROM nginx:alpine + +# Copy built files +COPY --from=build /app/dist /usr/share/nginx/html + +# Copy nginx config +COPY nginx.conf /etc/nginx/conf.d/default.conf + +EXPOSE 80 + +CMD ["nginx", "-g", "daemon off;"] diff --git a/frontend/index.html b/frontend/index.html new file mode 100644 index 0000000..631e457 --- /dev/null +++ b/frontend/index.html @@ -0,0 +1,13 @@ + + + + + + + SPARC Dashboard + + +
+ + + diff --git a/frontend/nginx.conf b/frontend/nginx.conf new file mode 100644 index 0000000..a16abcf --- /dev/null +++ b/frontend/nginx.conf @@ -0,0 +1,34 @@ +server { + listen 80; + server_name localhost; + root /usr/share/nginx/html; + index index.html; + + # Gzip compression + gzip on; + gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript; + + # Handle React Router (SPA) + location / { + try_files $uri $uri/ /index.html; + } + + # Proxy API requests to backend + location /api/ { + proxy_pass http://api:8000/; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection 'upgrade'; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_cache_bypass $http_upgrade; + } + + # Cache static assets + location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2)$ { + expires 1y; + add_header Cache-Control "public, immutable"; + } +} diff --git a/frontend/package.json b/frontend/package.json new file mode 100644 index 0000000..b99eee1 --- /dev/null +++ b/frontend/package.json @@ -0,0 +1,37 @@ +{ + "name": "sparc-dashboard", + "private": true, + "version": "1.0.0", + "type": "module", + "scripts": { + "dev": "vite", + "build": "tsc -b && vite build", + "lint": "eslint .", + "preview": "vite preview" + }, + "dependencies": { + "@tanstack/react-query": "^5.51.0", + "axios": "^1.7.2", + "lucide-react": "^0.400.0", + "react": "^18.3.1", + "react-dom": "^18.3.1", + "react-router-dom": "^6.24.0", + "recharts": "^2.12.7" + }, + "devDependencies": { + "@eslint/js": "^9.6.0", + "@types/react": "^18.3.3", + "@types/react-dom": "^18.3.0", + "@vitejs/plugin-react": "^4.3.1", + "autoprefixer": "^10.4.19", + "eslint": "^9.6.0", + "eslint-plugin-react-hooks": "^5.1.0", + "eslint-plugin-react-refresh": "^0.4.7", + "globals": "^15.8.0", + "postcss": "^8.4.39", + "tailwindcss": "^3.4.4", + "typescript": "~5.5.3", + "typescript-eslint": "^8.0.0", + "vite": "^5.3.3" + } +} diff --git a/frontend/postcss.config.js b/frontend/postcss.config.js new file mode 100644 index 0000000..2e7af2b --- /dev/null +++ b/frontend/postcss.config.js @@ -0,0 +1,6 @@ +export default { + plugins: { + tailwindcss: {}, + autoprefixer: {}, + }, +} diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx new file mode 100644 index 0000000..c3426cd --- /dev/null +++ b/frontend/src/App.tsx @@ -0,0 +1,67 @@ +import { BrowserRouter, Routes, Route, Navigate } from 'react-router-dom'; +import { QueryClient, QueryClientProvider } from '@tanstack/react-query'; +import { AuthProvider } from './context/AuthContext'; +import { Layout } from './components/Layout'; +import { ProtectedRoute } from './components/ProtectedRoute'; +import { Login } from './pages/Login'; +import { Register } from './pages/Register'; +import { Analysis } from './pages/Analysis'; +import { Batch } from './pages/Batch'; +import { AnalyticsPage } from './pages/Analytics'; +import { About } from './pages/About'; +import { AdminUsers } from './pages/AdminUsers'; + +const queryClient = new QueryClient({ + defaultOptions: { + queries: { + staleTime: 1000 * 60 * 5, // 5 minutes + retry: 1, + }, + }, +}); + +function App() { + return ( + + + + + {/* Public routes */} + } /> + } /> + + {/* Protected routes */} + + + + } + > + } /> + } /> + } /> + } /> + + {/* Admin routes */} + + + + } + /> + + + {/* Default redirect */} + } /> + } /> + + + + + ); +} + +export default App; diff --git a/frontend/src/api/client.ts b/frontend/src/api/client.ts new file mode 100644 index 0000000..037d59c --- /dev/null +++ b/frontend/src/api/client.ts @@ -0,0 +1,154 @@ +import axios, { AxiosError, InternalAxiosRequestConfig } from 'axios'; +import type { TokenResponse, User, CompanyAnalysis, BatchAnalysisResult, JobStatus, Analytics } from '../types'; + +const API_BASE_URL = import.meta.env.VITE_API_URL || '/api'; + +const api = axios.create({ + baseURL: API_BASE_URL, + headers: { + 'Content-Type': 'application/json', + }, +}); + +// Token management +let accessToken: string | null = localStorage.getItem('access_token'); +let refreshToken: string | null = localStorage.getItem('refresh_token'); + +export const setTokens = (tokens: TokenResponse) => { + accessToken = tokens.access_token; + refreshToken = tokens.refresh_token; + localStorage.setItem('access_token', tokens.access_token); + localStorage.setItem('refresh_token', tokens.refresh_token); +}; + +export const clearTokens = () => { + accessToken = null; + refreshToken = null; + localStorage.removeItem('access_token'); + localStorage.removeItem('refresh_token'); +}; + +export const getAccessToken = () => accessToken; + +// Request interceptor to add auth header +api.interceptors.request.use((config: InternalAxiosRequestConfig) => { + if (accessToken) { + config.headers.Authorization = `Bearer ${accessToken}`; + } + return config; +}); + +// Response interceptor to handle token refresh +api.interceptors.response.use( + (response) => response, + async (error: AxiosError) => { + const originalRequest = error.config as InternalAxiosRequestConfig & { _retry?: boolean }; + + if (error.response?.status === 401 && !originalRequest._retry && refreshToken) { + originalRequest._retry = true; + + try { + const response = await axios.post(`${API_BASE_URL}/auth/refresh`, { + refresh_token: refreshToken, + }); + + setTokens(response.data); + originalRequest.headers.Authorization = `Bearer ${response.data.access_token}`; + + return api(originalRequest); + } catch { + clearTokens(); + window.location.href = '/login'; + } + } + + return Promise.reject(error); + } +); + +// Auth API +export const authApi = { + register: async (email: string, password: string): Promise => { + const response = await api.post('/auth/register', { email, password }); + return response.data; + }, + + login: async (email: string, password: string): Promise => { + const response = await api.post('/auth/login', { email, password }); + setTokens(response.data); + return response.data; + }, + + getMe: async (): Promise => { + const response = await api.get('/auth/me'); + return response.data; + }, + + logout: () => { + clearTokens(); + }, +}; + +// Analysis API +export const analysisApi = { + analyzeCompany: async (companyName: string): Promise => { + const response = await api.get(`/analyze/${encodeURIComponent(companyName)}`); + return response.data; + }, + + analyzeBatch: async (companies: string[], maxWorkers = 3): Promise => { + const response = await api.post('/analyze/batch', { + companies, + max_workers: maxWorkers, + }); + return response.data; + }, + + analyzeBatchAsync: async (companies: string[], maxWorkers = 3): Promise => { + const response = await api.post('/analyze/batch/async', { + companies, + max_workers: maxWorkers, + }); + return response.data; + }, + + getJobStatus: async (jobId: string): Promise => { + const response = await api.get(`/jobs/${jobId}`); + return response.data; + }, + + listJobs: async (status?: string, limit = 10): Promise => { + const params = new URLSearchParams(); + if (status) params.append('status', status); + params.append('limit', limit.toString()); + const response = await api.get(`/jobs?${params}`); + return response.data; + }, +}; + +// Analytics API +export const analyticsApi = { + getAnalytics: async (days = 30): Promise => { + const response = await api.get(`/analytics?days=${days}`); + return response.data; + }, +}; + +// Admin API +export const adminApi = { + listUsers: async (limit = 100, offset = 0): Promise => { + const response = await api.get(`/admin/users?limit=${limit}&offset=${offset}`); + return response.data; + }, + + updateUserRole: async (userId: number, role: 'admin' | 'user'): Promise => { + const response = await api.patch(`/admin/users/${userId}/role`, { role }); + return response.data; + }, + + deleteUser: async (userId: number): Promise => { + await api.delete(`/admin/users/${userId}`); + }, +}; + +export default api; diff --git a/frontend/src/components/Layout.tsx b/frontend/src/components/Layout.tsx new file mode 100644 index 0000000..501dc1f --- /dev/null +++ b/frontend/src/components/Layout.tsx @@ -0,0 +1,108 @@ +import { Outlet, NavLink, useNavigate } from 'react-router-dom'; +import { useAuth } from '../context/AuthContext'; +import { Search, Layers, BarChart3, Info, Users, LogOut } from 'lucide-react'; + +export function Layout() { + const { user, isAdmin, logout } = useAuth(); + const navigate = useNavigate(); + + const handleLogout = () => { + logout(); + navigate('/login'); + }; + + const navItems = [ + { to: '/analysis', icon: Search, label: 'Analysis' }, + { to: '/batch', icon: Layers, label: 'Batch' }, + { to: '/analytics', icon: BarChart3, label: 'Analytics' }, + { to: '/about', icon: Info, label: 'About' }, + ]; + + if (isAdmin) { + navItems.push({ to: '/admin/users', icon: Users, label: 'Users' }); + } + + return ( +
+ {/* Header */} +
+
+
+ {/* Brand */} +
+ +
+

+ SPARC +

+ + Semiconductor Patent Analytics + +
+
+ + {/* Navigation */} + + + {/* User menu */} +
+
+
{user?.email}
+
{user?.role}
+
+ +
+
+
+
+ + {/* Mobile Navigation */} + + + {/* Main content */} +
+ +
+
+ ); +} diff --git a/frontend/src/components/ProtectedRoute.tsx b/frontend/src/components/ProtectedRoute.tsx new file mode 100644 index 0000000..667057d --- /dev/null +++ b/frontend/src/components/ProtectedRoute.tsx @@ -0,0 +1,30 @@ +import { Navigate, useLocation } from 'react-router-dom'; +import { useAuth } from '../context/AuthContext'; + +interface ProtectedRouteProps { + children: React.ReactNode; + requireAdmin?: boolean; +} + +export function ProtectedRoute({ children, requireAdmin = false }: ProtectedRouteProps) { + const { isAuthenticated, isAdmin, isLoading } = useAuth(); + const location = useLocation(); + + if (isLoading) { + return ( +
+
+
+ ); + } + + if (!isAuthenticated) { + return ; + } + + if (requireAdmin && !isAdmin) { + return ; + } + + return <>{children}; +} diff --git a/frontend/src/context/AuthContext.tsx b/frontend/src/context/AuthContext.tsx new file mode 100644 index 0000000..d40eca8 --- /dev/null +++ b/frontend/src/context/AuthContext.tsx @@ -0,0 +1,81 @@ +import { createContext, useContext, useState, useEffect, ReactNode } from 'react'; +import { authApi, getAccessToken } from '../api/client'; +import type { User } from '../types'; + +interface AuthContextType { + user: User | null; + isLoading: boolean; + isAuthenticated: boolean; + isAdmin: boolean; + login: (email: string, password: string) => Promise; + register: (email: string, password: string) => Promise; + logout: () => void; + refreshUser: () => Promise; +} + +const AuthContext = createContext(undefined); + +export function AuthProvider({ children }: { children: ReactNode }) { + const [user, setUser] = useState(null); + const [isLoading, setIsLoading] = useState(true); + + const refreshUser = async () => { + try { + const userData = await authApi.getMe(); + setUser(userData); + } catch { + setUser(null); + } + }; + + useEffect(() => { + const initAuth = async () => { + if (getAccessToken()) { + await refreshUser(); + } + setIsLoading(false); + }; + initAuth(); + }, []); + + const login = async (email: string, password: string) => { + await authApi.login(email, password); + await refreshUser(); + }; + + const register = async (email: string, password: string) => { + await authApi.register(email, password); + await authApi.login(email, password); + await refreshUser(); + }; + + const logout = () => { + authApi.logout(); + setUser(null); + }; + + return ( + + {children} + + ); +} + +export function useAuth() { + const context = useContext(AuthContext); + if (context === undefined) { + throw new Error('useAuth must be used within an AuthProvider'); + } + return context; +} diff --git a/frontend/src/index.css b/frontend/src/index.css new file mode 100644 index 0000000..b94918a --- /dev/null +++ b/frontend/src/index.css @@ -0,0 +1,34 @@ +@tailwind base; +@tailwind components; +@tailwind utilities; + +body { + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; +} + +/* Custom scrollbar */ +::-webkit-scrollbar { + width: 8px; + height: 8px; +} + +::-webkit-scrollbar-track { + background: #1e293b; +} + +::-webkit-scrollbar-thumb { + background: #6366f1; + border-radius: 4px; +} + +::-webkit-scrollbar-thumb:hover { + background: #4f46e5; +} + +/* Selection */ +::selection { + background: rgba(99, 102, 241, 0.3); + color: #f8fafc; +} diff --git a/frontend/src/main.tsx b/frontend/src/main.tsx new file mode 100644 index 0000000..a46835a --- /dev/null +++ b/frontend/src/main.tsx @@ -0,0 +1,10 @@ +import { StrictMode } from 'react'; +import { createRoot } from 'react-dom/client'; +import App from './App'; +import './index.css'; + +createRoot(document.getElementById('root')!).render( + + + +); diff --git a/frontend/src/pages/About.tsx b/frontend/src/pages/About.tsx new file mode 100644 index 0000000..4c71bac --- /dev/null +++ b/frontend/src/pages/About.tsx @@ -0,0 +1,171 @@ +import { useQuery } from '@tanstack/react-query'; +import axios from 'axios'; +import { Search, FileText, Bot, Zap, Globe, BarChart3, CheckCircle, AlertTriangle, XCircle } from 'lucide-react'; + +const API_BASE_URL = import.meta.env.VITE_API_URL || '/api'; + +export function About() { + const { data: health } = useQuery({ + queryKey: ['health'], + queryFn: async () => { + const response = await axios.get(`${API_BASE_URL}/health`); + return response.data; + }, + refetchInterval: 30000, + }); + + const features = [ + { + icon: Search, + title: 'Patent Retrieval', + description: 'Automated collection via SerpAPI\'s Google Patents', + }, + { + icon: FileText, + title: 'Intelligent Parsing', + description: 'Extracts key sections from patent documents', + }, + { + icon: Bot, + title: 'AI Analysis', + description: 'Deep analysis powered by Claude 3.5 Sonnet', + }, + { + icon: Zap, + title: 'Batch Processing', + description: 'Analyze multiple companies concurrently', + }, + { + icon: Globe, + title: 'REST API', + description: 'FastAPI web service for seamless integration', + }, + { + icon: BarChart3, + title: 'Analytics', + description: 'Track and visualize historical analysis data', + }, + ]; + + const techStack = [ + { label: 'Backend', value: 'Python, FastAPI' }, + { label: 'AI Model', value: 'Claude 3.5 Sonnet' }, + { label: 'Database', value: 'PostgreSQL' }, + { label: 'Frontend', value: 'React, TailwindCSS' }, + { label: 'Data Source', value: 'SerpAPI Patents' }, + ]; + + return ( +
+ {/* Header */} +
+

+ About SPARC +

+
+ +
+ {/* Main Content */} +
+ {/* Description */} +

+ SPARC (Semiconductor Patent & Analytics Report Core) + is an AI-powered patent analysis platform that evaluates company performance by analyzing their + patent portfolios with cutting-edge language models. +

+ + {/* Features */} +
+

Key Features

+
+ {features.map(({ icon: Icon, title, description }) => ( +
+
+ +
+
+
{title}
+
{description}
+
+
+ ))} +
+
+
+ + {/* Sidebar */} +
+ {/* Tech Stack */} +
+

Technology Stack

+
+ {techStack.map(({ label, value }) => ( +
+
{label}
+
{value}
+
+ ))} +
+
+ + {/* API Endpoints */} +
+

API Endpoints

+
+ + http://localhost:8000/docs + + + http://localhost:8000/health + +
+
+
+
+ + {/* System Status */} +
+

+ System Status +

+
+ + + +
+
+
+ ); +} + +function StatusCard({ label, status }: { label: string; status: 'online' | 'offline' | 'configured' }) { + const statusConfig = { + online: { icon: CheckCircle, color: 'text-success', bg: 'bg-success' }, + offline: { icon: XCircle, color: 'text-error', bg: 'bg-error' }, + configured: { icon: AlertTriangle, color: 'text-warning', bg: 'bg-warning' }, + }; + + const { icon: Icon, color, bg } = statusConfig[status]; + + return ( +
+
+ +
+
{label}
+
{status}
+
+ ); +} diff --git a/frontend/src/pages/AdminUsers.tsx b/frontend/src/pages/AdminUsers.tsx new file mode 100644 index 0000000..632905c --- /dev/null +++ b/frontend/src/pages/AdminUsers.tsx @@ -0,0 +1,183 @@ +import { useState } from 'react'; +import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query'; +import { adminApi } from '../api/client'; +import { useAuth } from '../context/AuthContext'; +import { Users, Shield, User, Trash2, AlertCircle } from 'lucide-react'; +import type { User as UserType } from '../types'; + +export function AdminUsers() { + const { user: currentUser } = useAuth(); + const queryClient = useQueryClient(); + const [deleteConfirm, setDeleteConfirm] = useState(null); + + const { data: users, isLoading, isError } = useQuery({ + queryKey: ['admin-users'], + queryFn: () => adminApi.listUsers(), + }); + + const updateRoleMutation = useMutation({ + mutationFn: ({ userId, role }: { userId: number; role: 'admin' | 'user' }) => + adminApi.updateUserRole(userId, role), + onSuccess: () => { + queryClient.invalidateQueries({ queryKey: ['admin-users'] }); + }, + }); + + const deleteMutation = useMutation({ + mutationFn: (userId: number) => adminApi.deleteUser(userId), + onSuccess: () => { + queryClient.invalidateQueries({ queryKey: ['admin-users'] }); + setDeleteConfirm(null); + }, + }); + + const handleRoleChange = (user: UserType) => { + const newRole = user.role === 'admin' ? 'user' : 'admin'; + updateRoleMutation.mutate({ userId: user.id, role: newRole }); + }; + + const handleDelete = (userId: number) => { + deleteMutation.mutate(userId); + }; + + if (isLoading) { + return ( +
+
+
+ ); + } + + if (isError) { + return ( +
+ + Failed to load users. +
+ ); + } + + return ( +
+ {/* Header */} +
+
+

+ User Management +

+

Manage user accounts and permissions.

+
+
+ + {users?.length || 0} Users +
+
+ + {/* Users Table */} +
+
+ + + + + + + + + + + {users?.map((user) => ( + + + + + + + ))} + +
+ User + + Role + + Created + + Actions +
+
+
+ {user.role === 'admin' ? ( + + ) : ( + + )} +
+
+
{user.email}
+ {user.id === currentUser?.id && ( + (You) + )} +
+
+
+ + {user.role === 'admin' ? : } + {user.role} + + + {new Date(user.created_at).toLocaleDateString()} + +
+ {user.id !== currentUser?.id && ( + <> + + + {deleteConfirm === user.id ? ( +
+ + +
+ ) : ( + + )} + + )} +
+
+
+
+
+ ); +} diff --git a/frontend/src/pages/Analysis.tsx b/frontend/src/pages/Analysis.tsx new file mode 100644 index 0000000..2dfd2f5 --- /dev/null +++ b/frontend/src/pages/Analysis.tsx @@ -0,0 +1,135 @@ +import { useState } from 'react'; +import { useMutation } from '@tanstack/react-query'; +import { analysisApi } from '../api/client'; +import { Search, CheckCircle, AlertCircle, Clock, FileText } from 'lucide-react'; +import type { CompanyAnalysis } from '../types'; + +export function Analysis() { + const [companyName, setCompanyName] = useState(''); + const [result, setResult] = useState(null); + + const mutation = useMutation({ + mutationFn: (name: string) => analysisApi.analyzeCompany(name), + onSuccess: (data) => setResult(data), + }); + + const handleSubmit = (e: React.FormEvent) => { + e.preventDefault(); + if (companyName.trim()) { + mutation.mutate(companyName.trim()); + } + }; + + return ( +
+ {/* Header */} +
+

+ Single Company Analysis +

+

+ Analyze a company's patent portfolio using AI-powered insights. +

+
+ + {/* Search Form */} +
+
+ + setCompanyName(e.target.value)} + placeholder="Enter company name (e.g., nvidia, intel, amd)" + className="w-full bg-bg-card/80 border border-primary/30 rounded-xl pl-12 pr-4 py-3 text-text-primary placeholder-text-secondary/50 focus:outline-none focus:border-primary focus:ring-2 focus:ring-primary/20 transition-all" + /> +
+ +
+ + {/* Error */} + {mutation.isError && ( +
+ + Analysis failed. Please try again. +
+ )} + + {/* Results */} + {result && ( +
+ {/* Success/Failure Status */} + {result.success ? ( +
+ + Analysis complete for {result.company_name.toUpperCase()} +
+ ) : ( +
+ + Analysis failed: {result.error} +
+ )} + + {/* Metrics */} +
+ + + +
+ + {/* Analysis Content */} + {result.success && result.analysis && ( +
+

+ AI Analysis Results +

+
+
+ {result.analysis} +
+
+
+ )} +
+ )} +
+ ); +} + +function MetricCard({ icon: Icon, label, value }: { icon: typeof FileText; label: string; value: string }) { + return ( +
+ +
+ {value} +
+
{label}
+
+ ); +} diff --git a/frontend/src/pages/Analytics.tsx b/frontend/src/pages/Analytics.tsx new file mode 100644 index 0000000..19f4aff --- /dev/null +++ b/frontend/src/pages/Analytics.tsx @@ -0,0 +1,179 @@ +import { useState } from 'react'; +import { useQuery } from '@tanstack/react-query'; +import { analyticsApi } from '../api/client'; +import { AlertCircle, Database } from 'lucide-react'; +import { PieChart, Pie, Cell, BarChart, Bar, XAxis, YAxis, Tooltip, ResponsiveContainer, Legend } from 'recharts'; + +const COLORS = ['#6366f1', '#0ea5e9', '#10b981', '#f59e0b', '#ef4444', '#8b5cf6', '#ec4899', '#14b8a6']; + +export function AnalyticsPage() { + const [days, setDays] = useState(30); + + const { data, isLoading, isError } = useQuery({ + queryKey: ['analytics', days], + queryFn: () => analyticsApi.getAnalytics(days), + }); + + if (isLoading) { + return ( +
+
+
+ ); + } + + if (isError) { + return ( +
+
+

+ Analytics Dashboard +

+
+
+
+ + Database Not Connected +
+

+ Set USE_DATABASE=true in your .env file to enable analytics tracking. +

+
+
+ + Analytics features require storing analysis results in PostgreSQL for historical tracking. +
+
+ ); + } + + if (!data || (data.total_messages === 0 && data.by_company.length === 0)) { + return ( +
+
+

+ Analytics Dashboard +

+

Track historical analysis data and view insights.

+
+
+ + No analytics data available yet. Run some analyses first! +
+
+ ); + } + + const companyData = data.by_company.map((c) => ({ + name: (c.company_name || 'Unknown').toUpperCase(), + value: c.count, + })); + + const typeData = data.by_type.map((t) => ({ + name: t.analysis_type || 'Unknown', + count: t.count, + })); + + return ( +
+ {/* Header */} +
+
+

+ Analytics Dashboard +

+

Track historical analysis data and view insights.

+
+ + {/* Time Range Selector */} + +
+ + {/* Summary Metrics */} +
+ + + +
+ + {/* Charts */} +
+ {/* Pie Chart - Distribution by Company */} + {companyData.length > 0 && ( +
+

Distribution by Company

+ + + `${name} ${(percent * 100).toFixed(0)}%`} + labelLine={false} + > + {companyData.map((_, index) => ( + + ))} + + + + + +
+ )} + + {/* Bar Chart - Analysis Types */} + {typeData.length > 0 && ( +
+

Analysis Types

+ + + + + + + + +
+ )} +
+
+ ); +} + +function MetricCard({ label, value }: { label: string; value: number }) { + return ( +
+
+ {value} +
+
{label}
+
+ ); +} diff --git a/frontend/src/pages/Batch.tsx b/frontend/src/pages/Batch.tsx new file mode 100644 index 0000000..9b9b351 --- /dev/null +++ b/frontend/src/pages/Batch.tsx @@ -0,0 +1,248 @@ +import { useState } from 'react'; +import { useMutation } from '@tanstack/react-query'; +import { analysisApi } from '../api/client'; +import { Rocket, CheckCircle, AlertCircle, ChevronDown, ChevronUp } from 'lucide-react'; +import { BarChart, Bar, XAxis, YAxis, Tooltip, ResponsiveContainer, Cell } from 'recharts'; +import type { BatchAnalysisResult } from '../types'; + +export function Batch() { + const [companiesInput, setCompaniesInput] = useState(''); + const [maxWorkers, setMaxWorkers] = useState(3); + const [result, setResult] = useState(null); + const [expandedItems, setExpandedItems] = useState>(new Set()); + + const mutation = useMutation({ + mutationFn: ({ companies, workers }: { companies: string[]; workers: number }) => + analysisApi.analyzeBatch(companies, workers), + onSuccess: (data) => setResult(data), + }); + + const handleSubmit = (e: React.FormEvent) => { + e.preventDefault(); + const companies = companiesInput + .split(/[,\n]/) + .map((c) => c.trim()) + .filter((c) => c.length > 0); + + if (companies.length > 0) { + mutation.mutate({ companies, workers: maxWorkers }); + } + }; + + const toggleExpand = (company: string) => { + const newExpanded = new Set(expandedItems); + if (newExpanded.has(company)) { + newExpanded.delete(company); + } else { + newExpanded.add(company); + } + setExpandedItems(newExpanded); + }; + + const chartData = result?.results.map((r) => ({ + name: r.company_name.toUpperCase(), + patents: r.patent_count, + success: r.success, + })); + + return ( +
+ {/* Header */} +
+

+ Batch Company Analysis +

+

+ Analyze multiple companies simultaneously for comparative insights. +

+
+ + {/* Input Form */} +
+
+