Compare commits
153 Commits
d7cf80f02f
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
68ee19025a
|
|||
|
ef97710d1c
|
|||
|
88812b5967
|
|||
|
90e58949fc
|
|||
|
bd10925c97
|
|||
|
89fec43aa2
|
|||
|
02e1c41126
|
|||
|
c17a0d006a
|
|||
|
c6760a39a1
|
|||
|
2ae6280566
|
|||
|
9745ed75a8
|
|||
|
c649eaf343
|
|||
| 7e66d0e7e0 | |||
| 71465401c6 | |||
| 97048917f2 | |||
| 88abd9574b | |||
| e0ed39908e | |||
| 87e09b365b | |||
| 5d11f514c0 | |||
| cbc8f449a1 | |||
| 44620614b6 | |||
| c72a44aa56 | |||
| 6aa71eb17e | |||
| fb52d08387 | |||
| 223d5f7e5d | |||
| 595516e330 | |||
| 514e274fdb | |||
| 3d2c0ea27d | |||
| f611e3a30c | |||
| 2bbf2d70bb | |||
| f8ca1b80b1 | |||
| 338ac86086 | |||
| ce31a32322 | |||
| 449055b026 | |||
| 70925fbf04 | |||
| 9b2b2c75db | |||
| 730f455e2b | |||
| 03f8f7fa79 | |||
| f0edc5a3ae | |||
| f64d1b745f | |||
| 513b682dad | |||
| a6c92fde9f | |||
| a4db9439f5 | |||
| bbea16387d | |||
| 4e2bcae18a | |||
| b66b8332b6 | |||
| c42bf5bf71 | |||
| 02991b6648 | |||
| ab74904845 | |||
| 92197440bf | |||
| 301a773622 | |||
| 2e6b8c7445 | |||
| f33447eef8 | |||
| 04f4d36307 | |||
| 7a364e6736 | |||
| 52972bbff0 | |||
| c738f785c3 | |||
| 1bd9dccdb8 | |||
| 3b6411869d | |||
| 9a43f85259 | |||
| a4aa968434 | |||
| 153eb3b968 | |||
| ecc2c37bcd | |||
| 0b4d712fc5 | |||
| 4696838fb8 | |||
| 55c131cb32 | |||
| fbb72fe2a5 | |||
| e484baaf5f | |||
| 069f1c343c | |||
| d366443b38 | |||
| b000146585 | |||
| 35d105b14e | |||
| 6fcf170d93 | |||
| 5a42e216ba | |||
| 24ab341d9b | |||
| 878fedfbb8 | |||
| ae9f257dcb | |||
| 96d5d27b17 | |||
| 3dac88ec90 | |||
| e2d750146c | |||
| 47cddcbeaf | |||
| 6105ba7793 | |||
| e8cdc089fa | |||
| 9c971dac72 | |||
| 6f0b448044 | |||
| 1a297eb60b | |||
| 3154f6b732 | |||
| b9bb3dc1cd | |||
| 90f9cfc826 | |||
| d387bbbdf3 | |||
| fa564e5e1e | |||
| 2815deb221 | |||
| 56e8287720 | |||
| b969423957 | |||
| 0dee4c5099 | |||
| 03105a2f87 | |||
| 28e2ded501 | |||
| f87572ab7e | |||
| 44b6c79713 | |||
| 13fe383116 | |||
| dee3cbefbd | |||
| 6acad4cff7 | |||
| 45ccd0b4e1 | |||
| d108d4c7ea | |||
| 068aecce61 | |||
| 8790abfbf7 | |||
| fe0c5ca280 | |||
| ed81ae4569 | |||
| ebba983a1d | |||
| 258b349e98 | |||
| fc99173028 | |||
| 4405f199ba | |||
| 874f60f0d9 | |||
| cb7d7121c5 | |||
| 9c98b948d3 | |||
| af52107ed8 | |||
| 0107691c90 | |||
| 3424384088 | |||
| 5141d9dd47 | |||
| 4e419166e8 | |||
| 7eb72ab549 | |||
| d371ceeec8 | |||
| 490850d7a6 | |||
| d4ba13846a | |||
| 3479ba8a46 | |||
| 1c6d903301 | |||
| 84fd0bef32 | |||
| 4640106530 | |||
| 44456cb073 | |||
| 11a4aba46f | |||
| 5fab53e0a7 | |||
| 1067ffa35a | |||
| 08444b41a8 | |||
| 7b61be1a4a | |||
| 2d37b35d1f | |||
| 19f2de4228 | |||
| 76de945acc | |||
| b63641b36c | |||
| 1f3196b317 | |||
| 50adb4435c | |||
| 7a317a0acd | |||
| 19b97f7f6d | |||
| ead0867f4d | |||
| c6843ac115 | |||
| 56892ebbdc | |||
| dc7eedd902 | |||
| a65c267687 | |||
| a498b6f525 | |||
| af4114969a | |||
| 8971ebc913 | |||
| 6882e53280 | |||
| b8566fc2af | |||
| a91c3badab |
@@ -0,0 +1,75 @@
|
||||
# SPARC Configuration
|
||||
|
||||
# ---- Application Environment ----
|
||||
# Set to "production" or "staging" in deployed environments.
|
||||
# The API will refuse to start with the default JWT secret unless APP_ENV=development.
|
||||
APP_ENV=development
|
||||
|
||||
# ---- API Keys ----
|
||||
|
||||
# SerpAPI key for patent search
|
||||
API_KEY=your_serpapi_key_here
|
||||
|
||||
# OpenRouter API key for LLM analysis
|
||||
OPENROUTER_API_KEY=your_openrouter_key_here
|
||||
|
||||
# ---- Database ----
|
||||
|
||||
# PostgreSQL credentials (used by docker-compose)
|
||||
POSTGRES_USER=postgres
|
||||
POSTGRES_PASSWORD=change-me-to-a-secure-password
|
||||
POSTGRES_DB=sparc
|
||||
|
||||
# Full database URL (must match the credentials above)
|
||||
DATABASE_URL=postgresql://postgres:change-me-to-a-secure-password@localhost:5432/sparc
|
||||
|
||||
# ---- Authentication ----
|
||||
|
||||
# JWT Secret for signing tokens
|
||||
# IMPORTANT: Change this to a secure random string in production
|
||||
JWT_SECRET=your-secure-jwt-secret-change-in-production
|
||||
|
||||
# ---- CORS ----
|
||||
|
||||
# Comma-separated list of allowed origins for CORS
|
||||
# Defaults to http://localhost:3000,http://localhost:5173 when unset
|
||||
# CORS_ORIGINS=https://sparc.example.com,https://app.example.com
|
||||
|
||||
# ---- Storage ----
|
||||
|
||||
# Backend for patent PDF storage: "local" (default) or "s3"
|
||||
STORAGE_BACKEND=local
|
||||
|
||||
# S3/MinIO settings (only used when STORAGE_BACKEND=s3)
|
||||
# S3_BUCKET=sparc-patents
|
||||
# S3_ENDPOINT_URL=http://localhost:9000
|
||||
# AWS_ACCESS_KEY_ID=minioadmin
|
||||
# AWS_SECRET_ACCESS_KEY=minioadmin
|
||||
# To start MinIO locally: docker compose --profile s3 up -d minio
|
||||
|
||||
# ---- LLM ----
|
||||
|
||||
# LLM model to use via OpenRouter
|
||||
# Supported: anthropic/claude-3.5-sonnet, openai/gpt-4o, openai/gpt-4o-mini,
|
||||
# google/gemini-pro-1.5, meta-llama/llama-3.1-70b-instruct
|
||||
# MODEL=anthropic/claude-3.5-sonnet
|
||||
|
||||
# ---- Cache ----
|
||||
|
||||
# When USE_CACHE=true: check database for cached responses before making API calls
|
||||
# When USE_CACHE=false: always make fresh API calls (still stores results in database)
|
||||
USE_CACHE=true
|
||||
|
||||
# SERP API cache TTL in hours (how long cached search results are considered fresh)
|
||||
# SERP_CACHE_TTL_HOURS=24
|
||||
|
||||
# ---- Logging ----
|
||||
|
||||
# Log level: DEBUG, INFO, WARNING, ERROR, CRITICAL
|
||||
# LOG_LEVEL=INFO
|
||||
|
||||
# ---- Webhooks ----
|
||||
|
||||
# Comma-separated list of webhook URLs for job completion and alert notifications
|
||||
# Supports generic HTTP POST and Slack/Discord incoming webhooks
|
||||
# WEBHOOK_URLS=https://hooks.slack.com/services/XXX,https://example.com/webhook
|
||||
@@ -0,0 +1,203 @@
|
||||
name: Build and Push Docker Images
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
tags:
|
||||
- '*'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Install system dependencies
|
||||
shell: sh
|
||||
run: |
|
||||
apt-get update && apt-get install -y git python3 python3-pip gcc libpq-dev python3-dev
|
||||
|
||||
- name: Checkout code
|
||||
shell: sh
|
||||
run: |
|
||||
git clone http://gitea.gitea.svc.cluster.local/${{ gitea.repository }}.git .
|
||||
git checkout ${{ gitea.sha }}
|
||||
|
||||
- name: Install Python dependencies
|
||||
shell: sh
|
||||
run: |
|
||||
pip3 install -r requirements.txt ruff
|
||||
|
||||
# - name: Run ruff linter
|
||||
# shell: sh
|
||||
# run: |
|
||||
# ruff check SPARC/ tests/
|
||||
|
||||
- name: Install Node.js and check TypeScript types
|
||||
shell: sh
|
||||
run: |
|
||||
apt-get install -y nodejs npm
|
||||
cd frontend
|
||||
npm ci
|
||||
npm run generate:local
|
||||
if ! git diff --quiet src/api/schema.d.ts; then
|
||||
echo "ERROR: src/api/schema.d.ts is out of date. Run 'npm run generate:local' and commit the result."
|
||||
git diff src/api/schema.d.ts
|
||||
exit 1
|
||||
fi
|
||||
npx tsc --noEmit
|
||||
|
||||
# - name: Run pytest
|
||||
# shell: sh
|
||||
# env:
|
||||
# DATABASE_URL: "sqlite://"
|
||||
# API_KEY: "test-key"
|
||||
# OPENROUTER_API_KEY: "test-key"
|
||||
# JWT_SECRET: "test-secret-for-ci"
|
||||
# APP_ENV: "development"
|
||||
# run: |
|
||||
# python3 -m pytest tests/ -v --tb=short -x
|
||||
|
||||
build-api:
|
||||
needs: test
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
shell: sh
|
||||
run: |
|
||||
apt-get update && apt-get install -y git docker.io
|
||||
|
||||
- name: Checkout code
|
||||
shell: sh
|
||||
run: |
|
||||
git clone http://gitea.gitea.svc.cluster.local/${{ gitea.repository }}.git .
|
||||
git checkout ${{ gitea.sha }}
|
||||
|
||||
- name: Determine image tags
|
||||
id: tags
|
||||
shell: sh
|
||||
run: |
|
||||
REGISTRY="gitea.gitea.svc.cluster.local:80"
|
||||
REPO_OWNER="${{ gitea.repository_owner }}"
|
||||
REPO_NAME="${{ gitea.repository }}"
|
||||
|
||||
REPO_NAME_ONLY=$(echo "$REPO_NAME" | cut -d'/' -f2)
|
||||
REPO_OWNER_LOWER=$(echo "$REPO_OWNER" | tr '[:upper:]' '[:lower:]')
|
||||
REPO_NAME_LOWER=$(echo "$REPO_NAME_ONLY" | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
IMAGE_BASE="${REGISTRY}/${REPO_OWNER_LOWER}/${REPO_NAME_LOWER}"
|
||||
|
||||
case "${{ gitea.ref }}" in
|
||||
refs/tags/*)
|
||||
TAG_NAME="${{ gitea.ref_name }}"
|
||||
echo "IMAGE_TAG=${IMAGE_BASE}:${TAG_NAME}" >> $GITHUB_OUTPUT
|
||||
echo "PUSH_LATEST=true" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
refs/heads/main)
|
||||
TIMESTAMP=$(date -u +%Y%m%d%H%M%S)
|
||||
SHORT_SHA=$(echo "${{ gitea.sha }}" | cut -c1-7)
|
||||
echo "IMAGE_TAG=${IMAGE_BASE}:${TIMESTAMP}-${SHORT_SHA}" >> $GITHUB_OUTPUT
|
||||
echo "PUSH_LATEST=true" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
*)
|
||||
BRANCH_TAG=$(echo "${{ gitea.ref_name }}" | sed 's/\//-/g')
|
||||
echo "IMAGE_TAG=${IMAGE_BASE}:${BRANCH_TAG}" >> $GITHUB_OUTPUT
|
||||
echo "PUSH_LATEST=false" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "IMAGE_LATEST=${IMAGE_BASE}:latest" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Login to registry
|
||||
shell: sh
|
||||
run: |
|
||||
echo "${{ secrets.PERSONAL_TOKEN }}" | docker login gitea.gitea.svc.cluster.local:80 -u "${{ gitea.actor }}" --password-stdin
|
||||
|
||||
- name: Build and push API image
|
||||
shell: sh
|
||||
run: |
|
||||
echo "Building API image..."
|
||||
docker build -t ${{ steps.tags.outputs.IMAGE_TAG }} .
|
||||
|
||||
echo "Pushing API image..."
|
||||
docker push ${{ steps.tags.outputs.IMAGE_TAG }}
|
||||
|
||||
if [ "${{ steps.tags.outputs.PUSH_LATEST }}" = "true" ]; then
|
||||
echo "Tagging and pushing latest..."
|
||||
docker tag ${{ steps.tags.outputs.IMAGE_TAG }} ${{ steps.tags.outputs.IMAGE_LATEST }}
|
||||
docker push ${{ steps.tags.outputs.IMAGE_LATEST }}
|
||||
fi
|
||||
|
||||
echo "API image available at ${{ steps.tags.outputs.IMAGE_TAG }}"
|
||||
|
||||
build-frontend:
|
||||
needs: test
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
shell: sh
|
||||
run: |
|
||||
apt-get update && apt-get install -y git docker.io
|
||||
|
||||
- name: Checkout code
|
||||
shell: sh
|
||||
run: |
|
||||
git clone http://gitea.gitea.svc.cluster.local/${{ gitea.repository }}.git .
|
||||
git checkout ${{ gitea.sha }}
|
||||
|
||||
- name: Determine image tags
|
||||
id: tags
|
||||
shell: sh
|
||||
run: |
|
||||
REGISTRY="gitea.gitea.svc.cluster.local:80"
|
||||
REPO_OWNER="${{ gitea.repository_owner }}"
|
||||
REPO_NAME="${{ gitea.repository }}"
|
||||
|
||||
REPO_NAME_ONLY=$(echo "$REPO_NAME" | cut -d'/' -f2)
|
||||
REPO_OWNER_LOWER=$(echo "$REPO_OWNER" | tr '[:upper:]' '[:lower:]')
|
||||
REPO_NAME_LOWER=$(echo "$REPO_NAME_ONLY" | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
IMAGE_BASE="${REGISTRY}/${REPO_OWNER_LOWER}/${REPO_NAME_LOWER}"
|
||||
|
||||
case "${{ gitea.ref }}" in
|
||||
refs/tags/*)
|
||||
TAG_NAME="${{ gitea.ref_name }}"
|
||||
echo "IMAGE_TAG=${IMAGE_BASE}:frontend-${TAG_NAME}" >> $GITHUB_OUTPUT
|
||||
echo "PUSH_LATEST=true" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
refs/heads/main)
|
||||
TIMESTAMP=$(date -u +%Y%m%d%H%M%S)
|
||||
SHORT_SHA=$(echo "${{ gitea.sha }}" | cut -c1-7)
|
||||
echo "IMAGE_TAG=${IMAGE_BASE}:frontend-${TIMESTAMP}-${SHORT_SHA}" >> $GITHUB_OUTPUT
|
||||
echo "PUSH_LATEST=true" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
*)
|
||||
BRANCH_TAG=$(echo "${{ gitea.ref_name }}" | sed 's/\//-/g')
|
||||
echo "IMAGE_TAG=${IMAGE_BASE}:frontend-${BRANCH_TAG}" >> $GITHUB_OUTPUT
|
||||
echo "PUSH_LATEST=false" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "IMAGE_LATEST=${IMAGE_BASE}:frontend-latest" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Login to registry
|
||||
shell: sh
|
||||
run: |
|
||||
echo "${{ secrets.PERSONAL_TOKEN }}" | docker login gitea.gitea.svc.cluster.local:80 -u "${{ gitea.actor }}" --password-stdin
|
||||
|
||||
- name: Build and push frontend image
|
||||
shell: sh
|
||||
run: |
|
||||
echo "Building frontend image..."
|
||||
docker build -t ${{ steps.tags.outputs.IMAGE_TAG }} ./frontend
|
||||
|
||||
echo "Pushing frontend image..."
|
||||
docker push ${{ steps.tags.outputs.IMAGE_TAG }}
|
||||
|
||||
if [ "${{ steps.tags.outputs.PUSH_LATEST }}" = "true" ]; then
|
||||
echo "Tagging and pushing frontend-latest..."
|
||||
docker tag ${{ steps.tags.outputs.IMAGE_TAG }} ${{ steps.tags.outputs.IMAGE_LATEST }}
|
||||
docker push ${{ steps.tags.outputs.IMAGE_LATEST }}
|
||||
fi
|
||||
|
||||
echo "Frontend image available at ${{ steps.tags.outputs.IMAGE_TAG }}"
|
||||
@@ -0,0 +1,67 @@
|
||||
name: Test and Lint
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Install system dependencies
|
||||
shell: sh
|
||||
run: |
|
||||
apt-get update && apt-get install -y git python3 python3-pip gcc libpq-dev python3-dev
|
||||
|
||||
- name: Checkout code
|
||||
shell: sh
|
||||
run: |
|
||||
git clone http://gitea.gitea.svc.cluster.local/${{ gitea.repository }}.git .
|
||||
git checkout ${{ gitea.sha }}
|
||||
|
||||
- name: Install Python dependencies
|
||||
shell: sh
|
||||
run: |
|
||||
pip3 install -r requirements.txt ruff
|
||||
|
||||
- name: Run ruff linter
|
||||
shell: sh
|
||||
run: |
|
||||
ruff check SPARC/ tests/
|
||||
|
||||
- name: Install Node.js and frontend dependencies
|
||||
shell: sh
|
||||
run: |
|
||||
apt-get install -y nodejs npm
|
||||
cd frontend && npm ci
|
||||
|
||||
- name: Verify generated API types are up to date
|
||||
shell: sh
|
||||
run: |
|
||||
cd frontend && npm run generate:local
|
||||
if ! git diff --quiet src/api/schema.d.ts; then
|
||||
echo "ERROR: src/api/schema.d.ts is out of date. Run 'npm run generate:local' and commit the result."
|
||||
git diff src/api/schema.d.ts
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Run TypeScript type check
|
||||
shell: sh
|
||||
run: |
|
||||
cd frontend && npx tsc --noEmit
|
||||
|
||||
- name: Run pytest
|
||||
shell: sh
|
||||
env:
|
||||
DATABASE_URL: "sqlite://"
|
||||
API_KEY: "test-key"
|
||||
OPENROUTER_API_KEY: "test-key"
|
||||
JWT_SECRET: "test-secret-for-ci"
|
||||
APP_ENV: "development"
|
||||
run: |
|
||||
python3 -m pytest tests/ -v --tb=short -x
|
||||
+2
-1
@@ -2,4 +2,5 @@
|
||||
.pyenv
|
||||
__pycache__
|
||||
.venv
|
||||
patents
|
||||
patents
|
||||
tmp/
|
||||
|
||||
+25
@@ -0,0 +1,25 @@
|
||||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies for pdfplumber and psycopg2
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
libpq-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY requirements.txt .
|
||||
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY . .
|
||||
|
||||
# Create patents directory for PDF storage
|
||||
RUN mkdir -p /app/patents
|
||||
|
||||
# Expose ports for API and Dashboard
|
||||
EXPOSE 8000 8501
|
||||
|
||||
# Default command runs the API (can be overridden in docker-compose)
|
||||
CMD ["uvicorn", "SPARC.api:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
|
||||
@@ -1,28 +1,301 @@
|
||||
# SPARC
|
||||
|
||||
## Name
|
||||
Semiconductor Patent & Analytics Report Core
|
||||
**Semiconductor Patent & Analytics Report Core**
|
||||
|
||||
## Description
|
||||
A patent analysis system that estimates company performance by analyzing their patent portfolios using LLM-powered insights.
|
||||
|
||||
## Installation
|
||||
### NixOS Installation
|
||||
`nix develop` to build and configure nix dev environment
|
||||
## Overview
|
||||
|
||||
## Usage
|
||||
```bash
|
||||
docker compose up -d
|
||||
SPARC automatically collects, parses, and analyzes patents from companies to provide performance estimations. It uses Claude AI to evaluate innovation quality, strategic direction, and competitive positioning based on patent content.
|
||||
|
||||
## Features
|
||||
|
||||
- **Patent Retrieval**: Automated collection via SerpAPI's Google Patents engine
|
||||
- **Intelligent Parsing**: Extracts key sections (abstract, claims, summary) from patent PDFs
|
||||
- **Content Minimization**: Removes verbose descriptions to reduce LLM token usage
|
||||
- **AI Analysis**: Uses Claude 3.5 Sonnet via OpenRouter to analyze innovation quality and market potential
|
||||
- **Portfolio Analysis**: Evaluates multiple patents holistically for comprehensive insights
|
||||
- **Batch Processing**: Analyze multiple companies concurrently with progress tracking
|
||||
- **REST API**: FastAPI web service with async job support
|
||||
- **Dashboard**: React TypeScript web dashboard with authentication
|
||||
- **Robust Testing**: 40 tests covering all major functionality
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
SPARC/
|
||||
├── serp_api.py # Patent retrieval and PDF parsing
|
||||
├── llm.py # Claude AI integration via OpenRouter
|
||||
├── analyzer.py # High-level orchestration
|
||||
├── api.py # FastAPI web service with auth endpoints
|
||||
├── auth.py # JWT authentication module
|
||||
├── database.py # PostgreSQL storage with caching
|
||||
├── types.py # Data models
|
||||
└── config.py # Environment configuration
|
||||
```
|
||||
|
||||
## Roadmap
|
||||
- [X] Retrive `publicationID` from SERP API
|
||||
- [ ] Retrive data from Google's patent API based on those `publicationID`'s
|
||||
- This may not be needed, looking to parse the patents based soley on the pdf retrived from SERP
|
||||
- [ ] Wrap this into a python fastAPI, then bundle with docker
|
||||
## Installation
|
||||
|
||||
### Docker (Recommended)
|
||||
|
||||
```bash
|
||||
# Clone and configure
|
||||
git clone <repository-url>
|
||||
cd SPARC
|
||||
cp .env.example .env
|
||||
# Edit .env with your API keys
|
||||
|
||||
# Start all services (API, Dashboard, PostgreSQL)
|
||||
docker-compose up -d
|
||||
|
||||
# Access the services
|
||||
# - API: http://localhost:8000
|
||||
# - Dashboard: http://localhost:8080
|
||||
# - API Docs: http://localhost:8000/docs
|
||||
```
|
||||
|
||||
#### Patent PDF Storage
|
||||
|
||||
The API stores downloaded patent PDFs in a `patents/` directory. In Docker,
|
||||
this is mounted as a bind mount (`./patents:/app/patents`) so that PDFs persist
|
||||
across container restarts.
|
||||
|
||||
If you deploy to a different environment, ensure the `patents/` directory is a
|
||||
persistent volume. Without it, PDFs will be re-downloaded on every analysis.
|
||||
|
||||
```yaml
|
||||
# docker-compose.yml excerpt
|
||||
volumes:
|
||||
- ./patents:/app/patents
|
||||
```
|
||||
|
||||
### NixOS
|
||||
|
||||
```bash
|
||||
nix develop
|
||||
```
|
||||
|
||||
This automatically creates a virtual environment and installs all dependencies.
|
||||
|
||||
### Manual Installation
|
||||
|
||||
```bash
|
||||
python -m venv .venv
|
||||
source .venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
Create a `.env` file in the project root:
|
||||
|
||||
```bash
|
||||
# SerpAPI key for patent search
|
||||
API_KEY=your_serpapi_key_here
|
||||
|
||||
# OpenRouter API key for Claude AI analysis
|
||||
OPENROUTER_API_KEY=your_openrouter_key_here
|
||||
```
|
||||
|
||||
Get your API keys:
|
||||
- SerpAPI: https://serpapi.com/
|
||||
- OpenRouter: https://openrouter.ai/
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from SPARC.analyzer import CompanyAnalyzer
|
||||
|
||||
# Initialize the analyzer
|
||||
analyzer = CompanyAnalyzer()
|
||||
|
||||
# Analyze a company's patent portfolio
|
||||
analysis = analyzer.analyze_company("nvidia")
|
||||
print(analysis)
|
||||
```
|
||||
|
||||
### Run the Example
|
||||
|
||||
```bash
|
||||
python main.py
|
||||
```
|
||||
|
||||
This will:
|
||||
1. Retrieve recent NVIDIA patents
|
||||
2. Parse and minimize content
|
||||
3. Analyze with Claude AI
|
||||
4. Print comprehensive performance assessment
|
||||
|
||||
### Single Patent Analysis
|
||||
|
||||
```python
|
||||
# Analyze a specific patent
|
||||
result = analyzer.analyze_single_patent(
|
||||
patent_id="US11322171B1",
|
||||
company_name="nvidia"
|
||||
)
|
||||
```
|
||||
|
||||
### Multi-Company Batch Analysis
|
||||
|
||||
```python
|
||||
from SPARC.analyzer import CompanyAnalyzer
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
|
||||
# Analyze multiple companies concurrently (default 3 workers)
|
||||
batch_result = analyzer.analyze_companies(
|
||||
["nvidia", "amd", "intel", "qualcomm"],
|
||||
max_workers=3
|
||||
)
|
||||
|
||||
# Access results
|
||||
print(f"Analyzed: {batch_result.total_companies}")
|
||||
print(f"Successful: {batch_result.successful}")
|
||||
print(f"Failed: {batch_result.failed}")
|
||||
|
||||
for result in batch_result.results:
|
||||
if result.success:
|
||||
print(f"{result.company_name}: {result.patent_count} patents")
|
||||
print(result.analysis)
|
||||
|
||||
# Or use sequential processing (safer for rate limits)
|
||||
batch_result = analyzer.analyze_companies_sequential(["nvidia", "amd"])
|
||||
```
|
||||
|
||||
### REST API
|
||||
|
||||
Start the FastAPI server:
|
||||
|
||||
```bash
|
||||
uvicorn SPARC.api:app --reload
|
||||
```
|
||||
|
||||
API endpoints:
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
|----------|--------|-------------|
|
||||
| `/health` | GET | Health check |
|
||||
| `/analyze/{company}` | GET | Analyze single company |
|
||||
| `/analyze/batch` | POST | Analyze multiple companies |
|
||||
| `/analyze/batch/async` | POST | Start async batch job |
|
||||
| `/jobs/{job_id}` | GET | Get job status |
|
||||
| `/jobs` | GET | List all jobs |
|
||||
|
||||
Interactive docs available at `http://localhost:8000/docs`
|
||||
|
||||
Example API usage:
|
||||
|
||||
```bash
|
||||
# Single company
|
||||
curl http://localhost:8000/analyze/nvidia
|
||||
|
||||
# Batch analysis
|
||||
curl -X POST http://localhost:8000/analyze/batch \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"companies": ["nvidia", "amd", "intel"]}'
|
||||
|
||||
# Async batch (for long-running jobs)
|
||||
curl -X POST http://localhost:8000/analyze/batch/async \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"companies": ["nvidia", "amd", "intel", "qualcomm"]}'
|
||||
```
|
||||
|
||||
### Web Dashboard
|
||||
|
||||
The React dashboard is included in Docker Compose:
|
||||
|
||||
```bash
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
Dashboard features:
|
||||
- **Authentication**: User registration, login, and JWT-based sessions
|
||||
- **Company Analysis**: Analyze individual companies with real-time results
|
||||
- **Batch Analysis**: Process multiple companies with progress tracking
|
||||
- **Analytics**: View historical analysis data and trends
|
||||
- **Admin Panel**: User management for administrators
|
||||
|
||||
The dashboard runs at `http://localhost:8080` when using Docker Compose.
|
||||
|
||||
## Running Tests
|
||||
|
||||
```bash
|
||||
# Run all tests
|
||||
pytest tests/ -v
|
||||
|
||||
# Run specific test modules
|
||||
pytest tests/test_analyzer.py -v
|
||||
pytest tests/test_llm.py -v
|
||||
pytest tests/test_serp_api.py -v
|
||||
|
||||
# Run with coverage
|
||||
pytest tests/ --cov=SPARC --cov-report=term-missing
|
||||
```
|
||||
|
||||
## How It Works
|
||||
|
||||
1. **Patent Collection**: Queries SerpAPI for company patents
|
||||
2. **PDF Download**: Retrieves patent PDF files
|
||||
3. **Section Extraction**: Parses abstract, claims, summary, and description
|
||||
4. **Content Minimization**: Keeps essential sections, removes bloated descriptions
|
||||
5. **LLM Analysis**: Sends minimized content to Claude for analysis
|
||||
6. **Performance Estimation**: Returns insights on innovation quality and outlook
|
||||
|
||||
## Roadmap
|
||||
|
||||
- [X] Retrieve `publicationID` from SERP API
|
||||
- [X] Parse patents from PDFs (no need for Google Patent API)
|
||||
- [X] Extract and minimize patent content
|
||||
- [X] LLM integration for analysis
|
||||
- [X] Company performance estimation
|
||||
- [X] Multi-company batch processing
|
||||
- [X] FastAPI web service wrapper
|
||||
- [X] Docker containerization
|
||||
- [X] Results persistence (database)
|
||||
- [X] Visualization dashboard
|
||||
|
||||
## Development
|
||||
|
||||
### Code Style
|
||||
|
||||
- Type hints throughout
|
||||
- Comprehensive docstrings
|
||||
- Small, testable functions
|
||||
- Conventional commits
|
||||
|
||||
### Testing Philosophy
|
||||
|
||||
- Unit tests for core logic
|
||||
- Integration tests for orchestration
|
||||
- Mock external APIs
|
||||
- Aim for high coverage
|
||||
|
||||
### Making Changes
|
||||
|
||||
1. Write tests first
|
||||
2. Implement feature
|
||||
3. Verify all tests pass
|
||||
4. Commit with conventional format: `type: description`
|
||||
|
||||
Types: `feat`, `fix`, `docs`, `test`, `refactor`, `chore`
|
||||
|
||||
## Documentation
|
||||
|
||||
Additional documentation is available in the `docs/` directory:
|
||||
|
||||
- **[Deployment Guide](docs/DEPLOYMENT.md)** - Complete deployment instructions for Docker, database setup, and production configuration
|
||||
- **[Database Mode](docs/DATABASE_MODE.md)** - Database storage for prompts, responses, and analytics
|
||||
- **[Container Registry](docs/CONTAINER_REGISTRY.md)** - CI/CD and container registry setup with Gitea Actions
|
||||
|
||||
## License
|
||||
|
||||
For open source projects, say how it is licensed.
|
||||
|
||||
## Project status
|
||||
Heavy development for the limited time available to me
|
||||
## Project Status
|
||||
|
||||
Core functionality complete. Ready for production use with API keys configured.
|
||||
|
||||
All major features implemented: REST API, React dashboard with authentication, Docker containerization, database storage with caching, and multi-company batch processing.
|
||||
|
||||
+122
@@ -0,0 +1,122 @@
|
||||
# SPARC Roadmap
|
||||
|
||||
Semiconductor Patent & Analytics Report Core -- development priorities.
|
||||
|
||||
## Current State
|
||||
|
||||
SPARC is a patent analysis platform with a working end-to-end pipeline:
|
||||
Python/FastAPI backend, React/TypeScript frontend, PostgreSQL for persistence
|
||||
and caching, Docker Compose for local development, and Gitea Actions CI/CD for
|
||||
image builds. Core features (patent retrieval via SerpAPI, PDF parsing, LLM
|
||||
analysis via OpenRouter/Claude, batch processing, JWT authentication, analytics
|
||||
dashboard) are all implemented and functional.
|
||||
|
||||
---
|
||||
|
||||
## P1 -- High Priority
|
||||
|
||||
These items address correctness, security, and reliability gaps that should be
|
||||
resolved before broader production use.
|
||||
|
||||
### Security hardening
|
||||
|
||||
- **Rotate default JWT secret.** `auth.py` ships a fallback
|
||||
`sparc-secret-key-change-in-production` that will be used if `JWT_SECRET` is
|
||||
unset. Add a startup check that refuses to start with the default secret in
|
||||
non-development environments.
|
||||
- **CORS allow-origins are hardcoded.** `api.py` only permits
|
||||
`localhost:3000` and `localhost:5173`. Make the allowed origins configurable
|
||||
via environment variable so the dashboard works when deployed behind a real
|
||||
domain.
|
||||
- **Database credentials in docker-compose.yml.** The compose file embeds
|
||||
`postgres:postgres` in plain text. Reference a `.env` file or Docker secrets
|
||||
instead.
|
||||
|
||||
### Error handling and resilience
|
||||
|
||||
- **`get_db_client()` in `auth.py` creates a new `DatabaseClient` on every
|
||||
call.** This bypasses the connection pool and can exhaust database
|
||||
connections under load. Refactor to share a single pooled client.
|
||||
- **`_jobs` dict is in-memory only.** Job state is lost on API restart. Persist
|
||||
job status in PostgreSQL or Redis so async batch results survive restarts.
|
||||
- **No rate limiting on auth endpoints.** `/auth/login` and `/auth/register`
|
||||
are unprotected against brute-force or abuse. Add rate limiting middleware.
|
||||
|
||||
### Test coverage for auth and admin
|
||||
|
||||
- The existing API tests (`tests/test_api.py`) bypass authentication entirely.
|
||||
Add tests that exercise the JWT flow: registration, login, protected-route
|
||||
access, token refresh, and admin-only endpoints.
|
||||
|
||||
---
|
||||
|
||||
## P2 -- Medium Priority
|
||||
|
||||
Improvements to usability, performance, and developer experience.
|
||||
|
||||
### Backend
|
||||
|
||||
- **Add structured logging.** Replace `print()` calls throughout `analyzer.py`,
|
||||
`serp_api.py`, and `llm.py` with Python `logging` so log levels and
|
||||
formatting are consistent.
|
||||
- **Make LLM model configurable.** `llm.py` hardcodes
|
||||
`anthropic/claude-3.5-sonnet`. Accept a `MODEL` environment variable to allow
|
||||
switching models without code changes.
|
||||
- **SERP cache TTL is hardcoded to 24 hours.** Expose `SERP_CACHE_TTL_HOURS`
|
||||
as an environment variable in `config.py`.
|
||||
- **Patent PDF storage.** PDFs are saved to a local `patents/` directory. For
|
||||
containerized deployments, consider object storage (S3/MinIO) or at minimum
|
||||
document the volume mount requirement more prominently.
|
||||
- **`analyze_single_patent` assumes local file path.** The method constructs
|
||||
`patents/{patent_id}.pdf` and reads from disk, but does not download the PDF
|
||||
first. Either integrate the download step or document the prerequisite.
|
||||
- **`Patent.patent_id` typed as `int` in `types.py` but used as `str`
|
||||
everywhere.** Fix the type annotation to `str`.
|
||||
|
||||
### Frontend
|
||||
|
||||
- **No loading/error states on several pages.** The Batch and Analytics pages
|
||||
would benefit from skeleton loaders and user-friendly error messages.
|
||||
- **No dark mode.** Tailwind is configured but no dark variant is applied.
|
||||
- **Missing `package-lock.json` or `pnpm-lock.yaml`.** The frontend has no
|
||||
lockfile committed, leading to non-reproducible builds.
|
||||
|
||||
### CI/CD
|
||||
|
||||
- **No test stage in the Gitea Actions workflow.** `build.yaml` builds and
|
||||
pushes images but never runs `pytest`. Add a test job that gates the build.
|
||||
- **No linting or type checking.** Add `ruff` (Python) and `tsc --noEmit`
|
||||
(TypeScript) to CI.
|
||||
|
||||
---
|
||||
|
||||
## P3 -- Nice to Have
|
||||
|
||||
Lower-urgency enhancements and future features.
|
||||
|
||||
- **Export analysis reports.** Allow users to download analysis results as PDF
|
||||
or CSV from the dashboard.
|
||||
- **Comparison view.** Side-by-side comparison of two companies' patent
|
||||
portfolios.
|
||||
- **Scheduled/recurring analysis.** Periodically re-analyze tracked companies
|
||||
and alert on significant changes.
|
||||
- **Webhook/notification support.** Send alerts (Slack, Discord, email) when
|
||||
batch jobs complete or when a company's innovation score changes
|
||||
significantly.
|
||||
- **Multi-model support.** Let users choose between LLM providers per analysis
|
||||
(e.g., GPT-4o, Gemini, Claude) and compare outputs.
|
||||
- **Patent trend charts.** Visualize patent filing frequency and technology
|
||||
category distribution over time in the Analytics page.
|
||||
- **API pagination.** The `/analyze/batch` and `/jobs` endpoints could benefit
|
||||
from cursor-based pagination for large result sets.
|
||||
- **OpenAPI client generation.** Auto-generate the TypeScript API client from
|
||||
the FastAPI OpenAPI spec to keep frontend types in sync.
|
||||
|
||||
---
|
||||
|
||||
## Infrastructure and Deployment
|
||||
|
||||
Kubernetes manifests, Helm charts, and cluster-level concerns (MetalLB,
|
||||
storage, FluxCD sync) are tracked in the
|
||||
[Talos](https://10.0.1.10/leeworks-agents/Talos) repository. File
|
||||
infrastructure-related issues there, not here.
|
||||
+3
-2
@@ -1,3 +1,4 @@
|
||||
from .types import Patents, Patent
|
||||
from .types import Patent as Patent
|
||||
from .types import Patents as Patents
|
||||
|
||||
all = ["Patents", "Patent"]
|
||||
__all__ = ["Patents", "Patent"]
|
||||
|
||||
@@ -0,0 +1,361 @@
|
||||
"""High-level patent analysis orchestration.
|
||||
|
||||
This module ties together patent retrieval, parsing, and LLM analysis
|
||||
to provide company performance estimation based on patent portfolios.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from typing import Callable
|
||||
|
||||
from SPARC import config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
from SPARC.database import DatabaseClient
|
||||
from SPARC.llm import LLMAnalyzer
|
||||
from SPARC.serp_api import SERP
|
||||
from SPARC.types import BatchAnalysisResult, CompanyAnalysisResult, Patent, Patents
|
||||
|
||||
|
||||
class CompanyAnalyzer:
|
||||
"""Orchestrates end-to-end company performance analysis via patents."""
|
||||
|
||||
def __init__(self, openrouter_api_key: str | None = None, db_client: DatabaseClient | None = None):
|
||||
"""Initialize the company analyzer.
|
||||
|
||||
Args:
|
||||
openrouter_api_key: Optional OpenRouter API key. If None, loads from config.
|
||||
db_client: Optional DatabaseClient for patent caching. Created automatically if None.
|
||||
"""
|
||||
self.llm_analyzer = LLMAnalyzer(api_key=openrouter_api_key)
|
||||
self.db = db_client or DatabaseClient(config.database_url)
|
||||
self.db.connect()
|
||||
self.db.initialize_schema()
|
||||
|
||||
def analyze_company(self, company_name: str, patents: "Patents | None" = None, model: str | None = None) -> str:
|
||||
"""Analyze a company's performance based on their patent portfolio.
|
||||
|
||||
This is the main entry point that orchestrates the full pipeline:
|
||||
1. Retrieve patents from SERP API
|
||||
2. Download and parse each patent PDF
|
||||
3. Minimize patent content (remove bloat)
|
||||
4. Analyze portfolio with LLM
|
||||
5. Return performance estimation
|
||||
|
||||
Args:
|
||||
company_name: Name of the company to analyze
|
||||
patents: Optional pre-fetched Patents result to avoid duplicate API calls
|
||||
model: Optional LLM model override (e.g. 'openai/gpt-4o')
|
||||
|
||||
Returns:
|
||||
Comprehensive analysis of company's innovation and performance outlook
|
||||
"""
|
||||
if patents is None:
|
||||
# Check SERP query cache first
|
||||
query_hash = hashlib.sha256(company_name.lower().encode()).hexdigest()
|
||||
cached_ids = self.db.get_cached_serp_query(query_hash)
|
||||
if cached_ids is not None:
|
||||
logger.info("Using cached SERP results for %s (%d patents)", company_name, len(cached_ids))
|
||||
patents = Patents(patents=[
|
||||
Patent(patent_id=pid, pdf_link="")
|
||||
for pid in cached_ids
|
||||
])
|
||||
else:
|
||||
logger.info("Retrieving patents for %s...", company_name)
|
||||
patents = SERP.query(company_name)
|
||||
# Cache the SERP results
|
||||
if patents.patents:
|
||||
self.db.store_serp_query(
|
||||
company_name=company_name,
|
||||
query_hash=query_hash,
|
||||
patent_ids=[p.patent_id for p in patents.patents],
|
||||
ttl_hours=config.serp_cache_ttl_hours,
|
||||
)
|
||||
|
||||
if not patents.patents:
|
||||
return f"No patents found for {company_name}"
|
||||
|
||||
logger.info("Found %d patents. Processing...", len(patents.patents))
|
||||
|
||||
# Download, parse, and minimize patents in parallel
|
||||
processed_patents = []
|
||||
with ThreadPoolExecutor(max_workers=config.patent_thread_workers) as executor:
|
||||
future_to_patent = {
|
||||
executor.submit(self._process_single_patent, patent, company_name, self.db): patent
|
||||
for patent in patents.patents
|
||||
}
|
||||
for future in as_completed(future_to_patent):
|
||||
patent = future_to_patent[future]
|
||||
try:
|
||||
result = future.result()
|
||||
if result:
|
||||
processed_patents.append(result)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to process %s: %s", patent.patent_id, e)
|
||||
|
||||
if not processed_patents:
|
||||
return f"Failed to process any patents for {company_name}"
|
||||
|
||||
logger.info("Analyzing portfolio with LLM...")
|
||||
|
||||
# Analyze the full portfolio with LLM
|
||||
analysis = self.llm_analyzer.analyze_patent_portfolio(
|
||||
patents_data=processed_patents, company_name=company_name, model=model
|
||||
)
|
||||
|
||||
return analysis
|
||||
|
||||
def analyze_single_patent(self, patent_id: str, company_name: str, model: str | None = None) -> str:
|
||||
"""Analyze a single patent by ID.
|
||||
|
||||
If the patent PDF is not already on disk, this method attempts to
|
||||
download it automatically by looking up the PDF link in the database
|
||||
cache. If the link is not cached either, a ``FileNotFoundError`` is
|
||||
raised with instructions on how to obtain the PDF.
|
||||
|
||||
Args:
|
||||
patent_id: Publication ID of the patent (e.g. "US-11234567-B2")
|
||||
company_name: Name of the company (for context)
|
||||
model: Optional LLM model override (e.g. 'openai/gpt-4o')
|
||||
|
||||
Returns:
|
||||
Analysis of the specific patent's innovation quality
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the patent PDF cannot be found or downloaded.
|
||||
"""
|
||||
import os
|
||||
logger.info("Analyzing patent %s for %s...", patent_id, company_name)
|
||||
|
||||
patent_path = f"patents/{patent_id}.pdf"
|
||||
|
||||
if not os.path.exists(patent_path):
|
||||
# Attempt to download the PDF automatically from cached metadata
|
||||
cached = self.db.get_cached_patent(patent_id)
|
||||
pdf_link = cached.get("pdf_link") if cached else None
|
||||
|
||||
if pdf_link:
|
||||
logger.info("PDF not on disk; downloading %s from cached link", patent_id)
|
||||
patent = SERP.save_patents(
|
||||
Patent(patent_id=patent_id, pdf_link=pdf_link)
|
||||
)
|
||||
patent_path = patent.pdf_path
|
||||
else:
|
||||
raise FileNotFoundError(
|
||||
f"Patent PDF not found at '{patent_path}' and no download link is "
|
||||
f"cached for '{patent_id}'. Run a company analysis first to populate "
|
||||
f"the cache, or call SERP.save_patents() with the patent's PDF link."
|
||||
)
|
||||
|
||||
try:
|
||||
sections = SERP.parse_patent_pdf(patent_path)
|
||||
minimized_content = SERP.minimize_patent_for_llm(sections)
|
||||
|
||||
analysis = self.llm_analyzer.analyze_patent_content(
|
||||
patent_content=minimized_content, company_name=company_name, model=model
|
||||
)
|
||||
|
||||
return analysis
|
||||
|
||||
except FileNotFoundError:
|
||||
raise
|
||||
except Exception as e:
|
||||
return f"Failed to analyze patent {patent_id}: {e}"
|
||||
|
||||
@staticmethod
|
||||
def _process_single_patent(
|
||||
patent: Patent,
|
||||
company_name: str = "",
|
||||
db: DatabaseClient | None = None,
|
||||
) -> dict | None:
|
||||
"""Download, parse, and minimize a single patent. Thread-safe.
|
||||
|
||||
Checks DB cache before downloading. Stores results after processing.
|
||||
|
||||
Returns:
|
||||
Dict with patent_id and minimized content, or None on failure.
|
||||
"""
|
||||
try:
|
||||
# Check DB cache first
|
||||
if db:
|
||||
cached = db.get_cached_patent(patent.patent_id)
|
||||
if cached and cached.get("minimized_content"):
|
||||
return {"patent_id": patent.patent_id, "content": cached["minimized_content"]}
|
||||
|
||||
# Full processing: download, parse, minimize
|
||||
patent = SERP.save_patents(patent)
|
||||
sections = SERP.parse_patent_pdf(patent.pdf_path)
|
||||
minimized_content = SERP.minimize_patent_for_llm(sections)
|
||||
|
||||
# Store in DB cache
|
||||
if db:
|
||||
db.store_patent(
|
||||
patent_id=patent.patent_id,
|
||||
company_name=company_name,
|
||||
pdf_link=patent.pdf_link,
|
||||
raw_sections=sections,
|
||||
minimized_content=minimized_content,
|
||||
)
|
||||
|
||||
return {"patent_id": patent.patent_id, "content": minimized_content}
|
||||
except Exception as e:
|
||||
logger.warning("Failed to process %s: %s", patent.patent_id, e)
|
||||
return None
|
||||
|
||||
def _analyze_company_safe(self, company_name: str, model: str | None = None) -> CompanyAnalysisResult:
|
||||
"""Internal wrapper that catches exceptions and returns structured result.
|
||||
|
||||
Args:
|
||||
company_name: Name of the company to analyze
|
||||
model: Optional LLM model override (e.g. 'openai/gpt-4o')
|
||||
|
||||
Returns:
|
||||
CompanyAnalysisResult with success/failure status
|
||||
"""
|
||||
try:
|
||||
# Delegate to analyze_company which handles SERP/patent caching
|
||||
analysis = self.analyze_company(company_name, model=model)
|
||||
|
||||
# Determine patent count from cached SERP query
|
||||
query_hash = hashlib.sha256(company_name.lower().encode()).hexdigest()
|
||||
cached_ids = self.db.get_cached_serp_query(query_hash)
|
||||
patent_count = len(cached_ids) if cached_ids else 0
|
||||
|
||||
# Check if analysis indicates failure
|
||||
if analysis.startswith("No patents found") or analysis.startswith(
|
||||
"Failed to process"
|
||||
):
|
||||
return CompanyAnalysisResult(
|
||||
company_name=company_name,
|
||||
analysis=analysis,
|
||||
patent_count=patent_count,
|
||||
success=False,
|
||||
error=analysis,
|
||||
)
|
||||
|
||||
return CompanyAnalysisResult(
|
||||
company_name=company_name,
|
||||
analysis=analysis,
|
||||
patent_count=patent_count,
|
||||
success=True,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
return CompanyAnalysisResult(
|
||||
company_name=company_name,
|
||||
analysis="",
|
||||
patent_count=0,
|
||||
success=False,
|
||||
error=str(e),
|
||||
)
|
||||
|
||||
def analyze_companies(
|
||||
self,
|
||||
companies: list[str],
|
||||
max_workers: int = 3,
|
||||
progress_callback: Callable[[str, int, int], None] | None = None,
|
||||
model: str | None = None,
|
||||
) -> BatchAnalysisResult:
|
||||
"""Analyze multiple companies' patent portfolios in batch.
|
||||
|
||||
Processes companies concurrently for improved performance while
|
||||
respecting API rate limits.
|
||||
|
||||
Args:
|
||||
companies: List of company names to analyze
|
||||
max_workers: Maximum concurrent analyses (default 3 to avoid rate limits)
|
||||
progress_callback: Optional callback(company_name, completed, total)
|
||||
model: Optional LLM model override (e.g. 'openai/gpt-4o')
|
||||
|
||||
Returns:
|
||||
BatchAnalysisResult containing all individual results and summary stats
|
||||
"""
|
||||
results: list[CompanyAnalysisResult] = []
|
||||
total = len(companies)
|
||||
|
||||
logger.info("Starting batch analysis of %d companies...", total)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
future_to_company = {
|
||||
executor.submit(self._analyze_company_safe, company, model): company
|
||||
for company in companies
|
||||
}
|
||||
|
||||
completed = 0
|
||||
for future in as_completed(future_to_company):
|
||||
company = future_to_company[future]
|
||||
completed += 1
|
||||
|
||||
try:
|
||||
result = future.result()
|
||||
results.append(result)
|
||||
|
||||
status = "OK" if result.success else "FAIL"
|
||||
logger.info("[%d/%d] %s %s", completed, total, status, company)
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(company, completed, total)
|
||||
|
||||
except Exception as e:
|
||||
results.append(
|
||||
CompanyAnalysisResult(
|
||||
company_name=company,
|
||||
analysis="",
|
||||
patent_count=0,
|
||||
success=False,
|
||||
error=str(e),
|
||||
)
|
||||
)
|
||||
logger.error("[%d/%d] FAIL %s: %s", completed, total, company, e)
|
||||
|
||||
successful = sum(1 for r in results if r.success)
|
||||
failed = total - successful
|
||||
|
||||
logger.info("Batch complete: %d succeeded, %d failed", successful, failed)
|
||||
|
||||
return BatchAnalysisResult(
|
||||
results=results,
|
||||
total_companies=total,
|
||||
successful=successful,
|
||||
failed=failed,
|
||||
)
|
||||
|
||||
def analyze_companies_sequential(
|
||||
self, companies: list[str]
|
||||
) -> BatchAnalysisResult:
|
||||
"""Analyze multiple companies sequentially (safer for rate limits).
|
||||
|
||||
Use this when you want more control over API rate limiting or
|
||||
when debugging issues.
|
||||
|
||||
Args:
|
||||
companies: List of company names to analyze
|
||||
|
||||
Returns:
|
||||
BatchAnalysisResult containing all individual results
|
||||
"""
|
||||
results: list[CompanyAnalysisResult] = []
|
||||
total = len(companies)
|
||||
|
||||
logger.info("Starting sequential analysis of %d companies...", total)
|
||||
|
||||
for idx, company in enumerate(companies, 1):
|
||||
logger.info("[%d/%d] Analyzing %s...", idx, total, company)
|
||||
result = self._analyze_company_safe(company)
|
||||
results.append(result)
|
||||
|
||||
status = "OK" if result.success else "FAIL"
|
||||
logger.info("[%d/%d] %s %s", idx, total, status, company)
|
||||
|
||||
successful = sum(1 for r in results if r.success)
|
||||
failed = total - successful
|
||||
|
||||
logger.info("Batch complete: %d succeeded, %d failed", successful, failed)
|
||||
|
||||
return BatchAnalysisResult(
|
||||
results=results,
|
||||
total_companies=total,
|
||||
successful=successful,
|
||||
failed=failed,
|
||||
)
|
||||
+1087
File diff suppressed because it is too large
Load Diff
+249
@@ -0,0 +1,249 @@
|
||||
"""JWT authentication utilities for SPARC API."""
|
||||
|
||||
import os
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Optional
|
||||
|
||||
import jwt
|
||||
from fastapi import Depends, HTTPException, status
|
||||
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
||||
from pydantic import BaseModel
|
||||
|
||||
from SPARC import config
|
||||
from SPARC.database import DatabaseClient
|
||||
|
||||
# JWT Configuration
|
||||
_DEFAULT_JWT_SECRET = "sparc-secret-key-change-in-production"
|
||||
JWT_SECRET = os.getenv("JWT_SECRET", _DEFAULT_JWT_SECRET)
|
||||
JWT_ALGORITHM = "HS256"
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES = 30
|
||||
REFRESH_TOKEN_EXPIRE_DAYS = 7
|
||||
|
||||
|
||||
def check_jwt_secret() -> None:
|
||||
"""Refuse to start with the default JWT secret in non-development environments.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If JWT_SECRET is the default value and APP_ENV is not 'development'.
|
||||
"""
|
||||
if JWT_SECRET == _DEFAULT_JWT_SECRET and config.app_env != "development":
|
||||
raise RuntimeError(
|
||||
f"FATAL: JWT_SECRET is set to the default value and APP_ENV={config.app_env!r}. "
|
||||
"Set a secure JWT_SECRET environment variable before running in non-development environments."
|
||||
)
|
||||
|
||||
security = HTTPBearer()
|
||||
|
||||
|
||||
class TokenPayload(BaseModel):
|
||||
"""JWT token payload."""
|
||||
|
||||
sub: str # user_id as string (JWT RFC 7519 requires sub to be a string)
|
||||
email: str
|
||||
role: str
|
||||
exp: datetime
|
||||
type: str # "access" or "refresh"
|
||||
|
||||
@property
|
||||
def user_id(self) -> int:
|
||||
"""Get user_id as integer."""
|
||||
return int(self.sub)
|
||||
|
||||
|
||||
class TokenResponse(BaseModel):
|
||||
"""Token response model."""
|
||||
|
||||
access_token: str
|
||||
refresh_token: str
|
||||
token_type: str = "bearer"
|
||||
|
||||
|
||||
class UserResponse(BaseModel):
|
||||
"""User response model."""
|
||||
|
||||
id: int
|
||||
email: str
|
||||
role: str
|
||||
created_at: datetime
|
||||
|
||||
|
||||
def create_access_token(user_id: int, email: str, role: str) -> str:
|
||||
"""Create a JWT access token.
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
email: User email
|
||||
role: User role
|
||||
|
||||
Returns:
|
||||
Encoded JWT token
|
||||
"""
|
||||
expire = datetime.now(timezone.utc) + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
|
||||
payload = {
|
||||
"sub": str(user_id),
|
||||
"email": email,
|
||||
"role": role,
|
||||
"exp": expire,
|
||||
"type": "access",
|
||||
}
|
||||
return jwt.encode(payload, JWT_SECRET, algorithm=JWT_ALGORITHM)
|
||||
|
||||
|
||||
def create_refresh_token(user_id: int, email: str, role: str) -> str:
|
||||
"""Create a JWT refresh token.
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
email: User email
|
||||
role: User role
|
||||
|
||||
Returns:
|
||||
Encoded JWT token
|
||||
"""
|
||||
expire = datetime.now(timezone.utc) + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
|
||||
payload = {
|
||||
"sub": str(user_id),
|
||||
"email": email,
|
||||
"role": role,
|
||||
"exp": expire,
|
||||
"type": "refresh",
|
||||
}
|
||||
return jwt.encode(payload, JWT_SECRET, algorithm=JWT_ALGORITHM)
|
||||
|
||||
|
||||
def create_tokens(user_id: int, email: str, role: str) -> TokenResponse:
|
||||
"""Create both access and refresh tokens.
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
email: User email
|
||||
role: User role
|
||||
|
||||
Returns:
|
||||
TokenResponse with both tokens
|
||||
"""
|
||||
return TokenResponse(
|
||||
access_token=create_access_token(user_id, email, role),
|
||||
refresh_token=create_refresh_token(user_id, email, role),
|
||||
)
|
||||
|
||||
|
||||
def decode_token(token: str) -> Optional[TokenPayload]:
|
||||
"""Decode and validate a JWT token.
|
||||
|
||||
Args:
|
||||
token: JWT token string
|
||||
|
||||
Returns:
|
||||
TokenPayload if valid, None otherwise
|
||||
"""
|
||||
try:
|
||||
payload = jwt.decode(token, JWT_SECRET, algorithms=[JWT_ALGORITHM])
|
||||
return TokenPayload(**payload)
|
||||
except jwt.ExpiredSignatureError:
|
||||
return None
|
||||
except jwt.InvalidTokenError:
|
||||
return None
|
||||
|
||||
|
||||
# Shared database client singleton, initialized at startup via init_db_client()
|
||||
_db_client: DatabaseClient | None = None
|
||||
|
||||
|
||||
def init_db_client() -> None:
|
||||
"""Initialize the shared database client. Call once at app startup."""
|
||||
global _db_client
|
||||
_db_client = DatabaseClient(config.database_url)
|
||||
_db_client.connect()
|
||||
|
||||
|
||||
def close_db_client() -> None:
|
||||
"""Close the shared database client. Call at app shutdown."""
|
||||
global _db_client
|
||||
if _db_client:
|
||||
_db_client.close()
|
||||
_db_client = None
|
||||
|
||||
|
||||
def get_db_client() -> DatabaseClient:
|
||||
"""Get the shared pooled database client for auth operations.
|
||||
|
||||
Returns the module-level singleton DatabaseClient. If not yet initialized
|
||||
(e.g., during tests), creates a new instance as a fallback.
|
||||
"""
|
||||
global _db_client
|
||||
if _db_client is None:
|
||||
_db_client = DatabaseClient(config.database_url)
|
||||
_db_client.connect()
|
||||
return _db_client
|
||||
|
||||
|
||||
async def get_current_user(
|
||||
credentials: HTTPAuthorizationCredentials = Depends(security),
|
||||
) -> UserResponse:
|
||||
"""Get the current authenticated user from JWT token.
|
||||
|
||||
Args:
|
||||
credentials: Bearer token from request
|
||||
|
||||
Returns:
|
||||
UserResponse with user details
|
||||
|
||||
Raises:
|
||||
HTTPException: If token is invalid or expired
|
||||
"""
|
||||
token = credentials.credentials
|
||||
payload = decode_token(token)
|
||||
|
||||
if not payload:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid or expired token",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
|
||||
if payload.type != "access":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid token type",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
|
||||
db = get_db_client()
|
||||
user = db.get_user_by_id(payload.user_id)
|
||||
|
||||
if not user:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="User not found",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
|
||||
return UserResponse(
|
||||
id=user["id"],
|
||||
email=user["email"],
|
||||
role=user["role"],
|
||||
created_at=user["created_at"],
|
||||
)
|
||||
|
||||
|
||||
async def get_current_admin(
|
||||
current_user: UserResponse = Depends(get_current_user),
|
||||
) -> UserResponse:
|
||||
"""Require admin role for the current user.
|
||||
|
||||
Args:
|
||||
current_user: Current authenticated user
|
||||
|
||||
Returns:
|
||||
UserResponse if admin
|
||||
|
||||
Raises:
|
||||
HTTPException: If user is not admin
|
||||
"""
|
||||
if current_user.role != "admin":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Admin access required",
|
||||
)
|
||||
return current_user
|
||||
+59
-3
@@ -2,13 +2,69 @@
|
||||
|
||||
Loads environment variables from .env file for API keys and other secrets.
|
||||
"""
|
||||
from dotenv import load_dotenv
|
||||
import logging
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# Logging configuration
|
||||
log_level = os.getenv("LOG_LEVEL", "INFO").upper()
|
||||
logging.basicConfig(
|
||||
level=getattr(logging, log_level, logging.INFO),
|
||||
format="%(asctime)s %(levelname)s %(name)s %(message)s",
|
||||
)
|
||||
|
||||
# SerpAPI key for patent search
|
||||
api_key = os.getenv("API_KEY")
|
||||
|
||||
# Anthropic API key for LLM analysis
|
||||
anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
|
||||
# OpenRouter API key for LLM analysis
|
||||
openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
|
||||
|
||||
# Database configuration - all messages are stored in the database
|
||||
# The database serves as both a persistent store and a cache layer
|
||||
database_url = os.getenv("DATABASE_URL", "postgresql://postgres:postgres@localhost:5432/sparc")
|
||||
|
||||
# Cache configuration
|
||||
# When enabled (default), the system checks the database for cached responses
|
||||
# before making API calls, saving tokens and reducing latency
|
||||
use_cache = os.getenv("USE_CACHE", "true").lower() in ("true", "1", "yes")
|
||||
|
||||
# Legacy compatibility - USE_DATABASE is deprecated, database is always used
|
||||
# This variable is kept for backwards compatibility but has no effect
|
||||
use_database = os.getenv("USE_DATABASE", "false").lower() in ("true", "1", "yes")
|
||||
|
||||
# Patent search configuration
|
||||
patent_search_days = int(os.getenv("PATENT_SEARCH_DAYS", "90"))
|
||||
patent_thread_workers = int(os.getenv("PATENT_THREAD_WORKERS", "5"))
|
||||
|
||||
# LLM model to use via OpenRouter (e.g. "anthropic/claude-3.5-sonnet", "openai/gpt-4o")
|
||||
model = os.getenv("MODEL", "anthropic/claude-3.5-sonnet")
|
||||
|
||||
# SERP cache TTL in hours (how long cached search results are considered fresh)
|
||||
serp_cache_ttl_hours = int(os.getenv("SERP_CACHE_TTL_HOURS", "24"))
|
||||
|
||||
# Root path for running behind a reverse proxy (e.g., "/api" when served at /api/)
|
||||
# This ensures OpenAPI docs work correctly when accessed via the proxy
|
||||
root_path = os.getenv("ROOT_PATH", "")
|
||||
|
||||
# Application environment: "development", "staging", or "production"
|
||||
# Used for safety checks (e.g., refusing default JWT secret in production)
|
||||
app_env = os.getenv("APP_ENV", "development")
|
||||
|
||||
# Storage backend: "local" (default) or "s3" for S3/MinIO object storage
|
||||
storage_backend = os.getenv("STORAGE_BACKEND", "local")
|
||||
s3_bucket = os.getenv("S3_BUCKET", "sparc-patents")
|
||||
s3_endpoint_url = os.getenv("S3_ENDPOINT_URL", "")
|
||||
s3_access_key = os.getenv("AWS_ACCESS_KEY_ID", "")
|
||||
s3_secret_key = os.getenv("AWS_SECRET_ACCESS_KEY", "")
|
||||
|
||||
# CORS allowed origins (comma-separated)
|
||||
# Defaults to localhost dev origins when unset
|
||||
_cors_origins_raw = os.getenv("CORS_ORIGINS", "")
|
||||
cors_origins: list[str] = (
|
||||
[o.strip() for o in _cors_origins_raw.split(",") if o.strip()]
|
||||
if _cors_origins_raw
|
||||
else ["http://localhost:3000", "http://localhost:5173"]
|
||||
)
|
||||
|
||||
@@ -0,0 +1,937 @@
|
||||
"""Database client for storing and retrieving LLM messages and user authentication."""
|
||||
|
||||
import contextlib
|
||||
import hashlib
|
||||
import json
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import bcrypt
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
from psycopg2.pool import ThreadedConnectionPool
|
||||
|
||||
|
||||
class DatabaseClient:
|
||||
"""Handles database operations for message storage and retrieval."""
|
||||
|
||||
def __init__(self, database_url: str, minconn: int = 2, maxconn: int = 10):
|
||||
"""Initialize the database client.
|
||||
|
||||
Args:
|
||||
database_url: PostgreSQL connection string
|
||||
minconn: Minimum connections in the pool
|
||||
maxconn: Maximum connections in the pool
|
||||
"""
|
||||
self.database_url = database_url
|
||||
self._pool: ThreadedConnectionPool | None = None
|
||||
self._minconn = minconn
|
||||
self._maxconn = maxconn
|
||||
# Legacy single connection kept for backwards compatibility
|
||||
self.conn = None
|
||||
|
||||
def _ensure_pool(self):
|
||||
"""Create the connection pool if it doesn't exist yet."""
|
||||
if self._pool is None or self._pool.closed:
|
||||
self._pool = ThreadedConnectionPool(
|
||||
self._minconn, self._maxconn, self.database_url
|
||||
)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def get_conn(self):
|
||||
"""Check out a connection from the pool. Returns it on exit."""
|
||||
self._ensure_pool()
|
||||
conn = self._pool.getconn()
|
||||
try:
|
||||
yield conn
|
||||
finally:
|
||||
self._pool.putconn(conn)
|
||||
|
||||
def connect(self):
|
||||
"""Establish database connection (legacy single-connection path)."""
|
||||
if not self.conn or self.conn.closed:
|
||||
self.conn = psycopg2.connect(self.database_url)
|
||||
|
||||
def close(self):
|
||||
"""Close database connection and pool."""
|
||||
if self.conn and not self.conn.closed:
|
||||
self.conn.close()
|
||||
if self._pool and not self._pool.closed:
|
||||
self._pool.closeall()
|
||||
|
||||
def initialize_schema(self):
|
||||
"""Create database tables if they don't exist."""
|
||||
self.connect()
|
||||
|
||||
with self.conn.cursor() as cursor:
|
||||
# Create messages table
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS llm_messages (
|
||||
id SERIAL PRIMARY KEY,
|
||||
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
company_name VARCHAR(255),
|
||||
analysis_type VARCHAR(50),
|
||||
model VARCHAR(100),
|
||||
prompt TEXT NOT NULL,
|
||||
prompt_hash VARCHAR(64),
|
||||
response TEXT,
|
||||
metadata JSONB,
|
||||
token_usage JSONB,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
is_cached BOOLEAN DEFAULT FALSE
|
||||
)
|
||||
""")
|
||||
|
||||
# Create index on timestamp for analytics queries
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_messages_timestamp
|
||||
ON llm_messages(timestamp)
|
||||
""")
|
||||
|
||||
# Create index on company_name for filtering
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_messages_company
|
||||
ON llm_messages(company_name)
|
||||
""")
|
||||
|
||||
# Add prompt_hash and is_cached columns if they don't exist (for existing tables)
|
||||
# This must run BEFORE creating the index on prompt_hash
|
||||
cursor.execute("""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'llm_messages' AND column_name = 'prompt_hash'
|
||||
) THEN
|
||||
ALTER TABLE llm_messages ADD COLUMN prompt_hash VARCHAR(64);
|
||||
END IF;
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'llm_messages' AND column_name = 'is_cached'
|
||||
) THEN
|
||||
ALTER TABLE llm_messages ADD COLUMN is_cached BOOLEAN DEFAULT FALSE;
|
||||
END IF;
|
||||
END $$;
|
||||
""")
|
||||
|
||||
# Create index on prompt_hash for cache lookups
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_messages_prompt_hash
|
||||
ON llm_messages(prompt_hash)
|
||||
""")
|
||||
|
||||
# Create users table for authentication
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS users (
|
||||
id SERIAL PRIMARY KEY,
|
||||
email VARCHAR(255) UNIQUE NOT NULL,
|
||||
password_hash VARCHAR(255) NOT NULL,
|
||||
role VARCHAR(20) DEFAULT 'user' CHECK (role IN ('admin', 'user')),
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""")
|
||||
|
||||
# Create index on email for fast lookups
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_users_email
|
||||
ON users(email)
|
||||
""")
|
||||
|
||||
# Create patents cache table
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS patents (
|
||||
patent_id VARCHAR(64) PRIMARY KEY,
|
||||
company_name VARCHAR(255),
|
||||
pdf_link TEXT,
|
||||
raw_sections JSONB,
|
||||
minimized_content TEXT,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_patents_company
|
||||
ON patents(company_name)
|
||||
""")
|
||||
|
||||
# Create SERP query cache table
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS serp_queries (
|
||||
id SERIAL PRIMARY KEY,
|
||||
company_name VARCHAR(255),
|
||||
query_hash VARCHAR(64) UNIQUE,
|
||||
result_patent_ids TEXT[],
|
||||
expires_at TIMESTAMP NOT NULL,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_serp_queries_hash
|
||||
ON serp_queries(query_hash)
|
||||
""")
|
||||
|
||||
# Create jobs table for persisting async batch job state
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS jobs (
|
||||
job_id VARCHAR(128) PRIMARY KEY,
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'pending',
|
||||
progress INTEGER NOT NULL DEFAULT 0,
|
||||
total_companies INTEGER NOT NULL DEFAULT 0,
|
||||
completed_companies INTEGER NOT NULL DEFAULT 0,
|
||||
result_json JSONB,
|
||||
error TEXT,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_jobs_status
|
||||
ON jobs(status)
|
||||
""")
|
||||
|
||||
# Create tracked companies table for scheduled analysis
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS tracked_companies (
|
||||
id SERIAL PRIMARY KEY,
|
||||
company_name VARCHAR(255) UNIQUE NOT NULL,
|
||||
last_patent_count INTEGER DEFAULT 0,
|
||||
last_analysis_at TIMESTAMP,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""")
|
||||
|
||||
# Create alerts table for significant changes
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS alerts (
|
||||
id SERIAL PRIMARY KEY,
|
||||
company_name VARCHAR(255) NOT NULL,
|
||||
alert_type VARCHAR(50) NOT NULL,
|
||||
message TEXT NOT NULL,
|
||||
old_value NUMERIC,
|
||||
new_value NUMERIC,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_alerts_company
|
||||
ON alerts(company_name)
|
||||
""")
|
||||
|
||||
self.conn.commit()
|
||||
|
||||
@staticmethod
|
||||
def hash_prompt(prompt: str) -> str:
|
||||
"""Generate a hash of the prompt for cache lookups.
|
||||
|
||||
Args:
|
||||
prompt: The prompt text to hash
|
||||
|
||||
Returns:
|
||||
SHA-256 hash of the prompt
|
||||
"""
|
||||
return hashlib.sha256(prompt.encode()).hexdigest()
|
||||
|
||||
def get_cached_response(
|
||||
self,
|
||||
prompt: str,
|
||||
company_name: Optional[str] = None,
|
||||
analysis_type: Optional[str] = None,
|
||||
) -> Optional[Dict]:
|
||||
"""Look up a cached response for a given prompt.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to look up
|
||||
company_name: Optional company name filter
|
||||
analysis_type: Optional analysis type filter
|
||||
|
||||
Returns:
|
||||
Cached message dict if found, None otherwise
|
||||
"""
|
||||
prompt_hash = self.hash_prompt(prompt)
|
||||
|
||||
query = """
|
||||
SELECT * FROM llm_messages
|
||||
WHERE prompt_hash = %s
|
||||
AND response IS NOT NULL
|
||||
AND response NOT LIKE '[DATABASE MODE]%%'
|
||||
AND response NOT LIKE '[TEST MODE]%%'
|
||||
AND response NOT LIKE '[NO API]%%'
|
||||
"""
|
||||
params = [prompt_hash]
|
||||
|
||||
if company_name:
|
||||
query += " AND company_name = %s"
|
||||
params.append(company_name)
|
||||
|
||||
if analysis_type:
|
||||
query += " AND analysis_type = %s"
|
||||
params.append(analysis_type)
|
||||
|
||||
query += " ORDER BY timestamp DESC LIMIT 1"
|
||||
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute(query, params)
|
||||
result = cursor.fetchone()
|
||||
return dict(result) if result else None
|
||||
|
||||
def store_message(
|
||||
self,
|
||||
prompt: str,
|
||||
response: str,
|
||||
company_name: Optional[str] = None,
|
||||
analysis_type: Optional[str] = None,
|
||||
model: Optional[str] = None,
|
||||
metadata: Optional[Dict] = None,
|
||||
token_usage: Optional[Dict] = None,
|
||||
is_cached: bool = False,
|
||||
) -> int:
|
||||
"""Store an LLM message exchange in the database.
|
||||
|
||||
Args:
|
||||
prompt: The prompt sent to the LLM
|
||||
response: The response from the LLM
|
||||
company_name: Name of company being analyzed
|
||||
analysis_type: Type of analysis (e.g., 'single_patent', 'portfolio')
|
||||
model: Model identifier used
|
||||
metadata: Additional metadata as dict
|
||||
token_usage: Token usage information
|
||||
is_cached: Whether this response was served from cache
|
||||
|
||||
Returns:
|
||||
The ID of the inserted record
|
||||
"""
|
||||
prompt_hash = self.hash_prompt(prompt)
|
||||
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO llm_messages
|
||||
(prompt, prompt_hash, response, company_name, analysis_type, model, metadata, token_usage, is_cached)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
RETURNING id
|
||||
""",
|
||||
(
|
||||
prompt,
|
||||
prompt_hash,
|
||||
response,
|
||||
company_name,
|
||||
analysis_type,
|
||||
model,
|
||||
json.dumps(metadata) if metadata else None,
|
||||
json.dumps(token_usage) if token_usage else None,
|
||||
is_cached,
|
||||
),
|
||||
)
|
||||
|
||||
message_id = cursor.fetchone()[0]
|
||||
conn.commit()
|
||||
|
||||
return message_id
|
||||
|
||||
def get_messages(
|
||||
self,
|
||||
company_name: Optional[str] = None,
|
||||
analysis_type: Optional[str] = None,
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
) -> List[Dict]:
|
||||
"""Retrieve messages from the database.
|
||||
|
||||
Args:
|
||||
company_name: Filter by company name
|
||||
analysis_type: Filter by analysis type
|
||||
limit: Maximum number of records to return
|
||||
offset: Number of records to skip
|
||||
|
||||
Returns:
|
||||
List of message dictionaries
|
||||
"""
|
||||
query = "SELECT * FROM llm_messages WHERE 1=1"
|
||||
params = []
|
||||
|
||||
if company_name:
|
||||
query += " AND company_name = %s"
|
||||
params.append(company_name)
|
||||
|
||||
if analysis_type:
|
||||
query += " AND analysis_type = %s"
|
||||
params.append(analysis_type)
|
||||
|
||||
query += " ORDER BY timestamp DESC LIMIT %s OFFSET %s"
|
||||
params.extend([limit, offset])
|
||||
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute(query, params)
|
||||
return [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
def get_analytics(self, days: int = 30) -> Dict:
|
||||
"""Get analytics on message usage.
|
||||
|
||||
Args:
|
||||
days: Number of days to look back
|
||||
|
||||
Returns:
|
||||
Dictionary with analytics data
|
||||
"""
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
# Total messages
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT COUNT(*) as total_messages
|
||||
FROM llm_messages
|
||||
WHERE timestamp >= NOW() - INTERVAL '%s days'
|
||||
""",
|
||||
(days,),
|
||||
)
|
||||
total = cursor.fetchone()["total_messages"]
|
||||
|
||||
# Messages by company
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT company_name, COUNT(*) as count
|
||||
FROM llm_messages
|
||||
WHERE timestamp >= NOW() - INTERVAL '%s days'
|
||||
GROUP BY company_name
|
||||
ORDER BY count DESC
|
||||
LIMIT 10
|
||||
""",
|
||||
(days,),
|
||||
)
|
||||
by_company = cursor.fetchall()
|
||||
|
||||
# Messages by type
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT analysis_type, COUNT(*) as count
|
||||
FROM llm_messages
|
||||
WHERE timestamp >= NOW() - INTERVAL '%s days'
|
||||
GROUP BY analysis_type
|
||||
ORDER BY count DESC
|
||||
""",
|
||||
(days,),
|
||||
)
|
||||
by_type = cursor.fetchall()
|
||||
|
||||
return {
|
||||
"total_messages": total,
|
||||
"by_company": [dict(row) for row in by_company],
|
||||
"by_type": [dict(row) for row in by_type],
|
||||
"period_days": days,
|
||||
}
|
||||
|
||||
# Patent Cache Methods
|
||||
|
||||
def get_cached_patent(self, patent_id: str) -> Optional[Dict]:
|
||||
"""Look up a cached patent by ID.
|
||||
|
||||
Returns:
|
||||
Dict with raw_sections and minimized_content, or None.
|
||||
"""
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute(
|
||||
"SELECT * FROM patents WHERE patent_id = %s",
|
||||
(patent_id,),
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
return dict(row) if row else None
|
||||
|
||||
def store_patent(
|
||||
self,
|
||||
patent_id: str,
|
||||
company_name: str,
|
||||
pdf_link: str,
|
||||
raw_sections: Dict,
|
||||
minimized_content: str,
|
||||
) -> None:
|
||||
"""Store a processed patent in the cache."""
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO patents (patent_id, company_name, pdf_link, raw_sections, minimized_content)
|
||||
VALUES (%s, %s, %s, %s, %s)
|
||||
ON CONFLICT (patent_id) DO UPDATE SET
|
||||
raw_sections = EXCLUDED.raw_sections,
|
||||
minimized_content = EXCLUDED.minimized_content
|
||||
""",
|
||||
(patent_id, company_name, pdf_link, json.dumps(raw_sections), minimized_content),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
def get_cached_serp_query(self, query_hash: str) -> Optional[List[str]]:
|
||||
"""Look up cached SERP query results.
|
||||
|
||||
Returns:
|
||||
List of patent IDs if cache hit and not expired, None otherwise.
|
||||
"""
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT result_patent_ids FROM serp_queries
|
||||
WHERE query_hash = %s AND expires_at > NOW()
|
||||
""",
|
||||
(query_hash,),
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
return row["result_patent_ids"] if row else None
|
||||
|
||||
def store_serp_query(
|
||||
self,
|
||||
company_name: str,
|
||||
query_hash: str,
|
||||
patent_ids: List[str],
|
||||
ttl_hours: int = 24,
|
||||
) -> None:
|
||||
"""Store SERP query results in the cache."""
|
||||
expires_at = datetime.now() + timedelta(hours=ttl_hours)
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO serp_queries (company_name, query_hash, result_patent_ids, expires_at)
|
||||
VALUES (%s, %s, %s, %s)
|
||||
ON CONFLICT (query_hash) DO UPDATE SET
|
||||
result_patent_ids = EXCLUDED.result_patent_ids,
|
||||
expires_at = EXCLUDED.expires_at
|
||||
""",
|
||||
(company_name, query_hash, patent_ids, expires_at),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
# Job Persistence Methods
|
||||
|
||||
def create_job(
|
||||
self,
|
||||
job_id: str,
|
||||
total_companies: int,
|
||||
) -> Dict:
|
||||
"""Create a new job record.
|
||||
|
||||
Args:
|
||||
job_id: Unique job identifier
|
||||
total_companies: Number of companies in the batch
|
||||
|
||||
Returns:
|
||||
Job dict
|
||||
"""
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO jobs (job_id, status, progress, total_companies, completed_companies)
|
||||
VALUES (%s, 'pending', 0, %s, 0)
|
||||
RETURNING *
|
||||
""",
|
||||
(job_id, total_companies),
|
||||
)
|
||||
job = cursor.fetchone()
|
||||
conn.commit()
|
||||
return dict(job)
|
||||
|
||||
def update_job(
|
||||
self,
|
||||
job_id: str,
|
||||
status: Optional[str] = None,
|
||||
progress: Optional[int] = None,
|
||||
completed_companies: Optional[int] = None,
|
||||
result_json: Optional[str] = None,
|
||||
error: Optional[str] = None,
|
||||
) -> Optional[Dict]:
|
||||
"""Update a job's state.
|
||||
|
||||
Only non-None fields are updated.
|
||||
"""
|
||||
updates = []
|
||||
params = []
|
||||
if status is not None:
|
||||
updates.append("status = %s")
|
||||
params.append(status)
|
||||
if progress is not None:
|
||||
updates.append("progress = %s")
|
||||
params.append(progress)
|
||||
if completed_companies is not None:
|
||||
updates.append("completed_companies = %s")
|
||||
params.append(completed_companies)
|
||||
if result_json is not None:
|
||||
updates.append("result_json = %s")
|
||||
params.append(result_json)
|
||||
if error is not None:
|
||||
updates.append("error = %s")
|
||||
params.append(error)
|
||||
|
||||
if not updates:
|
||||
return self.get_job(job_id)
|
||||
|
||||
updates.append("updated_at = CURRENT_TIMESTAMP")
|
||||
params.append(job_id)
|
||||
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute(
|
||||
f"UPDATE jobs SET {', '.join(updates)} WHERE job_id = %s RETURNING *",
|
||||
params,
|
||||
)
|
||||
job = cursor.fetchone()
|
||||
conn.commit()
|
||||
return dict(job) if job else None
|
||||
|
||||
def get_job(self, job_id: str) -> Optional[Dict]:
|
||||
"""Get a job by ID."""
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute("SELECT * FROM jobs WHERE job_id = %s", (job_id,))
|
||||
job = cursor.fetchone()
|
||||
return dict(job) if job else None
|
||||
|
||||
def list_jobs(
|
||||
self,
|
||||
status: Optional[str] = None,
|
||||
limit: int = 10,
|
||||
cursor: Optional[str] = None,
|
||||
) -> List[Dict]:
|
||||
"""List jobs with optional status filter and cursor-based pagination.
|
||||
|
||||
Args:
|
||||
status: Optional status filter (pending, running, completed, failed).
|
||||
limit: Maximum number of jobs to return.
|
||||
cursor: Opaque cursor (``created_at|job_id``) from a previous
|
||||
response. When provided, only jobs older than the cursor are
|
||||
returned.
|
||||
|
||||
Returns:
|
||||
List of job dicts ordered by created_at descending.
|
||||
"""
|
||||
conditions: list[str] = []
|
||||
params: list = []
|
||||
|
||||
if status:
|
||||
conditions.append("status = %s")
|
||||
params.append(status)
|
||||
|
||||
if cursor:
|
||||
try:
|
||||
ts_str, cursor_job_id = cursor.rsplit("|", 1)
|
||||
conditions.append("(created_at, job_id) < (%s, %s)")
|
||||
params.extend([ts_str, cursor_job_id])
|
||||
except ValueError:
|
||||
pass # Ignore malformed cursors; return from start
|
||||
|
||||
query = "SELECT * FROM jobs"
|
||||
if conditions:
|
||||
query += " WHERE " + " AND ".join(conditions)
|
||||
query += " ORDER BY created_at DESC, job_id DESC LIMIT %s"
|
||||
params.append(limit)
|
||||
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
cur.execute(query, params)
|
||||
return [dict(row) for row in cur.fetchall()]
|
||||
|
||||
def mark_stale_jobs_failed(self) -> int:
|
||||
"""Mark any jobs in 'running' or 'pending' state as 'failed'.
|
||||
|
||||
Called at startup to clean up jobs that were interrupted by a restart.
|
||||
|
||||
Returns:
|
||||
Number of jobs marked as failed.
|
||||
"""
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
UPDATE jobs SET status = 'failed', error = 'Interrupted by server restart',
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
WHERE status IN ('running', 'pending')
|
||||
"""
|
||||
)
|
||||
count = cursor.rowcount
|
||||
conn.commit()
|
||||
return count
|
||||
|
||||
# User Authentication Methods
|
||||
|
||||
@staticmethod
|
||||
def hash_password(password: str) -> str:
|
||||
"""Hash a password using bcrypt.
|
||||
|
||||
Args:
|
||||
password: Plain text password
|
||||
|
||||
Returns:
|
||||
Hashed password string
|
||||
"""
|
||||
return bcrypt.hashpw(password.encode(), bcrypt.gensalt()).decode()
|
||||
|
||||
@staticmethod
|
||||
def verify_password(password: str, password_hash: str) -> bool:
|
||||
"""Verify a password against its hash.
|
||||
|
||||
Args:
|
||||
password: Plain text password
|
||||
password_hash: Stored hash
|
||||
|
||||
Returns:
|
||||
True if password matches
|
||||
"""
|
||||
return bcrypt.checkpw(password.encode(), password_hash.encode())
|
||||
|
||||
def create_user(
|
||||
self,
|
||||
email: str,
|
||||
password: str,
|
||||
role: str = "user",
|
||||
) -> Optional[Dict]:
|
||||
"""Create a new user.
|
||||
|
||||
Args:
|
||||
email: User email
|
||||
password: Plain text password
|
||||
role: User role ('admin' or 'user')
|
||||
|
||||
Returns:
|
||||
Created user dict or None if email exists
|
||||
"""
|
||||
password_hash = self.hash_password(password)
|
||||
|
||||
try:
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO users (email, password_hash, role)
|
||||
VALUES (%s, %s, %s)
|
||||
RETURNING id, email, role, created_at
|
||||
""",
|
||||
(email, password_hash, role),
|
||||
)
|
||||
user = cursor.fetchone()
|
||||
conn.commit()
|
||||
return dict(user) if user else None
|
||||
except psycopg2.errors.UniqueViolation:
|
||||
return None
|
||||
|
||||
def authenticate_user(self, email: str, password: str) -> Optional[Dict]:
|
||||
"""Authenticate a user by email and password.
|
||||
|
||||
Args:
|
||||
email: User email
|
||||
password: Plain text password
|
||||
|
||||
Returns:
|
||||
User dict if authenticated, None otherwise
|
||||
"""
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute(
|
||||
"SELECT * FROM users WHERE email = %s",
|
||||
(email,),
|
||||
)
|
||||
user = cursor.fetchone()
|
||||
|
||||
if user and self.verify_password(password, user["password_hash"]):
|
||||
return {
|
||||
"id": user["id"],
|
||||
"email": user["email"],
|
||||
"role": user["role"],
|
||||
"created_at": user["created_at"],
|
||||
}
|
||||
return None
|
||||
|
||||
def get_user_by_id(self, user_id: int) -> Optional[Dict]:
|
||||
"""Get a user by ID.
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
|
||||
Returns:
|
||||
User dict or None
|
||||
"""
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute(
|
||||
"SELECT id, email, role, created_at FROM users WHERE id = %s",
|
||||
(user_id,),
|
||||
)
|
||||
user = cursor.fetchone()
|
||||
return dict(user) if user else None
|
||||
|
||||
def get_user_by_email(self, email: str) -> Optional[Dict]:
|
||||
"""Get a user by email.
|
||||
|
||||
Args:
|
||||
email: User email
|
||||
|
||||
Returns:
|
||||
User dict or None
|
||||
"""
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute(
|
||||
"SELECT id, email, role, created_at FROM users WHERE email = %s",
|
||||
(email,),
|
||||
)
|
||||
user = cursor.fetchone()
|
||||
return dict(user) if user else None
|
||||
|
||||
def get_all_users(self, limit: int = 100, offset: int = 0) -> List[Dict]:
|
||||
"""Get all users (admin only).
|
||||
|
||||
Args:
|
||||
limit: Maximum number of users
|
||||
offset: Offset for pagination
|
||||
|
||||
Returns:
|
||||
List of user dicts
|
||||
"""
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT id, email, role, created_at
|
||||
FROM users
|
||||
ORDER BY created_at DESC
|
||||
LIMIT %s OFFSET %s
|
||||
""",
|
||||
(limit, offset),
|
||||
)
|
||||
return [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
def update_user_role(self, user_id: int, role: str) -> Optional[Dict]:
|
||||
"""Update a user's role (admin only).
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
role: New role ('admin' or 'user')
|
||||
|
||||
Returns:
|
||||
Updated user dict or None
|
||||
"""
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
UPDATE users
|
||||
SET role = %s, updated_at = CURRENT_TIMESTAMP
|
||||
WHERE id = %s
|
||||
RETURNING id, email, role, created_at
|
||||
""",
|
||||
(role, user_id),
|
||||
)
|
||||
user = cursor.fetchone()
|
||||
conn.commit()
|
||||
return dict(user) if user else None
|
||||
|
||||
def delete_user(self, user_id: int) -> bool:
|
||||
"""Delete a user (admin only).
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
|
||||
Returns:
|
||||
True if deleted
|
||||
"""
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute("DELETE FROM users WHERE id = %s", (user_id,))
|
||||
deleted = cursor.rowcount > 0
|
||||
conn.commit()
|
||||
return deleted
|
||||
|
||||
def get_user_count(self) -> int:
|
||||
"""Get total user count.
|
||||
|
||||
Returns:
|
||||
Number of users
|
||||
"""
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute("SELECT COUNT(*) FROM users")
|
||||
return cursor.fetchone()[0]
|
||||
|
||||
# Tracked Companies Methods
|
||||
|
||||
def add_tracked_company(self, company_name: str) -> Optional[Dict]:
|
||||
"""Add a company to the tracking list."""
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
try:
|
||||
cursor.execute(
|
||||
"INSERT INTO tracked_companies (company_name) VALUES (%s) RETURNING *",
|
||||
(company_name,),
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
conn.commit()
|
||||
return dict(row) if row else None
|
||||
except Exception:
|
||||
conn.rollback()
|
||||
return None
|
||||
|
||||
def remove_tracked_company(self, company_name: str) -> bool:
|
||||
"""Remove a company from the tracking list."""
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute(
|
||||
"DELETE FROM tracked_companies WHERE LOWER(company_name) = LOWER(%s)",
|
||||
(company_name,),
|
||||
)
|
||||
conn.commit()
|
||||
return cursor.rowcount > 0
|
||||
|
||||
def list_tracked_companies(self) -> List[Dict]:
|
||||
"""List all tracked companies."""
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute("SELECT * FROM tracked_companies ORDER BY company_name")
|
||||
return [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
def update_tracked_company(
|
||||
self, company_name: str, patent_count: int
|
||||
) -> None:
|
||||
"""Update the last analysis stats for a tracked company."""
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute(
|
||||
"""UPDATE tracked_companies
|
||||
SET last_patent_count = %s, last_analysis_at = CURRENT_TIMESTAMP
|
||||
WHERE LOWER(company_name) = LOWER(%s)""",
|
||||
(patent_count, company_name),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
def store_alert(
|
||||
self,
|
||||
company_name: str,
|
||||
alert_type: str,
|
||||
message: str,
|
||||
old_value: float | None = None,
|
||||
new_value: float | None = None,
|
||||
) -> None:
|
||||
"""Record an alert for a significant change."""
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute(
|
||||
"""INSERT INTO alerts (company_name, alert_type, message, old_value, new_value)
|
||||
VALUES (%s, %s, %s, %s, %s)""",
|
||||
(company_name, alert_type, message, old_value, new_value),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
def list_alerts(self, limit: int = 50) -> List[Dict]:
|
||||
"""List recent alerts."""
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute(
|
||||
"SELECT * FROM alerts ORDER BY created_at DESC LIMIT %s",
|
||||
(limit,),
|
||||
)
|
||||
return [dict(row) for row in cursor.fetchall()]
|
||||
+176
-21
@@ -1,28 +1,52 @@
|
||||
"""LLM integration for patent analysis using Anthropic's Claude."""
|
||||
"""LLM integration for patent analysis using OpenRouter."""
|
||||
|
||||
from anthropic import Anthropic
|
||||
from SPARC import config
|
||||
import logging
|
||||
from typing import Dict
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
from SPARC import config
|
||||
from SPARC.database import DatabaseClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LLMAnalyzer:
|
||||
"""Handles LLM-based analysis of patent content."""
|
||||
|
||||
def __init__(self, api_key: str | None = None):
|
||||
def __init__(self, api_key: str | None = None, test_mode: bool = False, use_cache: bool | None = None):
|
||||
"""Initialize the LLM analyzer.
|
||||
|
||||
Args:
|
||||
api_key: Anthropic API key. If None, will attempt to load from config.
|
||||
api_key: OpenRouter API key. If None, will attempt to load from config.
|
||||
test_mode: If True, print prompts instead of making API calls
|
||||
use_cache: If True, check database cache before making API calls.
|
||||
If None, uses config.use_cache (default: True)
|
||||
"""
|
||||
self.client = Anthropic(api_key=api_key or config.anthropic_api_key)
|
||||
self.model = "claude-3-5-sonnet-20241022"
|
||||
self.test_mode = test_mode
|
||||
self.use_cache = use_cache if use_cache is not None else config.use_cache
|
||||
self.model = config.model
|
||||
|
||||
def analyze_patent_content(self, patent_content: str, company_name: str) -> str:
|
||||
# Always initialize database client for storage and caching
|
||||
self.db_client = DatabaseClient(config.database_url)
|
||||
self.db_client.initialize_schema()
|
||||
|
||||
# Initialize OpenRouter client if API key is available
|
||||
if (api_key or config.openrouter_api_key) and not test_mode:
|
||||
self.client = OpenAI(
|
||||
api_key=api_key or config.openrouter_api_key,
|
||||
base_url="https://openrouter.ai/api/v1"
|
||||
)
|
||||
else:
|
||||
self.client = None
|
||||
|
||||
def analyze_patent_content(self, patent_content: str, company_name: str, model: str | None = None) -> str:
|
||||
"""Analyze patent content to estimate company innovation and performance.
|
||||
|
||||
Args:
|
||||
patent_content: Minimized patent text (abstract, claims, summary)
|
||||
company_name: Name of the company for context
|
||||
model: Optional model override (e.g. "openai/gpt-4o"). Defaults to config.
|
||||
|
||||
Returns:
|
||||
Analysis text describing innovation quality and potential impact
|
||||
@@ -40,16 +64,76 @@ Patent Content:
|
||||
|
||||
Provide a concise analysis (2-3 paragraphs) focusing on what this patent reveals about the company's technical direction and competitive advantage."""
|
||||
|
||||
message = self.client.messages.create(
|
||||
model=self.model,
|
||||
max_tokens=1024,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
effective_model = model or self.model
|
||||
|
||||
return message.content[0].text
|
||||
if self.test_mode:
|
||||
logger.debug("TEST MODE - Prompt that would be sent to LLM:\n%s", prompt)
|
||||
return "[TEST MODE - No API call made]"
|
||||
|
||||
# Check cache first
|
||||
if self.use_cache:
|
||||
cached = self.db_client.get_cached_response(
|
||||
prompt=prompt,
|
||||
company_name=company_name,
|
||||
analysis_type="single_patent"
|
||||
)
|
||||
if cached:
|
||||
# Log the cache hit
|
||||
self.db_client.store_message(
|
||||
prompt=prompt,
|
||||
response=cached["response"],
|
||||
company_name=company_name,
|
||||
analysis_type="single_patent",
|
||||
model=effective_model,
|
||||
metadata={
|
||||
"patent_content_length": len(patent_content),
|
||||
"cache_hit": True,
|
||||
"original_message_id": cached["id"]
|
||||
},
|
||||
is_cached=True
|
||||
)
|
||||
return cached["response"]
|
||||
|
||||
# Call API if no cache hit and client is available
|
||||
if self.client:
|
||||
response = self.client.chat.completions.create(
|
||||
model=effective_model,
|
||||
max_tokens=1024,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
response_text = response.choices[0].message.content
|
||||
|
||||
# Store in database for future cache lookups
|
||||
self.db_client.store_message(
|
||||
prompt=prompt,
|
||||
response=response_text,
|
||||
company_name=company_name,
|
||||
analysis_type="single_patent",
|
||||
model=effective_model,
|
||||
metadata={"patent_content_length": len(patent_content)},
|
||||
token_usage={
|
||||
"prompt_tokens": response.usage.prompt_tokens,
|
||||
"completion_tokens": response.usage.completion_tokens,
|
||||
"total_tokens": response.usage.total_tokens
|
||||
} if hasattr(response, 'usage') else None
|
||||
)
|
||||
|
||||
return response_text
|
||||
|
||||
# No API client available - store prompt for later processing
|
||||
placeholder = "[NO API] Prompt stored in database. Configure OPENROUTER_API_KEY to enable analysis."
|
||||
self.db_client.store_message(
|
||||
prompt=prompt,
|
||||
response=placeholder,
|
||||
company_name=company_name,
|
||||
analysis_type="single_patent",
|
||||
model=effective_model,
|
||||
metadata={"patent_content_length": len(patent_content), "pending": True}
|
||||
)
|
||||
return placeholder
|
||||
|
||||
def analyze_patent_portfolio(
|
||||
self, patents_data: list[Dict[str, str]], company_name: str
|
||||
self, patents_data: list[Dict[str, str]], company_name: str, model: str | None = None
|
||||
) -> str:
|
||||
"""Analyze multiple patents to estimate overall company performance.
|
||||
|
||||
@@ -84,10 +168,81 @@ Patent Portfolio:
|
||||
|
||||
Provide a comprehensive analysis (4-5 paragraphs) with a final verdict on the company's innovation strength and performance outlook."""
|
||||
|
||||
message = self.client.messages.create(
|
||||
model=self.model,
|
||||
max_tokens=2048,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
effective_model = model or self.model
|
||||
|
||||
return message.content[0].text
|
||||
if self.test_mode:
|
||||
logger.debug("TEST MODE - Portfolio prompt:\n%s", prompt)
|
||||
return "[TEST MODE]"
|
||||
|
||||
metadata = {
|
||||
"patent_count": len(patents_data),
|
||||
"patent_ids": [p['patent_id'] for p in patents_data],
|
||||
"model": effective_model,
|
||||
}
|
||||
|
||||
# Check cache first
|
||||
if self.use_cache:
|
||||
cached = self.db_client.get_cached_response(
|
||||
prompt=prompt,
|
||||
company_name=company_name,
|
||||
analysis_type="portfolio"
|
||||
)
|
||||
if cached:
|
||||
# Log the cache hit
|
||||
self.db_client.store_message(
|
||||
prompt=prompt,
|
||||
response=cached["response"],
|
||||
company_name=company_name,
|
||||
analysis_type="portfolio",
|
||||
model=effective_model,
|
||||
metadata={
|
||||
**metadata,
|
||||
"cache_hit": True,
|
||||
"original_message_id": cached["id"]
|
||||
},
|
||||
is_cached=True
|
||||
)
|
||||
return cached["response"]
|
||||
|
||||
# Call API if no cache hit and client is available
|
||||
if self.client:
|
||||
try:
|
||||
response = self.client.chat.completions.create(
|
||||
model=effective_model,
|
||||
max_tokens=2048,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
|
||||
response_text = response.choices[0].message.content
|
||||
|
||||
# Store in database for future cache lookups
|
||||
self.db_client.store_message(
|
||||
prompt=prompt,
|
||||
response=response_text,
|
||||
company_name=company_name,
|
||||
analysis_type="portfolio",
|
||||
model=effective_model,
|
||||
metadata=metadata,
|
||||
token_usage={
|
||||
"prompt_tokens": response.usage.prompt_tokens,
|
||||
"completion_tokens": response.usage.completion_tokens,
|
||||
"total_tokens": response.usage.total_tokens
|
||||
} if hasattr(response, 'usage') else None
|
||||
)
|
||||
|
||||
return response_text
|
||||
except AttributeError:
|
||||
return prompt
|
||||
|
||||
# No API client available - store prompt for later processing
|
||||
placeholder = "[NO API] Prompt stored in database. Configure OPENROUTER_API_KEY to enable analysis."
|
||||
self.db_client.store_message(
|
||||
prompt=prompt,
|
||||
response=placeholder,
|
||||
company_name=company_name,
|
||||
analysis_type="portfolio",
|
||||
model=effective_model,
|
||||
metadata={**metadata, "pending": True}
|
||||
)
|
||||
return placeholder
|
||||
|
||||
|
||||
@@ -0,0 +1,109 @@
|
||||
"""Scheduled patent analysis for tracked companies.
|
||||
|
||||
Uses APScheduler to periodically re-analyze tracked companies and
|
||||
detect significant changes in patent counts.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
from SPARC import config
|
||||
from SPARC.analyzer import CompanyAnalyzer
|
||||
from SPARC.database import DatabaseClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Configurable via environment variable (in hours, default 24)
|
||||
SCHEDULE_INTERVAL_HOURS = int(os.getenv("SCHEDULE_INTERVAL_HOURS", "24"))
|
||||
|
||||
# Patent count change threshold (percentage) to trigger an alert
|
||||
CHANGE_THRESHOLD_PERCENT = int(os.getenv("CHANGE_THRESHOLD_PERCENT", "20"))
|
||||
|
||||
|
||||
def run_scheduled_analysis() -> None:
|
||||
"""Re-analyze all tracked companies and check for significant changes."""
|
||||
db = DatabaseClient(config.database_url)
|
||||
db.connect()
|
||||
db.initialize_schema()
|
||||
|
||||
tracked = db.list_tracked_companies()
|
||||
if not tracked:
|
||||
logger.info("No tracked companies configured; skipping scheduled analysis")
|
||||
return
|
||||
|
||||
logger.info("Running scheduled analysis for %d tracked companies", len(tracked))
|
||||
|
||||
analyzer = CompanyAnalyzer(db_client=db)
|
||||
|
||||
for company_row in tracked:
|
||||
name = company_row["company_name"]
|
||||
old_count = company_row.get("last_patent_count", 0) or 0
|
||||
|
||||
try:
|
||||
result = analyzer._analyze_company_safe(name)
|
||||
|
||||
if result.success:
|
||||
new_count = result.patent_count
|
||||
|
||||
# Update tracking record
|
||||
db.update_tracked_company(name, new_count)
|
||||
|
||||
# Check for significant change
|
||||
if old_count > 0:
|
||||
delta_pct = abs(new_count - old_count) / old_count * 100
|
||||
if delta_pct >= CHANGE_THRESHOLD_PERCENT:
|
||||
direction = "increased" if new_count > old_count else "decreased"
|
||||
message = (
|
||||
f"Patent count for {name} {direction} by {delta_pct:.0f}% "
|
||||
f"({old_count} -> {new_count})"
|
||||
)
|
||||
logger.warning("ALERT: %s", message)
|
||||
db.store_alert(
|
||||
company_name=name,
|
||||
alert_type="patent_count_change",
|
||||
message=message,
|
||||
old_value=old_count,
|
||||
new_value=new_count,
|
||||
)
|
||||
elif new_count > 0:
|
||||
# First analysis -- record baseline
|
||||
logger.info("Baseline for %s: %d patents", name, new_count)
|
||||
else:
|
||||
logger.warning("Scheduled analysis failed for %s: %s", name, result.error)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error analyzing tracked company %s: %s", name, e)
|
||||
|
||||
db.close()
|
||||
logger.info("Scheduled analysis complete")
|
||||
|
||||
|
||||
def start_scheduler() -> None:
|
||||
"""Start the APScheduler background scheduler.
|
||||
|
||||
Safe to call at application startup. If apscheduler is not installed,
|
||||
the function logs a warning and returns without starting anything.
|
||||
"""
|
||||
try:
|
||||
from apscheduler.schedulers.background import BackgroundScheduler
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"apscheduler not installed; scheduled analysis disabled. "
|
||||
"Install with: pip install apscheduler"
|
||||
)
|
||||
return
|
||||
|
||||
scheduler = BackgroundScheduler()
|
||||
scheduler.add_job(
|
||||
run_scheduled_analysis,
|
||||
"interval",
|
||||
hours=SCHEDULE_INTERVAL_HOURS,
|
||||
id="scheduled_patent_analysis",
|
||||
replace_existing=True,
|
||||
)
|
||||
scheduler.start()
|
||||
logger.info(
|
||||
"Scheduled patent analysis started (every %d hours, threshold %d%%)",
|
||||
SCHEDULE_INTERVAL_HOURS,
|
||||
CHANGE_THRESHOLD_PERCENT,
|
||||
)
|
||||
+87
-20
@@ -1,67 +1,134 @@
|
||||
import serpapi
|
||||
from SPARC import config
|
||||
import io
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict
|
||||
|
||||
import pdfplumber # pip install pdfplumber
|
||||
import requests
|
||||
from typing import Dict
|
||||
from SPARC.types import Patents, Patent
|
||||
import serpapi
|
||||
|
||||
from SPARC import config
|
||||
from SPARC.storage import StorageBackend, get_storage_backend
|
||||
from SPARC.types import Patent, Patents
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Module-level storage instance (lazy-initialized)
|
||||
_storage: StorageBackend | None = None
|
||||
|
||||
|
||||
def _get_storage() -> StorageBackend:
|
||||
global _storage
|
||||
if _storage is None:
|
||||
_storage = get_storage_backend()
|
||||
return _storage
|
||||
|
||||
|
||||
class SERP:
|
||||
def query(company: str) -> Patents:
|
||||
def query(company: str, days_back: int = None) -> Patents:
|
||||
"""Query Google Patents for a company's recent patents.
|
||||
|
||||
Args:
|
||||
company: Name of the company to search for
|
||||
days_back: Number of days to look back for patents (default from config)
|
||||
|
||||
Returns:
|
||||
Patents object containing list of patents with PDF links
|
||||
|
||||
Note:
|
||||
Patents without PDF download links are skipped. This occurs when
|
||||
Google Patents doesn't have a PDF available for a particular patent
|
||||
(e.g., recently filed patents, certain international patents, or
|
||||
patents with restricted access). The returned count may be lower
|
||||
than the requested number of results.
|
||||
"""
|
||||
if days_back is None:
|
||||
days_back = config.patent_search_days
|
||||
end_date = datetime.now()
|
||||
start_date = end_date - timedelta(days=days_back)
|
||||
date_filter = f"cdr:1,cd_min:{start_date.strftime('%-m/%-d/%Y')},cd_max:{end_date.strftime('%-m/%-d/%Y')}"
|
||||
|
||||
# Make API call
|
||||
params = {
|
||||
"engine": "google_patents",
|
||||
"q": company,
|
||||
"num": 10,
|
||||
"filter": 1,
|
||||
"tbs": "cdr:1,cd_min:10/28/2025,cd_max:11/4/2025",
|
||||
"tbs": date_filter,
|
||||
"api_key": config.api_key,
|
||||
}
|
||||
logger.info("Querying Google Patents for '%s' (last %d days)", company, days_back)
|
||||
search = serpapi.search(params)
|
||||
# Convert data into a list of publicationID
|
||||
# Convert results to Patent objects, skipping any without PDF links
|
||||
patent_ids = []
|
||||
list_of_patents = search["organic_results"]
|
||||
for patent in list_of_patents:
|
||||
patent_ids.append(Patent(patent_id=patent["publication_number"], pdf_link=patent["pdf"], summary=None))
|
||||
pdf_link = patent.get("pdf")
|
||||
if pdf_link:
|
||||
patent_ids.append(Patent(patent_id=patent["publication_number"], pdf_link=pdf_link, summary=None))
|
||||
else:
|
||||
logger.debug("Skipping patent %s (no PDF link)", patent.get("publication_number", "unknown"))
|
||||
|
||||
logger.info("Found %d patents with PDF links for '%s'", len(patent_ids), company)
|
||||
return Patents(patents=patent_ids)
|
||||
|
||||
def save_patents(patent: Patent) -> Patent:
|
||||
"""
|
||||
Save the patent PDF to the patents folder
|
||||
|
||||
"""Save the patent PDF to storage, skipping download if already cached.
|
||||
|
||||
Uses the configured storage backend (local filesystem or S3).
|
||||
|
||||
Args:
|
||||
patent: Patent object
|
||||
|
||||
Returns:
|
||||
Patent object with updated PDF path
|
||||
"""
|
||||
response = requests.get(patent.pdf_link)
|
||||
print(patent.pdf_link)
|
||||
with open(f"patents/{patent.patent_id}.pdf", "wb") as f:
|
||||
f.write(response.content)
|
||||
|
||||
patent.pdf_path = f"patents/{patent.patent_id}.pdf"
|
||||
storage = _get_storage()
|
||||
key = f"{patent.patent_id}.pdf"
|
||||
|
||||
if not storage.exists(key):
|
||||
logger.info("Downloading PDF for %s", patent.patent_id)
|
||||
response = requests.get(patent.pdf_link)
|
||||
storage.write(key, response.content)
|
||||
logger.debug("Saved %d bytes for %s", len(response.content), patent.patent_id)
|
||||
else:
|
||||
logger.debug("Using cached PDF for %s", patent.patent_id)
|
||||
|
||||
patent.pdf_path = storage.path_for(key)
|
||||
return patent
|
||||
|
||||
def parse_patent_pdf(pdf_path: str) -> Dict:
|
||||
"""Extract structured sections from patent PDF.
|
||||
|
||||
Extracts all major sections from a patent PDF including abstract,
|
||||
claims, summary, and detailed description.
|
||||
claims, summary, and detailed description. Supports both local file
|
||||
paths and S3 URIs (s3://bucket/key).
|
||||
|
||||
Args:
|
||||
pdf_path: Path to the patent PDF file
|
||||
pdf_path: Local path or S3 URI to the patent PDF file
|
||||
|
||||
Returns:
|
||||
Dictionary containing all extracted sections
|
||||
"""
|
||||
logger.debug("Parsing patent PDF: %s", pdf_path)
|
||||
|
||||
with pdfplumber.open(pdf_path) as pdf:
|
||||
if pdf_path.startswith("s3://"):
|
||||
# Read from S3 via storage backend
|
||||
storage = _get_storage()
|
||||
# Extract key from "s3://bucket/key"
|
||||
key = pdf_path.split("/", 3)[-1]
|
||||
data = storage.read(key)
|
||||
pdf_file: io.BytesIO | str = io.BytesIO(data)
|
||||
else:
|
||||
pdf_file = pdf_path
|
||||
|
||||
with pdfplumber.open(pdf_file) as pdf:
|
||||
# Extract all text
|
||||
full_text = ""
|
||||
for page in pdf.pages:
|
||||
full_text += page.extract_text() + "\n"
|
||||
logger.debug("Extracted text from %d pages (%d chars)", len(pdf.pages), len(full_text))
|
||||
|
||||
# Define section patterns (common in patents)
|
||||
sections = {
|
||||
|
||||
@@ -0,0 +1,171 @@
|
||||
"""Patent PDF storage abstraction.
|
||||
|
||||
Provides a unified interface for reading and writing patent PDF files,
|
||||
with pluggable backends for local filesystem and S3-compatible object
|
||||
storage (e.g., MinIO, AWS S3).
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from SPARC import config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class StorageBackend(ABC):
|
||||
"""Abstract base class for patent PDF storage."""
|
||||
|
||||
@abstractmethod
|
||||
def read(self, key: str) -> bytes:
|
||||
"""Read a file by key.
|
||||
|
||||
Args:
|
||||
key: Storage key (e.g., "US-12345678-B2.pdf")
|
||||
|
||||
Returns:
|
||||
File contents as bytes.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the file does not exist.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def write(self, key: str, data: bytes) -> None:
|
||||
"""Write data to storage.
|
||||
|
||||
Args:
|
||||
key: Storage key (e.g., "US-12345678-B2.pdf")
|
||||
data: File contents as bytes.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def exists(self, key: str) -> bool:
|
||||
"""Check if a file exists in storage.
|
||||
|
||||
Args:
|
||||
key: Storage key.
|
||||
|
||||
Returns:
|
||||
True if the file exists and has non-zero size.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def path_for(self, key: str) -> str:
|
||||
"""Return a path or URI suitable for downstream consumers.
|
||||
|
||||
For local storage this is a filesystem path; for S3 it is the
|
||||
object key (callers that need a local file should use read()
|
||||
and write to a temporary location).
|
||||
"""
|
||||
|
||||
|
||||
class LocalStorageBackend(StorageBackend):
|
||||
"""Store patent PDFs on the local filesystem under a directory."""
|
||||
|
||||
def __init__(self, base_dir: str = "patents"):
|
||||
self.base_dir = base_dir
|
||||
os.makedirs(self.base_dir, exist_ok=True)
|
||||
|
||||
def _full_path(self, key: str) -> str:
|
||||
return os.path.join(self.base_dir, key)
|
||||
|
||||
def read(self, key: str) -> bytes:
|
||||
path = self._full_path(key)
|
||||
if not os.path.exists(path):
|
||||
raise FileNotFoundError(f"File not found: {path}")
|
||||
with open(path, "rb") as f:
|
||||
return f.read()
|
||||
|
||||
def write(self, key: str, data: bytes) -> None:
|
||||
path = self._full_path(key)
|
||||
os.makedirs(os.path.dirname(path) or self.base_dir, exist_ok=True)
|
||||
with open(path, "wb") as f:
|
||||
f.write(data)
|
||||
logger.debug("Wrote %d bytes to %s", len(data), path)
|
||||
|
||||
def exists(self, key: str) -> bool:
|
||||
path = self._full_path(key)
|
||||
return os.path.exists(path) and os.path.getsize(path) > 0
|
||||
|
||||
def path_for(self, key: str) -> str:
|
||||
return self._full_path(key)
|
||||
|
||||
|
||||
class S3StorageBackend(StorageBackend):
|
||||
"""Store patent PDFs in an S3-compatible bucket."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
bucket: str,
|
||||
endpoint_url: str = "",
|
||||
access_key: str = "",
|
||||
secret_key: str = "",
|
||||
):
|
||||
import boto3
|
||||
|
||||
kwargs: dict = {}
|
||||
if endpoint_url:
|
||||
kwargs["endpoint_url"] = endpoint_url
|
||||
if access_key and secret_key:
|
||||
kwargs["aws_access_key_id"] = access_key
|
||||
kwargs["aws_secret_access_key"] = secret_key
|
||||
|
||||
self.s3 = boto3.client("s3", **kwargs)
|
||||
self.bucket = bucket
|
||||
|
||||
# Ensure bucket exists (useful for MinIO local dev)
|
||||
try:
|
||||
self.s3.head_bucket(Bucket=self.bucket)
|
||||
except Exception:
|
||||
try:
|
||||
self.s3.create_bucket(Bucket=self.bucket)
|
||||
logger.info("Created S3 bucket: %s", self.bucket)
|
||||
except Exception as e:
|
||||
logger.warning("Could not create bucket %s: %s", self.bucket, e)
|
||||
|
||||
def read(self, key: str) -> bytes:
|
||||
try:
|
||||
response = self.s3.get_object(Bucket=self.bucket, Key=key)
|
||||
return response["Body"].read()
|
||||
except self.s3.exceptions.NoSuchKey:
|
||||
raise FileNotFoundError(f"S3 object not found: s3://{self.bucket}/{key}")
|
||||
except Exception as e:
|
||||
if "NoSuchKey" in str(e) or "404" in str(e):
|
||||
raise FileNotFoundError(f"S3 object not found: s3://{self.bucket}/{key}")
|
||||
raise
|
||||
|
||||
def write(self, key: str, data: bytes) -> None:
|
||||
self.s3.put_object(
|
||||
Bucket=self.bucket,
|
||||
Key=key,
|
||||
Body=data,
|
||||
ContentType="application/pdf",
|
||||
)
|
||||
logger.debug("Wrote %d bytes to s3://%s/%s", len(data), self.bucket, key)
|
||||
|
||||
def exists(self, key: str) -> bool:
|
||||
try:
|
||||
response = self.s3.head_object(Bucket=self.bucket, Key=key)
|
||||
return response["ContentLength"] > 0
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def path_for(self, key: str) -> str:
|
||||
return f"s3://{self.bucket}/{key}"
|
||||
|
||||
|
||||
def get_storage_backend() -> StorageBackend:
|
||||
"""Factory: return the configured storage backend instance."""
|
||||
backend = config.storage_backend.lower()
|
||||
if backend == "s3":
|
||||
logger.info("Using S3 storage backend (bucket=%s)", config.s3_bucket)
|
||||
return S3StorageBackend(
|
||||
bucket=config.s3_bucket,
|
||||
endpoint_url=config.s3_endpoint_url,
|
||||
access_key=config.s3_access_key,
|
||||
secret_key=config.s3_secret_key,
|
||||
)
|
||||
logger.info("Using local storage backend")
|
||||
return LocalStorageBackend()
|
||||
+27
-2
@@ -1,9 +1,10 @@
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
@dataclass
|
||||
class Patent:
|
||||
patent_id: int
|
||||
patent_id: str
|
||||
pdf_link: str
|
||||
pdf_path: str | None = None
|
||||
summary: dict | None = None
|
||||
@@ -12,3 +13,27 @@ class Patent:
|
||||
@dataclass
|
||||
class Patents:
|
||||
patents: list[Patent]
|
||||
|
||||
|
||||
@dataclass
|
||||
class CompanyAnalysisResult:
|
||||
"""Result of analyzing a single company's patent portfolio."""
|
||||
|
||||
company_name: str
|
||||
analysis: str
|
||||
patent_count: int
|
||||
success: bool
|
||||
error: str | None = None
|
||||
model: str | None = None
|
||||
timestamp: datetime = field(default_factory=datetime.now)
|
||||
|
||||
|
||||
@dataclass
|
||||
class BatchAnalysisResult:
|
||||
"""Result of batch analyzing multiple companies."""
|
||||
|
||||
results: list[CompanyAnalysisResult]
|
||||
total_companies: int
|
||||
successful: int
|
||||
failed: int
|
||||
timestamp: datetime = field(default_factory=datetime.now)
|
||||
|
||||
@@ -0,0 +1,139 @@
|
||||
"""Webhook notifications for job completion and alert events.
|
||||
|
||||
Sends JSON payloads to configured webhook URLs with retry logic.
|
||||
Supports generic HTTP POST and Slack-compatible text payloads.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Comma-separated list of webhook URLs (env var based config)
|
||||
_WEBHOOK_URLS_RAW = os.getenv("WEBHOOK_URLS", "")
|
||||
WEBHOOK_URLS: list[str] = [
|
||||
url.strip() for url in _WEBHOOK_URLS_RAW.split(",") if url.strip()
|
||||
]
|
||||
|
||||
MAX_RETRIES = 3
|
||||
BACKOFF_BASE = 2 # seconds
|
||||
|
||||
|
||||
def _is_slack_url(url: str) -> bool:
|
||||
"""Check if a URL looks like a Slack incoming webhook."""
|
||||
return "hooks.slack.com" in url or "discord.com/api/webhooks" in url
|
||||
|
||||
|
||||
def _build_payload(event_type: str, data: dict[str, Any], slack: bool = False) -> dict:
|
||||
"""Build the webhook payload.
|
||||
|
||||
Args:
|
||||
event_type: Type of event (e.g., "job_completed", "alert")
|
||||
data: Event-specific data
|
||||
slack: If True, wrap in Slack-compatible ``text`` format
|
||||
|
||||
Returns:
|
||||
JSON-serializable payload dict
|
||||
"""
|
||||
payload = {
|
||||
"event": event_type,
|
||||
"timestamp": datetime.utcnow().isoformat() + "Z",
|
||||
**data,
|
||||
}
|
||||
|
||||
if slack:
|
||||
# Build a human-readable summary for Slack/Discord
|
||||
lines = [f"*[SPARC] {event_type}*"]
|
||||
for key, value in data.items():
|
||||
lines.append(f" {key}: {value}")
|
||||
return {"text": "\n".join(lines)}
|
||||
|
||||
return payload
|
||||
|
||||
|
||||
def _send_with_retry(url: str, payload: dict) -> bool:
|
||||
"""Send a POST request with exponential backoff retry.
|
||||
|
||||
Args:
|
||||
url: Webhook URL
|
||||
payload: JSON payload to send
|
||||
|
||||
Returns:
|
||||
True if delivered successfully, False after all retries exhausted
|
||||
"""
|
||||
for attempt in range(1, MAX_RETRIES + 1):
|
||||
try:
|
||||
response = requests.post(url, json=payload, timeout=10)
|
||||
if response.status_code < 300:
|
||||
logger.debug("Webhook delivered to %s (attempt %d)", url, attempt)
|
||||
return True
|
||||
logger.warning(
|
||||
"Webhook %s returned %d (attempt %d/%d)",
|
||||
url, response.status_code, attempt, MAX_RETRIES,
|
||||
)
|
||||
except requests.RequestException as e:
|
||||
logger.warning(
|
||||
"Webhook delivery failed for %s (attempt %d/%d): %s",
|
||||
url, attempt, MAX_RETRIES, e,
|
||||
)
|
||||
|
||||
if attempt < MAX_RETRIES:
|
||||
wait = BACKOFF_BASE ** attempt
|
||||
time.sleep(wait)
|
||||
|
||||
logger.error("Webhook permanently failed for %s after %d attempts", url, MAX_RETRIES)
|
||||
return False
|
||||
|
||||
|
||||
def notify(event_type: str, data: dict[str, Any]) -> None:
|
||||
"""Fire all configured webhooks for an event.
|
||||
|
||||
Safe to call even when no webhooks are configured (returns immediately).
|
||||
|
||||
Args:
|
||||
event_type: Event identifier (e.g., "job_completed", "patent_alert")
|
||||
data: Event data to include in the payload
|
||||
"""
|
||||
if not WEBHOOK_URLS:
|
||||
return
|
||||
|
||||
for url in WEBHOOK_URLS:
|
||||
slack = _is_slack_url(url)
|
||||
payload = _build_payload(event_type, data, slack=slack)
|
||||
_send_with_retry(url, payload)
|
||||
|
||||
|
||||
def notify_job_completed(
|
||||
job_id: str,
|
||||
status: str,
|
||||
total_companies: int,
|
||||
successful: int,
|
||||
failed: int,
|
||||
) -> None:
|
||||
"""Send notification when a batch job completes."""
|
||||
notify("job_completed", {
|
||||
"job_id": job_id,
|
||||
"status": status,
|
||||
"total_companies": total_companies,
|
||||
"successful": successful,
|
||||
"failed": failed,
|
||||
"summary": f"Batch job {job_id}: {successful}/{total_companies} succeeded",
|
||||
})
|
||||
|
||||
|
||||
def notify_alert(
|
||||
company_name: str,
|
||||
alert_type: str,
|
||||
message: str,
|
||||
) -> None:
|
||||
"""Send notification for a tracked company alert."""
|
||||
notify("patent_alert", {
|
||||
"company_name": company_name,
|
||||
"alert_type": alert_type,
|
||||
"message": message,
|
||||
})
|
||||
@@ -0,0 +1,93 @@
|
||||
services:
|
||||
postgres:
|
||||
image: postgres:16-alpine
|
||||
container_name: sparc-postgres
|
||||
environment:
|
||||
POSTGRES_USER: ${POSTGRES_USER}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
POSTGRES_DB: ${POSTGRES_DB}
|
||||
ports:
|
||||
- "5432:5432"
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER}"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
restart: unless-stopped
|
||||
|
||||
init-db:
|
||||
image: gitea.leeworks.dev/0xwheatyz/sparc:latest
|
||||
build: .
|
||||
container_name: sparc-init-db
|
||||
command: python scripts/init_database.py
|
||||
environment:
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB}
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
restart: "no"
|
||||
|
||||
api:
|
||||
image: gitea.leeworks.dev/0xwheatyz/sparc:latest
|
||||
build: .
|
||||
container_name: sparc-api
|
||||
command: uvicorn SPARC.api:app --host 0.0.0.0 --port 8000
|
||||
environment:
|
||||
API_KEY: ${API_KEY}
|
||||
OPENROUTER_API_KEY: ${OPENROUTER_API_KEY}
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB}
|
||||
USE_CACHE: "true"
|
||||
JWT_SECRET: ${JWT_SECRET:-sparc-secret-key-change-in-production}
|
||||
CORS_ORIGINS: ${CORS_ORIGINS:-}
|
||||
APP_ENV: ${APP_ENV:-development}
|
||||
ROOT_PATH: ""
|
||||
ports:
|
||||
- "8000:8000"
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
init-db:
|
||||
condition: service_completed_successfully
|
||||
volumes:
|
||||
- patent_data:/app/patents
|
||||
restart: unless-stopped
|
||||
|
||||
# Optional: MinIO for S3-compatible local object storage
|
||||
# Enable by setting STORAGE_BACKEND=s3 in .env
|
||||
minio:
|
||||
image: minio/minio:latest
|
||||
container_name: sparc-minio
|
||||
command: server /data --console-address ":9001"
|
||||
environment:
|
||||
MINIO_ROOT_USER: ${AWS_ACCESS_KEY_ID:-minioadmin}
|
||||
MINIO_ROOT_PASSWORD: ${AWS_SECRET_ACCESS_KEY:-minioadmin}
|
||||
ports:
|
||||
- "9000:9000"
|
||||
- "9001:9001"
|
||||
volumes:
|
||||
- minio_data:/data
|
||||
healthcheck:
|
||||
test: ["CMD", "mc", "ready", "local"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
restart: unless-stopped
|
||||
profiles:
|
||||
- s3
|
||||
|
||||
dashboard:
|
||||
image: gitea.leeworks.dev/0xwheatyz/sparc:frontend-latest
|
||||
build: ./frontend
|
||||
container_name: sparc-dashboard
|
||||
ports:
|
||||
- "8080:80"
|
||||
depends_on:
|
||||
- api
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
patent_data:
|
||||
minio_data:
|
||||
@@ -0,0 +1,188 @@
|
||||
# Container Registry and CI/CD Setup
|
||||
|
||||
This document explains how to build and push Docker images using Gitea Actions and the Gitea Container Registry.
|
||||
|
||||
## Overview
|
||||
|
||||
The SPARC project uses Gitea Actions (GitHub Actions-compatible) to automatically build and push Docker images to the Gitea Container Registry whenever code is pushed to the repository.
|
||||
|
||||
## Workflow Configuration
|
||||
|
||||
The workflow is defined in `.gitea/workflows/build.yaml` and automatically:
|
||||
- Builds the Docker image from the `Dockerfile`
|
||||
- Tags the image appropriately based on the git ref (branch/tag)
|
||||
- Pushes to the Gitea Container Registry at `10.0.1.10`
|
||||
|
||||
### Triggers
|
||||
|
||||
The workflow runs on:
|
||||
- **Push to main branch**: Builds and tags with commit SHA + `latest`
|
||||
- **Push of tags**: Builds and tags with the tag name + `latest`
|
||||
- **Manual dispatch**: Can be triggered manually from Gitea UI
|
||||
|
||||
### Image Naming
|
||||
|
||||
Images are pushed to: `10.0.1.10/0xwheatyz/sparc:<tag>`
|
||||
|
||||
- Main branch commits: `10.0.1.10/0xwheatyz/sparc:<sha>` and `10.0.1.10/0xwheatyz/sparc:latest`
|
||||
- Tags: `10.0.1.10/0xwheatyz/sparc:<tag-name>` and `10.0.1.10/0xwheatyz/sparc:latest`
|
||||
- Other branches: `10.0.1.10/0xwheatyz/sparc:<branch-name>`
|
||||
|
||||
## Prerequisites
|
||||
|
||||
### 1. Enable Container Registry in Gitea
|
||||
|
||||
The Gitea instance must have the Container Registry (Packages) feature enabled:
|
||||
|
||||
1. Access Gitea as administrator
|
||||
2. Go to Site Administration > Configuration
|
||||
3. Find "Packages" section
|
||||
4. Ensure packages/container registry is enabled
|
||||
|
||||
### 2. Create Personal Access Token
|
||||
|
||||
The workflow needs a personal access token with package write permissions:
|
||||
|
||||
1. In Gitea UI, click your profile → Settings
|
||||
2. Go to Applications → Manage Access Tokens
|
||||
3. Click "Generate New Token"
|
||||
4. Give it a descriptive name (e.g., "Actions Container Registry")
|
||||
5. Select scopes:
|
||||
- `write:package` (required)
|
||||
- `read:package` (required)
|
||||
6. Click "Generate Token"
|
||||
7. **Copy the token immediately** (you won't see it again)
|
||||
|
||||
### 3. Add Token as Repository Secret
|
||||
|
||||
1. Go to your repository in Gitea
|
||||
2. Click Settings → Secrets
|
||||
3. Click "Add Secret"
|
||||
4. Name: `GITEA_TOKEN`
|
||||
5. Value: Paste the personal access token
|
||||
6. Click "Add Secret"
|
||||
|
||||
## Usage
|
||||
|
||||
### Automatic Builds
|
||||
|
||||
Once configured, the workflow runs automatically:
|
||||
|
||||
```bash
|
||||
# Push to main branch - triggers build
|
||||
git add .
|
||||
git commit -m "feat: add new feature"
|
||||
git push origin main
|
||||
|
||||
# Create and push a tag - triggers build with tag
|
||||
git tag v1.0.0
|
||||
git push origin v1.0.0
|
||||
```
|
||||
|
||||
### Manual Builds
|
||||
|
||||
You can also trigger builds manually:
|
||||
|
||||
1. Go to repository → Actions
|
||||
2. Click on "Build and Push Docker Image" workflow
|
||||
3. Click "Run workflow"
|
||||
4. Select the branch
|
||||
5. Click "Run workflow"
|
||||
|
||||
### Monitor Build Progress
|
||||
|
||||
1. Go to repository → Actions
|
||||
2. Click on the running workflow
|
||||
3. View logs for each step
|
||||
|
||||
## Pulling Images
|
||||
|
||||
Once built, images can be pulled from the registry:
|
||||
|
||||
```bash
|
||||
# Log in to registry
|
||||
docker login 10.0.1.10 -u your-username
|
||||
|
||||
# Pull the latest image
|
||||
docker pull 10.0.1.10/0xwheatyz/sparc:latest
|
||||
|
||||
# Pull a specific tag
|
||||
docker pull 10.0.1.10/0xwheatyz/sparc:v1.0.0
|
||||
|
||||
# Pull a specific commit
|
||||
docker pull 10.0.1.10/0xwheatyz/sparc:abc1234
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Workflow Fails at Login Step
|
||||
|
||||
**Error**: `Error response from daemon: login attempt to http://10.0.1.10/v2/ failed with status: 404 Not Found`
|
||||
|
||||
**Solution**: Container registry is not enabled in Gitea. Contact administrator to enable packages feature.
|
||||
|
||||
### Workflow Fails with 401 Unauthorized
|
||||
|
||||
**Error**: `unauthorized: authentication required`
|
||||
|
||||
**Solutions**:
|
||||
1. Verify `GITEA_TOKEN` secret exists and is correct
|
||||
2. Verify token has `write:package` and `read:package` scopes
|
||||
3. Regenerate token if it has expired
|
||||
|
||||
### Workflow Fails at Push Step
|
||||
|
||||
**Error**: `denied: permission denied`
|
||||
|
||||
**Solutions**:
|
||||
1. Ensure your user account has write access to the repository
|
||||
2. Verify the token has the correct permissions
|
||||
3. Check if the repository owner matches the registry path
|
||||
|
||||
### Image Not Appearing in Packages
|
||||
|
||||
**Check**:
|
||||
1. Go to repository → Packages tab
|
||||
2. If no packages appear, check workflow logs for errors
|
||||
3. Verify the image was successfully pushed (check workflow output)
|
||||
|
||||
## Advanced Configuration
|
||||
|
||||
### Using a Different Registry
|
||||
|
||||
To push to a different container registry (e.g., Docker Hub, GHCR):
|
||||
|
||||
1. Update the `REGISTRY` variable in `.gitea/workflows/build.yaml`
|
||||
2. Update the login step with appropriate credentials
|
||||
3. Add registry credentials as secrets
|
||||
|
||||
### Building Multi-platform Images
|
||||
|
||||
To build for multiple architectures:
|
||||
|
||||
```yaml
|
||||
- name: Build Docker image
|
||||
run: |
|
||||
docker buildx build \
|
||||
--platform linux/amd64,linux/arm64 \
|
||||
-t ${{ steps.tags.outputs.IMAGE_TAG }} \
|
||||
--push .
|
||||
```
|
||||
|
||||
### Adding Build Arguments
|
||||
|
||||
To pass build arguments:
|
||||
|
||||
```yaml
|
||||
- name: Build Docker image
|
||||
run: |
|
||||
docker build \
|
||||
--build-arg VERSION=${{ gitea.sha_short }} \
|
||||
-t ${{ steps.tags.outputs.IMAGE_TAG }} .
|
||||
```
|
||||
|
||||
## References
|
||||
|
||||
- [Gitea Actions Documentation](https://docs.gitea.com/usage/actions/overview)
|
||||
- [Gitea Packages Documentation](https://docs.gitea.com/usage/packages/overview)
|
||||
- [GitHub Actions Syntax](https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions) (Gitea Actions compatible)
|
||||
@@ -0,0 +1,331 @@
|
||||
# Database Storage and Caching
|
||||
|
||||
This document explains how SPARC uses PostgreSQL for storing LLM messages, enabling response caching and analytics.
|
||||
|
||||
## Overview
|
||||
|
||||
SPARC stores all LLM interactions in PostgreSQL, providing:
|
||||
|
||||
- **Response Caching**: Avoid redundant API calls for previously analyzed patents
|
||||
- **Analytics**: Track usage patterns, token consumption, and analysis history
|
||||
- **Persistence**: Maintain analysis history across sessions
|
||||
|
||||
SPARC supports two cache modes:
|
||||
|
||||
1. **Cache Mode** (default, `USE_CACHE=true`): Check database for cached responses before making API calls
|
||||
2. **Fresh Mode** (`USE_CACHE=false`): Always make fresh API calls (still stores results in database)
|
||||
|
||||
## Setup
|
||||
|
||||
### 1. Start the Database
|
||||
|
||||
Use docker-compose to start the PostgreSQL database:
|
||||
|
||||
```bash
|
||||
docker-compose up -d postgres
|
||||
```
|
||||
|
||||
This will start a PostgreSQL instance accessible at `localhost:5432`.
|
||||
|
||||
### 2. Initialize the Database Schema
|
||||
|
||||
Run the initialization script to create the necessary tables:
|
||||
|
||||
```bash
|
||||
python scripts/init_database.py
|
||||
```
|
||||
|
||||
This creates the `llm_messages` table and indexes for efficient querying.
|
||||
|
||||
### 3. Configure Environment Variables
|
||||
|
||||
Create a `.env` file (or copy from `.env.example`):
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
Edit `.env` and set:
|
||||
|
||||
```env
|
||||
# Database connection (required)
|
||||
DATABASE_URL=postgresql://postgres:postgres@localhost:5432/sparc
|
||||
|
||||
# Cache mode: use cached responses when available
|
||||
USE_CACHE=true
|
||||
|
||||
# API key for fresh LLM calls
|
||||
OPENROUTER_API_KEY=your_openrouter_key_here
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Running with Cache Mode (Default)
|
||||
|
||||
Set `USE_CACHE=true` in your `.env` file, then run the application normally:
|
||||
|
||||
```bash
|
||||
python main.py
|
||||
```
|
||||
|
||||
The application will:
|
||||
- Check the database for cached responses matching the request
|
||||
- If found, return the cached response (no API call)
|
||||
- If not found, make an API call and store the response for future use
|
||||
|
||||
### Running with Fresh Mode
|
||||
|
||||
Set `USE_CACHE=false` in your `.env` file to always get fresh responses:
|
||||
|
||||
```bash
|
||||
python main.py
|
||||
```
|
||||
|
||||
The application will:
|
||||
- Always send messages to OpenRouter for real LLM responses
|
||||
- Store all responses in the database
|
||||
- Useful when you need the latest analysis or want to refresh cached data
|
||||
|
||||
## Viewing Analytics
|
||||
|
||||
### View Message Statistics
|
||||
|
||||
```bash
|
||||
python scripts/view_analytics.py
|
||||
```
|
||||
|
||||
Options:
|
||||
- `--days N`: Analyze messages from the last N days (default: 30)
|
||||
|
||||
Example output:
|
||||
```
|
||||
SPARC Analytics - Last 30 days
|
||||
======================================================================
|
||||
|
||||
Total Messages: 45
|
||||
|
||||
Messages by Company:
|
||||
nvidia: 25
|
||||
intel: 12
|
||||
amd: 8
|
||||
|
||||
Messages by Analysis Type:
|
||||
portfolio: 30
|
||||
single_patent: 15
|
||||
|
||||
======================================================================
|
||||
```
|
||||
|
||||
### View Stored Messages
|
||||
|
||||
```bash
|
||||
python scripts/view_messages.py
|
||||
```
|
||||
|
||||
Options:
|
||||
- `--company COMPANY`: Filter by company name
|
||||
- `--type TYPE`: Filter by analysis type (single_patent or portfolio)
|
||||
- `--limit N`: Maximum number of messages to display (default: 10)
|
||||
|
||||
Examples:
|
||||
```bash
|
||||
# View last 10 messages
|
||||
python scripts/view_messages.py
|
||||
|
||||
# View all messages for nvidia
|
||||
python scripts/view_messages.py --company nvidia --limit 100
|
||||
|
||||
# View portfolio analyses only
|
||||
python scripts/view_messages.py --type portfolio
|
||||
```
|
||||
|
||||
## Database Schema
|
||||
|
||||
### llm_messages Table
|
||||
|
||||
| Column | Type | Description |
|
||||
|--------|------|-------------|
|
||||
| id | SERIAL | Primary key |
|
||||
| timestamp | TIMESTAMP | When the message was created |
|
||||
| company_name | VARCHAR(255) | Company being analyzed |
|
||||
| analysis_type | VARCHAR(50) | Type of analysis (single_patent, portfolio) |
|
||||
| model | VARCHAR(100) | LLM model identifier |
|
||||
| prompt | TEXT | The full prompt sent to the LLM |
|
||||
| response | TEXT | The response from the LLM |
|
||||
| metadata | JSONB | Additional metadata (patent IDs, content length, etc.) |
|
||||
| token_usage | JSONB | Token usage statistics (when available) |
|
||||
| created_at | TIMESTAMP | Record creation timestamp |
|
||||
|
||||
### Indexes
|
||||
|
||||
- `idx_messages_timestamp`: Speeds up time-based queries
|
||||
- `idx_messages_company`: Speeds up company-specific queries
|
||||
|
||||
## Docker Compose
|
||||
|
||||
The included `docker-compose.yml` provides:
|
||||
|
||||
1. **PostgreSQL Database**:
|
||||
- Image: `postgres:16-alpine`
|
||||
- Port: `5432`
|
||||
- Credentials: postgres/postgres
|
||||
- Database: sparc
|
||||
- Persistent storage via volume
|
||||
|
||||
2. **Application Container** (optional):
|
||||
- Builds from Dockerfile
|
||||
- Connects to PostgreSQL
|
||||
- Mounts current directory
|
||||
|
||||
### Start Services
|
||||
|
||||
```bash
|
||||
# Start just the database
|
||||
docker-compose up -d postgres
|
||||
|
||||
# Start everything
|
||||
docker-compose up -d
|
||||
|
||||
# View logs
|
||||
docker-compose logs -f
|
||||
|
||||
# Stop services
|
||||
docker-compose down
|
||||
|
||||
# Stop and remove volumes (WARNING: deletes data)
|
||||
docker-compose down -v
|
||||
```
|
||||
|
||||
## Toggling Between Modes
|
||||
|
||||
You can easily switch between modes by changing the `USE_CACHE` environment variable:
|
||||
|
||||
### Quick Toggle (temporary)
|
||||
|
||||
```bash
|
||||
# Run with caching enabled
|
||||
USE_CACHE=true python main.py
|
||||
|
||||
# Run with fresh API calls
|
||||
USE_CACHE=false python main.py
|
||||
```
|
||||
|
||||
### Persistent Toggle
|
||||
|
||||
Edit your `.env` file:
|
||||
|
||||
```env
|
||||
# Use cached responses when available (recommended for most use)
|
||||
USE_CACHE=true
|
||||
|
||||
# Always make fresh API calls
|
||||
USE_CACHE=false
|
||||
```
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Cost Optimization with Caching
|
||||
|
||||
Cache mode reduces API costs by reusing previous analysis results:
|
||||
|
||||
```bash
|
||||
USE_CACHE=true python main.py
|
||||
```
|
||||
|
||||
If the same company/patent combination was analyzed before, the cached response is returned instantly.
|
||||
|
||||
### Fresh Analysis
|
||||
|
||||
When you need the latest LLM analysis (e.g., after model updates):
|
||||
|
||||
```bash
|
||||
USE_CACHE=false python main.py
|
||||
```
|
||||
|
||||
### Collecting Usage Analytics
|
||||
|
||||
The database stores all interactions, enabling analytics on:
|
||||
- Which companies are analyzed most frequently
|
||||
- Types of analyses performed
|
||||
- Token usage and costs over time
|
||||
- Response caching hit rates
|
||||
|
||||
### Development and Debugging
|
||||
|
||||
Database storage is useful for:
|
||||
- Reviewing actual prompts sent to the LLM
|
||||
- Analyzing response patterns
|
||||
- Debugging the full pipeline end-to-end
|
||||
- Understanding token usage patterns
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Connection Refused
|
||||
|
||||
If you get "connection refused" errors:
|
||||
|
||||
1. Ensure PostgreSQL is running: `docker-compose ps`
|
||||
2. Check the DATABASE_URL in your `.env` file
|
||||
3. Wait for the database to be healthy: `docker-compose logs postgres`
|
||||
|
||||
### Schema Not Found
|
||||
|
||||
If you get "relation does not exist" errors:
|
||||
|
||||
1. Run the initialization script: `python scripts/init_database.py`
|
||||
2. Verify tables were created: `docker-compose exec postgres psql -U postgres -d sparc -c "\dt"`
|
||||
|
||||
### Permission Denied
|
||||
|
||||
If you get permission errors:
|
||||
|
||||
1. Check your DATABASE_URL credentials match docker-compose.yml
|
||||
2. Ensure the database container is running: `docker-compose up -d postgres`
|
||||
|
||||
## Advanced Usage
|
||||
|
||||
### Direct Database Access
|
||||
|
||||
You can access the database directly using psql:
|
||||
|
||||
```bash
|
||||
docker-compose exec postgres psql -U postgres -d sparc
|
||||
```
|
||||
|
||||
Example queries:
|
||||
|
||||
```sql
|
||||
-- View all messages
|
||||
SELECT id, company_name, analysis_type, timestamp FROM llm_messages ORDER BY timestamp DESC LIMIT 10;
|
||||
|
||||
-- Count messages by company
|
||||
SELECT company_name, COUNT(*) FROM llm_messages GROUP BY company_name;
|
||||
|
||||
-- View recent prompts
|
||||
SELECT prompt FROM llm_messages ORDER BY timestamp DESC LIMIT 5;
|
||||
```
|
||||
|
||||
### Programmatic Access
|
||||
|
||||
You can use the `DatabaseClient` directly in your code:
|
||||
|
||||
```python
|
||||
from SPARC.database import DatabaseClient
|
||||
from SPARC import config
|
||||
|
||||
db = DatabaseClient(config.database_url)
|
||||
|
||||
# Get messages
|
||||
messages = db.get_messages(company_name="nvidia", limit=10)
|
||||
|
||||
# Get analytics
|
||||
analytics = db.get_analytics(days=7)
|
||||
|
||||
# Store a custom message
|
||||
db.store_message(
|
||||
prompt="test prompt",
|
||||
response="test response",
|
||||
company_name="test",
|
||||
analysis_type="custom"
|
||||
)
|
||||
```
|
||||
@@ -0,0 +1,471 @@
|
||||
# SPARC Complete Deployment Guide
|
||||
|
||||
This guide provides step-by-step instructions for deploying the SPARC (Semiconductor Patent & Analytics Report Core) application with all features enabled, including SERP API patent retrieval, LLM analysis, database storage, and the web UI.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Prerequisites](#prerequisites)
|
||||
- [Step 1: Clone and Configure](#step-1-clone-and-configure)
|
||||
- [Step 2: Start Services with Docker Compose](#step-2-start-services-with-docker-compose)
|
||||
- [Step 3: Initialize the Database](#step-3-initialize-the-database)
|
||||
- [Step 4: Run the Services](#step-4-run-the-services)
|
||||
- [Step 5: Verify Deployment](#step-5-verify-deployment)
|
||||
- [Step 6: Using the Application](#step-6-using-the-application)
|
||||
- [Step 7: View Stored Data](#step-7-view-stored-data)
|
||||
- [Architecture Overview](#architecture-overview)
|
||||
- [Environment Variables Reference](#environment-variables-reference)
|
||||
- [Production Docker Compose](#production-docker-compose)
|
||||
- [Troubleshooting](#troubleshooting)
|
||||
|
||||
---
|
||||
|
||||
## Prerequisites
|
||||
|
||||
1. **Docker & Docker Compose** installed
|
||||
2. **API Keys** (you'll need to obtain these):
|
||||
- **SerpAPI Key**: Sign up at https://serpapi.com/ (free tier: 100 searches/month)
|
||||
- **OpenRouter API Key**: Sign up at https://openrouter.ai/ (pay-as-you-go)
|
||||
|
||||
---
|
||||
|
||||
## Step 1: Clone and Configure
|
||||
|
||||
```bash
|
||||
git clone <repository-url>
|
||||
cd SPARC
|
||||
|
||||
# Create environment file
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
Edit `.env` with your API keys:
|
||||
|
||||
```env
|
||||
# Required API Keys
|
||||
API_KEY=your_serpapi_key_here
|
||||
OPENROUTER_API_KEY=your_openrouter_key_here
|
||||
|
||||
# Database Configuration (matches docker-compose.yml)
|
||||
DATABASE_URL=postgresql://postgres:postgres@localhost:5432/sparc
|
||||
USE_DATABASE=true
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 2: Start Services with Docker Compose
|
||||
|
||||
```bash
|
||||
# Start all services (PostgreSQL, API, and Dashboard)
|
||||
docker-compose up -d
|
||||
|
||||
# Check status
|
||||
docker-compose ps
|
||||
|
||||
# You should see:
|
||||
# - sparc-postgres (healthy)
|
||||
# - sparc-api (running on port 8000)
|
||||
# - sparc-dashboard (running on port 8080)
|
||||
```
|
||||
|
||||
The database is automatically initialized by the `init-db` service.
|
||||
|
||||
---
|
||||
|
||||
## Step 3: Database Schema
|
||||
|
||||
The `init-db` service automatically creates the `llm_messages` table with the following schema:
|
||||
|
||||
| Column | Type | Purpose |
|
||||
|--------|------|---------|
|
||||
| `id` | SERIAL | Primary key |
|
||||
| `timestamp` | TIMESTAMP | Message creation time |
|
||||
| `company_name` | VARCHAR(255) | Company being analyzed |
|
||||
| `analysis_type` | VARCHAR(50) | 'single_patent' or 'portfolio' |
|
||||
| `model` | VARCHAR(100) | LLM model identifier |
|
||||
| `prompt` | TEXT | Full prompt sent to LLM |
|
||||
| `response` | TEXT | LLM response |
|
||||
| `metadata` | JSONB | Patent IDs, content lengths |
|
||||
| `token_usage` | JSONB | prompt/completion/total tokens |
|
||||
| `created_at` | TIMESTAMP | Record timestamp |
|
||||
|
||||
---
|
||||
|
||||
## Step 4: Run the Services
|
||||
|
||||
### Option A: Run with Docker Compose (Recommended)
|
||||
|
||||
All services are started automatically with `docker-compose up -d` from Step 2.
|
||||
|
||||
```bash
|
||||
# View logs
|
||||
docker-compose logs -f
|
||||
|
||||
# View specific service logs
|
||||
docker-compose logs -f api
|
||||
docker-compose logs -f dashboard
|
||||
```
|
||||
|
||||
### Option B: Run Locally (Development)
|
||||
|
||||
If you prefer running services locally without Docker:
|
||||
|
||||
```bash
|
||||
# Start PostgreSQL with Docker
|
||||
docker-compose up -d postgres
|
||||
|
||||
# Wait for database to be healthy, then initialize
|
||||
python scripts/init_database.py
|
||||
|
||||
# Start FastAPI backend
|
||||
uvicorn SPARC.api:app --host 0.0.0.0 --port 8000 --reload
|
||||
|
||||
# For the React frontend (separate terminal)
|
||||
cd frontend
|
||||
npm install
|
||||
npm run dev
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 5: Verify Deployment
|
||||
|
||||
```bash
|
||||
# Check API health
|
||||
curl http://localhost:8000/health
|
||||
|
||||
# Expected response:
|
||||
# {"status":"healthy","version":"0.1.0","timestamp":"..."}
|
||||
```
|
||||
|
||||
Access the services:
|
||||
|
||||
| Service | URL |
|
||||
|---------|-----|
|
||||
| REST API | http://localhost:8000 |
|
||||
| API Documentation (Swagger) | http://localhost:8000/docs |
|
||||
| Dashboard (Web UI) | http://localhost:8080 |
|
||||
|
||||
---
|
||||
|
||||
## Step 6: Using the Application
|
||||
|
||||
### Via Dashboard (Web UI)
|
||||
|
||||
1. Open http://localhost:8080
|
||||
2. Register a new account or login (default admin: `admin` / `admin`)
|
||||
3. Navigate to **"Analysis"** from the sidebar
|
||||
4. Enter a company name (e.g., "Intel")
|
||||
5. Click **"Analyze"**
|
||||
|
||||
This will:
|
||||
- Query SerpAPI for recent patents
|
||||
- Download and parse patent PDFs
|
||||
- Send patent content to Claude for analysis
|
||||
- Store prompt/response in PostgreSQL (with caching)
|
||||
- Display results in the dashboard
|
||||
|
||||
### Via REST API
|
||||
|
||||
```bash
|
||||
# Analyze single company
|
||||
curl http://localhost:8000/analyze/Intel
|
||||
|
||||
# Batch analyze multiple companies (synchronous)
|
||||
curl -X POST http://localhost:8000/analyze/batch \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"companies": ["Intel", "AMD", "NVIDIA"], "max_workers": 3}'
|
||||
|
||||
# Async batch (for large jobs)
|
||||
curl -X POST http://localhost:8000/analyze/batch/async \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"companies": ["Intel", "AMD"]}'
|
||||
|
||||
# Check job status
|
||||
curl http://localhost:8000/jobs/{job_id}
|
||||
|
||||
# List all jobs
|
||||
curl http://localhost:8000/jobs
|
||||
```
|
||||
|
||||
### Via Python
|
||||
|
||||
```python
|
||||
from SPARC.analyzer import CompanyAnalyzer
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze("Intel")
|
||||
print(result.analysis)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 7: View Stored Data
|
||||
|
||||
```bash
|
||||
# View analytics (aggregated usage)
|
||||
python scripts/view_analytics.py
|
||||
|
||||
# View stored messages
|
||||
python scripts/view_messages.py
|
||||
|
||||
# Query database directly
|
||||
docker exec -it sparc-postgres psql -U postgres -d sparc -c \
|
||||
"SELECT company_name, analysis_type, token_usage FROM llm_messages ORDER BY timestamp DESC LIMIT 10;"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
```
|
||||
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
|
||||
│ Dashboard │───▶│ FastAPI │───▶│ Analyzer │
|
||||
│ (8501) │ │ (8000) │ │ │
|
||||
└──────────────┘ └──────────────┘ └──────┬───────┘
|
||||
│
|
||||
┌──────────────────────────┼──────────────────────────┐
|
||||
│ │ │
|
||||
▼ ▼ ▼
|
||||
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
|
||||
│ SerpAPI │ │ OpenRouter │ │ PostgreSQL │
|
||||
│ (Patents) │ │ (Claude) │ │ (Storage) │
|
||||
└──────────────┘ └──────────────┘ └──────────────┘
|
||||
```
|
||||
|
||||
### Component Responsibilities
|
||||
|
||||
| Component | Purpose |
|
||||
|-----------|---------|
|
||||
| **Dashboard** | React TypeScript web UI with authentication |
|
||||
| **FastAPI** | REST API with JWT authentication |
|
||||
| **Analyzer** | Orchestrates patent retrieval and LLM analysis |
|
||||
| **SerpAPI** | Retrieves patent data from Google Patents |
|
||||
| **OpenRouter** | Routes requests to Claude for AI analysis |
|
||||
| **PostgreSQL** | Stores prompts, responses, users, and cached results |
|
||||
|
||||
---
|
||||
|
||||
## Environment Variables Reference
|
||||
|
||||
| Variable | Required | Default | Description |
|
||||
|----------|----------|---------|-------------|
|
||||
| `API_KEY` | Yes | - | SerpAPI key for patent search |
|
||||
| `OPENROUTER_API_KEY` | Yes | - | OpenRouter API key for Claude access |
|
||||
| `DATABASE_URL` | Yes | - | PostgreSQL connection string |
|
||||
| `USE_CACHE` | No | `true` | Check database for cached responses before API calls |
|
||||
| `JWT_SECRET` | Yes | - | Secret key for JWT authentication (change in production!) |
|
||||
|
||||
### Database URL Format
|
||||
|
||||
```
|
||||
postgresql://[user]:[password]@[host]:[port]/[database]
|
||||
```
|
||||
|
||||
Example:
|
||||
```
|
||||
postgresql://postgres:postgres@localhost:5432/sparc
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Docker Compose Services
|
||||
|
||||
The `docker-compose.yml` includes all services needed for production:
|
||||
|
||||
| Service | Container | Port | Description |
|
||||
|---------|-----------|------|-------------|
|
||||
| `postgres` | sparc-postgres | 5432 | PostgreSQL database |
|
||||
| `init-db` | sparc-init-db | - | One-time database initialization (seeds admin user) |
|
||||
| `api` | sparc-api | 8000 | FastAPI REST API with JWT auth (patent PDFs stored in `patent_data` volume) |
|
||||
| `dashboard` | sparc-dashboard | 8080 | React TypeScript web UI |
|
||||
|
||||
### Common Docker Compose Commands
|
||||
|
||||
```bash
|
||||
# Start all services
|
||||
docker-compose up -d
|
||||
|
||||
# Start with rebuild (after code changes)
|
||||
docker-compose up -d --build
|
||||
|
||||
# View logs
|
||||
docker-compose logs -f
|
||||
|
||||
# View specific service logs
|
||||
docker-compose logs -f api
|
||||
docker-compose logs -f dashboard
|
||||
|
||||
# Stop all services
|
||||
docker-compose down
|
||||
|
||||
# Stop and remove volumes (WARNING: deletes data)
|
||||
docker-compose down -v
|
||||
|
||||
# Restart a specific service
|
||||
docker-compose restart api
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Patent PDF Storage
|
||||
|
||||
The SPARC API downloads patent PDFs during analysis and stores them at `/app/patents` inside the container. These files are used for subsequent single-patent analysis requests and as a local cache to avoid re-downloading. If this directory is not persisted, all downloaded PDFs are lost when the container is recreated.
|
||||
|
||||
### Docker Compose (default)
|
||||
|
||||
The default `docker-compose.yml` declares a named volume called `patent_data` that is mounted at `/app/patents`:
|
||||
|
||||
```yaml
|
||||
# In the api service:
|
||||
volumes:
|
||||
- patent_data:/app/patents
|
||||
|
||||
# At the top-level volumes section:
|
||||
volumes:
|
||||
patent_data:
|
||||
```
|
||||
|
||||
This means PDFs survive `docker compose down` and `docker compose up` cycles. To remove patent data intentionally, run:
|
||||
|
||||
```bash
|
||||
docker compose down -v # WARNING: also removes postgres_data
|
||||
# or selectively:
|
||||
docker volume rm sparc_patent_data
|
||||
```
|
||||
|
||||
If you prefer a bind mount (e.g., for easy host-side access during development), replace the volume with:
|
||||
|
||||
```yaml
|
||||
volumes:
|
||||
- ./patents:/app/patents
|
||||
```
|
||||
|
||||
### Kubernetes
|
||||
|
||||
For Kubernetes deployments, create a PersistentVolumeClaim and mount it into the API pod:
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: sparc-patent-data
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 5Gi
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: sparc-api
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: api
|
||||
volumeMounts:
|
||||
- name: patent-data
|
||||
mountPath: /app/patents
|
||||
volumes:
|
||||
- name: patent-data
|
||||
persistentVolumeClaim:
|
||||
claimName: sparc-patent-data
|
||||
```
|
||||
|
||||
Adjust the storage size based on expected patent volume. Each patent PDF is typically 1-5 MB.
|
||||
|
||||
### S3 Object Storage (alternative)
|
||||
|
||||
For production deployments that need shared or highly durable storage, set `STORAGE_BACKEND=s3` in your `.env` file. This stores patent PDFs in an S3-compatible bucket (AWS S3 or MinIO) instead of the local filesystem, eliminating the need for a persistent volume. See the S3/MinIO section in `.env.example` for configuration details.
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Database Connection Issues
|
||||
|
||||
```bash
|
||||
# Check if postgres is running
|
||||
docker-compose ps
|
||||
|
||||
# Check postgres logs
|
||||
docker-compose logs postgres
|
||||
|
||||
# Test database connection
|
||||
docker exec -it sparc-postgres psql -U postgres -d sparc -c "SELECT 1;"
|
||||
```
|
||||
|
||||
### API Key Issues
|
||||
|
||||
```bash
|
||||
# Verify environment variables are set
|
||||
echo $API_KEY
|
||||
echo $OPENROUTER_API_KEY
|
||||
|
||||
# Test SerpAPI directly
|
||||
curl "https://serpapi.com/search?engine=google_patents&q=Intel&api_key=$API_KEY"
|
||||
```
|
||||
|
||||
### Port Conflicts
|
||||
|
||||
If ports 8000, 8501, or 5432 are in use:
|
||||
|
||||
```bash
|
||||
# Find what's using the port
|
||||
lsof -i :8000
|
||||
|
||||
# Or change ports in docker-compose.yml
|
||||
ports:
|
||||
- "8080:8000" # Use 8080 instead of 8000
|
||||
```
|
||||
|
||||
### Container Issues
|
||||
|
||||
```bash
|
||||
# Rebuild containers after code changes
|
||||
docker-compose build --no-cache
|
||||
|
||||
# Remove all containers and start fresh
|
||||
docker-compose down
|
||||
docker-compose up -d --build
|
||||
```
|
||||
|
||||
### Viewing Application Logs
|
||||
|
||||
```bash
|
||||
# All services
|
||||
docker-compose logs -f
|
||||
|
||||
# Specific service
|
||||
docker-compose logs -f api
|
||||
docker-compose logs -f dashboard
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Quick Reference
|
||||
|
||||
```bash
|
||||
# Docker setup (recommended)
|
||||
cp .env.example .env
|
||||
# Edit .env with API keys
|
||||
docker-compose up -d
|
||||
|
||||
# Local development setup
|
||||
cp .env.example .env
|
||||
# Edit .env with API keys
|
||||
docker-compose up -d postgres
|
||||
python scripts/init_database.py
|
||||
uvicorn SPARC.api:app --reload &
|
||||
cd frontend && npm install && npm run dev &
|
||||
|
||||
# Check status
|
||||
curl http://localhost:8000/health
|
||||
open http://localhost:8080
|
||||
|
||||
# View data
|
||||
python scripts/view_analytics.py
|
||||
python scripts/view_messages.py
|
||||
```
|
||||
@@ -20,6 +20,14 @@
|
||||
packages = [
|
||||
python
|
||||
pkgs.python311Packages.virtualenv # gives `virtualenv` tool
|
||||
pkgs.zlib
|
||||
pkgs.stdenv.cc.cc.lib
|
||||
];
|
||||
|
||||
# Required for numpy and other C extension packages
|
||||
LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath [
|
||||
pkgs.zlib
|
||||
pkgs.stdenv.cc.cc.lib
|
||||
];
|
||||
|
||||
shellHook = ''
|
||||
@@ -48,8 +56,8 @@
|
||||
fi
|
||||
|
||||
# Prompt tweak so you can see when venv is active
|
||||
export PS1="(SPARC-venv) $PS1"
|
||||
export NIX_PROJECT_SHELL="SPARC"
|
||||
'';
|
||||
};
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
# Dependencies
|
||||
node_modules/
|
||||
|
||||
# Build output
|
||||
dist/
|
||||
|
||||
# Local env files
|
||||
.env.local
|
||||
.env.*.local
|
||||
|
||||
# Editor directories
|
||||
.vscode/
|
||||
.idea/
|
||||
|
||||
# OS files
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Debug logs
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
@@ -0,0 +1,32 @@
|
||||
# Build stage
|
||||
FROM node:20-alpine AS build
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy package files
|
||||
COPY package.json package-lock.json* ./
|
||||
|
||||
# Install dependencies
|
||||
RUN npm install
|
||||
|
||||
# Copy source files
|
||||
COPY . .
|
||||
|
||||
# Build the application
|
||||
RUN npm run build
|
||||
|
||||
# Production stage
|
||||
FROM nginx:alpine
|
||||
|
||||
# Copy built files
|
||||
COPY --from=build /app/dist /usr/share/nginx/html
|
||||
|
||||
# Copy nginx template (processed at startup with envsubst)
|
||||
COPY nginx.conf.template /etc/nginx/templates/default.conf.template
|
||||
|
||||
# Default API URL (override with -e API_URL=...)
|
||||
ENV API_URL=http://api:8000/
|
||||
|
||||
EXPOSE 80
|
||||
|
||||
CMD ["nginx", "-g", "daemon off;"]
|
||||
@@ -0,0 +1,22 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>SPARC Dashboard</title>
|
||||
</head>
|
||||
<body>
|
||||
<script>
|
||||
// Prevent FOUC: apply saved theme before first render
|
||||
(function() {
|
||||
var theme = localStorage.getItem('theme');
|
||||
if (theme === 'dark' || (!theme && window.matchMedia('(prefers-color-scheme: dark)').matches)) {
|
||||
document.documentElement.classList.add('dark');
|
||||
}
|
||||
})();
|
||||
</script>
|
||||
<div id="root"></div>
|
||||
<script type="module" src="/src/main.tsx"></script>
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,34 @@
|
||||
server {
|
||||
listen 80;
|
||||
server_name localhost;
|
||||
root /usr/share/nginx/html;
|
||||
index index.html;
|
||||
|
||||
# Gzip compression
|
||||
gzip on;
|
||||
gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript;
|
||||
|
||||
# Handle React Router (SPA)
|
||||
location / {
|
||||
try_files $uri $uri/ /index.html;
|
||||
}
|
||||
|
||||
# Proxy API requests to backend
|
||||
location /api/ {
|
||||
proxy_pass ${API_URL};
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection 'upgrade';
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_cache_bypass $http_upgrade;
|
||||
}
|
||||
|
||||
# Cache static assets
|
||||
location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2)$ {
|
||||
expires 1y;
|
||||
add_header Cache-Control "public, immutable";
|
||||
}
|
||||
}
|
||||
Generated
+4985
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,41 @@
|
||||
{
|
||||
"name": "sparc-dashboard",
|
||||
"private": true,
|
||||
"version": "1.0.0",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
"build": "tsc -b && vite build",
|
||||
"lint": "eslint .",
|
||||
"generate": "openapi-typescript http://localhost:8000/api/openapi.json -o src/api/schema.d.ts",
|
||||
"generate:local": "openapi-typescript src/api/openapi.json -o src/api/schema.d.ts",
|
||||
"typecheck": "tsc --noEmit",
|
||||
"preview": "vite preview"
|
||||
},
|
||||
"dependencies": {
|
||||
"@tanstack/react-query": "^5.51.0",
|
||||
"axios": "^1.7.2",
|
||||
"lucide-react": "^1.7.0",
|
||||
"react": "^18.3.1",
|
||||
"react-dom": "^18.3.1",
|
||||
"react-router-dom": "^6.24.0",
|
||||
"recharts": "^2.12.7"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@eslint/js": "^9.6.0",
|
||||
"@types/react": "^18.3.3",
|
||||
"@types/react-dom": "^18.3.0",
|
||||
"@vitejs/plugin-react": "^4.3.1",
|
||||
"autoprefixer": "^10.4.19",
|
||||
"eslint": "^9.6.0",
|
||||
"eslint-plugin-react-hooks": "^5.1.0",
|
||||
"eslint-plugin-react-refresh": "^0.4.7",
|
||||
"globals": "^15.8.0",
|
||||
"postcss": "^8.4.39",
|
||||
"tailwindcss": "^3.4.4",
|
||||
"openapi-typescript": "^7.0.0",
|
||||
"typescript": "~5.5.3",
|
||||
"typescript-eslint": "^8.0.0",
|
||||
"vite": "^5.3.3"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
export default {
|
||||
plugins: {
|
||||
tailwindcss: {},
|
||||
autoprefixer: {},
|
||||
},
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
import { BrowserRouter, Routes, Route, Navigate } from 'react-router-dom';
|
||||
import { QueryClient, QueryClientProvider } from '@tanstack/react-query';
|
||||
import { AuthProvider } from './context/AuthContext';
|
||||
import { ThemeProvider } from './context/ThemeContext';
|
||||
import { Layout } from './components/Layout';
|
||||
import { ProtectedRoute } from './components/ProtectedRoute';
|
||||
import { Login } from './pages/Login';
|
||||
import { Register } from './pages/Register';
|
||||
import { Analysis } from './pages/Analysis';
|
||||
import { Batch } from './pages/Batch';
|
||||
import { AnalyticsPage } from './pages/Analytics';
|
||||
import { About } from './pages/About';
|
||||
import { AdminUsers } from './pages/AdminUsers';
|
||||
import { Compare } from './pages/Compare';
|
||||
|
||||
const queryClient = new QueryClient({
|
||||
defaultOptions: {
|
||||
queries: {
|
||||
staleTime: 1000 * 60 * 5, // 5 minutes
|
||||
retry: 1,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
function App() {
|
||||
return (
|
||||
<ThemeProvider>
|
||||
<QueryClientProvider client={queryClient}>
|
||||
<AuthProvider>
|
||||
<BrowserRouter>
|
||||
<Routes>
|
||||
{/* Public routes */}
|
||||
<Route path="/login" element={<Login />} />
|
||||
<Route path="/register" element={<Register />} />
|
||||
|
||||
{/* Protected routes */}
|
||||
<Route
|
||||
element={
|
||||
<ProtectedRoute>
|
||||
<Layout />
|
||||
</ProtectedRoute>
|
||||
}
|
||||
>
|
||||
<Route path="/analysis" element={<Analysis />} />
|
||||
<Route path="/batch" element={<Batch />} />
|
||||
<Route path="/analytics" element={<AnalyticsPage />} />
|
||||
<Route path="/compare" element={<Compare />} />
|
||||
<Route path="/about" element={<About />} />
|
||||
|
||||
{/* Admin routes */}
|
||||
<Route
|
||||
path="/admin/users"
|
||||
element={
|
||||
<ProtectedRoute requireAdmin>
|
||||
<AdminUsers />
|
||||
</ProtectedRoute>
|
||||
}
|
||||
/>
|
||||
</Route>
|
||||
|
||||
{/* Default redirect */}
|
||||
<Route path="/" element={<Navigate to="/analysis" replace />} />
|
||||
<Route path="*" element={<Navigate to="/analysis" replace />} />
|
||||
</Routes>
|
||||
</BrowserRouter>
|
||||
</AuthProvider>
|
||||
</QueryClientProvider>
|
||||
</ThemeProvider>
|
||||
);
|
||||
}
|
||||
|
||||
export default App;
|
||||
@@ -0,0 +1,221 @@
|
||||
import axios, { AxiosError, InternalAxiosRequestConfig } from 'axios';
|
||||
import type { TokenResponse, User, CompanyAnalysis, BatchAnalysisResult, JobStatus, Analytics } from '../types';
|
||||
|
||||
const API_BASE_URL = import.meta.env.VITE_API_URL || '/api';
|
||||
|
||||
const api = axios.create({
|
||||
baseURL: API_BASE_URL,
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
});
|
||||
|
||||
// Token management
|
||||
let accessToken: string | null = localStorage.getItem('access_token');
|
||||
let refreshToken: string | null = localStorage.getItem('refresh_token');
|
||||
|
||||
export const setTokens = (tokens: TokenResponse) => {
|
||||
accessToken = tokens.access_token;
|
||||
refreshToken = tokens.refresh_token;
|
||||
localStorage.setItem('access_token', tokens.access_token);
|
||||
localStorage.setItem('refresh_token', tokens.refresh_token);
|
||||
};
|
||||
|
||||
export const clearTokens = () => {
|
||||
accessToken = null;
|
||||
refreshToken = null;
|
||||
localStorage.removeItem('access_token');
|
||||
localStorage.removeItem('refresh_token');
|
||||
};
|
||||
|
||||
export const getAccessToken = () => accessToken;
|
||||
|
||||
// Request interceptor to add auth header
|
||||
api.interceptors.request.use((config: InternalAxiosRequestConfig) => {
|
||||
if (accessToken) {
|
||||
config.headers.Authorization = `Bearer ${accessToken}`;
|
||||
}
|
||||
return config;
|
||||
});
|
||||
|
||||
// Response interceptor to handle token refresh
|
||||
api.interceptors.response.use(
|
||||
(response) => response,
|
||||
async (error: AxiosError) => {
|
||||
const originalRequest = error.config as InternalAxiosRequestConfig & { _retry?: boolean };
|
||||
|
||||
if (error.response?.status === 401 && !originalRequest._retry && refreshToken) {
|
||||
originalRequest._retry = true;
|
||||
|
||||
try {
|
||||
const response = await axios.post<TokenResponse>(`${API_BASE_URL}/auth/refresh`, {
|
||||
refresh_token: refreshToken,
|
||||
});
|
||||
|
||||
setTokens(response.data);
|
||||
originalRequest.headers.Authorization = `Bearer ${response.data.access_token}`;
|
||||
|
||||
return api(originalRequest);
|
||||
} catch {
|
||||
clearTokens();
|
||||
window.location.href = '/login';
|
||||
}
|
||||
}
|
||||
|
||||
return Promise.reject(error);
|
||||
}
|
||||
);
|
||||
|
||||
// Auth API
|
||||
export const authApi = {
|
||||
register: async (email: string, password: string): Promise<User> => {
|
||||
const response = await api.post<User>('/auth/register', { email, password });
|
||||
return response.data;
|
||||
},
|
||||
|
||||
login: async (email: string, password: string): Promise<TokenResponse> => {
|
||||
const response = await api.post<TokenResponse>('/auth/login', { email, password });
|
||||
setTokens(response.data);
|
||||
return response.data;
|
||||
},
|
||||
|
||||
getMe: async (): Promise<User> => {
|
||||
const response = await api.get<User>('/auth/me');
|
||||
return response.data;
|
||||
},
|
||||
|
||||
logout: () => {
|
||||
clearTokens();
|
||||
},
|
||||
};
|
||||
|
||||
// Model types
|
||||
export interface ModelInfo {
|
||||
id: string;
|
||||
name: string;
|
||||
provider: string;
|
||||
}
|
||||
|
||||
export interface ModelsResponse {
|
||||
models: ModelInfo[];
|
||||
default: string;
|
||||
}
|
||||
|
||||
// Analysis API
|
||||
export const analysisApi = {
|
||||
analyzeCompany: async (companyName: string, model?: string): Promise<CompanyAnalysis> => {
|
||||
const params = new URLSearchParams();
|
||||
if (model) params.append('model', model);
|
||||
const qs = params.toString();
|
||||
const response = await api.get<CompanyAnalysis>(
|
||||
`/analyze/${encodeURIComponent(companyName)}${qs ? `?${qs}` : ''}`
|
||||
);
|
||||
return response.data;
|
||||
},
|
||||
|
||||
analyzeBatch: async (companies: string[], maxWorkers = 3, model?: string): Promise<BatchAnalysisResult> => {
|
||||
const response = await api.post<BatchAnalysisResult>('/analyze/batch', {
|
||||
companies,
|
||||
max_workers: maxWorkers,
|
||||
...(model ? { model } : {}),
|
||||
});
|
||||
return response.data;
|
||||
},
|
||||
|
||||
analyzeBatchAsync: async (companies: string[], maxWorkers = 3, model?: string): Promise<JobStatus> => {
|
||||
const response = await api.post<JobStatus>('/analyze/batch/async', {
|
||||
companies,
|
||||
max_workers: maxWorkers,
|
||||
...(model ? { model } : {}),
|
||||
});
|
||||
return response.data;
|
||||
},
|
||||
|
||||
listModels: async (): Promise<ModelsResponse> => {
|
||||
const response = await api.get<ModelsResponse>('/models');
|
||||
return response.data;
|
||||
},
|
||||
|
||||
getJobStatus: async (jobId: string): Promise<JobStatus> => {
|
||||
const response = await api.get<JobStatus>(`/jobs/${jobId}`);
|
||||
return response.data;
|
||||
},
|
||||
|
||||
listJobs: async (status?: string, limit = 10): Promise<JobStatus[]> => {
|
||||
const params = new URLSearchParams();
|
||||
if (status) params.append('status', status);
|
||||
params.append('limit', limit.toString());
|
||||
const response = await api.get<JobStatus[]>(`/jobs?${params}`);
|
||||
return response.data;
|
||||
},
|
||||
};
|
||||
|
||||
// Export API
|
||||
export const exportApi = {
|
||||
exportCsv: async (companyName: string): Promise<void> => {
|
||||
const response = await api.get(`/export/${encodeURIComponent(companyName)}`, {
|
||||
responseType: 'blob',
|
||||
});
|
||||
const url = window.URL.createObjectURL(new Blob([response.data]));
|
||||
const link = document.createElement('a');
|
||||
link.href = url;
|
||||
link.setAttribute('download', `sparc_${companyName.toLowerCase().replace(/\s+/g, '_')}_export.csv`);
|
||||
document.body.appendChild(link);
|
||||
link.click();
|
||||
link.remove();
|
||||
window.URL.revokeObjectURL(url);
|
||||
},
|
||||
exportPdf: async (companyName: string): Promise<void> => {
|
||||
const response = await api.get(`/export/${encodeURIComponent(companyName)}/pdf`, {
|
||||
responseType: 'blob',
|
||||
});
|
||||
const safeName = companyName.toLowerCase().replace(/\s+/g, '_');
|
||||
const date = new Date().toISOString().split('T')[0];
|
||||
const url = window.URL.createObjectURL(new Blob([response.data], { type: 'application/pdf' }));
|
||||
const link = document.createElement('a');
|
||||
link.href = url;
|
||||
link.setAttribute('download', `${safeName}-analysis-${date}.pdf`);
|
||||
document.body.appendChild(link);
|
||||
link.click();
|
||||
link.remove();
|
||||
window.URL.revokeObjectURL(url);
|
||||
},
|
||||
};
|
||||
|
||||
// Analytics API
|
||||
export interface TrendData {
|
||||
by_month: Array<{ month: string; company_name: string; count: number }>;
|
||||
by_type_over_time: Array<{ month: string; analysis_type: string; count: number }>;
|
||||
period_days: number;
|
||||
}
|
||||
|
||||
export const analyticsApi = {
|
||||
getAnalytics: async (days = 30): Promise<Analytics> => {
|
||||
const response = await api.get<Analytics>(`/analytics?days=${days}`);
|
||||
return response.data;
|
||||
},
|
||||
|
||||
getTrends: async (days = 90): Promise<TrendData> => {
|
||||
const response = await api.get<TrendData>(`/analytics/trends?days=${days}`);
|
||||
return response.data;
|
||||
},
|
||||
};
|
||||
|
||||
// Admin API
|
||||
export const adminApi = {
|
||||
listUsers: async (limit = 100, offset = 0): Promise<User[]> => {
|
||||
const response = await api.get<User[]>(`/admin/users?limit=${limit}&offset=${offset}`);
|
||||
return response.data;
|
||||
},
|
||||
|
||||
updateUserRole: async (userId: number, role: 'admin' | 'user'): Promise<User> => {
|
||||
const response = await api.patch<User>(`/admin/users/${userId}/role`, { role });
|
||||
return response.data;
|
||||
},
|
||||
|
||||
deleteUser: async (userId: number): Promise<void> => {
|
||||
await api.delete(`/admin/users/${userId}`);
|
||||
},
|
||||
};
|
||||
|
||||
export default api;
|
||||
File diff suppressed because it is too large
Load Diff
Vendored
+975
@@ -0,0 +1,975 @@
|
||||
/**
|
||||
* This file was auto-generated by openapi-typescript.
|
||||
* Do not make direct changes to the file.
|
||||
*/
|
||||
|
||||
export interface paths {
|
||||
"/auth/register": {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
get?: never;
|
||||
put?: never;
|
||||
/**
|
||||
* Register
|
||||
* @description Register a new user.
|
||||
*
|
||||
* The first registered user automatically becomes an admin.
|
||||
*/
|
||||
post: operations["register_auth_register_post"];
|
||||
delete?: never;
|
||||
options?: never;
|
||||
head?: never;
|
||||
patch?: never;
|
||||
trace?: never;
|
||||
};
|
||||
"/auth/login": {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
get?: never;
|
||||
put?: never;
|
||||
/**
|
||||
* Login
|
||||
* @description Authenticate user and return JWT tokens.
|
||||
*/
|
||||
post: operations["login_auth_login_post"];
|
||||
delete?: never;
|
||||
options?: never;
|
||||
head?: never;
|
||||
patch?: never;
|
||||
trace?: never;
|
||||
};
|
||||
"/auth/refresh": {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
get?: never;
|
||||
put?: never;
|
||||
/**
|
||||
* Refresh Token
|
||||
* @description Refresh access token using refresh token.
|
||||
*/
|
||||
post: operations["refresh_token_auth_refresh_post"];
|
||||
delete?: never;
|
||||
options?: never;
|
||||
head?: never;
|
||||
patch?: never;
|
||||
trace?: never;
|
||||
};
|
||||
"/auth/me": {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
/**
|
||||
* Get Me
|
||||
* @description Get current authenticated user.
|
||||
*/
|
||||
get: operations["get_me_auth_me_get"];
|
||||
put?: never;
|
||||
post?: never;
|
||||
delete?: never;
|
||||
options?: never;
|
||||
head?: never;
|
||||
patch?: never;
|
||||
trace?: never;
|
||||
};
|
||||
"/admin/users": {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
/**
|
||||
* List Users
|
||||
* @description List all users (admin only).
|
||||
*/
|
||||
get: operations["list_users_admin_users_get"];
|
||||
put?: never;
|
||||
post?: never;
|
||||
delete?: never;
|
||||
options?: never;
|
||||
head?: never;
|
||||
patch?: never;
|
||||
trace?: never;
|
||||
};
|
||||
"/admin/users/{user_id}/role": {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
get?: never;
|
||||
put?: never;
|
||||
post?: never;
|
||||
delete?: never;
|
||||
options?: never;
|
||||
head?: never;
|
||||
/**
|
||||
* Update User Role
|
||||
* @description Update a user's role (admin only).
|
||||
*/
|
||||
patch: operations["update_user_role_admin_users__user_id__role_patch"];
|
||||
trace?: never;
|
||||
};
|
||||
"/admin/users/{user_id}": {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
get?: never;
|
||||
put?: never;
|
||||
post?: never;
|
||||
/**
|
||||
* Delete User
|
||||
* @description Delete a user (admin only).
|
||||
*/
|
||||
delete: operations["delete_user_admin_users__user_id__delete"];
|
||||
options?: never;
|
||||
head?: never;
|
||||
patch?: never;
|
||||
trace?: never;
|
||||
};
|
||||
"/analytics": {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
/**
|
||||
* Get Analytics
|
||||
* @description Get analytics data (authenticated users only).
|
||||
*/
|
||||
get: operations["get_analytics_analytics_get"];
|
||||
put?: never;
|
||||
post?: never;
|
||||
delete?: never;
|
||||
options?: never;
|
||||
head?: never;
|
||||
patch?: never;
|
||||
trace?: never;
|
||||
};
|
||||
"/health": {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
/**
|
||||
* Health Check
|
||||
* @description Check API health status.
|
||||
*/
|
||||
get: operations["health_check_health_get"];
|
||||
put?: never;
|
||||
post?: never;
|
||||
delete?: never;
|
||||
options?: never;
|
||||
head?: never;
|
||||
patch?: never;
|
||||
trace?: never;
|
||||
};
|
||||
"/analyze/{company_name}": {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
/**
|
||||
* Analyze Company
|
||||
* @description Analyze a single company's patent portfolio.
|
||||
*
|
||||
* This endpoint retrieves recent patents for the specified company,
|
||||
* parses them, and uses AI to generate a comprehensive analysis.
|
||||
*
|
||||
* Args:
|
||||
* company_name: Name of the company to analyze (e.g., "nvidia", "intel")
|
||||
*
|
||||
* Returns:
|
||||
* Analysis results including patent count, AI insights, and success status
|
||||
*/
|
||||
get: operations["analyze_company_analyze__company_name__get"];
|
||||
put?: never;
|
||||
post?: never;
|
||||
delete?: never;
|
||||
options?: never;
|
||||
head?: never;
|
||||
patch?: never;
|
||||
trace?: never;
|
||||
};
|
||||
"/analyze/batch": {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
get?: never;
|
||||
put?: never;
|
||||
/**
|
||||
* Analyze Companies Batch
|
||||
* @description Analyze multiple companies' patent portfolios.
|
||||
*
|
||||
* Processes companies concurrently for improved performance.
|
||||
* Limited to 20 companies per request.
|
||||
*
|
||||
* Args:
|
||||
* request: List of company names and optional worker count
|
||||
*
|
||||
* Returns:
|
||||
* Batch results with individual company analyses and summary statistics
|
||||
*/
|
||||
post: operations["analyze_companies_batch_analyze_batch_post"];
|
||||
delete?: never;
|
||||
options?: never;
|
||||
head?: never;
|
||||
patch?: never;
|
||||
trace?: never;
|
||||
};
|
||||
"/analyze/batch/async": {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
get?: never;
|
||||
put?: never;
|
||||
/**
|
||||
* Analyze Companies Async
|
||||
* @description Start an asynchronous batch analysis job.
|
||||
*
|
||||
* Returns immediately with a job ID that can be used to poll for status.
|
||||
* Useful for large batch analyses that may take a long time.
|
||||
*
|
||||
* Args:
|
||||
* request: List of company names and optional worker count
|
||||
*
|
||||
* Returns:
|
||||
* Job status with job_id for polling
|
||||
*/
|
||||
post: operations["analyze_companies_async_analyze_batch_async_post"];
|
||||
delete?: never;
|
||||
options?: never;
|
||||
head?: never;
|
||||
patch?: never;
|
||||
trace?: never;
|
||||
};
|
||||
"/jobs/{job_id}": {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
/**
|
||||
* Get Job Status
|
||||
* @description Get the status of a background analysis job.
|
||||
*
|
||||
* Args:
|
||||
* job_id: The job ID returned from the async batch endpoint
|
||||
*
|
||||
* Returns:
|
||||
* Current job status including progress and results when complete
|
||||
*/
|
||||
get: operations["get_job_status_jobs__job_id__get"];
|
||||
put?: never;
|
||||
post?: never;
|
||||
delete?: never;
|
||||
options?: never;
|
||||
head?: never;
|
||||
patch?: never;
|
||||
trace?: never;
|
||||
};
|
||||
"/jobs": {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
/**
|
||||
* List Jobs
|
||||
* @description List all analysis jobs.
|
||||
*
|
||||
* Args:
|
||||
* status: Optional filter by job status
|
||||
* limit: Maximum number of jobs to return (default 10, max 100)
|
||||
*
|
||||
* Returns:
|
||||
* List of job statuses
|
||||
*/
|
||||
get: operations["list_jobs_jobs_get"];
|
||||
put?: never;
|
||||
post?: never;
|
||||
delete?: never;
|
||||
options?: never;
|
||||
head?: never;
|
||||
patch?: never;
|
||||
trace?: never;
|
||||
};
|
||||
}
|
||||
export type webhooks = Record<string, never>;
|
||||
export interface components {
|
||||
schemas: {
|
||||
/**
|
||||
* AnalyticsResponse
|
||||
* @description Analytics response model.
|
||||
*/
|
||||
AnalyticsResponse: {
|
||||
/** Total Messages */
|
||||
total_messages: number;
|
||||
/** By Company */
|
||||
by_company: {
|
||||
[key: string]: unknown;
|
||||
}[];
|
||||
/** By Type */
|
||||
by_type: {
|
||||
[key: string]: unknown;
|
||||
}[];
|
||||
/** Period Days */
|
||||
period_days: number;
|
||||
};
|
||||
/**
|
||||
* BatchAnalysisRequest
|
||||
* @description Request model for batch company analysis.
|
||||
*/
|
||||
BatchAnalysisRequest: {
|
||||
/**
|
||||
* Companies
|
||||
* @description List of company names to analyze
|
||||
*/
|
||||
companies: string[];
|
||||
/**
|
||||
* Max Workers
|
||||
* @description Max concurrent analyses
|
||||
* @default 3
|
||||
*/
|
||||
max_workers: number;
|
||||
};
|
||||
/**
|
||||
* BatchAnalysisResponse
|
||||
* @description Response model for batch company analysis.
|
||||
*/
|
||||
BatchAnalysisResponse: {
|
||||
/** Results */
|
||||
results: components["schemas"]["CompanyAnalysisResponse"][];
|
||||
/** Total Companies */
|
||||
total_companies: number;
|
||||
/** Successful */
|
||||
successful: number;
|
||||
/** Failed */
|
||||
failed: number;
|
||||
/**
|
||||
* Timestamp
|
||||
* Format: date-time
|
||||
*/
|
||||
timestamp: string;
|
||||
};
|
||||
/**
|
||||
* CompanyAnalysisResponse
|
||||
* @description Response model for single company analysis.
|
||||
*/
|
||||
CompanyAnalysisResponse: {
|
||||
/** Company Name */
|
||||
company_name: string;
|
||||
/** Analysis */
|
||||
analysis: string;
|
||||
/** Patent Count */
|
||||
patent_count: number;
|
||||
/** Success */
|
||||
success: boolean;
|
||||
/** Error */
|
||||
error?: string | null;
|
||||
/**
|
||||
* Timestamp
|
||||
* Format: date-time
|
||||
*/
|
||||
timestamp: string;
|
||||
};
|
||||
/** HTTPValidationError */
|
||||
HTTPValidationError: {
|
||||
/** Detail */
|
||||
detail?: components["schemas"]["ValidationError"][];
|
||||
};
|
||||
/**
|
||||
* HealthResponse
|
||||
* @description Health check response.
|
||||
*/
|
||||
HealthResponse: {
|
||||
/** Status */
|
||||
status: string;
|
||||
/** Version */
|
||||
version: string;
|
||||
/**
|
||||
* Timestamp
|
||||
* Format: date-time
|
||||
*/
|
||||
timestamp: string;
|
||||
};
|
||||
/**
|
||||
* JobStatus
|
||||
* @description Status of a background analysis job.
|
||||
*/
|
||||
JobStatus: {
|
||||
/** Job Id */
|
||||
job_id: string;
|
||||
/** Status */
|
||||
status: string;
|
||||
/** Progress */
|
||||
progress: number;
|
||||
/** Total Companies */
|
||||
total_companies: number;
|
||||
/** Completed Companies */
|
||||
completed_companies: number;
|
||||
result?: components["schemas"]["BatchAnalysisResponse"] | null;
|
||||
/** Error */
|
||||
error?: string | null;
|
||||
};
|
||||
/**
|
||||
* LoginRequest
|
||||
* @description User login request.
|
||||
*/
|
||||
LoginRequest: {
|
||||
/**
|
||||
* Email
|
||||
* Format: email
|
||||
*/
|
||||
email: string;
|
||||
/** Password */
|
||||
password: string;
|
||||
};
|
||||
/**
|
||||
* RefreshRequest
|
||||
* @description Token refresh request.
|
||||
*/
|
||||
RefreshRequest: {
|
||||
/** Refresh Token */
|
||||
refresh_token: string;
|
||||
};
|
||||
/**
|
||||
* RegisterRequest
|
||||
* @description User registration request.
|
||||
*/
|
||||
RegisterRequest: {
|
||||
/**
|
||||
* Email
|
||||
* Format: email
|
||||
*/
|
||||
email: string;
|
||||
/**
|
||||
* Password
|
||||
* @description Password (min 8 characters)
|
||||
*/
|
||||
password: string;
|
||||
};
|
||||
/**
|
||||
* TokenResponse
|
||||
* @description Token response model.
|
||||
*/
|
||||
TokenResponse: {
|
||||
/** Access Token */
|
||||
access_token: string;
|
||||
/** Refresh Token */
|
||||
refresh_token: string;
|
||||
/**
|
||||
* Token Type
|
||||
* @default bearer
|
||||
*/
|
||||
token_type: string;
|
||||
};
|
||||
/**
|
||||
* UpdateRoleRequest
|
||||
* @description Update user role request.
|
||||
*/
|
||||
UpdateRoleRequest: {
|
||||
/** Role */
|
||||
role: string;
|
||||
};
|
||||
/**
|
||||
* UserResponse
|
||||
* @description User response model.
|
||||
*/
|
||||
UserResponse: {
|
||||
/** Id */
|
||||
id: number;
|
||||
/** Email */
|
||||
email: string;
|
||||
/** Role */
|
||||
role: string;
|
||||
/**
|
||||
* Created At
|
||||
* Format: date-time
|
||||
*/
|
||||
created_at: string;
|
||||
};
|
||||
/** ValidationError */
|
||||
ValidationError: {
|
||||
/** Location */
|
||||
loc: (string | number)[];
|
||||
/** Message */
|
||||
msg: string;
|
||||
/** Error Type */
|
||||
type: string;
|
||||
/** Input */
|
||||
input?: unknown;
|
||||
/** Context */
|
||||
ctx?: Record<string, never>;
|
||||
};
|
||||
};
|
||||
responses: never;
|
||||
parameters: never;
|
||||
requestBodies: never;
|
||||
headers: never;
|
||||
pathItems: never;
|
||||
}
|
||||
export type $defs = Record<string, never>;
|
||||
export interface operations {
|
||||
register_auth_register_post: {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
requestBody: {
|
||||
content: {
|
||||
"application/json": components["schemas"]["RegisterRequest"];
|
||||
};
|
||||
};
|
||||
responses: {
|
||||
/** @description Successful Response */
|
||||
200: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["UserResponse"];
|
||||
};
|
||||
};
|
||||
/** @description Validation Error */
|
||||
422: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["HTTPValidationError"];
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
login_auth_login_post: {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
requestBody: {
|
||||
content: {
|
||||
"application/json": components["schemas"]["LoginRequest"];
|
||||
};
|
||||
};
|
||||
responses: {
|
||||
/** @description Successful Response */
|
||||
200: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["TokenResponse"];
|
||||
};
|
||||
};
|
||||
/** @description Validation Error */
|
||||
422: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["HTTPValidationError"];
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
refresh_token_auth_refresh_post: {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
requestBody: {
|
||||
content: {
|
||||
"application/json": components["schemas"]["RefreshRequest"];
|
||||
};
|
||||
};
|
||||
responses: {
|
||||
/** @description Successful Response */
|
||||
200: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["TokenResponse"];
|
||||
};
|
||||
};
|
||||
/** @description Validation Error */
|
||||
422: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["HTTPValidationError"];
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
get_me_auth_me_get: {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
requestBody?: never;
|
||||
responses: {
|
||||
/** @description Successful Response */
|
||||
200: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["UserResponse"];
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
list_users_admin_users_get: {
|
||||
parameters: {
|
||||
query?: {
|
||||
limit?: number;
|
||||
offset?: number;
|
||||
};
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
requestBody?: never;
|
||||
responses: {
|
||||
/** @description Successful Response */
|
||||
200: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["UserResponse"][];
|
||||
};
|
||||
};
|
||||
/** @description Validation Error */
|
||||
422: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["HTTPValidationError"];
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
update_user_role_admin_users__user_id__role_patch: {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path: {
|
||||
user_id: number;
|
||||
};
|
||||
cookie?: never;
|
||||
};
|
||||
requestBody: {
|
||||
content: {
|
||||
"application/json": components["schemas"]["UpdateRoleRequest"];
|
||||
};
|
||||
};
|
||||
responses: {
|
||||
/** @description Successful Response */
|
||||
200: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["UserResponse"];
|
||||
};
|
||||
};
|
||||
/** @description Validation Error */
|
||||
422: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["HTTPValidationError"];
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
delete_user_admin_users__user_id__delete: {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path: {
|
||||
user_id: number;
|
||||
};
|
||||
cookie?: never;
|
||||
};
|
||||
requestBody?: never;
|
||||
responses: {
|
||||
/** @description Successful Response */
|
||||
200: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": unknown;
|
||||
};
|
||||
};
|
||||
/** @description Validation Error */
|
||||
422: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["HTTPValidationError"];
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
get_analytics_analytics_get: {
|
||||
parameters: {
|
||||
query?: {
|
||||
days?: number;
|
||||
};
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
requestBody?: never;
|
||||
responses: {
|
||||
/** @description Successful Response */
|
||||
200: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["AnalyticsResponse"];
|
||||
};
|
||||
};
|
||||
/** @description Validation Error */
|
||||
422: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["HTTPValidationError"];
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
health_check_health_get: {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
requestBody?: never;
|
||||
responses: {
|
||||
/** @description Successful Response */
|
||||
200: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["HealthResponse"];
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
analyze_company_analyze__company_name__get: {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path: {
|
||||
company_name: string;
|
||||
};
|
||||
cookie?: never;
|
||||
};
|
||||
requestBody?: never;
|
||||
responses: {
|
||||
/** @description Successful Response */
|
||||
200: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["CompanyAnalysisResponse"];
|
||||
};
|
||||
};
|
||||
/** @description Validation Error */
|
||||
422: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["HTTPValidationError"];
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
analyze_companies_batch_analyze_batch_post: {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
requestBody: {
|
||||
content: {
|
||||
"application/json": components["schemas"]["BatchAnalysisRequest"];
|
||||
};
|
||||
};
|
||||
responses: {
|
||||
/** @description Successful Response */
|
||||
200: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["BatchAnalysisResponse"];
|
||||
};
|
||||
};
|
||||
/** @description Validation Error */
|
||||
422: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["HTTPValidationError"];
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
analyze_companies_async_analyze_batch_async_post: {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
requestBody: {
|
||||
content: {
|
||||
"application/json": components["schemas"]["BatchAnalysisRequest"];
|
||||
};
|
||||
};
|
||||
responses: {
|
||||
/** @description Successful Response */
|
||||
200: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["JobStatus"];
|
||||
};
|
||||
};
|
||||
/** @description Validation Error */
|
||||
422: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["HTTPValidationError"];
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
get_job_status_jobs__job_id__get: {
|
||||
parameters: {
|
||||
query?: never;
|
||||
header?: never;
|
||||
path: {
|
||||
job_id: string;
|
||||
};
|
||||
cookie?: never;
|
||||
};
|
||||
requestBody?: never;
|
||||
responses: {
|
||||
/** @description Successful Response */
|
||||
200: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["JobStatus"];
|
||||
};
|
||||
};
|
||||
/** @description Validation Error */
|
||||
422: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["HTTPValidationError"];
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
list_jobs_jobs_get: {
|
||||
parameters: {
|
||||
query?: {
|
||||
/** @description Filter by status: pending, running, completed, failed */
|
||||
status?: string | null;
|
||||
limit?: number;
|
||||
};
|
||||
header?: never;
|
||||
path?: never;
|
||||
cookie?: never;
|
||||
};
|
||||
requestBody?: never;
|
||||
responses: {
|
||||
/** @description Successful Response */
|
||||
200: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["JobStatus"][];
|
||||
};
|
||||
};
|
||||
/** @description Validation Error */
|
||||
422: {
|
||||
headers: {
|
||||
[name: string]: unknown;
|
||||
};
|
||||
content: {
|
||||
"application/json": components["schemas"]["HTTPValidationError"];
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,118 @@
|
||||
import { Outlet, NavLink, useNavigate } from 'react-router-dom';
|
||||
import { useAuth } from '../context/AuthContext';
|
||||
import { useTheme } from '../context/ThemeContext';
|
||||
import { Search, Layers, BarChart3, Info, Users, LogOut, GitCompareArrows, Sun, Moon } from 'lucide-react';
|
||||
|
||||
export function Layout() {
|
||||
const { user, isAdmin, logout } = useAuth();
|
||||
const { theme, toggleTheme } = useTheme();
|
||||
const navigate = useNavigate();
|
||||
|
||||
const handleLogout = () => {
|
||||
logout();
|
||||
navigate('/login');
|
||||
};
|
||||
|
||||
const navItems = [
|
||||
{ to: '/analysis', icon: Search, label: 'Analysis' },
|
||||
{ to: '/batch', icon: Layers, label: 'Batch' },
|
||||
{ to: '/analytics', icon: BarChart3, label: 'Analytics' },
|
||||
{ to: '/compare', icon: GitCompareArrows, label: 'Compare' },
|
||||
{ to: '/about', icon: Info, label: 'About' },
|
||||
];
|
||||
|
||||
if (isAdmin) {
|
||||
navItems.push({ to: '/admin/users', icon: Users, label: 'Users' });
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="min-h-screen bg-gradient-to-br from-bg-dark to-slate-100 dark:to-indigo-950">
|
||||
{/* Header */}
|
||||
<header className="bg-bg-card/80 backdrop-blur-lg border-b border-primary/20">
|
||||
<div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||
<div className="flex items-center justify-between h-16">
|
||||
{/* Brand */}
|
||||
<div className="flex items-center gap-3">
|
||||
<span className="text-2xl">⚡</span>
|
||||
<div>
|
||||
<h1 className="text-xl font-bold bg-gradient-to-r from-primary to-secondary bg-clip-text text-transparent">
|
||||
SPARC
|
||||
</h1>
|
||||
<span className="text-xs text-text-secondary uppercase tracking-wider">
|
||||
Semiconductor Patent Analytics
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Navigation */}
|
||||
<nav className="hidden md:flex items-center gap-1 bg-bg-card/60 rounded-xl p-1 border border-primary/15">
|
||||
{navItems.map(({ to, icon: Icon, label }) => (
|
||||
<NavLink
|
||||
key={to}
|
||||
to={to}
|
||||
className={({ isActive }) =>
|
||||
`flex items-center gap-2 px-4 py-2 rounded-lg text-sm font-medium transition-all ${
|
||||
isActive
|
||||
? 'bg-gradient-to-r from-primary to-primary-dark text-white'
|
||||
: 'text-text-secondary hover:text-text-primary hover:bg-bg-card-hover'
|
||||
}`
|
||||
}
|
||||
>
|
||||
<Icon size={16} />
|
||||
{label}
|
||||
</NavLink>
|
||||
))}
|
||||
</nav>
|
||||
|
||||
{/* User menu */}
|
||||
<div className="flex items-center gap-4">
|
||||
<button
|
||||
onClick={toggleTheme}
|
||||
className="p-2 rounded-lg text-text-secondary hover:text-text-primary hover:bg-bg-card-hover transition-all"
|
||||
aria-label={theme === 'dark' ? 'Switch to light mode' : 'Switch to dark mode'}
|
||||
>
|
||||
{theme === 'dark' ? <Sun size={18} /> : <Moon size={18} />}
|
||||
</button>
|
||||
<div className="text-right hidden sm:block">
|
||||
<div className="text-sm font-medium text-text-primary">{user?.email}</div>
|
||||
<div className="text-xs text-text-secondary capitalize">{user?.role}</div>
|
||||
</div>
|
||||
<button
|
||||
onClick={handleLogout}
|
||||
className="flex items-center gap-2 px-3 py-2 rounded-lg text-text-secondary hover:text-error hover:bg-error/10 transition-all"
|
||||
>
|
||||
<LogOut size={18} />
|
||||
<span className="hidden sm:inline">Logout</span>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
{/* Mobile Navigation */}
|
||||
<nav className="md:hidden fixed bottom-0 left-0 right-0 bg-bg-card/95 backdrop-blur-lg border-t border-primary/20 z-50">
|
||||
<div className="flex justify-around py-2">
|
||||
{navItems.map(({ to, icon: Icon, label }) => (
|
||||
<NavLink
|
||||
key={to}
|
||||
to={to}
|
||||
className={({ isActive }) =>
|
||||
`flex flex-col items-center gap-1 px-3 py-2 rounded-lg text-xs font-medium transition-all ${
|
||||
isActive ? 'text-primary' : 'text-text-secondary'
|
||||
}`
|
||||
}
|
||||
>
|
||||
<Icon size={20} />
|
||||
{label}
|
||||
</NavLink>
|
||||
))}
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
{/* Main content */}
|
||||
<main className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-8 pb-24 md:pb-8">
|
||||
<Outlet />
|
||||
</main>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
import { Navigate, useLocation } from 'react-router-dom';
|
||||
import { useAuth } from '../context/AuthContext';
|
||||
|
||||
interface ProtectedRouteProps {
|
||||
children: React.ReactNode;
|
||||
requireAdmin?: boolean;
|
||||
}
|
||||
|
||||
export function ProtectedRoute({ children, requireAdmin = false }: ProtectedRouteProps) {
|
||||
const { isAuthenticated, isAdmin, isLoading } = useAuth();
|
||||
const location = useLocation();
|
||||
|
||||
if (isLoading) {
|
||||
return (
|
||||
<div className="min-h-screen bg-gradient-to-br from-bg-dark to-slate-100 dark:to-indigo-950 flex items-center justify-center">
|
||||
<div className="animate-spin rounded-full h-12 w-12 border-t-2 border-b-2 border-primary"></div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (!isAuthenticated) {
|
||||
return <Navigate to="/login" state={{ from: location }} replace />;
|
||||
}
|
||||
|
||||
if (requireAdmin && !isAdmin) {
|
||||
return <Navigate to="/analysis" replace />;
|
||||
}
|
||||
|
||||
return <>{children}</>;
|
||||
}
|
||||
@@ -0,0 +1,81 @@
|
||||
import { createContext, useContext, useState, useEffect, ReactNode } from 'react';
|
||||
import { authApi, getAccessToken } from '../api/client';
|
||||
import type { User } from '../types';
|
||||
|
||||
interface AuthContextType {
|
||||
user: User | null;
|
||||
isLoading: boolean;
|
||||
isAuthenticated: boolean;
|
||||
isAdmin: boolean;
|
||||
login: (email: string, password: string) => Promise<void>;
|
||||
register: (email: string, password: string) => Promise<void>;
|
||||
logout: () => void;
|
||||
refreshUser: () => Promise<void>;
|
||||
}
|
||||
|
||||
const AuthContext = createContext<AuthContextType | undefined>(undefined);
|
||||
|
||||
export function AuthProvider({ children }: { children: ReactNode }) {
|
||||
const [user, setUser] = useState<User | null>(null);
|
||||
const [isLoading, setIsLoading] = useState(true);
|
||||
|
||||
const refreshUser = async () => {
|
||||
try {
|
||||
const userData = await authApi.getMe();
|
||||
setUser(userData);
|
||||
} catch {
|
||||
setUser(null);
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
const initAuth = async () => {
|
||||
if (getAccessToken()) {
|
||||
await refreshUser();
|
||||
}
|
||||
setIsLoading(false);
|
||||
};
|
||||
initAuth();
|
||||
}, []);
|
||||
|
||||
const login = async (email: string, password: string) => {
|
||||
await authApi.login(email, password);
|
||||
await refreshUser();
|
||||
};
|
||||
|
||||
const register = async (email: string, password: string) => {
|
||||
await authApi.register(email, password);
|
||||
await authApi.login(email, password);
|
||||
await refreshUser();
|
||||
};
|
||||
|
||||
const logout = () => {
|
||||
authApi.logout();
|
||||
setUser(null);
|
||||
};
|
||||
|
||||
return (
|
||||
<AuthContext.Provider
|
||||
value={{
|
||||
user,
|
||||
isLoading,
|
||||
isAuthenticated: !!user,
|
||||
isAdmin: user?.role === 'admin',
|
||||
login,
|
||||
register,
|
||||
logout,
|
||||
refreshUser,
|
||||
}}
|
||||
>
|
||||
{children}
|
||||
</AuthContext.Provider>
|
||||
);
|
||||
}
|
||||
|
||||
export function useAuth() {
|
||||
const context = useContext(AuthContext);
|
||||
if (context === undefined) {
|
||||
throw new Error('useAuth must be used within an AuthProvider');
|
||||
}
|
||||
return context;
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
import { createContext, useContext, useEffect, useState } from 'react';
|
||||
|
||||
type Theme = 'light' | 'dark';
|
||||
|
||||
interface ThemeContextType {
|
||||
theme: Theme;
|
||||
toggleTheme: () => void;
|
||||
}
|
||||
|
||||
const ThemeContext = createContext<ThemeContextType | undefined>(undefined);
|
||||
|
||||
function getInitialTheme(): Theme {
|
||||
const stored = localStorage.getItem('theme');
|
||||
if (stored === 'light' || stored === 'dark') return stored;
|
||||
return window.matchMedia('(prefers-color-scheme: dark)').matches ? 'dark' : 'light';
|
||||
}
|
||||
|
||||
export function ThemeProvider({ children }: { children: React.ReactNode }) {
|
||||
const [theme, setTheme] = useState<Theme>(getInitialTheme);
|
||||
|
||||
useEffect(() => {
|
||||
const root = document.documentElement;
|
||||
if (theme === 'dark') {
|
||||
root.classList.add('dark');
|
||||
} else {
|
||||
root.classList.remove('dark');
|
||||
}
|
||||
localStorage.setItem('theme', theme);
|
||||
}, [theme]);
|
||||
|
||||
const toggleTheme = () => {
|
||||
setTheme((prev) => (prev === 'dark' ? 'light' : 'dark'));
|
||||
};
|
||||
|
||||
return (
|
||||
<ThemeContext.Provider value={{ theme, toggleTheme }}>
|
||||
{children}
|
||||
</ThemeContext.Provider>
|
||||
);
|
||||
}
|
||||
|
||||
export function useTheme() {
|
||||
const context = useContext(ThemeContext);
|
||||
if (!context) {
|
||||
throw new Error('useTheme must be used within a ThemeProvider');
|
||||
}
|
||||
return context;
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
import { useTheme } from './ThemeContext';
|
||||
|
||||
/**
|
||||
* Returns theme-aware color values for recharts components.
|
||||
*
|
||||
* Recharts accepts only raw color strings (not CSS variables),
|
||||
* so this hook bridges the Tailwind/CSS-variable theme system
|
||||
* to the imperative recharts API.
|
||||
*/
|
||||
export function useChartTheme() {
|
||||
const { theme } = useTheme();
|
||||
const isDark = theme === 'dark';
|
||||
|
||||
return {
|
||||
/** Axis tick and grid line stroke color */
|
||||
axisStroke: isDark ? '#94a3b8' : '#64748b',
|
||||
/** Tooltip container background */
|
||||
tooltipBg: isDark ? '#1e293b' : '#ffffff',
|
||||
/** Tooltip container border */
|
||||
tooltipBorder: isDark
|
||||
? '1px solid rgba(99, 102, 241, 0.3)'
|
||||
: '1px solid rgba(99, 102, 241, 0.2)',
|
||||
/** Tooltip label text color */
|
||||
tooltipLabelColor: isDark ? '#f8fafc' : '#0f172a',
|
||||
/** Tooltip item text color */
|
||||
tooltipItemColor: isDark ? '#e2e8f0' : '#334155',
|
||||
/** Convenience: full contentStyle object for recharts Tooltip */
|
||||
tooltipContentStyle: {
|
||||
backgroundColor: isDark ? '#1e293b' : '#ffffff',
|
||||
border: isDark
|
||||
? '1px solid rgba(99, 102, 241, 0.3)'
|
||||
: '1px solid rgba(99, 102, 241, 0.2)',
|
||||
borderRadius: '8px',
|
||||
color: isDark ? '#f8fafc' : '#0f172a',
|
||||
},
|
||||
/** Convenience: labelStyle for recharts Tooltip */
|
||||
tooltipLabelStyle: {
|
||||
color: isDark ? '#f8fafc' : '#0f172a',
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
@tailwind base;
|
||||
@tailwind components;
|
||||
@tailwind utilities;
|
||||
|
||||
/* Light mode (default) */
|
||||
:root {
|
||||
--color-bg-dark: #f1f5f9;
|
||||
--color-bg-card: #ffffff;
|
||||
--color-bg-card-hover: #e2e8f0;
|
||||
--color-text-primary: #0f172a;
|
||||
--color-text-secondary: #475569;
|
||||
--color-border: #cbd5e1;
|
||||
}
|
||||
|
||||
/* Dark mode */
|
||||
.dark {
|
||||
--color-bg-dark: #0f172a;
|
||||
--color-bg-card: #1e293b;
|
||||
--color-bg-card-hover: #334155;
|
||||
--color-text-primary: #f8fafc;
|
||||
--color-text-secondary: #94a3b8;
|
||||
--color-border: #334155;
|
||||
}
|
||||
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
|
||||
-webkit-font-smoothing: antialiased;
|
||||
-moz-osx-font-smoothing: grayscale;
|
||||
}
|
||||
|
||||
/* Custom scrollbar */
|
||||
::-webkit-scrollbar {
|
||||
width: 8px;
|
||||
height: 8px;
|
||||
}
|
||||
|
||||
::-webkit-scrollbar-track {
|
||||
background: var(--color-bg-card);
|
||||
}
|
||||
|
||||
::-webkit-scrollbar-thumb {
|
||||
background: #6366f1;
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
::-webkit-scrollbar-thumb:hover {
|
||||
background: #4f46e5;
|
||||
}
|
||||
|
||||
/* Selection */
|
||||
::selection {
|
||||
background: rgba(99, 102, 241, 0.3);
|
||||
color: var(--color-text-primary);
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
import { StrictMode } from 'react';
|
||||
import { createRoot } from 'react-dom/client';
|
||||
import App from './App';
|
||||
import './index.css';
|
||||
|
||||
createRoot(document.getElementById('root')!).render(
|
||||
<StrictMode>
|
||||
<App />
|
||||
</StrictMode>
|
||||
);
|
||||
@@ -0,0 +1,171 @@
|
||||
import { useQuery } from '@tanstack/react-query';
|
||||
import axios from 'axios';
|
||||
import { Search, FileText, Bot, Zap, Globe, BarChart3, CheckCircle, AlertTriangle, XCircle } from 'lucide-react';
|
||||
|
||||
const API_BASE_URL = import.meta.env.VITE_API_URL || '/api';
|
||||
|
||||
export function About() {
|
||||
const { data: health } = useQuery({
|
||||
queryKey: ['health'],
|
||||
queryFn: async () => {
|
||||
const response = await axios.get(`${API_BASE_URL}/health`);
|
||||
return response.data;
|
||||
},
|
||||
refetchInterval: 30000,
|
||||
});
|
||||
|
||||
const features = [
|
||||
{
|
||||
icon: Search,
|
||||
title: 'Patent Retrieval',
|
||||
description: 'Automated collection via SerpAPI\'s Google Patents',
|
||||
},
|
||||
{
|
||||
icon: FileText,
|
||||
title: 'Intelligent Parsing',
|
||||
description: 'Extracts key sections from patent documents',
|
||||
},
|
||||
{
|
||||
icon: Bot,
|
||||
title: 'AI Analysis',
|
||||
description: 'Deep analysis powered by Claude 3.5 Sonnet',
|
||||
},
|
||||
{
|
||||
icon: Zap,
|
||||
title: 'Batch Processing',
|
||||
description: 'Analyze multiple companies concurrently',
|
||||
},
|
||||
{
|
||||
icon: Globe,
|
||||
title: 'REST API',
|
||||
description: 'FastAPI web service for seamless integration',
|
||||
},
|
||||
{
|
||||
icon: BarChart3,
|
||||
title: 'Analytics',
|
||||
description: 'Track and visualize historical analysis data',
|
||||
},
|
||||
];
|
||||
|
||||
const techStack = [
|
||||
{ label: 'Backend', value: 'Python, FastAPI' },
|
||||
{ label: 'AI Model', value: 'Claude 3.5 Sonnet' },
|
||||
{ label: 'Database', value: 'PostgreSQL' },
|
||||
{ label: 'Frontend', value: 'React, TailwindCSS' },
|
||||
{ label: 'Data Source', value: 'SerpAPI Patents' },
|
||||
];
|
||||
|
||||
return (
|
||||
<div className="space-y-8">
|
||||
{/* Header */}
|
||||
<div>
|
||||
<h2 className="text-xl font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-2">
|
||||
About SPARC
|
||||
</h2>
|
||||
</div>
|
||||
|
||||
<div className="grid grid-cols-1 lg:grid-cols-3 gap-8">
|
||||
{/* Main Content */}
|
||||
<div className="lg:col-span-2 space-y-6">
|
||||
{/* Description */}
|
||||
<p className="text-text-secondary leading-relaxed">
|
||||
<strong className="text-text-primary">SPARC</strong> (Semiconductor Patent & Analytics Report Core)
|
||||
is an AI-powered patent analysis platform that evaluates company performance by analyzing their
|
||||
patent portfolios with cutting-edge language models.
|
||||
</p>
|
||||
|
||||
{/* Features */}
|
||||
<div>
|
||||
<h3 className="text-lg font-semibold text-text-primary mb-4">Key Features</h3>
|
||||
<div className="space-y-3">
|
||||
{features.map(({ icon: Icon, title, description }) => (
|
||||
<div
|
||||
key={title}
|
||||
className="flex items-start gap-4 py-3 border-b border-primary/10 last:border-0"
|
||||
>
|
||||
<div className="flex-shrink-0">
|
||||
<Icon className="text-primary" size={20} />
|
||||
</div>
|
||||
<div>
|
||||
<div className="font-medium text-text-primary">{title}</div>
|
||||
<div className="text-sm text-text-secondary">{description}</div>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Sidebar */}
|
||||
<div className="space-y-6">
|
||||
{/* Tech Stack */}
|
||||
<div className="bg-gradient-to-br from-primary/10 to-secondary/5 border border-primary/20 rounded-xl p-5">
|
||||
<h3 className="font-semibold text-text-primary mb-4">Technology Stack</h3>
|
||||
<div className="space-y-3">
|
||||
{techStack.map(({ label, value }) => (
|
||||
<div key={label}>
|
||||
<div className="text-primary text-sm">{label}</div>
|
||||
<div className="text-text-secondary text-sm">{value}</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* API Endpoints */}
|
||||
<div className="bg-bg-card/60 border border-primary/15 rounded-xl p-5">
|
||||
<h3 className="font-semibold text-text-primary mb-4">API Endpoints</h3>
|
||||
<div className="space-y-2">
|
||||
<code className="block bg-bg-dark px-3 py-2 rounded text-sm text-text-secondary">
|
||||
http://localhost:8000/docs
|
||||
</code>
|
||||
<code className="block bg-bg-dark px-3 py-2 rounded text-sm text-text-secondary">
|
||||
http://localhost:8000/health
|
||||
</code>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* System Status */}
|
||||
<div>
|
||||
<h3 className="text-lg font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-4">
|
||||
System Status
|
||||
</h3>
|
||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-4">
|
||||
<StatusCard
|
||||
label="API"
|
||||
status={health ? 'online' : 'offline'}
|
||||
/>
|
||||
<StatusCard
|
||||
label="Database"
|
||||
status="configured"
|
||||
/>
|
||||
<StatusCard
|
||||
label="Dashboard"
|
||||
status="online"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function StatusCard({ label, status }: { label: string; status: 'online' | 'offline' | 'configured' }) {
|
||||
const statusConfig = {
|
||||
online: { icon: CheckCircle, color: 'text-success', bg: 'bg-success' },
|
||||
offline: { icon: XCircle, color: 'text-error', bg: 'bg-error' },
|
||||
configured: { icon: AlertTriangle, color: 'text-warning', bg: 'bg-warning' },
|
||||
};
|
||||
|
||||
const { icon: Icon, color, bg } = statusConfig[status];
|
||||
|
||||
return (
|
||||
<div className="bg-gradient-to-br from-primary/10 to-secondary/10 border border-primary/20 rounded-xl p-5 text-center">
|
||||
<div className={`inline-flex items-center justify-center w-8 h-8 rounded-full ${bg}/20 mb-2`}>
|
||||
<Icon className={color} size={20} />
|
||||
</div>
|
||||
<div className="text-sm text-text-secondary uppercase tracking-wide">{label}</div>
|
||||
<div className={`font-semibold ${color} capitalize`}>{status}</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,183 @@
|
||||
import { useState } from 'react';
|
||||
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
|
||||
import { adminApi } from '../api/client';
|
||||
import { useAuth } from '../context/AuthContext';
|
||||
import { Users, Shield, User, Trash2, AlertCircle } from 'lucide-react';
|
||||
import type { User as UserType } from '../types';
|
||||
|
||||
export function AdminUsers() {
|
||||
const { user: currentUser } = useAuth();
|
||||
const queryClient = useQueryClient();
|
||||
const [deleteConfirm, setDeleteConfirm] = useState<number | null>(null);
|
||||
|
||||
const { data: users, isLoading, isError } = useQuery({
|
||||
queryKey: ['admin-users'],
|
||||
queryFn: () => adminApi.listUsers(),
|
||||
});
|
||||
|
||||
const updateRoleMutation = useMutation({
|
||||
mutationFn: ({ userId, role }: { userId: number; role: 'admin' | 'user' }) =>
|
||||
adminApi.updateUserRole(userId, role),
|
||||
onSuccess: () => {
|
||||
queryClient.invalidateQueries({ queryKey: ['admin-users'] });
|
||||
},
|
||||
});
|
||||
|
||||
const deleteMutation = useMutation({
|
||||
mutationFn: (userId: number) => adminApi.deleteUser(userId),
|
||||
onSuccess: () => {
|
||||
queryClient.invalidateQueries({ queryKey: ['admin-users'] });
|
||||
setDeleteConfirm(null);
|
||||
},
|
||||
});
|
||||
|
||||
const handleRoleChange = (user: UserType) => {
|
||||
const newRole = user.role === 'admin' ? 'user' : 'admin';
|
||||
updateRoleMutation.mutate({ userId: user.id, role: newRole });
|
||||
};
|
||||
|
||||
const handleDelete = (userId: number) => {
|
||||
deleteMutation.mutate(userId);
|
||||
};
|
||||
|
||||
if (isLoading) {
|
||||
return (
|
||||
<div className="flex items-center justify-center min-h-[400px]">
|
||||
<div className="animate-spin rounded-full h-12 w-12 border-t-2 border-b-2 border-primary"></div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (isError) {
|
||||
return (
|
||||
<div className="flex items-center gap-2 bg-error/10 border border-error/20 text-error rounded-xl px-4 py-3">
|
||||
<AlertCircle size={18} />
|
||||
<span>Failed to load users.</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between">
|
||||
<div>
|
||||
<h2 className="text-xl font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-2">
|
||||
User Management
|
||||
</h2>
|
||||
<p className="text-text-secondary">Manage user accounts and permissions.</p>
|
||||
</div>
|
||||
<div className="flex items-center gap-2 bg-primary/10 border border-primary/20 rounded-xl px-4 py-2">
|
||||
<Users size={18} className="text-primary" />
|
||||
<span className="text-text-primary font-semibold">{users?.length || 0} Users</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Users Table */}
|
||||
<div className="bg-bg-card/60 border border-primary/15 rounded-2xl overflow-hidden">
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full">
|
||||
<thead>
|
||||
<tr className="border-b border-primary/10">
|
||||
<th className="text-left px-6 py-4 text-sm font-semibold text-text-secondary uppercase tracking-wider">
|
||||
User
|
||||
</th>
|
||||
<th className="text-left px-6 py-4 text-sm font-semibold text-text-secondary uppercase tracking-wider">
|
||||
Role
|
||||
</th>
|
||||
<th className="text-left px-6 py-4 text-sm font-semibold text-text-secondary uppercase tracking-wider">
|
||||
Created
|
||||
</th>
|
||||
<th className="text-right px-6 py-4 text-sm font-semibold text-text-secondary uppercase tracking-wider">
|
||||
Actions
|
||||
</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody className="divide-y divide-primary/10">
|
||||
{users?.map((user) => (
|
||||
<tr key={user.id} className="hover:bg-bg-card-hover/50 transition-colors">
|
||||
<td className="px-6 py-4">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="w-10 h-10 rounded-full bg-gradient-to-br from-primary/20 to-secondary/20 flex items-center justify-center">
|
||||
{user.role === 'admin' ? (
|
||||
<Shield className="text-primary" size={18} />
|
||||
) : (
|
||||
<User className="text-secondary" size={18} />
|
||||
)}
|
||||
</div>
|
||||
<div>
|
||||
<div className="font-medium text-text-primary">{user.email}</div>
|
||||
{user.id === currentUser?.id && (
|
||||
<span className="text-xs text-primary">(You)</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</td>
|
||||
<td className="px-6 py-4">
|
||||
<span
|
||||
className={`inline-flex items-center gap-1 px-3 py-1 rounded-full text-xs font-semibold uppercase ${
|
||||
user.role === 'admin'
|
||||
? 'bg-primary/20 text-primary border border-primary/30'
|
||||
: 'bg-secondary/20 text-secondary border border-secondary/30'
|
||||
}`}
|
||||
>
|
||||
{user.role === 'admin' ? <Shield size={12} /> : <User size={12} />}
|
||||
{user.role}
|
||||
</span>
|
||||
</td>
|
||||
<td className="px-6 py-4 text-text-secondary">
|
||||
{new Date(user.created_at).toLocaleDateString()}
|
||||
</td>
|
||||
<td className="px-6 py-4">
|
||||
<div className="flex items-center justify-end gap-2">
|
||||
{user.id !== currentUser?.id && (
|
||||
<>
|
||||
<button
|
||||
onClick={() => handleRoleChange(user)}
|
||||
disabled={updateRoleMutation.isPending}
|
||||
className={`px-3 py-1.5 rounded-lg text-sm font-medium transition-all ${
|
||||
user.role === 'admin'
|
||||
? 'bg-secondary/10 text-secondary hover:bg-secondary/20 border border-secondary/30'
|
||||
: 'bg-primary/10 text-primary hover:bg-primary/20 border border-primary/30'
|
||||
} disabled:opacity-50`}
|
||||
>
|
||||
{user.role === 'admin' ? 'Demote' : 'Promote'}
|
||||
</button>
|
||||
|
||||
{deleteConfirm === user.id ? (
|
||||
<div className="flex items-center gap-1">
|
||||
<button
|
||||
onClick={() => handleDelete(user.id)}
|
||||
disabled={deleteMutation.isPending}
|
||||
className="px-3 py-1.5 rounded-lg text-sm font-medium bg-error text-white hover:bg-error/80 transition-all disabled:opacity-50"
|
||||
>
|
||||
Confirm
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setDeleteConfirm(null)}
|
||||
className="px-3 py-1.5 rounded-lg text-sm font-medium bg-bg-card-hover text-text-secondary hover:text-text-primary transition-all"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
</div>
|
||||
) : (
|
||||
<button
|
||||
onClick={() => setDeleteConfirm(user.id)}
|
||||
className="p-1.5 rounded-lg text-error/70 hover:text-error hover:bg-error/10 transition-all"
|
||||
>
|
||||
<Trash2 size={18} />
|
||||
</button>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,185 @@
|
||||
import { useState } from 'react';
|
||||
import { useMutation, useQuery } from '@tanstack/react-query';
|
||||
import { analysisApi, exportApi } from '../api/client';
|
||||
import { Search, CheckCircle, AlertCircle, Clock, FileText, Download, ChevronDown } from 'lucide-react';
|
||||
import type { CompanyAnalysis } from '../types';
|
||||
|
||||
export function Analysis() {
|
||||
const [companyName, setCompanyName] = useState('');
|
||||
const [selectedModel, setSelectedModel] = useState('');
|
||||
const [result, setResult] = useState<CompanyAnalysis | null>(null);
|
||||
|
||||
const modelsQuery = useQuery({
|
||||
queryKey: ['models'],
|
||||
queryFn: () => analysisApi.listModels(),
|
||||
});
|
||||
|
||||
const mutation = useMutation({
|
||||
mutationFn: (name: string) => analysisApi.analyzeCompany(name, selectedModel || undefined),
|
||||
onSuccess: (data) => setResult(data),
|
||||
});
|
||||
|
||||
const handleSubmit = (e: React.FormEvent) => {
|
||||
e.preventDefault();
|
||||
if (companyName.trim()) {
|
||||
mutation.mutate(companyName.trim());
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
{/* Header */}
|
||||
<div>
|
||||
<h2 className="text-xl font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-2">
|
||||
Single Company Analysis
|
||||
</h2>
|
||||
<p className="text-text-secondary">
|
||||
Analyze a company's patent portfolio using AI-powered insights.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{/* Search Form */}
|
||||
<form onSubmit={handleSubmit} className="space-y-4">
|
||||
<div className="flex gap-4">
|
||||
<div className="flex-1 relative">
|
||||
<Search className="absolute left-4 top-1/2 -translate-y-1/2 text-text-secondary" size={18} />
|
||||
<input
|
||||
type="text"
|
||||
value={companyName}
|
||||
onChange={(e) => setCompanyName(e.target.value)}
|
||||
placeholder="Enter company name (e.g., nvidia, intel, amd)"
|
||||
className="w-full bg-bg-card/80 border border-primary/30 rounded-xl pl-12 pr-4 py-3 text-text-primary placeholder-text-secondary/50 focus:outline-none focus:border-primary focus:ring-2 focus:ring-primary/20 transition-all"
|
||||
/>
|
||||
</div>
|
||||
<button
|
||||
type="submit"
|
||||
disabled={mutation.isPending || !companyName.trim()}
|
||||
className="bg-gradient-to-r from-primary to-primary-dark text-white font-semibold py-3 px-6 rounded-xl hover:shadow-lg hover:shadow-primary/30 transition-all disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-2"
|
||||
>
|
||||
{mutation.isPending ? (
|
||||
<div className="animate-spin rounded-full h-5 w-5 border-t-2 border-b-2 border-white"></div>
|
||||
) : (
|
||||
<>
|
||||
<Search size={18} />
|
||||
Analyze
|
||||
</>
|
||||
)}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Model Selector */}
|
||||
<div className="flex items-center gap-3">
|
||||
<label className="text-sm font-medium text-text-secondary whitespace-nowrap">
|
||||
LLM Model
|
||||
</label>
|
||||
<div className="relative flex-1 max-w-xs">
|
||||
<select
|
||||
value={selectedModel}
|
||||
onChange={(e) => setSelectedModel(e.target.value)}
|
||||
className="w-full appearance-none bg-bg-card/80 border border-primary/30 rounded-lg pl-3 pr-8 py-2 text-sm text-text-primary focus:outline-none focus:border-primary focus:ring-2 focus:ring-primary/20 transition-all cursor-pointer"
|
||||
>
|
||||
<option value="">
|
||||
{modelsQuery.data ? `Default (${modelsQuery.data.default})` : 'Default'}
|
||||
</option>
|
||||
{modelsQuery.data?.models.map((m) => (
|
||||
<option key={m.id} value={m.id}>
|
||||
{m.name} ({m.provider})
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
<ChevronDown className="absolute right-2 top-1/2 -translate-y-1/2 text-text-secondary pointer-events-none" size={16} />
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
{/* Error */}
|
||||
{mutation.isError && (
|
||||
<div className="flex items-center gap-2 bg-error/10 border border-error/20 text-error rounded-xl px-4 py-3">
|
||||
<AlertCircle size={18} />
|
||||
<span>Analysis failed. Please try again.</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Results */}
|
||||
{result && (
|
||||
<div className="space-y-6">
|
||||
{/* Success/Failure Status */}
|
||||
{result.success ? (
|
||||
<div className="flex items-center gap-2 bg-success/10 border border-success/20 text-success rounded-xl px-4 py-3">
|
||||
<CheckCircle size={18} />
|
||||
<span>Analysis complete for {result.company_name.toUpperCase()}</span>
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex items-center gap-2 bg-error/10 border border-error/20 text-error rounded-xl px-4 py-3">
|
||||
<AlertCircle size={18} />
|
||||
<span>Analysis failed: {result.error}</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Metrics */}
|
||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-4">
|
||||
<MetricCard
|
||||
icon={FileText}
|
||||
label="Patents Found"
|
||||
value={result.patent_count.toString()}
|
||||
/>
|
||||
<MetricCard
|
||||
icon={CheckCircle}
|
||||
label="Analysis Status"
|
||||
value={result.success ? 'Complete' : 'Failed'}
|
||||
/>
|
||||
<MetricCard
|
||||
icon={Clock}
|
||||
label="Timestamp"
|
||||
value={new Date(result.timestamp).toLocaleTimeString()}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Analysis Content */}
|
||||
{result.success && result.analysis && (
|
||||
<div className="bg-bg-card/60 backdrop-blur-lg border border-primary/15 rounded-2xl p-6">
|
||||
<div className="flex items-center justify-between border-b-2 border-primary/30 pb-2 mb-4">
|
||||
<h3 className="text-lg font-semibold text-text-primary">
|
||||
AI Analysis Results
|
||||
</h3>
|
||||
<div className="flex items-center gap-2">
|
||||
<button
|
||||
onClick={() => exportApi.exportCsv(result.company_name)}
|
||||
className="flex items-center gap-2 text-sm bg-primary/20 hover:bg-primary/30 text-primary font-medium px-3 py-1.5 rounded-lg transition-colors"
|
||||
>
|
||||
<Download size={14} />
|
||||
Export CSV
|
||||
</button>
|
||||
<button
|
||||
onClick={() => exportApi.exportPdf(result.company_name)}
|
||||
className="flex items-center gap-2 text-sm bg-primary/20 hover:bg-primary/30 text-primary font-medium px-3 py-1.5 rounded-lg transition-colors"
|
||||
>
|
||||
<FileText size={14} />
|
||||
Export PDF
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<div className="prose prose-invert max-w-none">
|
||||
<div className="text-text-primary whitespace-pre-wrap leading-relaxed">
|
||||
{result.analysis}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function MetricCard({ icon: Icon, label, value }: { icon: typeof FileText; label: string; value: string }) {
|
||||
return (
|
||||
<div className="bg-gradient-to-br from-primary/10 to-secondary/10 border border-primary/20 rounded-xl p-5 text-center">
|
||||
<Icon className="mx-auto mb-2 text-primary" size={24} />
|
||||
<div className="text-2xl font-bold bg-gradient-to-r from-primary to-secondary bg-clip-text text-transparent">
|
||||
{value}
|
||||
</div>
|
||||
<div className="text-sm text-text-secondary uppercase tracking-wide mt-1">{label}</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,304 @@
|
||||
import { useState } from 'react';
|
||||
import { useQuery } from '@tanstack/react-query';
|
||||
import { analyticsApi } from '../api/client';
|
||||
import { AlertCircle, Database } from 'lucide-react';
|
||||
import { PieChart, Pie, Cell, BarChart, Bar, LineChart, Line, XAxis, YAxis, Tooltip, ResponsiveContainer, Legend } from 'recharts';
|
||||
import { useChartTheme } from '../context/useChartTheme';
|
||||
|
||||
const COLORS = ['#6366f1', '#0ea5e9', '#10b981', '#f59e0b', '#ef4444', '#8b5cf6', '#ec4899', '#14b8a6'];
|
||||
|
||||
export function AnalyticsPage() {
|
||||
const [days, setDays] = useState(30);
|
||||
const chartTheme = useChartTheme();
|
||||
|
||||
const { data, isLoading, isError, refetch } = useQuery({
|
||||
queryKey: ['analytics', days],
|
||||
queryFn: () => analyticsApi.getAnalytics(days),
|
||||
});
|
||||
|
||||
const trendsQuery = useQuery({
|
||||
queryKey: ['analytics-trends', days],
|
||||
queryFn: () => analyticsApi.getTrends(days),
|
||||
});
|
||||
|
||||
if (isLoading) {
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
<div>
|
||||
<h2 className="text-xl font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-2">
|
||||
Analytics Dashboard
|
||||
</h2>
|
||||
<p className="text-text-secondary">Loading analytics data...</p>
|
||||
</div>
|
||||
{/* Skeleton cards */}
|
||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-4">
|
||||
{[1, 2, 3].map((i) => (
|
||||
<div key={i} className="bg-gradient-to-br from-primary/10 to-secondary/10 border border-primary/20 rounded-xl p-5 text-center animate-pulse">
|
||||
<div className="h-9 w-16 bg-primary/20 rounded mx-auto mb-2" />
|
||||
<div className="h-4 w-24 bg-primary/10 rounded mx-auto" />
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
{/* Skeleton charts */}
|
||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
|
||||
{[1, 2].map((i) => (
|
||||
<div key={i} className="bg-bg-card/60 border border-primary/15 rounded-2xl p-6 animate-pulse">
|
||||
<div className="h-5 w-40 bg-primary/20 rounded mb-4" />
|
||||
<div className="h-[300px] bg-primary/5 rounded" />
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (isError) {
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
<div>
|
||||
<h2 className="text-xl font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-2">
|
||||
Analytics Dashboard
|
||||
</h2>
|
||||
</div>
|
||||
<div className="bg-gradient-to-br from-primary/10 to-secondary/5 border border-primary/20 rounded-xl p-6">
|
||||
<div className="flex items-center gap-3 text-warning mb-2">
|
||||
<Database size={24} />
|
||||
<span className="font-semibold">Unable to Load Analytics</span>
|
||||
</div>
|
||||
<p className="text-text-secondary">
|
||||
Could not connect to the analytics database. Ensure PostgreSQL is running and
|
||||
<code className="bg-bg-card px-2 py-1 rounded mx-1">DATABASE_URL</code> is configured correctly.
|
||||
</p>
|
||||
<button
|
||||
onClick={() => refetch()}
|
||||
className="mt-3 text-sm bg-primary/20 hover:bg-primary/30 text-primary font-medium px-4 py-2 rounded-lg transition-colors"
|
||||
>
|
||||
Retry
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (!data || (data.total_messages === 0 && data.by_company.length === 0)) {
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
<div>
|
||||
<h2 className="text-xl font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-2">
|
||||
Analytics Dashboard
|
||||
</h2>
|
||||
<p className="text-text-secondary">Track historical analysis data and view insights.</p>
|
||||
</div>
|
||||
<div className="flex items-center gap-2 bg-secondary/10 border border-secondary/20 text-secondary rounded-xl px-4 py-3">
|
||||
<AlertCircle size={18} />
|
||||
<span>No analytics data available yet. Run some analyses first!</span>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
const companyData = data.by_company.map((c) => ({
|
||||
name: (c.company_name || 'Unknown').toUpperCase(),
|
||||
value: c.count,
|
||||
}));
|
||||
|
||||
const typeData = data.by_type.map((t) => ({
|
||||
name: t.analysis_type || 'Unknown',
|
||||
count: t.count,
|
||||
}));
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
{/* Header */}
|
||||
<div className="flex flex-wrap items-center justify-between gap-4">
|
||||
<div>
|
||||
<h2 className="text-xl font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-2">
|
||||
Analytics Dashboard
|
||||
</h2>
|
||||
<p className="text-text-secondary">Track historical analysis data and view insights.</p>
|
||||
</div>
|
||||
|
||||
{/* Time Range Selector */}
|
||||
<select
|
||||
value={days}
|
||||
onChange={(e) => setDays(Number(e.target.value))}
|
||||
className="bg-bg-card/80 border border-primary/30 rounded-xl px-4 py-2 text-text-primary focus:outline-none focus:border-primary"
|
||||
>
|
||||
<option value={7}>Last 7 days</option>
|
||||
<option value={14}>Last 14 days</option>
|
||||
<option value={30}>Last 30 days</option>
|
||||
<option value={90}>Last 90 days</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
{/* Summary Metrics */}
|
||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-4">
|
||||
<MetricCard label="Total Analyses" value={data.total_messages} />
|
||||
<MetricCard label="Companies Analyzed" value={data.by_company.length} />
|
||||
<MetricCard label="Analysis Types" value={data.by_type.length} />
|
||||
</div>
|
||||
|
||||
{/* Charts */}
|
||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
|
||||
{/* Pie Chart - Distribution by Company */}
|
||||
{companyData.length > 0 && (
|
||||
<div className="bg-bg-card/60 border border-primary/15 rounded-2xl p-6">
|
||||
<h3 className="text-lg font-semibold text-text-primary mb-4">Distribution by Company</h3>
|
||||
<ResponsiveContainer width="100%" height={300}>
|
||||
<PieChart>
|
||||
<Pie
|
||||
data={companyData}
|
||||
cx="50%"
|
||||
cy="50%"
|
||||
innerRadius={60}
|
||||
outerRadius={100}
|
||||
paddingAngle={2}
|
||||
dataKey="value"
|
||||
label={({ name, percent }) => `${name} ${(percent * 100).toFixed(0)}%`}
|
||||
labelLine={false}
|
||||
>
|
||||
{companyData.map((_, index) => (
|
||||
<Cell key={`cell-${index}`} fill={COLORS[index % COLORS.length]} />
|
||||
))}
|
||||
</Pie>
|
||||
<Tooltip
|
||||
contentStyle={chartTheme.tooltipContentStyle}
|
||||
/>
|
||||
<Legend />
|
||||
</PieChart>
|
||||
</ResponsiveContainer>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Bar Chart - Analysis Types */}
|
||||
{typeData.length > 0 && (
|
||||
<div className="bg-bg-card/60 border border-primary/15 rounded-2xl p-6">
|
||||
<h3 className="text-lg font-semibold text-text-primary mb-4">Analysis Types</h3>
|
||||
<ResponsiveContainer width="100%" height={300}>
|
||||
<BarChart data={typeData}>
|
||||
<XAxis dataKey="name" stroke={chartTheme.axisStroke} fontSize={12} />
|
||||
<YAxis stroke={chartTheme.axisStroke} fontSize={12} />
|
||||
<Tooltip
|
||||
contentStyle={chartTheme.tooltipContentStyle}
|
||||
labelStyle={chartTheme.tooltipLabelStyle}
|
||||
/>
|
||||
<Bar dataKey="count" fill="#6366f1" radius={[4, 4, 0, 0]} />
|
||||
</BarChart>
|
||||
</ResponsiveContainer>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Trend Charts */}
|
||||
{trendsQuery.data && (
|
||||
<div className="space-y-6">
|
||||
<h3 className="text-lg font-semibold text-text-primary border-b-2 border-primary/30 pb-2">
|
||||
Trends Over Time
|
||||
</h3>
|
||||
|
||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
|
||||
{/* Patent count over time per company (line chart) */}
|
||||
{trendsQuery.data.by_month.length > 0 && (() => {
|
||||
// Pivot data: each month as a row, companies as columns
|
||||
const companies = [...new Set(trendsQuery.data!.by_month.map(d => d.company_name))];
|
||||
const months = [...new Set(trendsQuery.data!.by_month.map(d => d.month))].sort();
|
||||
const pivoted = months.map(month => {
|
||||
const row: Record<string, string | number> = { month };
|
||||
for (const c of companies) {
|
||||
const entry = trendsQuery.data!.by_month.find(d => d.month === month && d.company_name === c);
|
||||
row[c] = entry?.count || 0;
|
||||
}
|
||||
return row;
|
||||
});
|
||||
|
||||
return (
|
||||
<div className="bg-bg-card/60 border border-primary/15 rounded-2xl p-6">
|
||||
<h4 className="text-md font-semibold text-text-primary mb-4">Analyses per Company Over Time</h4>
|
||||
<ResponsiveContainer width="100%" height={300}>
|
||||
<LineChart data={pivoted}>
|
||||
<XAxis dataKey="month" stroke={chartTheme.axisStroke} fontSize={12} />
|
||||
<YAxis stroke={chartTheme.axisStroke} fontSize={12} />
|
||||
<Tooltip
|
||||
contentStyle={chartTheme.tooltipContentStyle}
|
||||
labelStyle={chartTheme.tooltipLabelStyle}
|
||||
/>
|
||||
<Legend />
|
||||
{companies.map((company, idx) => (
|
||||
<Line
|
||||
key={company}
|
||||
type="monotone"
|
||||
dataKey={company}
|
||||
stroke={COLORS[idx % COLORS.length]}
|
||||
strokeWidth={2}
|
||||
dot={{ r: 4 }}
|
||||
name={company.toUpperCase()}
|
||||
/>
|
||||
))}
|
||||
</LineChart>
|
||||
</ResponsiveContainer>
|
||||
</div>
|
||||
);
|
||||
})()}
|
||||
|
||||
{/* Analysis type distribution over time (stacked bar) */}
|
||||
{trendsQuery.data.by_type_over_time.length > 0 && (() => {
|
||||
const types = [...new Set(trendsQuery.data!.by_type_over_time.map(d => d.analysis_type))];
|
||||
const months = [...new Set(trendsQuery.data!.by_type_over_time.map(d => d.month))].sort();
|
||||
const pivoted = months.map(month => {
|
||||
const row: Record<string, string | number> = { month };
|
||||
for (const t of types) {
|
||||
const entry = trendsQuery.data!.by_type_over_time.find(d => d.month === month && d.analysis_type === t);
|
||||
row[t] = entry?.count || 0;
|
||||
}
|
||||
return row;
|
||||
});
|
||||
|
||||
return (
|
||||
<div className="bg-bg-card/60 border border-primary/15 rounded-2xl p-6">
|
||||
<h4 className="text-md font-semibold text-text-primary mb-4">Analysis Types Over Time</h4>
|
||||
<ResponsiveContainer width="100%" height={300}>
|
||||
<BarChart data={pivoted}>
|
||||
<XAxis dataKey="month" stroke={chartTheme.axisStroke} fontSize={12} />
|
||||
<YAxis stroke={chartTheme.axisStroke} fontSize={12} />
|
||||
<Tooltip
|
||||
contentStyle={chartTheme.tooltipContentStyle}
|
||||
labelStyle={chartTheme.tooltipLabelStyle}
|
||||
/>
|
||||
<Legend />
|
||||
{types.map((type, idx) => (
|
||||
<Bar
|
||||
key={type}
|
||||
dataKey={type}
|
||||
stackId="types"
|
||||
fill={COLORS[idx % COLORS.length]}
|
||||
name={type}
|
||||
/>
|
||||
))}
|
||||
</BarChart>
|
||||
</ResponsiveContainer>
|
||||
</div>
|
||||
);
|
||||
})()}
|
||||
</div>
|
||||
|
||||
{trendsQuery.data.by_month.length === 0 && (
|
||||
<div className="text-text-secondary text-center py-8">
|
||||
No trend data available yet. Run analyses over multiple days to see trends.
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function MetricCard({ label, value }: { label: string; value: number }) {
|
||||
return (
|
||||
<div className="bg-gradient-to-br from-primary/10 to-secondary/10 border border-primary/20 rounded-xl p-5 text-center">
|
||||
<div className="text-3xl font-bold bg-gradient-to-r from-primary to-secondary bg-clip-text text-transparent">
|
||||
{value}
|
||||
</div>
|
||||
<div className="text-sm text-text-secondary uppercase tracking-wide mt-1">{label}</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,430 @@
|
||||
import { useState } from 'react';
|
||||
import { useMutation, useQuery } from '@tanstack/react-query';
|
||||
import { analysisApi } from '../api/client';
|
||||
import { Rocket, CheckCircle, AlertCircle, ChevronDown, ChevronUp, RefreshCw, Inbox } from 'lucide-react';
|
||||
import { BarChart, Bar, XAxis, YAxis, Tooltip, ResponsiveContainer, Cell } from 'recharts';
|
||||
import { useChartTheme } from '../context/useChartTheme';
|
||||
import type { BatchAnalysisResult } from '../types';
|
||||
|
||||
export function Batch() {
|
||||
const [companiesInput, setCompaniesInput] = useState('');
|
||||
const [maxWorkers, setMaxWorkers] = useState(3);
|
||||
const [selectedModel, setSelectedModel] = useState('');
|
||||
const [result, setResult] = useState<BatchAnalysisResult | null>(null);
|
||||
const [expandedItems, setExpandedItems] = useState<Set<string>>(new Set());
|
||||
|
||||
const chartTheme = useChartTheme();
|
||||
|
||||
const modelsQuery = useQuery({
|
||||
queryKey: ['models'],
|
||||
queryFn: () => analysisApi.listModels(),
|
||||
});
|
||||
|
||||
const jobsQuery = useQuery({
|
||||
queryKey: ['jobs'],
|
||||
queryFn: () => analysisApi.listJobs(undefined, 20),
|
||||
});
|
||||
|
||||
const mutation = useMutation({
|
||||
mutationFn: ({ companies, workers }: { companies: string[]; workers: number }) =>
|
||||
analysisApi.analyzeBatch(companies, workers, selectedModel || undefined),
|
||||
onSuccess: (data) => {
|
||||
setResult(data);
|
||||
jobsQuery.refetch();
|
||||
},
|
||||
});
|
||||
|
||||
const handleSubmit = (e: React.FormEvent) => {
|
||||
e.preventDefault();
|
||||
const companies = companiesInput
|
||||
.split(/[,\n]/)
|
||||
.map((c) => c.trim())
|
||||
.filter((c) => c.length > 0);
|
||||
|
||||
if (companies.length > 0) {
|
||||
mutation.mutate({ companies, workers: maxWorkers });
|
||||
}
|
||||
};
|
||||
|
||||
const toggleExpand = (company: string) => {
|
||||
const newExpanded = new Set(expandedItems);
|
||||
if (newExpanded.has(company)) {
|
||||
newExpanded.delete(company);
|
||||
} else {
|
||||
newExpanded.add(company);
|
||||
}
|
||||
setExpandedItems(newExpanded);
|
||||
};
|
||||
|
||||
const chartData = result?.results.map((r) => ({
|
||||
name: r.company_name.toUpperCase(),
|
||||
patents: r.patent_count,
|
||||
success: r.success,
|
||||
}));
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
{/* Header */}
|
||||
<div>
|
||||
<h2 className="text-xl font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-2">
|
||||
Batch Company Analysis
|
||||
</h2>
|
||||
<p className="text-text-secondary">
|
||||
Analyze multiple companies simultaneously for comparative insights.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{/* Input Form */}
|
||||
<form onSubmit={handleSubmit} className="grid grid-cols-1 md:grid-cols-3 gap-4">
|
||||
<div className="md:col-span-2">
|
||||
<textarea
|
||||
value={companiesInput}
|
||||
onChange={(e) => setCompaniesInput(e.target.value)}
|
||||
placeholder="Enter company names (one per line or comma-separated): nvidia amd intel qualcomm"
|
||||
rows={6}
|
||||
className="w-full bg-bg-card/80 border border-primary/30 rounded-xl px-4 py-3 text-text-primary placeholder-text-secondary/50 focus:outline-none focus:border-primary focus:ring-2 focus:ring-primary/20 transition-all resize-none"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="space-y-4">
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-text-secondary mb-2">
|
||||
Concurrent Workers
|
||||
</label>
|
||||
<input
|
||||
type="range"
|
||||
min={1}
|
||||
max={5}
|
||||
value={maxWorkers}
|
||||
onChange={(e) => setMaxWorkers(Number(e.target.value))}
|
||||
className="w-full accent-primary"
|
||||
/>
|
||||
<div className="text-center text-text-primary font-semibold">{maxWorkers}</div>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-text-secondary mb-2">
|
||||
LLM Model
|
||||
</label>
|
||||
<div className="relative">
|
||||
<select
|
||||
value={selectedModel}
|
||||
onChange={(e) => setSelectedModel(e.target.value)}
|
||||
className="w-full appearance-none bg-bg-card/80 border border-primary/30 rounded-lg pl-3 pr-8 py-2 text-sm text-text-primary focus:outline-none focus:border-primary focus:ring-2 focus:ring-primary/20 transition-all cursor-pointer"
|
||||
>
|
||||
<option value="">
|
||||
{modelsQuery.data ? `Default (${modelsQuery.data.default})` : 'Default'}
|
||||
</option>
|
||||
{modelsQuery.data?.models.map((m) => (
|
||||
<option key={m.id} value={m.id}>
|
||||
{m.name} ({m.provider})
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
<ChevronDown className="absolute right-2 top-1/2 -translate-y-1/2 text-text-secondary pointer-events-none" size={16} />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<button
|
||||
type="submit"
|
||||
disabled={mutation.isPending || !companiesInput.trim()}
|
||||
className="w-full bg-gradient-to-r from-primary to-primary-dark text-white font-semibold py-3 px-6 rounded-xl hover:shadow-lg hover:shadow-primary/30 transition-all disabled:opacity-50 disabled:cursor-not-allowed flex items-center justify-center gap-2"
|
||||
>
|
||||
{mutation.isPending ? (
|
||||
<div className="animate-spin rounded-full h-5 w-5 border-t-2 border-b-2 border-white"></div>
|
||||
) : (
|
||||
<>
|
||||
<Rocket size={18} />
|
||||
Run Batch Analysis
|
||||
</>
|
||||
)}
|
||||
</button>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
{/* Progress */}
|
||||
{mutation.isPending && (
|
||||
<div className="bg-bg-card/60 border border-primary/15 rounded-xl p-4">
|
||||
<div className="flex items-center gap-2 text-secondary">
|
||||
<div className="animate-spin rounded-full h-4 w-4 border-t-2 border-b-2 border-secondary"></div>
|
||||
<span>Analyzing companies...</span>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Error */}
|
||||
{mutation.isError && (
|
||||
<div className="bg-error/10 border border-error/20 rounded-xl px-4 py-3">
|
||||
<div className="flex items-center gap-2 text-error">
|
||||
<AlertCircle size={18} />
|
||||
<span className="font-semibold">Batch analysis failed</span>
|
||||
</div>
|
||||
<p className="text-text-secondary text-sm mt-1 ml-7">
|
||||
{mutation.error instanceof Error ? mutation.error.message : 'An unexpected error occurred.'}
|
||||
{' '}Check your connection and try again.
|
||||
</p>
|
||||
<div className="ml-7 mt-2 flex items-center gap-3">
|
||||
<button
|
||||
onClick={() => {
|
||||
const companies = companiesInput
|
||||
.split(/[,\n]/)
|
||||
.map((c) => c.trim())
|
||||
.filter((c) => c.length > 0);
|
||||
if (companies.length > 0) {
|
||||
mutation.mutate({ companies, workers: maxWorkers });
|
||||
}
|
||||
}}
|
||||
className="text-sm text-primary hover:text-primary-dark underline flex items-center gap-1"
|
||||
>
|
||||
<RefreshCw size={14} />
|
||||
Retry
|
||||
</button>
|
||||
<button
|
||||
onClick={() => mutation.reset()}
|
||||
className="text-sm text-text-secondary hover:text-text-primary underline"
|
||||
>
|
||||
Dismiss
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Results */}
|
||||
{result && (
|
||||
<div className="space-y-6">
|
||||
{/* Summary Metrics */}
|
||||
<div>
|
||||
<h3 className="text-lg font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-4">
|
||||
Results Summary
|
||||
</h3>
|
||||
<div className="grid grid-cols-2 md:grid-cols-4 gap-4">
|
||||
<SummaryCard label="Total Companies" value={result.total_companies} />
|
||||
<SummaryCard label="Successful" value={result.successful} color="success" />
|
||||
<SummaryCard label="Failed" value={result.failed} color="error" />
|
||||
<SummaryCard
|
||||
label="Success Rate"
|
||||
value={`${Math.round((result.successful / result.total_companies) * 100)}%`}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Chart */}
|
||||
{chartData && chartData.length > 0 && (
|
||||
<div className="bg-bg-card/60 border border-primary/15 rounded-2xl p-6">
|
||||
<ResponsiveContainer width="100%" height={300}>
|
||||
<BarChart data={chartData}>
|
||||
<XAxis dataKey="name" stroke={chartTheme.axisStroke} fontSize={12} />
|
||||
<YAxis stroke={chartTheme.axisStroke} fontSize={12} />
|
||||
<Tooltip
|
||||
contentStyle={chartTheme.tooltipContentStyle}
|
||||
labelStyle={chartTheme.tooltipLabelStyle}
|
||||
/>
|
||||
<Bar dataKey="patents" radius={[4, 4, 0, 0]}>
|
||||
{chartData.map((entry, index) => (
|
||||
<Cell
|
||||
key={`cell-${index}`}
|
||||
fill={entry.success ? '#10b981' : '#ef4444'}
|
||||
/>
|
||||
))}
|
||||
</Bar>
|
||||
</BarChart>
|
||||
</ResponsiveContainer>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Detailed Results */}
|
||||
<div>
|
||||
<h3 className="text-lg font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-4">
|
||||
Detailed Results
|
||||
</h3>
|
||||
<div className="space-y-3">
|
||||
{result.results.map((r) => (
|
||||
<div
|
||||
key={r.company_name}
|
||||
className="bg-bg-card/60 border border-primary/15 rounded-xl overflow-hidden"
|
||||
>
|
||||
<button
|
||||
onClick={() => toggleExpand(r.company_name)}
|
||||
className="w-full flex items-center justify-between p-4 hover:bg-bg-card-hover transition-colors"
|
||||
>
|
||||
<div className="flex items-center gap-3">
|
||||
{r.success ? (
|
||||
<CheckCircle className="text-success" size={20} />
|
||||
) : (
|
||||
<AlertCircle className="text-error" size={20} />
|
||||
)}
|
||||
<span className="font-semibold text-text-primary">
|
||||
{r.company_name.toUpperCase()}
|
||||
</span>
|
||||
<span className="text-text-secondary">
|
||||
{r.patent_count} patents
|
||||
</span>
|
||||
</div>
|
||||
{expandedItems.has(r.company_name) ? (
|
||||
<ChevronUp className="text-text-secondary" size={20} />
|
||||
) : (
|
||||
<ChevronDown className="text-text-secondary" size={20} />
|
||||
)}
|
||||
</button>
|
||||
{expandedItems.has(r.company_name) && (
|
||||
<div className="border-t border-primary/10 p-4 bg-bg-dark/40">
|
||||
{r.success ? (
|
||||
<div className="text-text-primary whitespace-pre-wrap leading-relaxed">
|
||||
{r.analysis}
|
||||
</div>
|
||||
) : (
|
||||
<div className="text-error">{r.error}</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Job History */}
|
||||
<div>
|
||||
<h3 className="text-lg font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-4">
|
||||
Job History
|
||||
</h3>
|
||||
|
||||
{/* Loading skeleton */}
|
||||
{jobsQuery.isLoading && (
|
||||
<div className="space-y-3">
|
||||
{[...Array(3)].map((_, i) => (
|
||||
<div
|
||||
key={i}
|
||||
className="bg-bg-card/60 border border-primary/15 rounded-xl p-4 animate-pulse"
|
||||
>
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="h-5 w-5 rounded-full bg-primary/20" />
|
||||
<div className="h-4 w-32 rounded bg-primary/20" />
|
||||
<div className="h-4 w-20 rounded bg-primary/10" />
|
||||
</div>
|
||||
<div className="h-6 w-20 rounded-full bg-primary/15" />
|
||||
</div>
|
||||
<div className="mt-3 flex gap-4">
|
||||
<div className="h-3 w-24 rounded bg-primary/10" />
|
||||
<div className="h-3 w-16 rounded bg-primary/10" />
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Job history error */}
|
||||
{jobsQuery.isError && (
|
||||
<div className="bg-error/10 border border-error/20 rounded-xl px-4 py-3">
|
||||
<div className="flex items-center gap-2 text-error">
|
||||
<AlertCircle size={18} />
|
||||
<span className="font-semibold">Failed to load job history</span>
|
||||
</div>
|
||||
<p className="text-text-secondary text-sm mt-1 ml-7">
|
||||
{jobsQuery.error instanceof Error ? jobsQuery.error.message : 'Could not retrieve past jobs.'}
|
||||
</p>
|
||||
<button
|
||||
onClick={() => jobsQuery.refetch()}
|
||||
className="ml-7 mt-2 text-sm text-primary hover:text-primary-dark underline flex items-center gap-1"
|
||||
>
|
||||
<RefreshCw size={14} />
|
||||
Retry
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Empty state */}
|
||||
{jobsQuery.isSuccess && jobsQuery.data.length === 0 && !result && (
|
||||
<div className="bg-bg-card/60 border border-primary/15 border-dashed rounded-xl p-8 text-center">
|
||||
<Inbox className="mx-auto text-text-secondary/40 mb-3" size={40} />
|
||||
<p className="text-text-secondary font-medium">No batch jobs yet</p>
|
||||
<p className="text-text-secondary/70 text-sm mt-1">
|
||||
Submit a batch analysis above to get started. Your job history will appear here.
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Job list */}
|
||||
{jobsQuery.isSuccess && jobsQuery.data.length > 0 && (
|
||||
<div className="space-y-3">
|
||||
{jobsQuery.data.map((job) => (
|
||||
<div
|
||||
key={job.job_id}
|
||||
className="bg-bg-card/60 border border-primary/15 rounded-xl p-4"
|
||||
>
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="flex items-center gap-3">
|
||||
{job.status === 'completed' && <CheckCircle className="text-success" size={18} />}
|
||||
{job.status === 'failed' && <AlertCircle className="text-error" size={18} />}
|
||||
{(job.status === 'pending' || job.status === 'running') && (
|
||||
<div className="animate-spin rounded-full h-[18px] w-[18px] border-t-2 border-b-2 border-secondary" />
|
||||
)}
|
||||
<span className="font-mono text-sm text-text-primary">{job.job_id.slice(0, 8)}</span>
|
||||
<span className="text-text-secondary text-sm">
|
||||
{job.total_companies} {job.total_companies === 1 ? 'company' : 'companies'}
|
||||
</span>
|
||||
</div>
|
||||
<span
|
||||
className={`text-xs font-semibold px-2.5 py-1 rounded-full ${
|
||||
job.status === 'completed'
|
||||
? 'bg-success/15 text-success'
|
||||
: job.status === 'failed'
|
||||
? 'bg-error/15 text-error'
|
||||
: 'bg-secondary/15 text-secondary'
|
||||
}`}
|
||||
>
|
||||
{job.status}
|
||||
</span>
|
||||
</div>
|
||||
{(job.status === 'running' || job.status === 'pending') && job.total_companies > 0 && (
|
||||
<div className="mt-3">
|
||||
<div className="flex items-center justify-between text-xs text-text-secondary mb-1">
|
||||
<span>Progress</span>
|
||||
<span>{job.completed_companies}/{job.total_companies}</span>
|
||||
</div>
|
||||
<div className="h-1.5 bg-bg-dark rounded-full overflow-hidden">
|
||||
<div
|
||||
className="h-full bg-gradient-to-r from-primary to-secondary rounded-full transition-all duration-300"
|
||||
style={{ width: `${(job.completed_companies / job.total_companies) * 100}%` }}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
{job.status === 'failed' && job.error && (
|
||||
<p className="mt-2 text-sm text-error/80">{job.error}</p>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function SummaryCard({
|
||||
label,
|
||||
value,
|
||||
color,
|
||||
}: {
|
||||
label: string;
|
||||
value: number | string;
|
||||
color?: 'success' | 'error';
|
||||
}) {
|
||||
const colorClass = color === 'success' ? 'text-success' : color === 'error' ? 'text-error' : '';
|
||||
|
||||
return (
|
||||
<div className="bg-gradient-to-br from-primary/10 to-secondary/10 border border-primary/20 rounded-xl p-4 text-center">
|
||||
<div
|
||||
className={`text-2xl font-bold ${
|
||||
colorClass || 'bg-gradient-to-r from-primary to-secondary bg-clip-text text-transparent'
|
||||
}`}
|
||||
>
|
||||
{value}
|
||||
</div>
|
||||
<div className="text-sm text-text-secondary uppercase tracking-wide mt-1">{label}</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,161 @@
|
||||
import { useState } from 'react';
|
||||
import { useSearchParams } from 'react-router-dom';
|
||||
import { useQuery } from '@tanstack/react-query';
|
||||
import { analysisApi } from '../api/client';
|
||||
import { GitCompareArrows, AlertCircle, FileText, Clock } from 'lucide-react';
|
||||
import type { CompanyAnalysis } from '../types';
|
||||
|
||||
function CompanyPanel({ data, isLoading, isError }: { data?: CompanyAnalysis; isLoading: boolean; isError: boolean }) {
|
||||
if (isLoading) {
|
||||
return (
|
||||
<div className="bg-bg-card/60 border border-primary/15 rounded-2xl p-6 animate-pulse">
|
||||
<div className="h-6 w-32 bg-primary/20 rounded mb-4" />
|
||||
<div className="space-y-3">
|
||||
<div className="h-4 bg-primary/10 rounded w-full" />
|
||||
<div className="h-4 bg-primary/10 rounded w-3/4" />
|
||||
<div className="h-4 bg-primary/10 rounded w-5/6" />
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (isError) {
|
||||
return (
|
||||
<div className="bg-error/10 border border-error/20 rounded-2xl p-6">
|
||||
<div className="flex items-center gap-2 text-error">
|
||||
<AlertCircle size={18} />
|
||||
<span>Failed to load analysis. Check the company name and try again.</span>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (!data) return null;
|
||||
|
||||
return (
|
||||
<div className="bg-bg-card/60 border border-primary/15 rounded-2xl p-6 space-y-4">
|
||||
<h3 className="text-lg font-bold text-text-primary border-b-2 border-primary/30 pb-2">
|
||||
{data.company_name.toUpperCase()}
|
||||
</h3>
|
||||
|
||||
<div className="grid grid-cols-2 gap-3">
|
||||
<div className="bg-primary/10 rounded-lg p-3 text-center">
|
||||
<FileText className="mx-auto mb-1 text-primary" size={18} />
|
||||
<div className="text-xl font-bold text-text-primary">{data.patent_count}</div>
|
||||
<div className="text-xs text-text-secondary uppercase">Patents</div>
|
||||
</div>
|
||||
<div className="bg-primary/10 rounded-lg p-3 text-center">
|
||||
<Clock className="mx-auto mb-1 text-primary" size={18} />
|
||||
<div className="text-sm font-medium text-text-primary">
|
||||
{new Date(data.timestamp).toLocaleDateString()}
|
||||
</div>
|
||||
<div className="text-xs text-text-secondary uppercase">Analyzed</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{data.success && data.analysis ? (
|
||||
<div className="text-text-primary whitespace-pre-wrap leading-relaxed text-sm">
|
||||
{data.analysis}
|
||||
</div>
|
||||
) : (
|
||||
<div className="text-error text-sm">{data.error || 'Analysis not available'}</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export function Compare() {
|
||||
const [searchParams, setSearchParams] = useSearchParams();
|
||||
const [companyA, setCompanyA] = useState(searchParams.get('a') || '');
|
||||
const [companyB, setCompanyB] = useState(searchParams.get('b') || '');
|
||||
|
||||
const queryA = searchParams.get('a') || '';
|
||||
const queryB = searchParams.get('b') || '';
|
||||
|
||||
const resultA = useQuery({
|
||||
queryKey: ['analyze', queryA],
|
||||
queryFn: () => analysisApi.analyzeCompany(queryA),
|
||||
enabled: !!queryA,
|
||||
});
|
||||
|
||||
const resultB = useQuery({
|
||||
queryKey: ['analyze', queryB],
|
||||
queryFn: () => analysisApi.analyzeCompany(queryB),
|
||||
enabled: !!queryB,
|
||||
});
|
||||
|
||||
const handleCompare = (e: React.FormEvent) => {
|
||||
e.preventDefault();
|
||||
const a = companyA.trim();
|
||||
const b = companyB.trim();
|
||||
if (a && b) {
|
||||
setSearchParams({ a, b });
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
{/* Header */}
|
||||
<div>
|
||||
<h2 className="text-xl font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-2">
|
||||
Portfolio Comparison
|
||||
</h2>
|
||||
<p className="text-text-secondary">
|
||||
Compare patent portfolios of two companies side by side.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{/* Input Form */}
|
||||
<form onSubmit={handleCompare} className="flex flex-col sm:flex-row gap-3 items-end">
|
||||
<div className="flex-1">
|
||||
<label className="block text-sm font-medium text-text-secondary mb-1">Company A</label>
|
||||
<input
|
||||
type="text"
|
||||
value={companyA}
|
||||
onChange={(e) => setCompanyA(e.target.value)}
|
||||
placeholder="e.g. nvidia"
|
||||
className="w-full bg-bg-card/80 border border-primary/30 rounded-xl px-4 py-2.5 text-text-primary placeholder-text-secondary/50 focus:outline-none focus:border-primary focus:ring-2 focus:ring-primary/20 transition-all"
|
||||
/>
|
||||
</div>
|
||||
<div className="flex-1">
|
||||
<label className="block text-sm font-medium text-text-secondary mb-1">Company B</label>
|
||||
<input
|
||||
type="text"
|
||||
value={companyB}
|
||||
onChange={(e) => setCompanyB(e.target.value)}
|
||||
placeholder="e.g. intel"
|
||||
className="w-full bg-bg-card/80 border border-primary/30 rounded-xl px-4 py-2.5 text-text-primary placeholder-text-secondary/50 focus:outline-none focus:border-primary focus:ring-2 focus:ring-primary/20 transition-all"
|
||||
/>
|
||||
</div>
|
||||
<button
|
||||
type="submit"
|
||||
disabled={!companyA.trim() || !companyB.trim() || resultA.isLoading || resultB.isLoading}
|
||||
className="bg-gradient-to-r from-primary to-primary-dark text-white font-semibold py-2.5 px-6 rounded-xl hover:shadow-lg hover:shadow-primary/30 transition-all disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-2"
|
||||
>
|
||||
<GitCompareArrows size={18} />
|
||||
Compare
|
||||
</button>
|
||||
</form>
|
||||
|
||||
{/* Comparison Panels */}
|
||||
{(queryA || queryB) && (
|
||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
|
||||
{queryA && (
|
||||
<CompanyPanel
|
||||
data={resultA.data}
|
||||
isLoading={resultA.isLoading}
|
||||
isError={resultA.isError}
|
||||
/>
|
||||
)}
|
||||
{queryB && (
|
||||
<CompanyPanel
|
||||
data={resultB.data}
|
||||
isLoading={resultB.isLoading}
|
||||
isError={resultB.isError}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,121 @@
|
||||
import { useState } from 'react';
|
||||
import { Link, useNavigate, useLocation } from 'react-router-dom';
|
||||
import { useAuth } from '../context/AuthContext';
|
||||
import { LogIn, Mail, Lock, AlertCircle } from 'lucide-react';
|
||||
|
||||
export function Login() {
|
||||
const [email, setEmail] = useState('');
|
||||
const [password, setPassword] = useState('');
|
||||
const [error, setError] = useState('');
|
||||
const [isLoading, setIsLoading] = useState(false);
|
||||
|
||||
const { login } = useAuth();
|
||||
const navigate = useNavigate();
|
||||
const location = useLocation();
|
||||
|
||||
const from = (location.state as { from?: { pathname: string } })?.from?.pathname || '/analysis';
|
||||
|
||||
const handleSubmit = async (e: React.FormEvent) => {
|
||||
e.preventDefault();
|
||||
setError('');
|
||||
setIsLoading(true);
|
||||
|
||||
try {
|
||||
await login(email, password);
|
||||
navigate(from, { replace: true });
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : 'Invalid email or password');
|
||||
} finally {
|
||||
setIsLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="min-h-screen bg-gradient-to-br from-bg-dark to-slate-100 dark:to-indigo-950 flex items-center justify-center px-4">
|
||||
<div className="w-full max-w-md">
|
||||
{/* Brand */}
|
||||
<div className="text-center mb-8">
|
||||
<div className="flex items-center justify-center gap-3 mb-4">
|
||||
<span className="text-4xl">⚡</span>
|
||||
<h1 className="text-3xl font-bold bg-gradient-to-r from-primary to-secondary bg-clip-text text-transparent">
|
||||
SPARC
|
||||
</h1>
|
||||
</div>
|
||||
<p className="text-text-secondary">Semiconductor Patent Analytics Dashboard</p>
|
||||
</div>
|
||||
|
||||
{/* Login Card */}
|
||||
<div className="bg-bg-card/60 backdrop-blur-lg border border-primary/15 rounded-2xl p-8">
|
||||
<h2 className="text-xl font-semibold text-text-primary mb-6">Sign in to your account</h2>
|
||||
|
||||
{error && (
|
||||
<div className="flex items-center gap-2 bg-error/10 border border-error/20 text-error rounded-lg px-4 py-3 mb-6">
|
||||
<AlertCircle size={18} />
|
||||
<span className="text-sm">{error}</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<form onSubmit={handleSubmit} className="space-y-5">
|
||||
<div>
|
||||
<label htmlFor="email" className="block text-sm font-medium text-text-secondary mb-2">
|
||||
Email
|
||||
</label>
|
||||
<div className="relative">
|
||||
<Mail className="absolute left-3 top-1/2 -translate-y-1/2 text-text-secondary" size={18} />
|
||||
<input
|
||||
id="email"
|
||||
type="email"
|
||||
value={email}
|
||||
onChange={(e) => setEmail(e.target.value)}
|
||||
required
|
||||
className="w-full bg-bg-dark/80 border border-primary/30 rounded-xl pl-10 pr-4 py-3 text-text-primary placeholder-text-secondary/50 focus:outline-none focus:border-primary focus:ring-2 focus:ring-primary/20 transition-all"
|
||||
placeholder="you@example.com"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label htmlFor="password" className="block text-sm font-medium text-text-secondary mb-2">
|
||||
Password
|
||||
</label>
|
||||
<div className="relative">
|
||||
<Lock className="absolute left-3 top-1/2 -translate-y-1/2 text-text-secondary" size={18} />
|
||||
<input
|
||||
id="password"
|
||||
type="password"
|
||||
value={password}
|
||||
onChange={(e) => setPassword(e.target.value)}
|
||||
required
|
||||
className="w-full bg-bg-dark/80 border border-primary/30 rounded-xl pl-10 pr-4 py-3 text-text-primary placeholder-text-secondary/50 focus:outline-none focus:border-primary focus:ring-2 focus:ring-primary/20 transition-all"
|
||||
placeholder="Enter your password"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<button
|
||||
type="submit"
|
||||
disabled={isLoading}
|
||||
className="w-full bg-gradient-to-r from-primary to-primary-dark text-white font-semibold py-3 px-4 rounded-xl hover:shadow-lg hover:shadow-primary/30 transition-all disabled:opacity-50 disabled:cursor-not-allowed flex items-center justify-center gap-2"
|
||||
>
|
||||
{isLoading ? (
|
||||
<div className="animate-spin rounded-full h-5 w-5 border-t-2 border-b-2 border-white"></div>
|
||||
) : (
|
||||
<>
|
||||
<LogIn size={18} />
|
||||
Sign In
|
||||
</>
|
||||
)}
|
||||
</button>
|
||||
</form>
|
||||
|
||||
<div className="mt-6 text-center">
|
||||
<span className="text-text-secondary text-sm">Don't have an account? </span>
|
||||
<Link to="/register" className="text-primary hover:text-primary-dark font-medium text-sm">
|
||||
Sign up
|
||||
</Link>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,153 @@
|
||||
import { useState } from 'react';
|
||||
import { Link, useNavigate } from 'react-router-dom';
|
||||
import { useAuth } from '../context/AuthContext';
|
||||
import { UserPlus, Mail, Lock, AlertCircle } from 'lucide-react';
|
||||
|
||||
export function Register() {
|
||||
const [email, setEmail] = useState('');
|
||||
const [password, setPassword] = useState('');
|
||||
const [confirmPassword, setConfirmPassword] = useState('');
|
||||
const [error, setError] = useState('');
|
||||
const [isLoading, setIsLoading] = useState(false);
|
||||
|
||||
const { register } = useAuth();
|
||||
const navigate = useNavigate();
|
||||
|
||||
const handleSubmit = async (e: React.FormEvent) => {
|
||||
e.preventDefault();
|
||||
setError('');
|
||||
|
||||
if (password !== confirmPassword) {
|
||||
setError('Passwords do not match');
|
||||
return;
|
||||
}
|
||||
|
||||
if (password.length < 8) {
|
||||
setError('Password must be at least 8 characters');
|
||||
return;
|
||||
}
|
||||
|
||||
setIsLoading(true);
|
||||
|
||||
try {
|
||||
await register(email, password);
|
||||
navigate('/analysis', { replace: true });
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : 'Registration failed');
|
||||
} finally {
|
||||
setIsLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="min-h-screen bg-gradient-to-br from-bg-dark to-slate-100 dark:to-indigo-950 flex items-center justify-center px-4">
|
||||
<div className="w-full max-w-md">
|
||||
{/* Brand */}
|
||||
<div className="text-center mb-8">
|
||||
<div className="flex items-center justify-center gap-3 mb-4">
|
||||
<span className="text-4xl">⚡</span>
|
||||
<h1 className="text-3xl font-bold bg-gradient-to-r from-primary to-secondary bg-clip-text text-transparent">
|
||||
SPARC
|
||||
</h1>
|
||||
</div>
|
||||
<p className="text-text-secondary">Semiconductor Patent Analytics Dashboard</p>
|
||||
</div>
|
||||
|
||||
{/* Register Card */}
|
||||
<div className="bg-bg-card/60 backdrop-blur-lg border border-primary/15 rounded-2xl p-8">
|
||||
<h2 className="text-xl font-semibold text-text-primary mb-6">Create your account</h2>
|
||||
|
||||
{error && (
|
||||
<div className="flex items-center gap-2 bg-error/10 border border-error/20 text-error rounded-lg px-4 py-3 mb-6">
|
||||
<AlertCircle size={18} />
|
||||
<span className="text-sm">{error}</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<form onSubmit={handleSubmit} className="space-y-5">
|
||||
<div>
|
||||
<label htmlFor="email" className="block text-sm font-medium text-text-secondary mb-2">
|
||||
Email
|
||||
</label>
|
||||
<div className="relative">
|
||||
<Mail className="absolute left-3 top-1/2 -translate-y-1/2 text-text-secondary" size={18} />
|
||||
<input
|
||||
id="email"
|
||||
type="email"
|
||||
value={email}
|
||||
onChange={(e) => setEmail(e.target.value)}
|
||||
required
|
||||
className="w-full bg-bg-dark/80 border border-primary/30 rounded-xl pl-10 pr-4 py-3 text-text-primary placeholder-text-secondary/50 focus:outline-none focus:border-primary focus:ring-2 focus:ring-primary/20 transition-all"
|
||||
placeholder="you@example.com"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label htmlFor="password" className="block text-sm font-medium text-text-secondary mb-2">
|
||||
Password
|
||||
</label>
|
||||
<div className="relative">
|
||||
<Lock className="absolute left-3 top-1/2 -translate-y-1/2 text-text-secondary" size={18} />
|
||||
<input
|
||||
id="password"
|
||||
type="password"
|
||||
value={password}
|
||||
onChange={(e) => setPassword(e.target.value)}
|
||||
required
|
||||
minLength={8}
|
||||
className="w-full bg-bg-dark/80 border border-primary/30 rounded-xl pl-10 pr-4 py-3 text-text-primary placeholder-text-secondary/50 focus:outline-none focus:border-primary focus:ring-2 focus:ring-primary/20 transition-all"
|
||||
placeholder="At least 8 characters"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label htmlFor="confirmPassword" className="block text-sm font-medium text-text-secondary mb-2">
|
||||
Confirm Password
|
||||
</label>
|
||||
<div className="relative">
|
||||
<Lock className="absolute left-3 top-1/2 -translate-y-1/2 text-text-secondary" size={18} />
|
||||
<input
|
||||
id="confirmPassword"
|
||||
type="password"
|
||||
value={confirmPassword}
|
||||
onChange={(e) => setConfirmPassword(e.target.value)}
|
||||
required
|
||||
className="w-full bg-bg-dark/80 border border-primary/30 rounded-xl pl-10 pr-4 py-3 text-text-primary placeholder-text-secondary/50 focus:outline-none focus:border-primary focus:ring-2 focus:ring-primary/20 transition-all"
|
||||
placeholder="Confirm your password"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<button
|
||||
type="submit"
|
||||
disabled={isLoading}
|
||||
className="w-full bg-gradient-to-r from-primary to-primary-dark text-white font-semibold py-3 px-4 rounded-xl hover:shadow-lg hover:shadow-primary/30 transition-all disabled:opacity-50 disabled:cursor-not-allowed flex items-center justify-center gap-2"
|
||||
>
|
||||
{isLoading ? (
|
||||
<div className="animate-spin rounded-full h-5 w-5 border-t-2 border-b-2 border-white"></div>
|
||||
) : (
|
||||
<>
|
||||
<UserPlus size={18} />
|
||||
Create Account
|
||||
</>
|
||||
)}
|
||||
</button>
|
||||
</form>
|
||||
|
||||
<div className="mt-6 text-center">
|
||||
<span className="text-text-secondary text-sm">Already have an account? </span>
|
||||
<Link to="/login" className="text-primary hover:text-primary-dark font-medium text-sm">
|
||||
Sign in
|
||||
</Link>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p className="mt-6 text-center text-xs text-text-secondary">
|
||||
The first registered user will automatically become an admin.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
/**
|
||||
* Application types derived from the auto-generated OpenAPI schema.
|
||||
*
|
||||
* Run `npm run generate:local` (or `npm run generate` with the API running)
|
||||
* to regenerate `src/api/schema.d.ts` from the backend OpenAPI spec.
|
||||
*
|
||||
* These aliases keep the rest of the codebase stable while the source of
|
||||
* truth lives in the generated file.
|
||||
*/
|
||||
|
||||
import type { components } from '../api/schema';
|
||||
|
||||
// Re-export schema types under the names the rest of the app expects.
|
||||
export type User = components['schemas']['UserResponse'];
|
||||
export type TokenResponse = components['schemas']['TokenResponse'];
|
||||
export type CompanyAnalysis = components['schemas']['CompanyAnalysisResponse'];
|
||||
export type BatchAnalysisResult = components['schemas']['BatchAnalysisResponse'];
|
||||
export type JobStatus = components['schemas']['JobStatus'];
|
||||
export type Analytics = Omit<components['schemas']['AnalyticsResponse'], 'by_company' | 'by_type'> & {
|
||||
by_company: Array<{ company_name: string; count: number }>;
|
||||
by_type: Array<{ analysis_type: string; count: number }>;
|
||||
};
|
||||
|
||||
// Additional generated types that may be useful elsewhere.
|
||||
export type RegisterRequest = components['schemas']['RegisterRequest'];
|
||||
export type LoginRequest = components['schemas']['LoginRequest'];
|
||||
export type RefreshRequest = components['schemas']['RefreshRequest'];
|
||||
export type UpdateRoleRequest = components['schemas']['UpdateRoleRequest'];
|
||||
export type HealthResponse = components['schemas']['HealthResponse'];
|
||||
export type BatchAnalysisRequest = components['schemas']['BatchAnalysisRequest'];
|
||||
export type ValidationError = components['schemas']['ValidationError'];
|
||||
export type HTTPValidationError = components['schemas']['HTTPValidationError'];
|
||||
Vendored
+9
@@ -0,0 +1,9 @@
|
||||
/// <reference types="vite/client" />
|
||||
|
||||
interface ImportMetaEnv {
|
||||
readonly VITE_API_URL: string;
|
||||
}
|
||||
|
||||
interface ImportMeta {
|
||||
readonly env: ImportMetaEnv;
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
/** @type {import('tailwindcss').Config} */
|
||||
export default {
|
||||
content: [
|
||||
"./index.html",
|
||||
"./src/**/*.{js,ts,jsx,tsx}",
|
||||
],
|
||||
darkMode: 'class',
|
||||
theme: {
|
||||
extend: {
|
||||
colors: {
|
||||
primary: {
|
||||
DEFAULT: '#6366f1',
|
||||
dark: '#4f46e5',
|
||||
},
|
||||
secondary: '#0ea5e9',
|
||||
success: '#10b981',
|
||||
warning: '#f59e0b',
|
||||
error: '#ef4444',
|
||||
bg: {
|
||||
dark: 'var(--color-bg-dark)',
|
||||
card: 'var(--color-bg-card)',
|
||||
'card-hover': 'var(--color-bg-card-hover)',
|
||||
},
|
||||
text: {
|
||||
primary: 'var(--color-text-primary)',
|
||||
secondary: 'var(--color-text-secondary)',
|
||||
},
|
||||
border: 'var(--color-border)',
|
||||
},
|
||||
},
|
||||
},
|
||||
plugins: [],
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2020",
|
||||
"useDefineForClassFields": true,
|
||||
"lib": ["ES2020", "DOM", "DOM.Iterable"],
|
||||
"module": "ESNext",
|
||||
"skipLibCheck": true,
|
||||
"moduleResolution": "bundler",
|
||||
"allowImportingTsExtensions": true,
|
||||
"isolatedModules": true,
|
||||
"moduleDetection": "force",
|
||||
"noEmit": true,
|
||||
"jsx": "react-jsx",
|
||||
"strict": true,
|
||||
"noUnusedLocals": true,
|
||||
"noUnusedParameters": true,
|
||||
"noFallthroughCasesInSwitch": true,
|
||||
"baseUrl": ".",
|
||||
"paths": {
|
||||
"@/*": ["src/*"]
|
||||
}
|
||||
},
|
||||
"include": ["src"]
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
import { defineConfig } from 'vite'
|
||||
import react from '@vitejs/plugin-react'
|
||||
|
||||
export default defineConfig({
|
||||
plugins: [react()],
|
||||
server: {
|
||||
port: 3000,
|
||||
proxy: {
|
||||
'/api': {
|
||||
target: 'http://localhost:8000',
|
||||
changeOrigin: true,
|
||||
rewrite: (path) => path.replace(/^\/api/, ''),
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
@@ -1,10 +1,43 @@
|
||||
from SPARC.serp_api import SERP
|
||||
"""SPARC - Semiconductor Patent & Analytics Report Core
|
||||
|
||||
patents = SERP.query("nvidia")
|
||||
Example usage of the company performance analyzer.
|
||||
|
||||
for patent in patents.patents:
|
||||
patent = SERP.save_patents(patent)
|
||||
patent.summary = SERP.parse_patent_pdf(patent.pdf_path)
|
||||
print(patent.summary)
|
||||
Before running:
|
||||
1. Create a .env file with:
|
||||
API_KEY=your_serpapi_key
|
||||
OPENROUTER_API_KEY=your_openrouter_key
|
||||
|
||||
print(patents)
|
||||
2. Run: python main.py
|
||||
"""
|
||||
|
||||
from SPARC.analyzer import CompanyAnalyzer
|
||||
|
||||
|
||||
def main():
|
||||
"""Analyze a company's performance based on their patent portfolio."""
|
||||
|
||||
# Initialize the analyzer (loads API keys from .env)
|
||||
analyzer = CompanyAnalyzer()
|
||||
|
||||
# Analyze a company - this will:
|
||||
# 1. Retrieve patents from SERP API
|
||||
# 2. Download and parse patent PDFs
|
||||
# 3. Minimize content (remove bloat)
|
||||
# 4. Analyze with Claude to estimate performance
|
||||
company_name = "nvidia"
|
||||
|
||||
print(f"\n{'=' * 70}")
|
||||
print(f"SPARC Patent Analysis - {company_name.upper()}")
|
||||
print(f"{'=' * 70}\n")
|
||||
|
||||
analysis = analyzer.analyze_company(company_name)
|
||||
|
||||
print(f"\n{'=' * 70}")
|
||||
print("ANALYSIS RESULTS")
|
||||
print(f"{'=' * 70}\n")
|
||||
print(analysis)
|
||||
print(f"\n{'=' * 70}\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
+14
-1
@@ -4,4 +4,17 @@ pdfplumber
|
||||
requests
|
||||
pytest
|
||||
pytest-mock
|
||||
anthropic
|
||||
openai
|
||||
psycopg2-binary
|
||||
fastapi
|
||||
uvicorn[standard]
|
||||
pydantic[email]
|
||||
httpx
|
||||
numpy
|
||||
pandas
|
||||
bcrypt
|
||||
PyJWT
|
||||
slowapi
|
||||
apscheduler
|
||||
boto3
|
||||
reportlab
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
[lint]
|
||||
select = ["E", "F", "I"]
|
||||
ignore = [
|
||||
"E501", # line too long (handled by formatter)
|
||||
]
|
||||
|
||||
[lint.per-file-ignores]
|
||||
"tests/*" = ["E402", "F841"] # allow import not at top of file, unused vars (mocks) in tests
|
||||
@@ -0,0 +1,227 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Estimate token usage per company portfolio for SPARC analysis."""
|
||||
|
||||
import tiktoken
|
||||
from typing import Dict, List, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class TokenEstimate:
|
||||
"""Token usage estimate for a company portfolio."""
|
||||
company_name: str
|
||||
patent_count: int
|
||||
prompt_tokens: int
|
||||
estimated_completion_tokens: int
|
||||
total_tokens: int
|
||||
cost_estimate_usd: float
|
||||
|
||||
|
||||
class TokenEstimator:
|
||||
"""Estimate token usage for SPARC patent analysis."""
|
||||
|
||||
# Claude 3.5 Sonnet pricing via OpenRouter (per 1M tokens)
|
||||
INPUT_COST_PER_1M = 3.00 # $3.00 per 1M input tokens
|
||||
OUTPUT_COST_PER_1M = 15.00 # $15.00 per 1M output tokens
|
||||
|
||||
# Estimated output tokens based on max_tokens settings
|
||||
SINGLE_PATENT_MAX_OUTPUT = 1024
|
||||
PORTFOLIO_MAX_OUTPUT = 2048
|
||||
|
||||
def __init__(self):
|
||||
# Use cl100k_base encoding (closest to Claude's tokenizer)
|
||||
self.encoder = tiktoken.get_encoding("cl100k_base")
|
||||
|
||||
def count_tokens(self, text: str) -> int:
|
||||
"""Count tokens in a text string."""
|
||||
return len(self.encoder.encode(text))
|
||||
|
||||
def build_single_patent_prompt(self, patent_content: str, company_name: str) -> str:
|
||||
"""Build prompt for single patent analysis (matches llm.py)."""
|
||||
return f"""You are a patent analyst evaluating {company_name}'s innovation strategy.
|
||||
|
||||
Analyze the following patent content and provide insights on:
|
||||
1. Innovation quality and novelty
|
||||
2. Technical complexity and defensibility
|
||||
3. Market potential and commercial viability
|
||||
4. Strategic positioning relative to industry trends
|
||||
|
||||
Patent Content:
|
||||
{patent_content}
|
||||
|
||||
Provide a concise analysis (2-3 paragraphs) focusing on what this patent reveals about the company's technical direction and competitive advantage."""
|
||||
|
||||
def build_portfolio_prompt(self, patents_data: List[Dict[str, str]], company_name: str) -> str:
|
||||
"""Build prompt for portfolio analysis (matches llm.py)."""
|
||||
portfolio_summary = []
|
||||
for idx, patent in enumerate(patents_data, 1):
|
||||
portfolio_summary.append(
|
||||
f"Patent {idx} ({patent['patent_id']}):\n{patent['content']}"
|
||||
)
|
||||
combined_content = "\n\n---\n\n".join(portfolio_summary)
|
||||
|
||||
return f"""You are analyzing {company_name}'s patent portfolio to estimate their future performance and innovation trajectory.
|
||||
|
||||
You have {len(patents_data)} recent patents to analyze. Evaluate the portfolio holistically:
|
||||
|
||||
1. Innovation Trends: What technology areas are they focusing on?
|
||||
2. Strategic Direction: What does this reveal about their business strategy?
|
||||
3. Competitive Position: How defensible are these innovations?
|
||||
4. Market Outlook: What market opportunities do these patents target?
|
||||
5. Performance Forecast: Based on this innovation activity, what's your assessment of their likely performance?
|
||||
|
||||
Patent Portfolio:
|
||||
{combined_content}
|
||||
|
||||
Provide a comprehensive analysis (4-5 paragraphs) with a final verdict on the company's innovation strength and performance outlook."""
|
||||
|
||||
def estimate_portfolio(
|
||||
self,
|
||||
company_name: str,
|
||||
patents_data: List[Dict[str, str]],
|
||||
include_individual_patents: bool = False
|
||||
) -> TokenEstimate:
|
||||
"""Estimate tokens for a company portfolio analysis.
|
||||
|
||||
Args:
|
||||
company_name: Name of the company
|
||||
patents_data: List of dicts with 'patent_id' and 'content' keys
|
||||
include_individual_patents: If True, also count individual patent analysis calls
|
||||
"""
|
||||
# Portfolio analysis tokens
|
||||
portfolio_prompt = self.build_portfolio_prompt(patents_data, company_name)
|
||||
prompt_tokens = self.count_tokens(portfolio_prompt)
|
||||
completion_tokens = self.PORTFOLIO_MAX_OUTPUT
|
||||
|
||||
# Optionally add individual patent analysis
|
||||
if include_individual_patents:
|
||||
for patent in patents_data:
|
||||
single_prompt = self.build_single_patent_prompt(patent['content'], company_name)
|
||||
prompt_tokens += self.count_tokens(single_prompt)
|
||||
completion_tokens += self.SINGLE_PATENT_MAX_OUTPUT
|
||||
|
||||
total_tokens = prompt_tokens + completion_tokens
|
||||
|
||||
# Calculate cost
|
||||
input_cost = (prompt_tokens / 1_000_000) * self.INPUT_COST_PER_1M
|
||||
output_cost = (completion_tokens / 1_000_000) * self.OUTPUT_COST_PER_1M
|
||||
total_cost = input_cost + output_cost
|
||||
|
||||
return TokenEstimate(
|
||||
company_name=company_name,
|
||||
patent_count=len(patents_data),
|
||||
prompt_tokens=prompt_tokens,
|
||||
estimated_completion_tokens=completion_tokens,
|
||||
total_tokens=total_tokens,
|
||||
cost_estimate_usd=total_cost
|
||||
)
|
||||
|
||||
def estimate_from_sample(
|
||||
self,
|
||||
company_name: str,
|
||||
patent_count: int = 10,
|
||||
avg_patent_chars: int = 5000
|
||||
) -> TokenEstimate:
|
||||
"""Estimate tokens using sample/average patent sizes.
|
||||
|
||||
Args:
|
||||
company_name: Name of the company
|
||||
patent_count: Number of patents (default 10, typical from SERP)
|
||||
avg_patent_chars: Average characters per minimized patent content
|
||||
"""
|
||||
# Generate sample patent data
|
||||
sample_content = "A" * avg_patent_chars # Placeholder content
|
||||
patents_data = [
|
||||
{"patent_id": f"US{10000000 + i}", "content": sample_content}
|
||||
for i in range(patent_count)
|
||||
]
|
||||
|
||||
return self.estimate_portfolio(company_name, patents_data)
|
||||
|
||||
|
||||
def main():
|
||||
"""Run token estimation examples."""
|
||||
estimator = TokenEstimator()
|
||||
|
||||
print("=" * 70)
|
||||
print("SPARC Token Usage Estimator")
|
||||
print("=" * 70)
|
||||
|
||||
# Example 1: Estimate with sample data
|
||||
print("\n📊 Sample Estimates (10 patents, ~5000 chars each):\n")
|
||||
|
||||
companies = ["Apple Inc.", "Microsoft Corporation", "Tesla Motors", "Google LLC"]
|
||||
|
||||
total_tokens = 0
|
||||
total_cost = 0.0
|
||||
|
||||
for company in companies:
|
||||
estimate = estimator.estimate_from_sample(company, patent_count=10, avg_patent_chars=5000)
|
||||
print(f" {company}:")
|
||||
print(f" Patents: {estimate.patent_count}")
|
||||
print(f" Prompt tokens: {estimate.prompt_tokens:,}")
|
||||
print(f" Est. completion tokens: {estimate.estimated_completion_tokens:,}")
|
||||
print(f" Total tokens: {estimate.total_tokens:,}")
|
||||
print(f" Est. cost: ${estimate.cost_estimate_usd:.4f}")
|
||||
print()
|
||||
|
||||
total_tokens += estimate.total_tokens
|
||||
total_cost += estimate.cost_estimate_usd
|
||||
|
||||
print("-" * 70)
|
||||
print(f" TOTAL for {len(companies)} companies:")
|
||||
print(f" Total tokens: {total_tokens:,}")
|
||||
print(f" Total est. cost: ${total_cost:.4f}")
|
||||
|
||||
# Example 2: Different portfolio sizes
|
||||
print("\n" + "=" * 70)
|
||||
print("📈 Token Scaling by Portfolio Size:")
|
||||
print("=" * 70 + "\n")
|
||||
|
||||
for patent_count in [5, 10, 15, 20]:
|
||||
estimate = estimator.estimate_from_sample("Sample Corp", patent_count=patent_count)
|
||||
print(f" {patent_count} patents: {estimate.prompt_tokens:,} prompt tokens, ${estimate.cost_estimate_usd:.4f}")
|
||||
|
||||
# Example 3: With actual patent content (simulated)
|
||||
print("\n" + "=" * 70)
|
||||
print("📝 Example with Real Patent Structure:")
|
||||
print("=" * 70 + "\n")
|
||||
|
||||
sample_patents = [
|
||||
{
|
||||
"patent_id": "US11234567",
|
||||
"content": """ABSTRACT: A method for machine learning optimization using gradient descent.
|
||||
|
||||
CLAIMS:
|
||||
1. A computer-implemented method comprising:
|
||||
receiving input data;
|
||||
processing the input data through a neural network;
|
||||
optimizing weights using backpropagation.
|
||||
|
||||
SUMMARY: This invention relates to improvements in neural network training efficiency."""
|
||||
},
|
||||
{
|
||||
"patent_id": "US11234568",
|
||||
"content": """ABSTRACT: System for distributed computing in cloud environments.
|
||||
|
||||
CLAIMS:
|
||||
1. A distributed system comprising:
|
||||
a plurality of compute nodes;
|
||||
a load balancer;
|
||||
a message queue for task distribution.
|
||||
|
||||
SUMMARY: The present disclosure improves cloud computing resource allocation."""
|
||||
}
|
||||
]
|
||||
|
||||
estimate = estimator.estimate_portfolio("Tech Corp", sample_patents)
|
||||
print(f" Company: {estimate.company_name}")
|
||||
print(f" Patents analyzed: {estimate.patent_count}")
|
||||
print(f" Prompt tokens: {estimate.prompt_tokens:,}")
|
||||
print(f" Est. completion: {estimate.estimated_completion_tokens:,}")
|
||||
print(f" Total: {estimate.total_tokens:,}")
|
||||
print(f" Est. cost: ${estimate.cost_estimate_usd:.4f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Initialize the SPARC database schema.
|
||||
|
||||
This script creates the necessary tables and indexes for storing
|
||||
LLM messages for testing and analytics.
|
||||
|
||||
Usage:
|
||||
python scripts/init_database.py
|
||||
"""
|
||||
|
||||
import secrets
|
||||
import string
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add parent directory to path
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from SPARC import config
|
||||
from SPARC.database import DatabaseClient
|
||||
|
||||
DEFAULT_ADMIN_EMAIL = "admin@sparc.dev"
|
||||
|
||||
|
||||
def generate_password(length: int = 16) -> str:
|
||||
"""Generate a secure random password."""
|
||||
alphabet = string.ascii_letters + string.digits
|
||||
return "".join(secrets.choice(alphabet) for _ in range(length))
|
||||
|
||||
|
||||
def main():
|
||||
"""Initialize the database schema."""
|
||||
print("Initializing SPARC database...")
|
||||
print(f"Database URL: {config.database_url}")
|
||||
|
||||
try:
|
||||
db_client = DatabaseClient(config.database_url)
|
||||
db_client.initialize_schema()
|
||||
print("Database schema initialized successfully!")
|
||||
print("\nTables created:")
|
||||
print(" - llm_messages: Stores all LLM prompts and responses")
|
||||
print(" - users: Stores user accounts")
|
||||
print(" - jobs: Stores async batch job state")
|
||||
print(" - patents: Patent PDF cache")
|
||||
print(" - serp_queries: SERP query result cache")
|
||||
print("\nIndexes created:")
|
||||
print(" - idx_messages_timestamp: For time-based queries")
|
||||
print(" - idx_messages_company: For company-specific queries")
|
||||
print(" - idx_users_email: For user lookups")
|
||||
|
||||
# Create default admin user if not exists
|
||||
existing_admin = db_client.get_user_by_email(DEFAULT_ADMIN_EMAIL)
|
||||
if existing_admin:
|
||||
print(f"\nDefault admin user already exists: {DEFAULT_ADMIN_EMAIL}")
|
||||
else:
|
||||
password = generate_password()
|
||||
admin_user = db_client.create_user(
|
||||
email=DEFAULT_ADMIN_EMAIL,
|
||||
password=password,
|
||||
role="admin",
|
||||
)
|
||||
if admin_user:
|
||||
print("\n" + "=" * 50)
|
||||
print("DEFAULT ADMIN CREDENTIALS")
|
||||
print("=" * 50)
|
||||
print(f"Email: {DEFAULT_ADMIN_EMAIL}")
|
||||
print(f"Password: {password}")
|
||||
print("=" * 50)
|
||||
print("Please save these credentials securely!")
|
||||
print("=" * 50)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error initializing database: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env python3
|
||||
"""View analytics from the message database.
|
||||
|
||||
This script displays statistics about stored LLM messages including
|
||||
usage by company, analysis type, and time periods.
|
||||
|
||||
Usage:
|
||||
python scripts/view_analytics.py [--days DAYS]
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import argparse
|
||||
import json
|
||||
|
||||
# Add parent directory to path
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from SPARC import config
|
||||
from SPARC.database import DatabaseClient
|
||||
|
||||
|
||||
def main():
|
||||
"""Display analytics from the database."""
|
||||
parser = argparse.ArgumentParser(description="View SPARC message analytics")
|
||||
parser.add_argument(
|
||||
"--days",
|
||||
type=int,
|
||||
default=30,
|
||||
help="Number of days to analyze (default: 30)"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"SPARC Analytics - Last {args.days} days")
|
||||
print("=" * 70)
|
||||
|
||||
try:
|
||||
db_client = DatabaseClient(config.database_url)
|
||||
analytics = db_client.get_analytics(days=args.days)
|
||||
|
||||
print(f"\nTotal Messages: {analytics['total_messages']}")
|
||||
|
||||
print("\nMessages by Company:")
|
||||
if analytics['by_company']:
|
||||
for item in analytics['by_company']:
|
||||
company = item['company_name'] or '(unknown)'
|
||||
print(f" {company}: {item['count']}")
|
||||
else:
|
||||
print(" No data")
|
||||
|
||||
print("\nMessages by Analysis Type:")
|
||||
if analytics['by_type']:
|
||||
for item in analytics['by_type']:
|
||||
analysis_type = item['analysis_type'] or '(unknown)'
|
||||
print(f" {analysis_type}: {item['count']}")
|
||||
else:
|
||||
print(" No data")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error retrieving analytics: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
"""View stored messages from the database.
|
||||
|
||||
This script displays stored LLM messages with filtering options.
|
||||
|
||||
Usage:
|
||||
python scripts/view_messages.py [--company COMPANY] [--type TYPE] [--limit LIMIT]
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import argparse
|
||||
from datetime import datetime
|
||||
|
||||
# Add parent directory to path
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from SPARC import config
|
||||
from SPARC.database import DatabaseClient
|
||||
|
||||
|
||||
def main():
|
||||
"""Display messages from the database."""
|
||||
parser = argparse.ArgumentParser(description="View stored SPARC messages")
|
||||
parser.add_argument(
|
||||
"--company",
|
||||
type=str,
|
||||
help="Filter by company name"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--type",
|
||||
type=str,
|
||||
choices=["single_patent", "portfolio"],
|
||||
help="Filter by analysis type"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--limit",
|
||||
type=int,
|
||||
default=10,
|
||||
help="Maximum number of messages to display (default: 10)"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
print("SPARC Stored Messages")
|
||||
print("=" * 70)
|
||||
|
||||
try:
|
||||
db_client = DatabaseClient(config.database_url)
|
||||
messages = db_client.get_messages(
|
||||
company_name=args.company,
|
||||
analysis_type=args.type,
|
||||
limit=args.limit
|
||||
)
|
||||
|
||||
if not messages:
|
||||
print("\nNo messages found.")
|
||||
return
|
||||
|
||||
print(f"\nShowing {len(messages)} message(s):\n")
|
||||
|
||||
for i, msg in enumerate(messages, 1):
|
||||
print(f"Message #{msg['id']} - {msg['timestamp']}")
|
||||
print(f"Company: {msg['company_name'] or '(unknown)'}")
|
||||
print(f"Type: {msg['analysis_type'] or '(unknown)'}")
|
||||
print(f"Model: {msg['model'] or '(unknown)'}")
|
||||
print(f"\nPrompt (first 200 chars):")
|
||||
print(f" {msg['prompt'][:200]}...")
|
||||
print(f"\nResponse (first 200 chars):")
|
||||
print(f" {msg['response'][:200] if msg['response'] else '(no response)'}...")
|
||||
print("\n" + "-" * 70 + "\n")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error retrieving messages: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,141 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Test script to verify database caching functionality.
|
||||
|
||||
This script tests the LLMAnalyzer with database caching without requiring
|
||||
actual API keys or patent downloads.
|
||||
"""
|
||||
|
||||
from SPARC.llm import LLMAnalyzer
|
||||
from SPARC.database import DatabaseClient
|
||||
from SPARC import config
|
||||
|
||||
def test_database_storage():
|
||||
"""Test that messages are always stored in database."""
|
||||
print("Testing Database Storage & Caching")
|
||||
print("=" * 70)
|
||||
|
||||
# Initialize analyzer (database is always used)
|
||||
print("\n1. Initializing LLMAnalyzer...")
|
||||
analyzer = LLMAnalyzer(use_cache=True)
|
||||
|
||||
print(f" - use_cache: {analyzer.use_cache}")
|
||||
print(f" - db_client: {analyzer.db_client is not None}")
|
||||
print(f" - client (API): {analyzer.client is not None}")
|
||||
|
||||
# Test single patent analysis (without API key, stores placeholder)
|
||||
print("\n2. Testing single patent analysis (no API key)...")
|
||||
result = analyzer.analyze_patent_content(
|
||||
patent_content="Test patent content about semiconductor innovation",
|
||||
company_name="TestCorp"
|
||||
)
|
||||
print(f" Result: {result[:80]}...")
|
||||
|
||||
# Test portfolio analysis
|
||||
print("\n3. Testing portfolio analysis (no API key)...")
|
||||
test_patents = [
|
||||
{"patent_id": "US001", "content": "First test patent"},
|
||||
{"patent_id": "US002", "content": "Second test patent"},
|
||||
]
|
||||
result = analyzer.analyze_patent_portfolio(
|
||||
patents_data=test_patents,
|
||||
company_name="TestCorp"
|
||||
)
|
||||
print(f" Result: {result[:80]}...")
|
||||
|
||||
# Verify messages were stored
|
||||
print("\n4. Verifying messages were stored...")
|
||||
db_client = DatabaseClient(config.database_url)
|
||||
messages = db_client.get_messages(company_name="TestCorp", limit=10)
|
||||
print(f" Found {len(messages)} stored messages")
|
||||
|
||||
for msg in messages:
|
||||
cached_status = "CACHED" if msg.get('is_cached') else "NEW"
|
||||
print(f" - ID: {msg['id']}, Type: {msg['analysis_type']}, Status: {cached_status}")
|
||||
|
||||
# Get analytics
|
||||
print("\n5. Getting analytics...")
|
||||
analytics = db_client.get_analytics(days=1)
|
||||
print(f" Total messages: {analytics['total_messages']}")
|
||||
print(f" By company: {analytics['by_company']}")
|
||||
print(f" By type: {analytics['by_type']}")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("Database storage test completed successfully!")
|
||||
|
||||
def test_caching():
|
||||
"""Test that caching works correctly."""
|
||||
print("\nTesting Cache Functionality")
|
||||
print("=" * 70)
|
||||
|
||||
db_client = DatabaseClient(config.database_url)
|
||||
db_client.initialize_schema()
|
||||
|
||||
# Store a fake cached response
|
||||
print("\n1. Storing a test response in database...")
|
||||
test_prompt = "Test prompt for caching"
|
||||
test_response = "This is a cached response from previous API call"
|
||||
|
||||
db_client.store_message(
|
||||
prompt=test_prompt,
|
||||
response=test_response,
|
||||
company_name="CacheTest",
|
||||
analysis_type="test",
|
||||
model="test-model"
|
||||
)
|
||||
|
||||
# Try to retrieve from cache
|
||||
print("\n2. Testing cache retrieval...")
|
||||
cached = db_client.get_cached_response(
|
||||
prompt=test_prompt,
|
||||
company_name="CacheTest",
|
||||
analysis_type="test"
|
||||
)
|
||||
|
||||
if cached:
|
||||
print(f" Cache hit! Response: {cached['response']}")
|
||||
else:
|
||||
print(" Cache miss (unexpected)")
|
||||
|
||||
# Test cache miss
|
||||
print("\n3. Testing cache miss...")
|
||||
cached = db_client.get_cached_response(
|
||||
prompt="Different prompt",
|
||||
company_name="CacheTest",
|
||||
analysis_type="test"
|
||||
)
|
||||
|
||||
if cached:
|
||||
print(" Unexpected cache hit")
|
||||
else:
|
||||
print(" Cache miss as expected")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("Cache test completed successfully!")
|
||||
|
||||
def test_test_mode():
|
||||
"""Test that test mode works correctly."""
|
||||
print("\nTesting Test Mode")
|
||||
print("=" * 70)
|
||||
|
||||
print("\n1. Initializing LLMAnalyzer in test mode...")
|
||||
analyzer = LLMAnalyzer(test_mode=True)
|
||||
|
||||
print(f" - test_mode: {analyzer.test_mode}")
|
||||
print(f" - db_client: {analyzer.db_client is not None}")
|
||||
|
||||
print("\n2. Testing single patent analysis (test mode)...")
|
||||
result = analyzer.analyze_patent_content(
|
||||
patent_content="Test patent content",
|
||||
company_name="TestCorp2"
|
||||
)
|
||||
print(f" Result: {result}")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("Test mode test completed successfully!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_database_storage()
|
||||
print("\n")
|
||||
test_caching()
|
||||
print("\n")
|
||||
test_test_mode()
|
||||
@@ -0,0 +1,542 @@
|
||||
"""Tests for the high-level company analyzer orchestration."""
|
||||
|
||||
from unittest.mock import MagicMock, Mock
|
||||
|
||||
import pytest
|
||||
|
||||
from SPARC.analyzer import CompanyAnalyzer
|
||||
from SPARC.types import BatchAnalysisResult, Patent, Patents
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def mock_db(mocker):
|
||||
"""Mock DatabaseClient for all tests so no real DB connection is needed."""
|
||||
mock_db_cls = mocker.patch("SPARC.analyzer.DatabaseClient")
|
||||
mock_db_instance = MagicMock()
|
||||
mock_db_instance.get_cached_patent.return_value = None
|
||||
mock_db_instance.get_cached_serp_query.return_value = None
|
||||
mock_db_cls.return_value = mock_db_instance
|
||||
return mock_db_instance
|
||||
|
||||
|
||||
class TestCompanyAnalyzer:
|
||||
"""Test the CompanyAnalyzer orchestration logic."""
|
||||
|
||||
def test_analyzer_initialization(self, mocker):
|
||||
"""Test analyzer initialization with API key."""
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
_analyzer = CompanyAnalyzer(openrouter_api_key="test-key") # noqa: F841
|
||||
|
||||
mock_llm.assert_called_once_with(api_key="test-key")
|
||||
|
||||
def test_analyze_company_full_pipeline(self, mocker, mock_db):
|
||||
"""Test complete company analysis pipeline."""
|
||||
# Mock all the dependencies
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
# Setup mock return values
|
||||
test_patent = Patent(
|
||||
patent_id="US123", pdf_link="http://example.com/test.pdf"
|
||||
)
|
||||
mock_query.return_value = Patents(patents=[test_patent])
|
||||
|
||||
test_patent.pdf_path = "patents/US123.pdf"
|
||||
mock_save.return_value = test_patent
|
||||
|
||||
mock_parse.return_value = {
|
||||
"abstract": "Test abstract",
|
||||
"claims": "Test claims",
|
||||
}
|
||||
|
||||
mock_minimize.return_value = "Minimized content"
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_portfolio.return_value = (
|
||||
"Strong innovation portfolio"
|
||||
)
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
# Run the analysis
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_company("TestCorp")
|
||||
|
||||
# Verify the pipeline executed correctly
|
||||
assert result == "Strong innovation portfolio"
|
||||
mock_query.assert_called_once_with("TestCorp")
|
||||
mock_save.assert_called_once()
|
||||
mock_parse.assert_called_once_with("patents/US123.pdf")
|
||||
mock_minimize.assert_called_once()
|
||||
mock_llm_instance.analyze_patent_portfolio.assert_called_once()
|
||||
|
||||
# Verify the data passed to LLM
|
||||
llm_call_args = mock_llm_instance.analyze_patent_portfolio.call_args
|
||||
patents_data = llm_call_args[1]["patents_data"]
|
||||
assert len(patents_data) == 1
|
||||
assert patents_data[0]["patent_id"] == "US123"
|
||||
assert patents_data[0]["content"] == "Minimized content"
|
||||
|
||||
def test_analyze_company_no_patents_found(self, mocker):
|
||||
"""Test handling when no patents are found for a company."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_query.return_value = Patents(patents=[])
|
||||
mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_company("UnknownCorp")
|
||||
|
||||
assert result == "No patents found for UnknownCorp"
|
||||
|
||||
def test_analyze_company_handles_processing_errors(self, mocker):
|
||||
"""Test that analysis continues even if some patents fail to process."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
# Create two test patents
|
||||
patent1 = Patent(patent_id="US123", pdf_link="http://example.com/1.pdf")
|
||||
patent2 = Patent(patent_id="US456", pdf_link="http://example.com/2.pdf")
|
||||
mock_query.return_value = Patents(patents=[patent1, patent2])
|
||||
|
||||
# First patent processes successfully
|
||||
patent1.pdf_path = "patents/US123.pdf"
|
||||
|
||||
# Second patent raises an error
|
||||
def save_side_effect(p):
|
||||
if p.patent_id == "US123":
|
||||
p.pdf_path = "patents/US123.pdf"
|
||||
return p
|
||||
else:
|
||||
raise Exception("Download failed")
|
||||
|
||||
mock_save.side_effect = save_side_effect
|
||||
|
||||
mock_parse.return_value = {"abstract": "Test"}
|
||||
mock_minimize.return_value = "Content"
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis result"
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_company("TestCorp")
|
||||
|
||||
# Should still succeed with the one patent that worked
|
||||
assert result == "Analysis result"
|
||||
|
||||
# Verify only one patent was analyzed
|
||||
llm_call_args = mock_llm_instance.analyze_patent_portfolio.call_args
|
||||
patents_data = llm_call_args[1]["patents_data"]
|
||||
assert len(patents_data) == 1
|
||||
assert patents_data[0]["patent_id"] == "US123"
|
||||
|
||||
def test_analyze_company_all_patents_fail(self, mocker):
|
||||
"""Test handling when all patents fail to process."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
patent = Patent(patent_id="US123", pdf_link="http://example.com/1.pdf")
|
||||
mock_query.return_value = Patents(patents=[patent])
|
||||
|
||||
# Make processing fail
|
||||
mock_save.side_effect = Exception("Processing error")
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_company("TestCorp")
|
||||
|
||||
assert result == "Failed to process any patents for TestCorp"
|
||||
|
||||
def test_analyze_single_patent(self, mocker):
|
||||
"""Test single patent analysis."""
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
mock_parse.return_value = {"abstract": "Test abstract"}
|
||||
mock_minimize.return_value = "Minimized content"
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_content.return_value = (
|
||||
"Innovative patent analysis"
|
||||
)
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_single_patent("US123", "TestCorp")
|
||||
|
||||
assert result == "Innovative patent analysis"
|
||||
mock_parse.assert_called_once_with("patents/US123.pdf")
|
||||
mock_llm_instance.analyze_patent_content.assert_called_once_with(
|
||||
patent_content="Minimized content", company_name="TestCorp"
|
||||
)
|
||||
|
||||
def test_analyze_single_patent_error_handling(self, mocker):
|
||||
"""Test single patent analysis with processing error."""
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
mock_parse.side_effect = FileNotFoundError("PDF not found")
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_single_patent("US999", "TestCorp")
|
||||
|
||||
assert "Failed to analyze patent US999" in result
|
||||
assert "PDF not found" in result
|
||||
|
||||
|
||||
class TestSingleQueryBugFix:
|
||||
"""Test that SERP.query is only called once per company analysis."""
|
||||
|
||||
def test_analyze_company_safe_calls_query_once(self, mocker, mock_db):
|
||||
"""_analyze_company_safe should call SERP.query exactly once."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf")
|
||||
mock_query.return_value = Patents(patents=[patent])
|
||||
|
||||
def save_side_effect(p):
|
||||
p.pdf_path = "patents/US123.pdf"
|
||||
return p
|
||||
|
||||
mock_save.side_effect = save_side_effect
|
||||
mock_parse.return_value = {"abstract": "Test"}
|
||||
mock_minimize.return_value = "Content"
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis"
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
analyzer._analyze_company_safe("TestCorp")
|
||||
|
||||
# The key assertion: SERP.query called exactly once, not twice
|
||||
mock_query.assert_called_once_with("TestCorp")
|
||||
|
||||
def test_analyze_company_with_prefetched_patents_skips_query(self, mocker):
|
||||
"""analyze_company should not call SERP.query when patents are provided."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf")
|
||||
prefetched = Patents(patents=[patent])
|
||||
|
||||
def save_side_effect(p):
|
||||
p.pdf_path = "patents/US123.pdf"
|
||||
return p
|
||||
|
||||
mock_save.side_effect = save_side_effect
|
||||
mock_parse.return_value = {"abstract": "Test"}
|
||||
mock_minimize.return_value = "Content"
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis"
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
analyzer.analyze_company("TestCorp", patents=prefetched)
|
||||
|
||||
# SERP.query should never be called
|
||||
mock_query.assert_not_called()
|
||||
|
||||
|
||||
class TestPatentCaching:
|
||||
"""Test patent-level DB caching in the pipeline."""
|
||||
|
||||
def test_process_single_patent_uses_db_cache(self, mocker, mock_db):
|
||||
"""_process_single_patent returns cached content when available."""
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
|
||||
mock_db.get_cached_patent.return_value = {
|
||||
"patent_id": "US123",
|
||||
"minimized_content": "Cached minimized content",
|
||||
}
|
||||
|
||||
patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf")
|
||||
result = CompanyAnalyzer._process_single_patent(patent, "TestCorp", mock_db)
|
||||
|
||||
assert result == {"patent_id": "US123", "content": "Cached minimized content"}
|
||||
# Should NOT download since cache hit
|
||||
mock_save.assert_not_called()
|
||||
|
||||
def test_process_single_patent_stores_to_db_cache(self, mocker, mock_db):
|
||||
"""_process_single_patent stores result in DB after processing."""
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
|
||||
# No cache hit
|
||||
mock_db.get_cached_patent.return_value = None
|
||||
|
||||
patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf")
|
||||
|
||||
def save_side_effect(p):
|
||||
p.pdf_path = "patents/US123.pdf"
|
||||
return p
|
||||
|
||||
mock_save.side_effect = save_side_effect
|
||||
mock_parse.return_value = {"abstract": "Test abstract"}
|
||||
mock_minimize.return_value = "Minimized content"
|
||||
|
||||
result = CompanyAnalyzer._process_single_patent(patent, "TestCorp", mock_db)
|
||||
|
||||
assert result == {"patent_id": "US123", "content": "Minimized content"}
|
||||
mock_db.store_patent.assert_called_once_with(
|
||||
patent_id="US123",
|
||||
company_name="TestCorp",
|
||||
pdf_link="http://example.com/test.pdf",
|
||||
raw_sections={"abstract": "Test abstract"},
|
||||
minimized_content="Minimized content",
|
||||
)
|
||||
|
||||
def test_serp_query_cache_hit_skips_api(self, mocker, mock_db):
|
||||
"""When SERP query is cached, API call is skipped."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
# Simulate SERP cache hit
|
||||
mock_db.get_cached_serp_query.return_value = ["US123"]
|
||||
# Simulate patent cache hit too
|
||||
mock_db.get_cached_patent.return_value = {
|
||||
"patent_id": "US123",
|
||||
"minimized_content": "Cached content",
|
||||
}
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis"
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_company("TestCorp")
|
||||
|
||||
assert result == "Analysis"
|
||||
# SERP.query should NOT be called
|
||||
mock_query.assert_not_called()
|
||||
# No downloads should happen
|
||||
mock_save.assert_not_called()
|
||||
|
||||
def test_serp_query_cache_miss_stores_result(self, mocker, mock_db):
|
||||
"""When SERP query cache misses, result is stored after API call."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
mock_db.get_cached_serp_query.return_value = None
|
||||
|
||||
patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf")
|
||||
mock_query.return_value = Patents(patents=[patent])
|
||||
|
||||
def save_side_effect(p):
|
||||
p.pdf_path = "patents/US123.pdf"
|
||||
return p
|
||||
|
||||
mock_save.side_effect = save_side_effect
|
||||
mock_parse.return_value = {"abstract": "Test"}
|
||||
mock_minimize.return_value = "Content"
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis"
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
analyzer.analyze_company("TestCorp")
|
||||
|
||||
mock_db.store_serp_query.assert_called_once()
|
||||
call_kwargs = mock_db.store_serp_query.call_args[1]
|
||||
assert call_kwargs["company_name"] == "TestCorp"
|
||||
assert call_kwargs["patent_ids"] == ["US123"]
|
||||
|
||||
|
||||
class TestBatchProcessing:
|
||||
"""Test multi-company batch processing functionality."""
|
||||
|
||||
def test_analyze_companies_success(self, mocker):
|
||||
"""Test batch analysis of multiple companies."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
# Setup mock returns
|
||||
def query_side_effect(company):
|
||||
patent = Patent(
|
||||
patent_id=f"US-{company}",
|
||||
pdf_link=f"http://example.com/{company}.pdf",
|
||||
)
|
||||
return Patents(patents=[patent])
|
||||
|
||||
mock_query.side_effect = query_side_effect
|
||||
|
||||
def save_side_effect(patent):
|
||||
patent.pdf_path = f"patents/{patent.patent_id}.pdf"
|
||||
return patent
|
||||
|
||||
mock_save.side_effect = save_side_effect
|
||||
mock_parse.return_value = {"abstract": "Test"}
|
||||
mock_minimize.return_value = "Content"
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis result"
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_companies(["CompanyA", "CompanyB"], max_workers=2)
|
||||
|
||||
assert isinstance(result, BatchAnalysisResult)
|
||||
assert result.total_companies == 2
|
||||
assert result.successful == 2
|
||||
assert result.failed == 0
|
||||
assert len(result.results) == 2
|
||||
|
||||
def test_analyze_companies_with_failures(self, mocker):
|
||||
"""Test batch analysis handles partial failures."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
def query_side_effect(company):
|
||||
if company == "FailCorp":
|
||||
return Patents(patents=[])
|
||||
patent = Patent(
|
||||
patent_id=f"US-{company}",
|
||||
pdf_link=f"http://example.com/{company}.pdf",
|
||||
)
|
||||
return Patents(patents=[patent])
|
||||
|
||||
mock_query.side_effect = query_side_effect
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_companies(["GoodCorp", "FailCorp"], max_workers=1)
|
||||
|
||||
assert result.total_companies == 2
|
||||
assert result.failed >= 1 # At least FailCorp should fail
|
||||
|
||||
# Find the failed result
|
||||
fail_result = next(r for r in result.results if r.company_name == "FailCorp")
|
||||
assert fail_result.success is False
|
||||
|
||||
def test_analyze_companies_sequential(self, mocker):
|
||||
"""Test sequential batch analysis."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
def query_side_effect(company):
|
||||
patent = Patent(
|
||||
patent_id=f"US-{company}",
|
||||
pdf_link=f"http://example.com/{company}.pdf",
|
||||
)
|
||||
return Patents(patents=[patent])
|
||||
|
||||
mock_query.side_effect = query_side_effect
|
||||
|
||||
def save_side_effect(patent):
|
||||
patent.pdf_path = f"patents/{patent.patent_id}.pdf"
|
||||
return patent
|
||||
|
||||
mock_save.side_effect = save_side_effect
|
||||
mock_parse.return_value = {"abstract": "Test"}
|
||||
mock_minimize.return_value = "Content"
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis"
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_companies_sequential(["Corp1", "Corp2", "Corp3"])
|
||||
|
||||
assert result.total_companies == 3
|
||||
assert len(result.results) == 3
|
||||
|
||||
def test_analyze_companies_progress_callback(self, mocker):
|
||||
"""Test that progress callback is invoked correctly."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
def query_side_effect(company):
|
||||
patent = Patent(
|
||||
patent_id=f"US-{company}",
|
||||
pdf_link=f"http://example.com/{company}.pdf",
|
||||
)
|
||||
return Patents(patents=[patent])
|
||||
|
||||
mock_query.side_effect = query_side_effect
|
||||
|
||||
def save_side_effect(patent):
|
||||
patent.pdf_path = f"patents/{patent.patent_id}.pdf"
|
||||
return patent
|
||||
|
||||
mock_save.side_effect = save_side_effect
|
||||
mock_parse.return_value = {"abstract": "Test"}
|
||||
mock_minimize.return_value = "Content"
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis"
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
callback = Mock()
|
||||
analyzer = CompanyAnalyzer()
|
||||
analyzer.analyze_companies(["A", "B"], max_workers=1, progress_callback=callback)
|
||||
|
||||
assert callback.call_count == 2
|
||||
|
||||
def test_company_analysis_result_structure(self, mocker, mock_db):
|
||||
"""Test CompanyAnalysisResult has correct structure."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf")
|
||||
mock_query.return_value = Patents(patents=[patent])
|
||||
|
||||
# Simulate DB caching: after store, subsequent get returns the IDs
|
||||
mock_db.get_cached_serp_query.side_effect = [None, ["US123"]]
|
||||
|
||||
def save_side_effect(p):
|
||||
p.pdf_path = "patents/US123.pdf"
|
||||
return p
|
||||
|
||||
mock_save.side_effect = save_side_effect
|
||||
mock_parse.return_value = {"abstract": "Test"}
|
||||
mock_minimize.return_value = "Content"
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_portfolio.return_value = "Strong innovation"
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_companies(["TestCorp"], max_workers=1)
|
||||
|
||||
assert len(result.results) == 1
|
||||
company_result = result.results[0]
|
||||
assert company_result.company_name == "TestCorp"
|
||||
assert company_result.analysis == "Strong innovation"
|
||||
assert company_result.patent_count == 1
|
||||
assert company_result.success is True
|
||||
assert company_result.error is None
|
||||
assert company_result.timestamp is not None
|
||||
@@ -0,0 +1,228 @@
|
||||
"""Tests for FastAPI web service endpoints."""
|
||||
|
||||
from datetime import datetime
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from SPARC.api import app
|
||||
from SPARC.types import BatchAnalysisResult, CompanyAnalysisResult
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client():
|
||||
"""Create test client."""
|
||||
return TestClient(app)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_analyzer(mocker):
|
||||
"""Mock the global analyzer."""
|
||||
mock = Mock()
|
||||
mocker.patch("SPARC.api._analyzer", mock)
|
||||
return mock
|
||||
|
||||
|
||||
class TestHealthEndpoint:
|
||||
"""Test health check endpoint."""
|
||||
|
||||
def test_health_returns_ok(self, client):
|
||||
"""Test health endpoint returns healthy status."""
|
||||
response = client.get("/health")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["status"] == "healthy"
|
||||
assert data["version"] == "1.0.0"
|
||||
assert "timestamp" in data
|
||||
|
||||
|
||||
class TestAnalyzeCompanyEndpoint:
|
||||
"""Test single company analysis endpoint."""
|
||||
|
||||
def test_analyze_company_success(self, client, mock_analyzer):
|
||||
"""Test successful company analysis."""
|
||||
mock_result = CompanyAnalysisResult(
|
||||
company_name="nvidia",
|
||||
analysis="Strong AI patent portfolio",
|
||||
patent_count=5,
|
||||
success=True,
|
||||
timestamp=datetime.now(),
|
||||
)
|
||||
mock_analyzer._analyze_company_safe.return_value = mock_result
|
||||
|
||||
response = client.get("/analyze/nvidia")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["company_name"] == "nvidia"
|
||||
assert data["analysis"] == "Strong AI patent portfolio"
|
||||
assert data["patent_count"] == 5
|
||||
assert data["success"] is True
|
||||
|
||||
def test_analyze_company_failure(self, client, mock_analyzer):
|
||||
"""Test company analysis with error."""
|
||||
mock_result = CompanyAnalysisResult(
|
||||
company_name="unknown",
|
||||
analysis="",
|
||||
patent_count=0,
|
||||
success=False,
|
||||
error="No patents found",
|
||||
timestamp=datetime.now(),
|
||||
)
|
||||
mock_analyzer._analyze_company_safe.return_value = mock_result
|
||||
|
||||
response = client.get("/analyze/unknown")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["success"] is False
|
||||
assert data["error"] == "No patents found"
|
||||
|
||||
|
||||
class TestBatchAnalysisEndpoint:
|
||||
"""Test batch analysis endpoint."""
|
||||
|
||||
def test_batch_analysis_success(self, client, mock_analyzer):
|
||||
"""Test successful batch analysis."""
|
||||
results = [
|
||||
CompanyAnalysisResult(
|
||||
company_name="nvidia",
|
||||
analysis="Strong portfolio",
|
||||
patent_count=5,
|
||||
success=True,
|
||||
timestamp=datetime.now(),
|
||||
),
|
||||
CompanyAnalysisResult(
|
||||
company_name="amd",
|
||||
analysis="Growing portfolio",
|
||||
patent_count=3,
|
||||
success=True,
|
||||
timestamp=datetime.now(),
|
||||
),
|
||||
]
|
||||
mock_batch = BatchAnalysisResult(
|
||||
results=results,
|
||||
total_companies=2,
|
||||
successful=2,
|
||||
failed=0,
|
||||
timestamp=datetime.now(),
|
||||
)
|
||||
mock_analyzer.analyze_companies.return_value = mock_batch
|
||||
|
||||
response = client.post(
|
||||
"/analyze/batch",
|
||||
json={"companies": ["nvidia", "amd"], "max_workers": 2},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["total_companies"] == 2
|
||||
assert data["successful"] == 2
|
||||
assert data["failed"] == 0
|
||||
assert len(data["results"]) == 2
|
||||
|
||||
def test_batch_analysis_validation(self, client):
|
||||
"""Test batch analysis request validation."""
|
||||
# Empty companies list
|
||||
response = client.post("/analyze/batch", json={"companies": []})
|
||||
assert response.status_code == 422
|
||||
|
||||
# Too many companies
|
||||
response = client.post(
|
||||
"/analyze/batch",
|
||||
json={"companies": [f"company{i}" for i in range(25)]},
|
||||
)
|
||||
assert response.status_code == 422
|
||||
|
||||
# Invalid max_workers
|
||||
response = client.post(
|
||||
"/analyze/batch",
|
||||
json={"companies": ["nvidia"], "max_workers": 10},
|
||||
)
|
||||
assert response.status_code == 422
|
||||
|
||||
|
||||
class TestAsyncBatchEndpoint:
|
||||
"""Test async batch analysis endpoint."""
|
||||
|
||||
def test_async_batch_creates_job(self, client, mock_analyzer):
|
||||
"""Test async endpoint creates a job."""
|
||||
response = client.post(
|
||||
"/analyze/batch/async",
|
||||
json={"companies": ["nvidia", "amd"]},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "job_id" in data
|
||||
assert data["status"] == "pending"
|
||||
assert data["total_companies"] == 2
|
||||
assert data["progress"] == 0
|
||||
|
||||
|
||||
class TestJobEndpoints:
|
||||
"""Test job management endpoints."""
|
||||
|
||||
def test_get_job_not_found(self, client):
|
||||
"""Test getting nonexistent job."""
|
||||
response = client.get("/jobs/nonexistent")
|
||||
assert response.status_code == 404
|
||||
|
||||
def test_list_jobs(self, client, mocker):
|
||||
"""Test listing jobs."""
|
||||
# Clear existing jobs
|
||||
mocker.patch.dict("SPARC.api._jobs", {}, clear=True)
|
||||
|
||||
response = client.get("/jobs")
|
||||
assert response.status_code == 200
|
||||
assert isinstance(response.json(), list)
|
||||
|
||||
def test_list_jobs_with_filter(self, client, mocker):
|
||||
"""Test listing jobs with status filter."""
|
||||
response = client.get("/jobs?status=completed")
|
||||
assert response.status_code == 200
|
||||
|
||||
|
||||
class TestModelValidation:
|
||||
"""Test that unsupported model identifiers are rejected."""
|
||||
|
||||
def test_analyze_rejects_unsupported_model(self, client, mock_analyzer):
|
||||
"""GET /analyze/{company} with unsupported model returns 400."""
|
||||
response = client.get("/analyze/nvidia?model=fake/nonexistent-model")
|
||||
assert response.status_code == 400
|
||||
assert "Unsupported model" in response.json()["detail"]
|
||||
|
||||
def test_analyze_accepts_supported_model(self, client, mock_analyzer):
|
||||
"""GET /analyze/{company} with a supported model succeeds."""
|
||||
mock_result = CompanyAnalysisResult(
|
||||
company_name="nvidia",
|
||||
analysis="test",
|
||||
patent_count=1,
|
||||
success=True,
|
||||
timestamp=datetime.now(),
|
||||
model="anthropic/claude-3.5-sonnet",
|
||||
)
|
||||
mock_analyzer._analyze_company_safe.return_value = mock_result
|
||||
|
||||
response = client.get("/analyze/nvidia?model=anthropic/claude-3.5-sonnet")
|
||||
assert response.status_code == 200
|
||||
|
||||
def test_batch_rejects_unsupported_model(self, client, mock_analyzer):
|
||||
"""POST /analyze/batch with unsupported model returns 400."""
|
||||
response = client.post(
|
||||
"/analyze/batch",
|
||||
json={"companies": ["nvidia"], "model": "fake/nonexistent-model"},
|
||||
)
|
||||
assert response.status_code == 400
|
||||
assert "Unsupported model" in response.json()["detail"]
|
||||
|
||||
def test_list_models_returns_supported(self, client):
|
||||
"""GET /models returns the allow-list."""
|
||||
response = client.get("/models")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "models" in data
|
||||
assert "default" in data
|
||||
assert len(data["models"]) > 0
|
||||
assert all("id" in m and "name" in m and "provider" in m for m in data["models"])
|
||||
@@ -0,0 +1,302 @@
|
||||
"""Tests for JWT authentication flow: register, login, protected routes, refresh, admin access."""
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from SPARC.api import app
|
||||
from SPARC.auth import create_access_token, create_refresh_token
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client():
|
||||
"""Create test client."""
|
||||
return TestClient(app)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def mock_db(monkeypatch):
|
||||
"""Mock the database client used by auth endpoints.
|
||||
|
||||
Returns a MagicMock with all DB methods pre-configured.
|
||||
"""
|
||||
db = MagicMock()
|
||||
|
||||
# Default: no users exist
|
||||
db.get_user_count.return_value = 0
|
||||
db.get_user_by_id.return_value = None
|
||||
db.get_user_by_email.return_value = None
|
||||
db.authenticate_user.return_value = None
|
||||
db.create_user.return_value = None
|
||||
db.get_all_users.return_value = []
|
||||
db.update_user_role.return_value = None
|
||||
db.delete_user.return_value = False
|
||||
|
||||
with patch("SPARC.api.get_db_client", return_value=db), \
|
||||
patch("SPARC.auth.get_db_client", return_value=db):
|
||||
yield db
|
||||
|
||||
|
||||
def _make_admin_user():
|
||||
return {
|
||||
"id": 1,
|
||||
"email": "admin@test.com",
|
||||
"role": "admin",
|
||||
"created_at": datetime(2025, 1, 1, tzinfo=timezone.utc),
|
||||
}
|
||||
|
||||
|
||||
def _make_regular_user():
|
||||
return {
|
||||
"id": 2,
|
||||
"email": "user@test.com",
|
||||
"role": "user",
|
||||
"created_at": datetime(2025, 1, 1, tzinfo=timezone.utc),
|
||||
}
|
||||
|
||||
|
||||
def _auth_header(user_dict):
|
||||
"""Create an Authorization header with a valid access token for the given user."""
|
||||
token = create_access_token(user_dict["id"], user_dict["email"], user_dict["role"])
|
||||
return {"Authorization": f"Bearer {token}"}
|
||||
|
||||
|
||||
class TestRegister:
|
||||
"""POST /auth/register"""
|
||||
|
||||
def test_register_first_user_becomes_admin(self, client, mock_db):
|
||||
"""First registered user should get admin role."""
|
||||
mock_db.get_user_count.return_value = 0
|
||||
mock_db.create_user.return_value = {
|
||||
"id": 1,
|
||||
"email": "admin@test.com",
|
||||
"role": "admin",
|
||||
"created_at": datetime(2025, 1, 1, tzinfo=timezone.utc),
|
||||
}
|
||||
|
||||
response = client.post(
|
||||
"/auth/register",
|
||||
json={"email": "admin@test.com", "password": "securepass123"},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["email"] == "admin@test.com"
|
||||
assert data["role"] == "admin"
|
||||
mock_db.create_user.assert_called_once_with(
|
||||
email="admin@test.com", password="securepass123", role="admin"
|
||||
)
|
||||
|
||||
def test_register_subsequent_user_gets_user_role(self, client, mock_db):
|
||||
"""Non-first user should get regular user role."""
|
||||
mock_db.get_user_count.return_value = 1
|
||||
mock_db.create_user.return_value = _make_regular_user()
|
||||
|
||||
response = client.post(
|
||||
"/auth/register",
|
||||
json={"email": "user@test.com", "password": "securepass123"},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["role"] == "user"
|
||||
|
||||
def test_register_duplicate_email_returns_400(self, client, mock_db):
|
||||
"""Registering with an existing email should return 400."""
|
||||
mock_db.get_user_count.return_value = 1
|
||||
mock_db.create_user.return_value = None # indicates duplicate
|
||||
|
||||
response = client.post(
|
||||
"/auth/register",
|
||||
json={"email": "existing@test.com", "password": "securepass123"},
|
||||
)
|
||||
|
||||
assert response.status_code == 400
|
||||
assert "already registered" in response.json()["detail"].lower()
|
||||
|
||||
|
||||
class TestLogin:
|
||||
"""POST /auth/login"""
|
||||
|
||||
def test_login_valid_credentials_returns_tokens(self, client, mock_db):
|
||||
"""Valid credentials should return access and refresh tokens."""
|
||||
user = _make_regular_user()
|
||||
mock_db.authenticate_user.return_value = user
|
||||
|
||||
response = client.post(
|
||||
"/auth/login",
|
||||
json={"email": "user@test.com", "password": "correctpassword"},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "access_token" in data
|
||||
assert "refresh_token" in data
|
||||
assert data["token_type"] == "bearer"
|
||||
|
||||
def test_login_invalid_credentials_returns_401(self, client, mock_db):
|
||||
"""Invalid credentials should return 401."""
|
||||
mock_db.authenticate_user.return_value = None
|
||||
|
||||
response = client.post(
|
||||
"/auth/login",
|
||||
json={"email": "user@test.com", "password": "wrongpassword"},
|
||||
)
|
||||
|
||||
assert response.status_code == 401
|
||||
assert "invalid" in response.json()["detail"].lower()
|
||||
|
||||
|
||||
class TestGetMe:
|
||||
"""GET /auth/me"""
|
||||
|
||||
def test_valid_access_token_returns_user(self, client, mock_db):
|
||||
"""A valid access token should return the user's data."""
|
||||
user = _make_regular_user()
|
||||
mock_db.get_user_by_id.return_value = user
|
||||
|
||||
response = client.get("/auth/me", headers=_auth_header(user))
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["email"] == "user@test.com"
|
||||
assert data["id"] == 2
|
||||
|
||||
def test_missing_token_returns_401(self, client):
|
||||
"""No token should return 401 (403 from HTTPBearer)."""
|
||||
response = client.get("/auth/me")
|
||||
assert response.status_code in (401, 403)
|
||||
|
||||
def test_expired_token_returns_401(self, client, mock_db):
|
||||
"""An expired token should return 401."""
|
||||
# Create a token that has already expired
|
||||
from datetime import timedelta
|
||||
|
||||
import jwt as pyjwt
|
||||
from SPARC.auth import JWT_ALGORITHM, JWT_SECRET
|
||||
|
||||
payload = {
|
||||
"sub": "1",
|
||||
"email": "user@test.com",
|
||||
"role": "user",
|
||||
"exp": datetime.now(timezone.utc) - timedelta(hours=1),
|
||||
"type": "access",
|
||||
}
|
||||
expired_token = pyjwt.encode(payload, JWT_SECRET, algorithm=JWT_ALGORITHM)
|
||||
|
||||
response = client.get(
|
||||
"/auth/me", headers={"Authorization": f"Bearer {expired_token}"}
|
||||
)
|
||||
assert response.status_code == 401
|
||||
|
||||
def test_refresh_token_as_access_returns_401(self, client, mock_db):
|
||||
"""Using a refresh token as an access token should return 401."""
|
||||
user = _make_regular_user()
|
||||
refresh_token = create_refresh_token(user["id"], user["email"], user["role"])
|
||||
|
||||
response = client.get(
|
||||
"/auth/me", headers={"Authorization": f"Bearer {refresh_token}"}
|
||||
)
|
||||
assert response.status_code == 401
|
||||
|
||||
|
||||
class TestRefreshToken:
|
||||
"""POST /auth/refresh"""
|
||||
|
||||
def test_valid_refresh_token_returns_new_tokens(self, client, mock_db):
|
||||
"""A valid refresh token should issue new access and refresh tokens."""
|
||||
user = _make_regular_user()
|
||||
mock_db.get_user_by_id.return_value = user
|
||||
refresh = create_refresh_token(user["id"], user["email"], user["role"])
|
||||
|
||||
response = client.post(
|
||||
"/auth/refresh", json={"refresh_token": refresh}
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "access_token" in data
|
||||
assert "refresh_token" in data
|
||||
|
||||
def test_invalid_refresh_token_returns_401(self, client, mock_db):
|
||||
"""An invalid refresh token should return 401."""
|
||||
response = client.post(
|
||||
"/auth/refresh", json={"refresh_token": "invalid-token-string"}
|
||||
)
|
||||
assert response.status_code == 401
|
||||
|
||||
def test_access_token_as_refresh_returns_401(self, client, mock_db):
|
||||
"""Using an access token as a refresh token should return 401."""
|
||||
user = _make_regular_user()
|
||||
access = create_access_token(user["id"], user["email"], user["role"])
|
||||
|
||||
response = client.post(
|
||||
"/auth/refresh", json={"refresh_token": access}
|
||||
)
|
||||
assert response.status_code == 401
|
||||
|
||||
|
||||
class TestAdminUsers:
|
||||
"""GET /admin/users and PATCH /admin/users/{id}/role"""
|
||||
|
||||
def test_admin_can_list_users(self, client, mock_db):
|
||||
"""Admin token should allow listing users."""
|
||||
admin = _make_admin_user()
|
||||
mock_db.get_user_by_id.return_value = admin
|
||||
mock_db.get_all_users.return_value = [admin, _make_regular_user()]
|
||||
|
||||
response = client.get("/admin/users", headers=_auth_header(admin))
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert len(data) == 2
|
||||
|
||||
def test_regular_user_cannot_list_users(self, client, mock_db):
|
||||
"""Regular user token should be rejected with 403."""
|
||||
user = _make_regular_user()
|
||||
mock_db.get_user_by_id.return_value = user
|
||||
|
||||
response = client.get("/admin/users", headers=_auth_header(user))
|
||||
|
||||
assert response.status_code == 403
|
||||
|
||||
def test_no_token_cannot_list_users(self, client):
|
||||
"""No token should be rejected."""
|
||||
response = client.get("/admin/users")
|
||||
assert response.status_code in (401, 403)
|
||||
|
||||
def test_admin_can_change_user_role(self, client, mock_db):
|
||||
"""Admin should be able to change another user's role."""
|
||||
admin = _make_admin_user()
|
||||
mock_db.get_user_by_id.return_value = admin
|
||||
mock_db.update_user_role.return_value = {
|
||||
"id": 2,
|
||||
"email": "user@test.com",
|
||||
"role": "admin",
|
||||
"created_at": datetime(2025, 1, 1, tzinfo=timezone.utc),
|
||||
}
|
||||
|
||||
response = client.patch(
|
||||
"/admin/users/2/role",
|
||||
json={"role": "admin"},
|
||||
headers=_auth_header(admin),
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.json()["role"] == "admin"
|
||||
|
||||
def test_admin_cannot_change_own_role(self, client, mock_db):
|
||||
"""Admin should not be able to change their own role."""
|
||||
admin = _make_admin_user()
|
||||
mock_db.get_user_by_id.return_value = admin
|
||||
|
||||
response = client.patch(
|
||||
"/admin/users/1/role",
|
||||
json={"role": "user"},
|
||||
headers=_auth_header(admin),
|
||||
)
|
||||
|
||||
assert response.status_code == 400
|
||||
assert "own role" in response.json()["detail"].lower()
|
||||
+105
-39
@@ -1,72 +1,118 @@
|
||||
"""Tests for LLM analysis functionality."""
|
||||
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
from unittest.mock import Mock, MagicMock
|
||||
|
||||
from SPARC.llm import LLMAnalyzer
|
||||
|
||||
|
||||
class TestLLMAnalyzer:
|
||||
"""Test LLM analyzer initialization and API interaction."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def mock_database(self, mocker):
|
||||
"""Mock the database client for all tests."""
|
||||
mock_db_client = Mock()
|
||||
mock_db_client.get_cached_response.return_value = None # No cache hit by default
|
||||
mock_db_client.store_message.return_value = 1
|
||||
mocker.patch("SPARC.llm.DatabaseClient", return_value=mock_db_client)
|
||||
return mock_db_client
|
||||
|
||||
def test_analyzer_initialization_with_api_key(self, mocker):
|
||||
"""Test that analyzer initializes with provided API key."""
|
||||
mock_anthropic = mocker.patch("SPARC.llm.Anthropic")
|
||||
mock_openai = mocker.patch("SPARC.llm.OpenAI")
|
||||
|
||||
analyzer = LLMAnalyzer(api_key="test-key-123")
|
||||
|
||||
mock_anthropic.assert_called_once_with(api_key="test-key-123")
|
||||
assert analyzer.model == "claude-3-5-sonnet-20241022"
|
||||
mock_openai.assert_called_once_with(
|
||||
api_key="test-key-123",
|
||||
base_url="https://openrouter.ai/api/v1"
|
||||
)
|
||||
assert analyzer.model == "anthropic/claude-3.5-sonnet"
|
||||
|
||||
def test_analyzer_initialization_from_config(self, mocker):
|
||||
"""Test that analyzer loads API key from config when not provided."""
|
||||
mock_anthropic = mocker.patch("SPARC.llm.Anthropic")
|
||||
mock_openai = mocker.patch("SPARC.llm.OpenAI")
|
||||
mock_config = mocker.patch("SPARC.llm.config")
|
||||
mock_config.anthropic_api_key = "config-key-456"
|
||||
mock_config.openrouter_api_key = "config-key-456"
|
||||
mock_config.use_cache = True
|
||||
mock_config.database_url = "postgresql://localhost/test"
|
||||
|
||||
analyzer = LLMAnalyzer()
|
||||
|
||||
mock_anthropic.assert_called_once_with(api_key="config-key-456")
|
||||
mock_openai.assert_called_once_with(
|
||||
api_key="config-key-456",
|
||||
base_url="https://openrouter.ai/api/v1"
|
||||
)
|
||||
|
||||
def test_analyze_patent_content(self, mocker):
|
||||
def test_analyze_patent_content(self, mocker, mock_database):
|
||||
"""Test single patent content analysis."""
|
||||
mock_anthropic = mocker.patch("SPARC.llm.Anthropic")
|
||||
mock_openai = mocker.patch("SPARC.llm.OpenAI")
|
||||
mock_client = Mock()
|
||||
mock_anthropic.return_value = mock_client
|
||||
mock_openai.return_value = mock_client
|
||||
|
||||
# Mock the API response
|
||||
mock_response = Mock()
|
||||
mock_response.content = [Mock(text="Innovative GPU architecture.")]
|
||||
mock_client.messages.create.return_value = mock_response
|
||||
mock_response.choices = [Mock(message=Mock(content="Innovative GPU architecture."))]
|
||||
mock_response.usage = Mock(prompt_tokens=100, completion_tokens=50, total_tokens=150)
|
||||
mock_client.chat.completions.create.return_value = mock_response
|
||||
|
||||
analyzer = LLMAnalyzer(api_key="test-key")
|
||||
analyzer = LLMAnalyzer(api_key="test-key", use_cache=False)
|
||||
result = analyzer.analyze_patent_content(
|
||||
patent_content="ABSTRACT: GPU with new cache design...",
|
||||
company_name="NVIDIA",
|
||||
)
|
||||
|
||||
assert result == "Innovative GPU architecture."
|
||||
mock_client.messages.create.assert_called_once()
|
||||
mock_client.chat.completions.create.assert_called_once()
|
||||
|
||||
# Verify the prompt includes company name and content
|
||||
call_args = mock_client.messages.create.call_args
|
||||
call_args = mock_client.chat.completions.create.call_args
|
||||
prompt_text = call_args[1]["messages"][0]["content"]
|
||||
assert "NVIDIA" in prompt_text
|
||||
assert "GPU with new cache design" in prompt_text
|
||||
|
||||
def test_analyze_patent_portfolio(self, mocker):
|
||||
"""Test portfolio analysis with multiple patents."""
|
||||
mock_anthropic = mocker.patch("SPARC.llm.Anthropic")
|
||||
# Verify message was stored in database
|
||||
mock_database.store_message.assert_called_once()
|
||||
|
||||
def test_analyze_patent_content_cache_hit(self, mocker, mock_database):
|
||||
"""Test that cached responses are returned without API call."""
|
||||
mock_openai = mocker.patch("SPARC.llm.OpenAI")
|
||||
mock_client = Mock()
|
||||
mock_anthropic.return_value = mock_client
|
||||
mock_openai.return_value = mock_client
|
||||
|
||||
# Set up cache hit
|
||||
mock_database.get_cached_response.return_value = {
|
||||
"id": 1,
|
||||
"response": "Cached analysis result"
|
||||
}
|
||||
|
||||
analyzer = LLMAnalyzer(api_key="test-key", use_cache=True)
|
||||
result = analyzer.analyze_patent_content(
|
||||
patent_content="ABSTRACT: GPU with new cache design...",
|
||||
company_name="NVIDIA",
|
||||
)
|
||||
|
||||
assert result == "Cached analysis result"
|
||||
# API should NOT be called on cache hit
|
||||
mock_client.chat.completions.create.assert_not_called()
|
||||
|
||||
def test_analyze_patent_portfolio(self, mocker, mock_database):
|
||||
"""Test portfolio analysis with multiple patents."""
|
||||
mock_openai = mocker.patch("SPARC.llm.OpenAI")
|
||||
mock_client = Mock()
|
||||
mock_openai.return_value = mock_client
|
||||
|
||||
# Mock the API response
|
||||
mock_response = Mock()
|
||||
mock_response.content = [
|
||||
Mock(text="Strong portfolio in AI and graphics.")
|
||||
mock_response.choices = [
|
||||
Mock(message=Mock(content="Strong portfolio in AI and graphics."))
|
||||
]
|
||||
mock_client.messages.create.return_value = mock_response
|
||||
mock_response.usage = Mock(prompt_tokens=200, completion_tokens=100, total_tokens=300)
|
||||
mock_client.chat.completions.create.return_value = mock_response
|
||||
|
||||
analyzer = LLMAnalyzer(api_key="test-key")
|
||||
analyzer = LLMAnalyzer(api_key="test-key", use_cache=False)
|
||||
patents_data = [
|
||||
{"patent_id": "US123", "content": "AI acceleration patent"},
|
||||
{"patent_id": "US456", "content": "Graphics rendering patent"},
|
||||
@@ -77,48 +123,68 @@ class TestLLMAnalyzer:
|
||||
)
|
||||
|
||||
assert result == "Strong portfolio in AI and graphics."
|
||||
mock_client.messages.create.assert_called_once()
|
||||
mock_client.chat.completions.create.assert_called_once()
|
||||
|
||||
# Verify the prompt includes all patents
|
||||
call_args = mock_client.messages.create.call_args
|
||||
call_args = mock_client.chat.completions.create.call_args
|
||||
prompt_text = call_args[1]["messages"][0]["content"]
|
||||
assert "US123" in prompt_text
|
||||
assert "US456" in prompt_text
|
||||
assert "AI acceleration patent" in prompt_text
|
||||
assert "Graphics rendering patent" in prompt_text
|
||||
|
||||
def test_analyze_patent_portfolio_with_correct_token_limit(self, mocker):
|
||||
def test_analyze_patent_portfolio_with_correct_token_limit(self, mocker, mock_database):
|
||||
"""Test that portfolio analysis uses higher token limit."""
|
||||
mock_anthropic = mocker.patch("SPARC.llm.Anthropic")
|
||||
mock_openai = mocker.patch("SPARC.llm.OpenAI")
|
||||
mock_client = Mock()
|
||||
mock_anthropic.return_value = mock_client
|
||||
mock_openai.return_value = mock_client
|
||||
|
||||
mock_response = Mock()
|
||||
mock_response.content = [Mock(text="Analysis result.")]
|
||||
mock_client.messages.create.return_value = mock_response
|
||||
mock_response.choices = [Mock(message=Mock(content="Analysis result."))]
|
||||
mock_response.usage = Mock(prompt_tokens=100, completion_tokens=50, total_tokens=150)
|
||||
mock_client.chat.completions.create.return_value = mock_response
|
||||
|
||||
analyzer = LLMAnalyzer(api_key="test-key")
|
||||
analyzer = LLMAnalyzer(api_key="test-key", use_cache=False)
|
||||
patents_data = [{"patent_id": "US123", "content": "Test content"}]
|
||||
|
||||
analyzer.analyze_patent_portfolio(patents_data, "TestCo")
|
||||
|
||||
call_args = mock_client.messages.create.call_args
|
||||
call_args = mock_client.chat.completions.create.call_args
|
||||
# Portfolio analysis should use 2048 tokens
|
||||
assert call_args[1]["max_tokens"] == 2048
|
||||
|
||||
def test_analyze_single_patent_with_correct_token_limit(self, mocker):
|
||||
def test_analyze_single_patent_with_correct_token_limit(self, mocker, mock_database):
|
||||
"""Test that single patent analysis uses lower token limit."""
|
||||
mock_anthropic = mocker.patch("SPARC.llm.Anthropic")
|
||||
mock_openai = mocker.patch("SPARC.llm.OpenAI")
|
||||
mock_client = Mock()
|
||||
mock_anthropic.return_value = mock_client
|
||||
mock_openai.return_value = mock_client
|
||||
|
||||
mock_response = Mock()
|
||||
mock_response.content = [Mock(text="Analysis result.")]
|
||||
mock_client.messages.create.return_value = mock_response
|
||||
mock_response.choices = [Mock(message=Mock(content="Analysis result."))]
|
||||
mock_response.usage = Mock(prompt_tokens=100, completion_tokens=50, total_tokens=150)
|
||||
mock_client.chat.completions.create.return_value = mock_response
|
||||
|
||||
analyzer = LLMAnalyzer(api_key="test-key")
|
||||
analyzer = LLMAnalyzer(api_key="test-key", use_cache=False)
|
||||
analyzer.analyze_patent_content("Test content", "TestCo")
|
||||
|
||||
call_args = mock_client.messages.create.call_args
|
||||
call_args = mock_client.chat.completions.create.call_args
|
||||
# Single patent should use 1024 tokens
|
||||
assert call_args[1]["max_tokens"] == 1024
|
||||
|
||||
def test_database_always_initialized(self, mocker, mock_database):
|
||||
"""Test that database client is always initialized."""
|
||||
mock_openai = mocker.patch("SPARC.llm.OpenAI")
|
||||
|
||||
analyzer = LLMAnalyzer(api_key="test-key")
|
||||
|
||||
assert analyzer.db_client is not None
|
||||
|
||||
def test_no_api_key_stores_placeholder(self, mocker, mock_database):
|
||||
"""Test that without API key, a placeholder is stored."""
|
||||
mocker.patch("SPARC.llm.config")
|
||||
|
||||
analyzer = LLMAnalyzer(use_cache=False)
|
||||
result = analyzer.analyze_patent_content("Test content", "TestCo")
|
||||
|
||||
assert "[NO API]" in result
|
||||
mock_database.store_message.assert_called_once()
|
||||
|
||||
@@ -0,0 +1,97 @@
|
||||
"""Tests for rate limiting on auth endpoints."""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import Mock, patch, MagicMock
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from SPARC.api import app
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client():
|
||||
"""Create test client with rate limiter enabled."""
|
||||
return TestClient(app)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_limiter():
|
||||
"""Reset rate limiter storage between tests."""
|
||||
from SPARC.api import limiter
|
||||
limiter.reset()
|
||||
yield
|
||||
|
||||
|
||||
class TestRateLimiting:
|
||||
"""Test rate limiting on login and register endpoints."""
|
||||
|
||||
@patch("SPARC.api.get_db_client")
|
||||
def test_login_allows_requests_under_limit(self, mock_db_client, client):
|
||||
"""Login endpoint allows requests under the rate limit."""
|
||||
mock_db = MagicMock()
|
||||
mock_db.authenticate_user.return_value = None
|
||||
mock_db_client.return_value = mock_db
|
||||
|
||||
# Should allow at least a few requests
|
||||
for _ in range(5):
|
||||
response = client.post(
|
||||
"/auth/login",
|
||||
json={"email": "test@example.com", "password": "password123"},
|
||||
)
|
||||
# 401 is expected (invalid credentials), not 429
|
||||
assert response.status_code == 401
|
||||
|
||||
@patch("SPARC.api.get_db_client")
|
||||
def test_login_rate_limited_after_threshold(self, mock_db_client, client):
|
||||
"""Login endpoint returns 429 after exceeding rate limit."""
|
||||
mock_db = MagicMock()
|
||||
mock_db.authenticate_user.return_value = None
|
||||
mock_db_client.return_value = mock_db
|
||||
|
||||
# Send more than the limit (10/minute)
|
||||
statuses = []
|
||||
for _ in range(15):
|
||||
response = client.post(
|
||||
"/auth/login",
|
||||
json={"email": "test@example.com", "password": "password123"},
|
||||
)
|
||||
statuses.append(response.status_code)
|
||||
|
||||
# At least one should be 429
|
||||
assert 429 in statuses, f"Expected 429 in statuses but got: {set(statuses)}"
|
||||
|
||||
@patch("SPARC.api.get_db_client")
|
||||
def test_register_rate_limited_after_threshold(self, mock_db_client, client):
|
||||
"""Register endpoint returns 429 after exceeding rate limit."""
|
||||
mock_db = MagicMock()
|
||||
mock_db.get_user_count.return_value = 1
|
||||
mock_db.create_user.return_value = None # triggers 400 (email exists)
|
||||
mock_db_client.return_value = mock_db
|
||||
|
||||
# Send more than the limit (5/minute)
|
||||
statuses = []
|
||||
for _ in range(10):
|
||||
response = client.post(
|
||||
"/auth/register",
|
||||
json={"email": "test@example.com", "password": "password123"},
|
||||
)
|
||||
statuses.append(response.status_code)
|
||||
|
||||
# At least one should be 429
|
||||
assert 429 in statuses, f"Expected 429 in statuses but got: {set(statuses)}"
|
||||
|
||||
@patch("SPARC.api.get_db_client")
|
||||
def test_rate_limit_returns_retry_after_header(self, mock_db_client, client):
|
||||
"""Rate limited responses include a Retry-After header."""
|
||||
mock_db = MagicMock()
|
||||
mock_db.authenticate_user.return_value = None
|
||||
mock_db_client.return_value = mock_db
|
||||
|
||||
# Exhaust the limit
|
||||
for _ in range(15):
|
||||
response = client.post(
|
||||
"/auth/login",
|
||||
json={"email": "test@example.com", "password": "password123"},
|
||||
)
|
||||
if response.status_code == 429:
|
||||
assert "Retry-After" in response.headers
|
||||
break
|
||||
@@ -0,0 +1,116 @@
|
||||
"""Tests for security hardening: JWT secret startup check, CORS config, credential handling."""
|
||||
|
||||
import os
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestJWTSecretStartupCheck:
|
||||
"""Test the startup guard that refuses default JWT secret in non-dev environments."""
|
||||
|
||||
def test_default_secret_in_production_raises(self):
|
||||
"""Starting with default secret and APP_ENV=production must raise RuntimeError."""
|
||||
with patch.dict(os.environ, {"APP_ENV": "production"}):
|
||||
# Reload config to pick up the new APP_ENV
|
||||
import importlib
|
||||
import SPARC.config
|
||||
importlib.reload(SPARC.config)
|
||||
|
||||
from SPARC.auth import _DEFAULT_JWT_SECRET, check_jwt_secret
|
||||
# Patch JWT_SECRET to the default
|
||||
with patch("SPARC.auth.JWT_SECRET", _DEFAULT_JWT_SECRET):
|
||||
with pytest.raises(RuntimeError, match="FATAL.*JWT_SECRET"):
|
||||
check_jwt_secret()
|
||||
|
||||
# Restore config
|
||||
with patch.dict(os.environ, {"APP_ENV": "development"}):
|
||||
importlib.reload(SPARC.config)
|
||||
|
||||
def test_default_secret_in_development_succeeds(self):
|
||||
"""Starting with default secret and APP_ENV=development must not raise."""
|
||||
with patch.dict(os.environ, {"APP_ENV": "development"}):
|
||||
import importlib
|
||||
import SPARC.config
|
||||
importlib.reload(SPARC.config)
|
||||
|
||||
from SPARC.auth import _DEFAULT_JWT_SECRET, check_jwt_secret
|
||||
with patch("SPARC.auth.JWT_SECRET", _DEFAULT_JWT_SECRET):
|
||||
# Should not raise
|
||||
check_jwt_secret()
|
||||
|
||||
# Restore
|
||||
importlib.reload(SPARC.config)
|
||||
|
||||
def test_custom_secret_in_production_succeeds(self):
|
||||
"""Starting with a custom secret in production must not raise."""
|
||||
with patch.dict(os.environ, {"APP_ENV": "production"}):
|
||||
import importlib
|
||||
import SPARC.config
|
||||
importlib.reload(SPARC.config)
|
||||
|
||||
from SPARC.auth import check_jwt_secret
|
||||
with patch("SPARC.auth.JWT_SECRET", "my-secure-random-secret-abc123"):
|
||||
# Should not raise
|
||||
check_jwt_secret()
|
||||
|
||||
with patch.dict(os.environ, {"APP_ENV": "development"}):
|
||||
importlib.reload(SPARC.config)
|
||||
|
||||
def test_default_secret_unset_env_succeeds(self):
|
||||
"""When APP_ENV is unset (defaults to development), default secret is allowed."""
|
||||
with patch.dict(os.environ, {}, clear=False):
|
||||
# Remove APP_ENV if present
|
||||
env = os.environ.copy()
|
||||
env.pop("APP_ENV", None)
|
||||
with patch.dict(os.environ, env, clear=True):
|
||||
import importlib
|
||||
import SPARC.config
|
||||
importlib.reload(SPARC.config)
|
||||
|
||||
from SPARC.auth import _DEFAULT_JWT_SECRET, check_jwt_secret
|
||||
with patch("SPARC.auth.JWT_SECRET", _DEFAULT_JWT_SECRET):
|
||||
# Should not raise (defaults to development)
|
||||
check_jwt_secret()
|
||||
|
||||
with patch.dict(os.environ, {"APP_ENV": "development"}):
|
||||
importlib.reload(SPARC.config)
|
||||
|
||||
|
||||
class TestCORSConfig:
|
||||
"""Test that CORS origins are configurable via environment variable."""
|
||||
|
||||
def test_default_cors_origins(self):
|
||||
"""When CORS_ORIGINS is unset, defaults to localhost origins."""
|
||||
with patch.dict(os.environ, {"CORS_ORIGINS": ""}):
|
||||
import importlib
|
||||
import SPARC.config
|
||||
importlib.reload(SPARC.config)
|
||||
assert SPARC.config.cors_origins == [
|
||||
"http://localhost:3000",
|
||||
"http://localhost:5173",
|
||||
]
|
||||
|
||||
def test_custom_cors_origins(self):
|
||||
"""Setting CORS_ORIGINS configures allowed origins."""
|
||||
with patch.dict(os.environ, {"CORS_ORIGINS": "https://sparc.example.com,https://app.example.com"}):
|
||||
import importlib
|
||||
import SPARC.config
|
||||
importlib.reload(SPARC.config)
|
||||
assert SPARC.config.cors_origins == [
|
||||
"https://sparc.example.com",
|
||||
"https://app.example.com",
|
||||
]
|
||||
# Restore
|
||||
with patch.dict(os.environ, {"CORS_ORIGINS": ""}):
|
||||
importlib.reload(SPARC.config)
|
||||
|
||||
def test_single_cors_origin(self):
|
||||
"""A single origin without comma works correctly."""
|
||||
with patch.dict(os.environ, {"CORS_ORIGINS": "https://sparc.example.com"}):
|
||||
import importlib
|
||||
import SPARC.config
|
||||
importlib.reload(SPARC.config)
|
||||
assert SPARC.config.cors_origins == ["https://sparc.example.com"]
|
||||
with patch.dict(os.environ, {"CORS_ORIGINS": ""}):
|
||||
importlib.reload(SPARC.config)
|
||||
+90
-1
@@ -1,7 +1,10 @@
|
||||
"""Tests for SERP API patent retrieval and parsing functionality."""
|
||||
|
||||
import pytest
|
||||
from datetime import datetime, timedelta
|
||||
from unittest.mock import Mock
|
||||
|
||||
from SPARC.serp_api import SERP
|
||||
from SPARC.types import Patent
|
||||
|
||||
|
||||
class TestTextCleaning:
|
||||
@@ -176,3 +179,89 @@ class TestPatentMinimization:
|
||||
|
||||
# Sections should be separated by double newlines
|
||||
assert "\n\n" in result
|
||||
|
||||
|
||||
class TestDynamicDateRange:
|
||||
"""Test dynamic date range computation in SERP.query."""
|
||||
|
||||
def test_query_uses_rolling_date_window(self, mocker):
|
||||
"""Verify the date filter uses a rolling window, not hardcoded dates."""
|
||||
mock_search = mocker.patch("SPARC.serp_api.serpapi.search")
|
||||
mock_search.return_value = {"organic_results": []}
|
||||
mocker.patch("SPARC.serp_api.config.api_key", "fake-key")
|
||||
mocker.patch("SPARC.serp_api.config.patent_search_days", 90)
|
||||
|
||||
SERP.query("TestCorp")
|
||||
|
||||
call_params = mock_search.call_args[0][0]
|
||||
tbs = call_params["tbs"]
|
||||
# Should contain "cdr:1,cd_min:" with a date, not the old hardcoded one
|
||||
assert "cdr:1,cd_min:" in tbs
|
||||
assert "10/28/2025" not in tbs # old hardcoded date gone
|
||||
|
||||
def test_query_respects_days_back_param(self, mocker):
|
||||
"""Verify days_back parameter controls the date window."""
|
||||
mock_search = mocker.patch("SPARC.serp_api.serpapi.search")
|
||||
mock_search.return_value = {"organic_results": []}
|
||||
mocker.patch("SPARC.serp_api.config.api_key", "fake-key")
|
||||
mocker.patch("SPARC.serp_api.config.patent_search_days", 90)
|
||||
|
||||
now = datetime.now()
|
||||
SERP.query("TestCorp", days_back=30)
|
||||
|
||||
call_params = mock_search.call_args[0][0]
|
||||
tbs = call_params["tbs"]
|
||||
expected_start = (now - timedelta(days=30)).strftime("%-m/%-d/%Y")
|
||||
assert expected_start in tbs
|
||||
|
||||
|
||||
class TestFilesystemPDFCaching:
|
||||
"""Test that save_patents skips download for existing files."""
|
||||
|
||||
def test_save_patents_skips_download_when_cached(self, mocker, tmp_path):
|
||||
"""Already-downloaded PDFs should not be re-downloaded."""
|
||||
mock_get = mocker.patch("SPARC.serp_api.requests.get")
|
||||
mocker.patch("SPARC.serp_api.os.makedirs")
|
||||
|
||||
pdf_path = tmp_path / "US123.pdf"
|
||||
pdf_path.write_bytes(b"%PDF-1.4 fake content")
|
||||
|
||||
mocker.patch("SPARC.serp_api.os.path.exists", return_value=True)
|
||||
mocker.patch("SPARC.serp_api.os.path.getsize", return_value=100)
|
||||
|
||||
patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf")
|
||||
result = SERP.save_patents(patent)
|
||||
|
||||
mock_get.assert_not_called()
|
||||
assert result.pdf_path == "patents/US123.pdf"
|
||||
|
||||
def test_save_patents_downloads_when_not_cached(self, mocker):
|
||||
"""Missing PDFs should be downloaded."""
|
||||
mock_response = Mock()
|
||||
mock_response.content = b"%PDF-1.4 content"
|
||||
mock_get = mocker.patch("SPARC.serp_api.requests.get", return_value=mock_response)
|
||||
mocker.patch("SPARC.serp_api.os.makedirs")
|
||||
mocker.patch("SPARC.serp_api.os.path.exists", return_value=False)
|
||||
mock_open = mocker.patch("builtins.open", mocker.mock_open())
|
||||
|
||||
patent = Patent(patent_id="US456", pdf_link="http://example.com/test.pdf")
|
||||
result = SERP.save_patents(patent)
|
||||
|
||||
mock_get.assert_called_once_with("http://example.com/test.pdf")
|
||||
assert result.pdf_path == "patents/US456.pdf"
|
||||
|
||||
def test_save_patents_redownloads_empty_files(self, mocker):
|
||||
"""Empty/corrupt PDFs (0 bytes) should be re-downloaded."""
|
||||
mock_response = Mock()
|
||||
mock_response.content = b"%PDF-1.4 content"
|
||||
mock_get = mocker.patch("SPARC.serp_api.requests.get", return_value=mock_response)
|
||||
mocker.patch("SPARC.serp_api.os.makedirs")
|
||||
mocker.patch("SPARC.serp_api.os.path.exists", return_value=True)
|
||||
mocker.patch("SPARC.serp_api.os.path.getsize", return_value=0)
|
||||
mock_open = mocker.patch("builtins.open", mocker.mock_open())
|
||||
|
||||
patent = Patent(patent_id="US789", pdf_link="http://example.com/test.pdf")
|
||||
result = SERP.save_patents(patent)
|
||||
|
||||
mock_get.assert_called_once()
|
||||
assert result.pdf_path == "patents/US789.pdf"
|
||||
|
||||
Reference in New Issue
Block a user