Compare commits
70 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 9c971dac72 | |||
| 6f0b448044 | |||
| 1a297eb60b | |||
| 3154f6b732 | |||
| b9bb3dc1cd | |||
| 90f9cfc826 | |||
| d387bbbdf3 | |||
| fa564e5e1e | |||
| 2815deb221 | |||
| 56e8287720 | |||
| b969423957 | |||
| 0dee4c5099 | |||
| 03105a2f87 | |||
| 28e2ded501 | |||
| f87572ab7e | |||
| 44b6c79713 | |||
| 13fe383116 | |||
| dee3cbefbd | |||
| 6acad4cff7 | |||
| 45ccd0b4e1 | |||
| d108d4c7ea | |||
| 068aecce61 | |||
| 8790abfbf7 | |||
| fe0c5ca280 | |||
| ed81ae4569 | |||
| ebba983a1d | |||
| 258b349e98 | |||
| fc99173028 | |||
| 4405f199ba | |||
| 874f60f0d9 | |||
| cb7d7121c5 | |||
| 9c98b948d3 | |||
| af52107ed8 | |||
| 0107691c90 | |||
| 3424384088 | |||
| 5141d9dd47 | |||
| 4e419166e8 | |||
| 7eb72ab549 | |||
| d371ceeec8 | |||
| 490850d7a6 | |||
| d4ba13846a | |||
| 3479ba8a46 | |||
| 1c6d903301 | |||
| 84fd0bef32 | |||
| 4640106530 | |||
| 44456cb073 | |||
| 11a4aba46f | |||
| 5fab53e0a7 | |||
| 1067ffa35a | |||
| 08444b41a8 | |||
| 7b61be1a4a | |||
| 2d37b35d1f | |||
| 19f2de4228 | |||
| 76de945acc | |||
| b63641b36c | |||
| 1f3196b317 | |||
| 50adb4435c | |||
| 7a317a0acd | |||
| 19b97f7f6d | |||
| ead0867f4d | |||
| c6843ac115 | |||
| 56892ebbdc | |||
| dc7eedd902 | |||
| a65c267687 | |||
| a498b6f525 | |||
| af4114969a | |||
| 8971ebc913 | |||
| 6882e53280 | |||
| b8566fc2af | |||
| a91c3badab |
@@ -0,0 +1,21 @@
|
||||
# SPARC Configuration
|
||||
|
||||
# SerpAPI key for patent search
|
||||
API_KEY=your_serpapi_key_here
|
||||
|
||||
# OpenRouter API key for LLM analysis
|
||||
OPENROUTER_API_KEY=your_openrouter_key_here
|
||||
|
||||
# Database configuration
|
||||
# All messages are stored in the database for persistence and caching
|
||||
DATABASE_URL=postgresql://postgres:postgres@localhost:5432/sparc
|
||||
|
||||
# Cache configuration
|
||||
# When USE_CACHE=true: check database for cached responses before making API calls
|
||||
# When USE_CACHE=false: always make fresh API calls (still stores results in database)
|
||||
# Default: true
|
||||
USE_CACHE=true
|
||||
|
||||
# JWT Secret for authentication
|
||||
# IMPORTANT: Change this to a secure random string in production
|
||||
JWT_SECRET=your-secure-jwt-secret-change-in-production
|
||||
@@ -0,0 +1,152 @@
|
||||
name: Build and Push Docker Images
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
tags:
|
||||
- '*'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
build-api:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
shell: sh
|
||||
run: |
|
||||
apk add --no-cache git docker-cli
|
||||
|
||||
- name: Checkout code
|
||||
shell: sh
|
||||
run: |
|
||||
git clone http://gitea.gitea.svc.cluster.local/${{ gitea.repository }}.git .
|
||||
git checkout ${{ gitea.sha }}
|
||||
|
||||
- name: Determine image tags
|
||||
id: tags
|
||||
shell: sh
|
||||
run: |
|
||||
REGISTRY="gitea.gitea.svc.cluster.local:80"
|
||||
REPO_OWNER="${{ gitea.repository_owner }}"
|
||||
REPO_NAME="${{ gitea.repository }}"
|
||||
|
||||
REPO_NAME_ONLY=$(echo "$REPO_NAME" | cut -d'/' -f2)
|
||||
REPO_OWNER_LOWER=$(echo "$REPO_OWNER" | tr '[:upper:]' '[:lower:]')
|
||||
REPO_NAME_LOWER=$(echo "$REPO_NAME_ONLY" | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
IMAGE_BASE="${REGISTRY}/${REPO_OWNER_LOWER}/${REPO_NAME_LOWER}"
|
||||
|
||||
case "${{ gitea.ref }}" in
|
||||
refs/tags/*)
|
||||
TAG_NAME="${{ gitea.ref_name }}"
|
||||
echo "IMAGE_TAG=${IMAGE_BASE}:${TAG_NAME}" >> $GITHUB_OUTPUT
|
||||
echo "PUSH_LATEST=true" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
refs/heads/main)
|
||||
TIMESTAMP=$(date -u +%Y%m%d%H%M%S)
|
||||
SHORT_SHA=$(echo "${{ gitea.sha }}" | cut -c1-7)
|
||||
echo "IMAGE_TAG=${IMAGE_BASE}:${TIMESTAMP}-${SHORT_SHA}" >> $GITHUB_OUTPUT
|
||||
echo "PUSH_LATEST=true" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
*)
|
||||
BRANCH_TAG=$(echo "${{ gitea.ref_name }}" | sed 's/\//-/g')
|
||||
echo "IMAGE_TAG=${IMAGE_BASE}:${BRANCH_TAG}" >> $GITHUB_OUTPUT
|
||||
echo "PUSH_LATEST=false" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "IMAGE_LATEST=${IMAGE_BASE}:latest" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Login to registry
|
||||
shell: sh
|
||||
run: |
|
||||
echo "${{ secrets.PERSONAL_TOKEN }}" | docker login gitea.gitea.svc.cluster.local:80 -u "${{ gitea.actor }}" --password-stdin
|
||||
|
||||
- name: Build and push API image
|
||||
shell: sh
|
||||
run: |
|
||||
echo "Building API image..."
|
||||
docker build -t ${{ steps.tags.outputs.IMAGE_TAG }} .
|
||||
|
||||
echo "Pushing API image..."
|
||||
docker push ${{ steps.tags.outputs.IMAGE_TAG }}
|
||||
|
||||
if [ "${{ steps.tags.outputs.PUSH_LATEST }}" = "true" ]; then
|
||||
echo "Tagging and pushing latest..."
|
||||
docker tag ${{ steps.tags.outputs.IMAGE_TAG }} ${{ steps.tags.outputs.IMAGE_LATEST }}
|
||||
docker push ${{ steps.tags.outputs.IMAGE_LATEST }}
|
||||
fi
|
||||
|
||||
echo "API image available at ${{ steps.tags.outputs.IMAGE_TAG }}"
|
||||
|
||||
build-frontend:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
shell: sh
|
||||
run: |
|
||||
apk add --no-cache git docker-cli
|
||||
|
||||
- name: Checkout code
|
||||
shell: sh
|
||||
run: |
|
||||
git clone http://gitea.gitea.svc.cluster.local/${{ gitea.repository }}.git .
|
||||
git checkout ${{ gitea.sha }}
|
||||
|
||||
- name: Determine image tags
|
||||
id: tags
|
||||
shell: sh
|
||||
run: |
|
||||
REGISTRY="gitea.gitea.svc.cluster.local:80"
|
||||
REPO_OWNER="${{ gitea.repository_owner }}"
|
||||
REPO_NAME="${{ gitea.repository }}"
|
||||
|
||||
REPO_NAME_ONLY=$(echo "$REPO_NAME" | cut -d'/' -f2)
|
||||
REPO_OWNER_LOWER=$(echo "$REPO_OWNER" | tr '[:upper:]' '[:lower:]')
|
||||
REPO_NAME_LOWER=$(echo "$REPO_NAME_ONLY" | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
IMAGE_BASE="${REGISTRY}/${REPO_OWNER_LOWER}/${REPO_NAME_LOWER}"
|
||||
|
||||
case "${{ gitea.ref }}" in
|
||||
refs/tags/*)
|
||||
TAG_NAME="${{ gitea.ref_name }}"
|
||||
echo "IMAGE_TAG=${IMAGE_BASE}:frontend-${TAG_NAME}" >> $GITHUB_OUTPUT
|
||||
echo "PUSH_LATEST=true" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
refs/heads/main)
|
||||
TIMESTAMP=$(date -u +%Y%m%d%H%M%S)
|
||||
SHORT_SHA=$(echo "${{ gitea.sha }}" | cut -c1-7)
|
||||
echo "IMAGE_TAG=${IMAGE_BASE}:frontend-${TIMESTAMP}-${SHORT_SHA}" >> $GITHUB_OUTPUT
|
||||
echo "PUSH_LATEST=true" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
*)
|
||||
BRANCH_TAG=$(echo "${{ gitea.ref_name }}" | sed 's/\//-/g')
|
||||
echo "IMAGE_TAG=${IMAGE_BASE}:frontend-${BRANCH_TAG}" >> $GITHUB_OUTPUT
|
||||
echo "PUSH_LATEST=false" >> $GITHUB_OUTPUT
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "IMAGE_LATEST=${IMAGE_BASE}:frontend-latest" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Login to registry
|
||||
shell: sh
|
||||
run: |
|
||||
echo "${{ secrets.PERSONAL_TOKEN }}" | docker login gitea.gitea.svc.cluster.local:80 -u "${{ gitea.actor }}" --password-stdin
|
||||
|
||||
- name: Build and push frontend image
|
||||
shell: sh
|
||||
run: |
|
||||
echo "Building frontend image..."
|
||||
docker build -t ${{ steps.tags.outputs.IMAGE_TAG }} ./frontend
|
||||
|
||||
echo "Pushing frontend image..."
|
||||
docker push ${{ steps.tags.outputs.IMAGE_TAG }}
|
||||
|
||||
if [ "${{ steps.tags.outputs.PUSH_LATEST }}" = "true" ]; then
|
||||
echo "Tagging and pushing frontend-latest..."
|
||||
docker tag ${{ steps.tags.outputs.IMAGE_TAG }} ${{ steps.tags.outputs.IMAGE_LATEST }}
|
||||
docker push ${{ steps.tags.outputs.IMAGE_LATEST }}
|
||||
fi
|
||||
|
||||
echo "Frontend image available at ${{ steps.tags.outputs.IMAGE_TAG }}"
|
||||
+2
-1
@@ -2,4 +2,5 @@
|
||||
.pyenv
|
||||
__pycache__
|
||||
.venv
|
||||
patents
|
||||
patents
|
||||
tmp/
|
||||
|
||||
+25
@@ -0,0 +1,25 @@
|
||||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies for pdfplumber and psycopg2
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
libpq-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY requirements.txt .
|
||||
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY . .
|
||||
|
||||
# Create patents directory for PDF storage
|
||||
RUN mkdir -p /app/patents
|
||||
|
||||
# Expose ports for API and Dashboard
|
||||
EXPOSE 8000 8501
|
||||
|
||||
# Default command runs the API (can be overridden in docker-compose)
|
||||
CMD ["uvicorn", "SPARC.api:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
|
||||
@@ -1,28 +1,286 @@
|
||||
# SPARC
|
||||
|
||||
## Name
|
||||
Semiconductor Patent & Analytics Report Core
|
||||
**Semiconductor Patent & Analytics Report Core**
|
||||
|
||||
## Description
|
||||
A patent analysis system that estimates company performance by analyzing their patent portfolios using LLM-powered insights.
|
||||
|
||||
## Installation
|
||||
### NixOS Installation
|
||||
`nix develop` to build and configure nix dev environment
|
||||
## Overview
|
||||
|
||||
## Usage
|
||||
```bash
|
||||
docker compose up -d
|
||||
SPARC automatically collects, parses, and analyzes patents from companies to provide performance estimations. It uses Claude AI to evaluate innovation quality, strategic direction, and competitive positioning based on patent content.
|
||||
|
||||
## Features
|
||||
|
||||
- **Patent Retrieval**: Automated collection via SerpAPI's Google Patents engine
|
||||
- **Intelligent Parsing**: Extracts key sections (abstract, claims, summary) from patent PDFs
|
||||
- **Content Minimization**: Removes verbose descriptions to reduce LLM token usage
|
||||
- **AI Analysis**: Uses Claude 3.5 Sonnet via OpenRouter to analyze innovation quality and market potential
|
||||
- **Portfolio Analysis**: Evaluates multiple patents holistically for comprehensive insights
|
||||
- **Batch Processing**: Analyze multiple companies concurrently with progress tracking
|
||||
- **REST API**: FastAPI web service with async job support
|
||||
- **Dashboard**: React TypeScript web dashboard with authentication
|
||||
- **Robust Testing**: 40 tests covering all major functionality
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
SPARC/
|
||||
├── serp_api.py # Patent retrieval and PDF parsing
|
||||
├── llm.py # Claude AI integration via OpenRouter
|
||||
├── analyzer.py # High-level orchestration
|
||||
├── api.py # FastAPI web service with auth endpoints
|
||||
├── auth.py # JWT authentication module
|
||||
├── database.py # PostgreSQL storage with caching
|
||||
├── types.py # Data models
|
||||
└── config.py # Environment configuration
|
||||
```
|
||||
|
||||
## Roadmap
|
||||
- [X] Retrive `publicationID` from SERP API
|
||||
- [ ] Retrive data from Google's patent API based on those `publicationID`'s
|
||||
- This may not be needed, looking to parse the patents based soley on the pdf retrived from SERP
|
||||
- [ ] Wrap this into a python fastAPI, then bundle with docker
|
||||
## Installation
|
||||
|
||||
### Docker (Recommended)
|
||||
|
||||
```bash
|
||||
# Clone and configure
|
||||
git clone <repository-url>
|
||||
cd SPARC
|
||||
cp .env.example .env
|
||||
# Edit .env with your API keys
|
||||
|
||||
# Start all services (API, Dashboard, PostgreSQL)
|
||||
docker-compose up -d
|
||||
|
||||
# Access the services
|
||||
# - API: http://localhost:8000
|
||||
# - Dashboard: http://localhost:8080
|
||||
# - API Docs: http://localhost:8000/docs
|
||||
```
|
||||
|
||||
### NixOS
|
||||
|
||||
```bash
|
||||
nix develop
|
||||
```
|
||||
|
||||
This automatically creates a virtual environment and installs all dependencies.
|
||||
|
||||
### Manual Installation
|
||||
|
||||
```bash
|
||||
python -m venv .venv
|
||||
source .venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
Create a `.env` file in the project root:
|
||||
|
||||
```bash
|
||||
# SerpAPI key for patent search
|
||||
API_KEY=your_serpapi_key_here
|
||||
|
||||
# OpenRouter API key for Claude AI analysis
|
||||
OPENROUTER_API_KEY=your_openrouter_key_here
|
||||
```
|
||||
|
||||
Get your API keys:
|
||||
- SerpAPI: https://serpapi.com/
|
||||
- OpenRouter: https://openrouter.ai/
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from SPARC.analyzer import CompanyAnalyzer
|
||||
|
||||
# Initialize the analyzer
|
||||
analyzer = CompanyAnalyzer()
|
||||
|
||||
# Analyze a company's patent portfolio
|
||||
analysis = analyzer.analyze_company("nvidia")
|
||||
print(analysis)
|
||||
```
|
||||
|
||||
### Run the Example
|
||||
|
||||
```bash
|
||||
python main.py
|
||||
```
|
||||
|
||||
This will:
|
||||
1. Retrieve recent NVIDIA patents
|
||||
2. Parse and minimize content
|
||||
3. Analyze with Claude AI
|
||||
4. Print comprehensive performance assessment
|
||||
|
||||
### Single Patent Analysis
|
||||
|
||||
```python
|
||||
# Analyze a specific patent
|
||||
result = analyzer.analyze_single_patent(
|
||||
patent_id="US11322171B1",
|
||||
company_name="nvidia"
|
||||
)
|
||||
```
|
||||
|
||||
### Multi-Company Batch Analysis
|
||||
|
||||
```python
|
||||
from SPARC.analyzer import CompanyAnalyzer
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
|
||||
# Analyze multiple companies concurrently (default 3 workers)
|
||||
batch_result = analyzer.analyze_companies(
|
||||
["nvidia", "amd", "intel", "qualcomm"],
|
||||
max_workers=3
|
||||
)
|
||||
|
||||
# Access results
|
||||
print(f"Analyzed: {batch_result.total_companies}")
|
||||
print(f"Successful: {batch_result.successful}")
|
||||
print(f"Failed: {batch_result.failed}")
|
||||
|
||||
for result in batch_result.results:
|
||||
if result.success:
|
||||
print(f"{result.company_name}: {result.patent_count} patents")
|
||||
print(result.analysis)
|
||||
|
||||
# Or use sequential processing (safer for rate limits)
|
||||
batch_result = analyzer.analyze_companies_sequential(["nvidia", "amd"])
|
||||
```
|
||||
|
||||
### REST API
|
||||
|
||||
Start the FastAPI server:
|
||||
|
||||
```bash
|
||||
uvicorn SPARC.api:app --reload
|
||||
```
|
||||
|
||||
API endpoints:
|
||||
|
||||
| Endpoint | Method | Description |
|
||||
|----------|--------|-------------|
|
||||
| `/health` | GET | Health check |
|
||||
| `/analyze/{company}` | GET | Analyze single company |
|
||||
| `/analyze/batch` | POST | Analyze multiple companies |
|
||||
| `/analyze/batch/async` | POST | Start async batch job |
|
||||
| `/jobs/{job_id}` | GET | Get job status |
|
||||
| `/jobs` | GET | List all jobs |
|
||||
|
||||
Interactive docs available at `http://localhost:8000/docs`
|
||||
|
||||
Example API usage:
|
||||
|
||||
```bash
|
||||
# Single company
|
||||
curl http://localhost:8000/analyze/nvidia
|
||||
|
||||
# Batch analysis
|
||||
curl -X POST http://localhost:8000/analyze/batch \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"companies": ["nvidia", "amd", "intel"]}'
|
||||
|
||||
# Async batch (for long-running jobs)
|
||||
curl -X POST http://localhost:8000/analyze/batch/async \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"companies": ["nvidia", "amd", "intel", "qualcomm"]}'
|
||||
```
|
||||
|
||||
### Web Dashboard
|
||||
|
||||
The React dashboard is included in Docker Compose:
|
||||
|
||||
```bash
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
Dashboard features:
|
||||
- **Authentication**: User registration, login, and JWT-based sessions
|
||||
- **Company Analysis**: Analyze individual companies with real-time results
|
||||
- **Batch Analysis**: Process multiple companies with progress tracking
|
||||
- **Analytics**: View historical analysis data and trends
|
||||
- **Admin Panel**: User management for administrators
|
||||
|
||||
The dashboard runs at `http://localhost:8080` when using Docker Compose.
|
||||
|
||||
## Running Tests
|
||||
|
||||
```bash
|
||||
# Run all tests
|
||||
pytest tests/ -v
|
||||
|
||||
# Run specific test modules
|
||||
pytest tests/test_analyzer.py -v
|
||||
pytest tests/test_llm.py -v
|
||||
pytest tests/test_serp_api.py -v
|
||||
|
||||
# Run with coverage
|
||||
pytest tests/ --cov=SPARC --cov-report=term-missing
|
||||
```
|
||||
|
||||
## How It Works
|
||||
|
||||
1. **Patent Collection**: Queries SerpAPI for company patents
|
||||
2. **PDF Download**: Retrieves patent PDF files
|
||||
3. **Section Extraction**: Parses abstract, claims, summary, and description
|
||||
4. **Content Minimization**: Keeps essential sections, removes bloated descriptions
|
||||
5. **LLM Analysis**: Sends minimized content to Claude for analysis
|
||||
6. **Performance Estimation**: Returns insights on innovation quality and outlook
|
||||
|
||||
## Roadmap
|
||||
|
||||
- [X] Retrieve `publicationID` from SERP API
|
||||
- [X] Parse patents from PDFs (no need for Google Patent API)
|
||||
- [X] Extract and minimize patent content
|
||||
- [X] LLM integration for analysis
|
||||
- [X] Company performance estimation
|
||||
- [X] Multi-company batch processing
|
||||
- [X] FastAPI web service wrapper
|
||||
- [X] Docker containerization
|
||||
- [X] Results persistence (database)
|
||||
- [X] Visualization dashboard
|
||||
|
||||
## Development
|
||||
|
||||
### Code Style
|
||||
|
||||
- Type hints throughout
|
||||
- Comprehensive docstrings
|
||||
- Small, testable functions
|
||||
- Conventional commits
|
||||
|
||||
### Testing Philosophy
|
||||
|
||||
- Unit tests for core logic
|
||||
- Integration tests for orchestration
|
||||
- Mock external APIs
|
||||
- Aim for high coverage
|
||||
|
||||
### Making Changes
|
||||
|
||||
1. Write tests first
|
||||
2. Implement feature
|
||||
3. Verify all tests pass
|
||||
4. Commit with conventional format: `type: description`
|
||||
|
||||
Types: `feat`, `fix`, `docs`, `test`, `refactor`, `chore`
|
||||
|
||||
## Documentation
|
||||
|
||||
Additional documentation is available in the `docs/` directory:
|
||||
|
||||
- **[Deployment Guide](docs/DEPLOYMENT.md)** - Complete deployment instructions for Docker, database setup, and production configuration
|
||||
- **[Database Mode](docs/DATABASE_MODE.md)** - Database storage for prompts, responses, and analytics
|
||||
- **[Container Registry](docs/CONTAINER_REGISTRY.md)** - CI/CD and container registry setup with Gitea Actions
|
||||
|
||||
## License
|
||||
|
||||
For open source projects, say how it is licensed.
|
||||
|
||||
## Project status
|
||||
Heavy development for the limited time available to me
|
||||
## Project Status
|
||||
|
||||
Core functionality complete. Ready for production use with API keys configured.
|
||||
|
||||
All major features implemented: REST API, React dashboard with authentication, Docker containerization, database storage with caching, and multi-company batch processing.
|
||||
|
||||
@@ -0,0 +1,327 @@
|
||||
"""High-level patent analysis orchestration.
|
||||
|
||||
This module ties together patent retrieval, parsing, and LLM analysis
|
||||
to provide company performance estimation based on patent portfolios.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from typing import Callable
|
||||
|
||||
from SPARC import config
|
||||
from SPARC.database import DatabaseClient
|
||||
from SPARC.serp_api import SERP
|
||||
from SPARC.llm import LLMAnalyzer
|
||||
from SPARC.types import Patent, Patents, CompanyAnalysisResult, BatchAnalysisResult
|
||||
|
||||
|
||||
class CompanyAnalyzer:
|
||||
"""Orchestrates end-to-end company performance analysis via patents."""
|
||||
|
||||
def __init__(self, openrouter_api_key: str | None = None, db_client: DatabaseClient | None = None):
|
||||
"""Initialize the company analyzer.
|
||||
|
||||
Args:
|
||||
openrouter_api_key: Optional OpenRouter API key. If None, loads from config.
|
||||
db_client: Optional DatabaseClient for patent caching. Created automatically if None.
|
||||
"""
|
||||
self.llm_analyzer = LLMAnalyzer(api_key=openrouter_api_key)
|
||||
self.db = db_client or DatabaseClient(config.database_url)
|
||||
self.db.connect()
|
||||
self.db.initialize_schema()
|
||||
|
||||
def analyze_company(self, company_name: str, patents: "Patents | None" = None) -> str:
|
||||
"""Analyze a company's performance based on their patent portfolio.
|
||||
|
||||
This is the main entry point that orchestrates the full pipeline:
|
||||
1. Retrieve patents from SERP API
|
||||
2. Download and parse each patent PDF
|
||||
3. Minimize patent content (remove bloat)
|
||||
4. Analyze portfolio with LLM
|
||||
5. Return performance estimation
|
||||
|
||||
Args:
|
||||
company_name: Name of the company to analyze
|
||||
patents: Optional pre-fetched Patents result to avoid duplicate API calls
|
||||
|
||||
Returns:
|
||||
Comprehensive analysis of company's innovation and performance outlook
|
||||
"""
|
||||
if patents is None:
|
||||
# Check SERP query cache first
|
||||
query_hash = hashlib.sha256(company_name.lower().encode()).hexdigest()
|
||||
cached_ids = self.db.get_cached_serp_query(query_hash)
|
||||
if cached_ids is not None:
|
||||
print(f"Using cached SERP results for {company_name} ({len(cached_ids)} patents)")
|
||||
patents = Patents(patents=[
|
||||
Patent(patent_id=pid, pdf_link="")
|
||||
for pid in cached_ids
|
||||
])
|
||||
else:
|
||||
print(f"Retrieving patents for {company_name}...")
|
||||
patents = SERP.query(company_name)
|
||||
# Cache the SERP results
|
||||
if patents.patents:
|
||||
self.db.store_serp_query(
|
||||
company_name=company_name,
|
||||
query_hash=query_hash,
|
||||
patent_ids=[p.patent_id for p in patents.patents],
|
||||
)
|
||||
|
||||
if not patents.patents:
|
||||
return f"No patents found for {company_name}"
|
||||
|
||||
print(f"Found {len(patents.patents)} patents. Processing...")
|
||||
|
||||
# Download, parse, and minimize patents in parallel
|
||||
processed_patents = []
|
||||
with ThreadPoolExecutor(max_workers=config.patent_thread_workers) as executor:
|
||||
future_to_patent = {
|
||||
executor.submit(self._process_single_patent, patent, company_name, self.db): patent
|
||||
for patent in patents.patents
|
||||
}
|
||||
for future in as_completed(future_to_patent):
|
||||
patent = future_to_patent[future]
|
||||
try:
|
||||
result = future.result()
|
||||
if result:
|
||||
processed_patents.append(result)
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to process {patent.patent_id}: {e}")
|
||||
|
||||
if not processed_patents:
|
||||
return f"Failed to process any patents for {company_name}"
|
||||
|
||||
print(f"Analyzing portfolio with LLM...")
|
||||
|
||||
# Analyze the full portfolio with LLM
|
||||
analysis = self.llm_analyzer.analyze_patent_portfolio(
|
||||
patents_data=processed_patents, company_name=company_name
|
||||
)
|
||||
|
||||
return analysis
|
||||
|
||||
def analyze_single_patent(self, patent_id: str, company_name: str) -> str:
|
||||
"""Analyze a single patent by ID.
|
||||
|
||||
Useful for focused analysis of specific innovations.
|
||||
|
||||
Args:
|
||||
patent_id: Publication ID of the patent
|
||||
company_name: Name of the company (for context)
|
||||
|
||||
Returns:
|
||||
Analysis of the specific patent's innovation quality
|
||||
"""
|
||||
# Note: This simplified version assumes the patent PDF is already downloaded
|
||||
# A more complete implementation would support direct patent ID lookup
|
||||
print(f"Analyzing patent {patent_id} for {company_name}...")
|
||||
|
||||
patent_path = f"patents/{patent_id}.pdf"
|
||||
|
||||
try:
|
||||
sections = SERP.parse_patent_pdf(patent_path)
|
||||
minimized_content = SERP.minimize_patent_for_llm(sections)
|
||||
|
||||
analysis = self.llm_analyzer.analyze_patent_content(
|
||||
patent_content=minimized_content, company_name=company_name
|
||||
)
|
||||
|
||||
return analysis
|
||||
|
||||
except Exception as e:
|
||||
return f"Failed to analyze patent {patent_id}: {e}"
|
||||
|
||||
@staticmethod
|
||||
def _process_single_patent(
|
||||
patent: Patent,
|
||||
company_name: str = "",
|
||||
db: DatabaseClient | None = None,
|
||||
) -> dict | None:
|
||||
"""Download, parse, and minimize a single patent. Thread-safe.
|
||||
|
||||
Checks DB cache before downloading. Stores results after processing.
|
||||
|
||||
Returns:
|
||||
Dict with patent_id and minimized content, or None on failure.
|
||||
"""
|
||||
try:
|
||||
# Check DB cache first
|
||||
if db:
|
||||
cached = db.get_cached_patent(patent.patent_id)
|
||||
if cached and cached.get("minimized_content"):
|
||||
return {"patent_id": patent.patent_id, "content": cached["minimized_content"]}
|
||||
|
||||
# Full processing: download, parse, minimize
|
||||
patent = SERP.save_patents(patent)
|
||||
sections = SERP.parse_patent_pdf(patent.pdf_path)
|
||||
minimized_content = SERP.minimize_patent_for_llm(sections)
|
||||
|
||||
# Store in DB cache
|
||||
if db:
|
||||
db.store_patent(
|
||||
patent_id=patent.patent_id,
|
||||
company_name=company_name,
|
||||
pdf_link=patent.pdf_link,
|
||||
raw_sections=sections,
|
||||
minimized_content=minimized_content,
|
||||
)
|
||||
|
||||
return {"patent_id": patent.patent_id, "content": minimized_content}
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to process {patent.patent_id}: {e}")
|
||||
return None
|
||||
|
||||
def _analyze_company_safe(self, company_name: str) -> CompanyAnalysisResult:
|
||||
"""Internal wrapper that catches exceptions and returns structured result.
|
||||
|
||||
Args:
|
||||
company_name: Name of the company to analyze
|
||||
|
||||
Returns:
|
||||
CompanyAnalysisResult with success/failure status
|
||||
"""
|
||||
try:
|
||||
# Delegate to analyze_company which handles SERP/patent caching
|
||||
analysis = self.analyze_company(company_name)
|
||||
|
||||
# Determine patent count from cached SERP query
|
||||
query_hash = hashlib.sha256(company_name.lower().encode()).hexdigest()
|
||||
cached_ids = self.db.get_cached_serp_query(query_hash)
|
||||
patent_count = len(cached_ids) if cached_ids else 0
|
||||
|
||||
# Check if analysis indicates failure
|
||||
if analysis.startswith("No patents found") or analysis.startswith(
|
||||
"Failed to process"
|
||||
):
|
||||
return CompanyAnalysisResult(
|
||||
company_name=company_name,
|
||||
analysis=analysis,
|
||||
patent_count=patent_count,
|
||||
success=False,
|
||||
error=analysis,
|
||||
)
|
||||
|
||||
return CompanyAnalysisResult(
|
||||
company_name=company_name,
|
||||
analysis=analysis,
|
||||
patent_count=patent_count,
|
||||
success=True,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
return CompanyAnalysisResult(
|
||||
company_name=company_name,
|
||||
analysis="",
|
||||
patent_count=0,
|
||||
success=False,
|
||||
error=str(e),
|
||||
)
|
||||
|
||||
def analyze_companies(
|
||||
self,
|
||||
companies: list[str],
|
||||
max_workers: int = 3,
|
||||
progress_callback: Callable[[str, int, int], None] | None = None,
|
||||
) -> BatchAnalysisResult:
|
||||
"""Analyze multiple companies' patent portfolios in batch.
|
||||
|
||||
Processes companies concurrently for improved performance while
|
||||
respecting API rate limits.
|
||||
|
||||
Args:
|
||||
companies: List of company names to analyze
|
||||
max_workers: Maximum concurrent analyses (default 3 to avoid rate limits)
|
||||
progress_callback: Optional callback(company_name, completed, total)
|
||||
|
||||
Returns:
|
||||
BatchAnalysisResult containing all individual results and summary stats
|
||||
"""
|
||||
results: list[CompanyAnalysisResult] = []
|
||||
total = len(companies)
|
||||
|
||||
print(f"Starting batch analysis of {total} companies...")
|
||||
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
future_to_company = {
|
||||
executor.submit(self._analyze_company_safe, company): company
|
||||
for company in companies
|
||||
}
|
||||
|
||||
completed = 0
|
||||
for future in as_completed(future_to_company):
|
||||
company = future_to_company[future]
|
||||
completed += 1
|
||||
|
||||
try:
|
||||
result = future.result()
|
||||
results.append(result)
|
||||
|
||||
status = "✓" if result.success else "✗"
|
||||
print(f"[{completed}/{total}] {status} {company}")
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(company, completed, total)
|
||||
|
||||
except Exception as e:
|
||||
results.append(
|
||||
CompanyAnalysisResult(
|
||||
company_name=company,
|
||||
analysis="",
|
||||
patent_count=0,
|
||||
success=False,
|
||||
error=str(e),
|
||||
)
|
||||
)
|
||||
print(f"[{completed}/{total}] ✗ {company}: {e}")
|
||||
|
||||
successful = sum(1 for r in results if r.success)
|
||||
failed = total - successful
|
||||
|
||||
print(f"\nBatch complete: {successful} succeeded, {failed} failed")
|
||||
|
||||
return BatchAnalysisResult(
|
||||
results=results,
|
||||
total_companies=total,
|
||||
successful=successful,
|
||||
failed=failed,
|
||||
)
|
||||
|
||||
def analyze_companies_sequential(
|
||||
self, companies: list[str]
|
||||
) -> BatchAnalysisResult:
|
||||
"""Analyze multiple companies sequentially (safer for rate limits).
|
||||
|
||||
Use this when you want more control over API rate limiting or
|
||||
when debugging issues.
|
||||
|
||||
Args:
|
||||
companies: List of company names to analyze
|
||||
|
||||
Returns:
|
||||
BatchAnalysisResult containing all individual results
|
||||
"""
|
||||
results: list[CompanyAnalysisResult] = []
|
||||
total = len(companies)
|
||||
|
||||
print(f"Starting sequential analysis of {total} companies...")
|
||||
|
||||
for idx, company in enumerate(companies, 1):
|
||||
print(f"\n[{idx}/{total}] Analyzing {company}...")
|
||||
result = self._analyze_company_safe(company)
|
||||
results.append(result)
|
||||
|
||||
status = "✓" if result.success else "✗"
|
||||
print(f"[{idx}/{total}] {status} {company}")
|
||||
|
||||
successful = sum(1 for r in results if r.success)
|
||||
failed = total - successful
|
||||
|
||||
print(f"\nBatch complete: {successful} succeeded, {failed} failed")
|
||||
|
||||
return BatchAnalysisResult(
|
||||
results=results,
|
||||
total_companies=total,
|
||||
successful=successful,
|
||||
failed=failed,
|
||||
)
|
||||
+536
@@ -0,0 +1,536 @@
|
||||
"""FastAPI web service wrapper for SPARC patent analysis.
|
||||
|
||||
Provides REST API endpoints for analyzing company patent portfolios.
|
||||
"""
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
from datetime import datetime
|
||||
from typing import Annotated, List
|
||||
|
||||
from fastapi import BackgroundTasks, Depends, FastAPI, HTTPException, Query
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from pydantic import BaseModel, EmailStr, Field
|
||||
|
||||
from SPARC import config
|
||||
from SPARC.analyzer import CompanyAnalyzer
|
||||
from SPARC.auth import (
|
||||
TokenResponse,
|
||||
UserResponse,
|
||||
create_tokens,
|
||||
decode_token,
|
||||
get_current_admin,
|
||||
get_current_user,
|
||||
get_db_client,
|
||||
)
|
||||
from SPARC.types import BatchAnalysisResult, CompanyAnalysisResult
|
||||
|
||||
|
||||
# Pydantic models for API
|
||||
class CompanyAnalysisResponse(BaseModel):
|
||||
"""Response model for single company analysis."""
|
||||
|
||||
company_name: str
|
||||
analysis: str
|
||||
patent_count: int
|
||||
success: bool
|
||||
error: str | None = None
|
||||
timestamp: datetime
|
||||
|
||||
|
||||
class BatchAnalysisResponse(BaseModel):
|
||||
"""Response model for batch company analysis."""
|
||||
|
||||
results: list[CompanyAnalysisResponse]
|
||||
total_companies: int
|
||||
successful: int
|
||||
failed: int
|
||||
timestamp: datetime
|
||||
|
||||
|
||||
class BatchAnalysisRequest(BaseModel):
|
||||
"""Request model for batch company analysis."""
|
||||
|
||||
companies: list[str] = Field(
|
||||
..., min_length=1, max_length=20, description="List of company names to analyze"
|
||||
)
|
||||
max_workers: int = Field(
|
||||
default=3, ge=1, le=5, description="Max concurrent analyses"
|
||||
)
|
||||
|
||||
|
||||
class JobStatus(BaseModel):
|
||||
"""Status of a background analysis job."""
|
||||
|
||||
job_id: str
|
||||
status: str # "pending", "running", "completed", "failed"
|
||||
progress: int # 0-100
|
||||
total_companies: int
|
||||
completed_companies: int
|
||||
result: BatchAnalysisResponse | None = None
|
||||
error: str | None = None
|
||||
|
||||
|
||||
class HealthResponse(BaseModel):
|
||||
"""Health check response."""
|
||||
|
||||
status: str
|
||||
version: str
|
||||
timestamp: datetime
|
||||
|
||||
|
||||
# Auth request/response models
|
||||
class RegisterRequest(BaseModel):
|
||||
"""User registration request."""
|
||||
|
||||
email: EmailStr
|
||||
password: str = Field(..., min_length=8, description="Password (min 8 characters)")
|
||||
|
||||
|
||||
class LoginRequest(BaseModel):
|
||||
"""User login request."""
|
||||
|
||||
email: EmailStr
|
||||
password: str
|
||||
|
||||
|
||||
class RefreshRequest(BaseModel):
|
||||
"""Token refresh request."""
|
||||
|
||||
refresh_token: str
|
||||
|
||||
|
||||
class UpdateRoleRequest(BaseModel):
|
||||
"""Update user role request."""
|
||||
|
||||
role: str = Field(..., pattern="^(admin|user)$")
|
||||
|
||||
|
||||
class AnalyticsResponse(BaseModel):
|
||||
"""Analytics response model."""
|
||||
|
||||
total_messages: int
|
||||
by_company: List[dict]
|
||||
by_type: List[dict]
|
||||
period_days: int
|
||||
|
||||
|
||||
# In-memory job storage (for demo; production would use Redis/DB)
|
||||
_jobs: dict[str, JobStatus] = {}
|
||||
_job_counter = 0
|
||||
|
||||
|
||||
def _convert_result(result: CompanyAnalysisResult) -> CompanyAnalysisResponse:
|
||||
"""Convert internal result to API response model."""
|
||||
return CompanyAnalysisResponse(
|
||||
company_name=result.company_name,
|
||||
analysis=result.analysis,
|
||||
patent_count=result.patent_count,
|
||||
success=result.success,
|
||||
error=result.error,
|
||||
timestamp=result.timestamp,
|
||||
)
|
||||
|
||||
|
||||
def _convert_batch_result(result: BatchAnalysisResult) -> BatchAnalysisResponse:
|
||||
"""Convert internal batch result to API response model."""
|
||||
return BatchAnalysisResponse(
|
||||
results=[_convert_result(r) for r in result.results],
|
||||
total_companies=result.total_companies,
|
||||
successful=result.successful,
|
||||
failed=result.failed,
|
||||
timestamp=result.timestamp,
|
||||
)
|
||||
|
||||
|
||||
# Global analyzer instance
|
||||
_analyzer: CompanyAnalyzer | None = None
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Initialize resources on startup."""
|
||||
global _analyzer
|
||||
_analyzer = CompanyAnalyzer()
|
||||
yield
|
||||
# Cleanup if needed
|
||||
_analyzer = None
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="SPARC API",
|
||||
description="Semiconductor Patent & Analytics Report Core - Patent portfolio analysis using AI",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan,
|
||||
root_path=config.root_path,
|
||||
)
|
||||
|
||||
# Add CORS middleware for React frontend
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["http://localhost:3000", "http://localhost:5173"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
|
||||
# ============== Auth Endpoints ==============
|
||||
|
||||
|
||||
@app.post("/auth/register", response_model=UserResponse, tags=["Auth"])
|
||||
async def register(request: RegisterRequest):
|
||||
"""Register a new user.
|
||||
|
||||
The first registered user automatically becomes an admin.
|
||||
"""
|
||||
db = get_db_client()
|
||||
|
||||
# First user becomes admin
|
||||
user_count = db.get_user_count()
|
||||
role = "admin" if user_count == 0 else "user"
|
||||
|
||||
user = db.create_user(
|
||||
email=request.email,
|
||||
password=request.password,
|
||||
role=role,
|
||||
)
|
||||
|
||||
if not user:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Email already registered",
|
||||
)
|
||||
|
||||
return UserResponse(
|
||||
id=user["id"],
|
||||
email=user["email"],
|
||||
role=user["role"],
|
||||
created_at=user["created_at"],
|
||||
)
|
||||
|
||||
|
||||
@app.post("/auth/login", response_model=TokenResponse, tags=["Auth"])
|
||||
async def login(request: LoginRequest):
|
||||
"""Authenticate user and return JWT tokens."""
|
||||
db = get_db_client()
|
||||
|
||||
user = db.authenticate_user(request.email, request.password)
|
||||
|
||||
if not user:
|
||||
raise HTTPException(
|
||||
status_code=401,
|
||||
detail="Invalid email or password",
|
||||
)
|
||||
|
||||
return create_tokens(user["id"], user["email"], user["role"])
|
||||
|
||||
|
||||
@app.post("/auth/refresh", response_model=TokenResponse, tags=["Auth"])
|
||||
async def refresh_token(request: RefreshRequest):
|
||||
"""Refresh access token using refresh token."""
|
||||
payload = decode_token(request.refresh_token)
|
||||
|
||||
if not payload or payload.type != "refresh":
|
||||
raise HTTPException(
|
||||
status_code=401,
|
||||
detail="Invalid refresh token",
|
||||
)
|
||||
|
||||
db = get_db_client()
|
||||
user = db.get_user_by_id(payload.user_id)
|
||||
|
||||
if not user:
|
||||
raise HTTPException(
|
||||
status_code=401,
|
||||
detail="User not found",
|
||||
)
|
||||
|
||||
return create_tokens(user["id"], user["email"], user["role"])
|
||||
|
||||
|
||||
@app.get("/auth/me", response_model=UserResponse, tags=["Auth"])
|
||||
async def get_me(current_user: UserResponse = Depends(get_current_user)):
|
||||
"""Get current authenticated user."""
|
||||
return current_user
|
||||
|
||||
|
||||
# ============== Admin Endpoints ==============
|
||||
|
||||
|
||||
@app.get("/admin/users", response_model=List[UserResponse], tags=["Admin"])
|
||||
async def list_users(
|
||||
limit: int = Query(default=100, ge=1, le=1000),
|
||||
offset: int = Query(default=0, ge=0),
|
||||
_: UserResponse = Depends(get_current_admin),
|
||||
):
|
||||
"""List all users (admin only)."""
|
||||
db = get_db_client()
|
||||
users = db.get_all_users(limit=limit, offset=offset)
|
||||
|
||||
return [
|
||||
UserResponse(
|
||||
id=u["id"],
|
||||
email=u["email"],
|
||||
role=u["role"],
|
||||
created_at=u["created_at"],
|
||||
)
|
||||
for u in users
|
||||
]
|
||||
|
||||
|
||||
@app.patch("/admin/users/{user_id}/role", response_model=UserResponse, tags=["Admin"])
|
||||
async def update_user_role(
|
||||
user_id: int,
|
||||
request: UpdateRoleRequest,
|
||||
current_admin: UserResponse = Depends(get_current_admin),
|
||||
):
|
||||
"""Update a user's role (admin only)."""
|
||||
if user_id == current_admin.id:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Cannot change your own role",
|
||||
)
|
||||
|
||||
db = get_db_client()
|
||||
user = db.update_user_role(user_id, request.role)
|
||||
|
||||
if not user:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="User not found",
|
||||
)
|
||||
|
||||
return UserResponse(
|
||||
id=user["id"],
|
||||
email=user["email"],
|
||||
role=user["role"],
|
||||
created_at=user["created_at"],
|
||||
)
|
||||
|
||||
|
||||
@app.delete("/admin/users/{user_id}", tags=["Admin"])
|
||||
async def delete_user(
|
||||
user_id: int,
|
||||
current_admin: UserResponse = Depends(get_current_admin),
|
||||
):
|
||||
"""Delete a user (admin only)."""
|
||||
if user_id == current_admin.id:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Cannot delete yourself",
|
||||
)
|
||||
|
||||
db = get_db_client()
|
||||
deleted = db.delete_user(user_id)
|
||||
|
||||
if not deleted:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="User not found",
|
||||
)
|
||||
|
||||
return {"message": "User deleted"}
|
||||
|
||||
|
||||
# ============== Analytics Endpoint ==============
|
||||
|
||||
|
||||
@app.get("/analytics", response_model=AnalyticsResponse, tags=["Analytics"])
|
||||
async def get_analytics(
|
||||
days: int = Query(default=30, ge=1, le=365),
|
||||
_: UserResponse = Depends(get_current_user),
|
||||
):
|
||||
"""Get analytics data (authenticated users only)."""
|
||||
db = get_db_client()
|
||||
analytics = db.get_analytics(days=days)
|
||||
|
||||
return AnalyticsResponse(
|
||||
total_messages=analytics["total_messages"],
|
||||
by_company=analytics["by_company"],
|
||||
by_type=analytics["by_type"],
|
||||
period_days=analytics["period_days"],
|
||||
)
|
||||
|
||||
|
||||
# ============== System Endpoints ==============
|
||||
|
||||
|
||||
@app.get("/health", response_model=HealthResponse, tags=["System"])
|
||||
async def health_check():
|
||||
"""Check API health status."""
|
||||
return HealthResponse(
|
||||
status="healthy",
|
||||
version="1.0.0",
|
||||
timestamp=datetime.now(),
|
||||
)
|
||||
|
||||
|
||||
@app.get(
|
||||
"/analyze/{company_name}",
|
||||
response_model=CompanyAnalysisResponse,
|
||||
tags=["Analysis"],
|
||||
)
|
||||
async def analyze_company(
|
||||
company_name: str,
|
||||
_: UserResponse = Depends(get_current_user),
|
||||
):
|
||||
"""Analyze a single company's patent portfolio.
|
||||
|
||||
This endpoint retrieves recent patents for the specified company,
|
||||
parses them, and uses AI to generate a comprehensive analysis.
|
||||
|
||||
Args:
|
||||
company_name: Name of the company to analyze (e.g., "nvidia", "intel")
|
||||
|
||||
Returns:
|
||||
Analysis results including patent count, AI insights, and success status
|
||||
"""
|
||||
if not _analyzer:
|
||||
raise HTTPException(status_code=503, detail="Analyzer not initialized")
|
||||
|
||||
result = _analyzer._analyze_company_safe(company_name)
|
||||
return _convert_result(result)
|
||||
|
||||
|
||||
@app.post(
|
||||
"/analyze/batch",
|
||||
response_model=BatchAnalysisResponse,
|
||||
tags=["Analysis"],
|
||||
)
|
||||
async def analyze_companies_batch(
|
||||
request: BatchAnalysisRequest,
|
||||
_: UserResponse = Depends(get_current_user),
|
||||
):
|
||||
"""Analyze multiple companies' patent portfolios.
|
||||
|
||||
Processes companies concurrently for improved performance.
|
||||
Limited to 20 companies per request.
|
||||
|
||||
Args:
|
||||
request: List of company names and optional worker count
|
||||
|
||||
Returns:
|
||||
Batch results with individual company analyses and summary statistics
|
||||
"""
|
||||
if not _analyzer:
|
||||
raise HTTPException(status_code=503, detail="Analyzer not initialized")
|
||||
|
||||
result = _analyzer.analyze_companies(
|
||||
companies=request.companies,
|
||||
max_workers=request.max_workers,
|
||||
)
|
||||
return _convert_batch_result(result)
|
||||
|
||||
|
||||
def _run_batch_job(job_id: str, companies: list[str], max_workers: int):
|
||||
"""Background task for batch analysis."""
|
||||
global _jobs, _analyzer
|
||||
|
||||
if not _analyzer:
|
||||
_jobs[job_id].status = "failed"
|
||||
_jobs[job_id].error = "Analyzer not initialized"
|
||||
return
|
||||
|
||||
_jobs[job_id].status = "running"
|
||||
|
||||
def progress_callback(company: str, completed: int, total: int):
|
||||
_jobs[job_id].completed_companies = completed
|
||||
_jobs[job_id].progress = int((completed / total) * 100)
|
||||
|
||||
try:
|
||||
result = _analyzer.analyze_companies(
|
||||
companies=companies,
|
||||
max_workers=max_workers,
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
_jobs[job_id].status = "completed"
|
||||
_jobs[job_id].progress = 100
|
||||
_jobs[job_id].result = _convert_batch_result(result)
|
||||
except Exception as e:
|
||||
_jobs[job_id].status = "failed"
|
||||
_jobs[job_id].error = str(e)
|
||||
|
||||
|
||||
@app.post("/analyze/batch/async", response_model=JobStatus, tags=["Analysis"])
|
||||
async def analyze_companies_async(
|
||||
request: BatchAnalysisRequest,
|
||||
background_tasks: BackgroundTasks,
|
||||
_: UserResponse = Depends(get_current_user),
|
||||
):
|
||||
"""Start an asynchronous batch analysis job.
|
||||
|
||||
Returns immediately with a job ID that can be used to poll for status.
|
||||
Useful for large batch analyses that may take a long time.
|
||||
|
||||
Args:
|
||||
request: List of company names and optional worker count
|
||||
|
||||
Returns:
|
||||
Job status with job_id for polling
|
||||
"""
|
||||
global _job_counter
|
||||
|
||||
_job_counter += 1
|
||||
job_id = f"job_{_job_counter}_{datetime.now().strftime('%Y%m%d%H%M%S')}"
|
||||
|
||||
_jobs[job_id] = JobStatus(
|
||||
job_id=job_id,
|
||||
status="pending",
|
||||
progress=0,
|
||||
total_companies=len(request.companies),
|
||||
completed_companies=0,
|
||||
)
|
||||
|
||||
background_tasks.add_task(
|
||||
_run_batch_job, job_id, request.companies, request.max_workers
|
||||
)
|
||||
|
||||
return _jobs[job_id]
|
||||
|
||||
|
||||
@app.get("/jobs/{job_id}", response_model=JobStatus, tags=["Jobs"])
|
||||
async def get_job_status(
|
||||
job_id: str,
|
||||
_: UserResponse = Depends(get_current_user),
|
||||
):
|
||||
"""Get the status of a background analysis job.
|
||||
|
||||
Args:
|
||||
job_id: The job ID returned from the async batch endpoint
|
||||
|
||||
Returns:
|
||||
Current job status including progress and results when complete
|
||||
"""
|
||||
if job_id not in _jobs:
|
||||
raise HTTPException(status_code=404, detail=f"Job {job_id} not found")
|
||||
|
||||
return _jobs[job_id]
|
||||
|
||||
|
||||
@app.get("/jobs", response_model=list[JobStatus], tags=["Jobs"])
|
||||
async def list_jobs(
|
||||
status: Annotated[
|
||||
str | None,
|
||||
Query(description="Filter by status: pending, running, completed, failed"),
|
||||
] = None,
|
||||
limit: Annotated[int, Query(ge=1, le=100)] = 10,
|
||||
_: UserResponse = Depends(get_current_user),
|
||||
):
|
||||
"""List all analysis jobs.
|
||||
|
||||
Args:
|
||||
status: Optional filter by job status
|
||||
limit: Maximum number of jobs to return (default 10, max 100)
|
||||
|
||||
Returns:
|
||||
List of job statuses
|
||||
"""
|
||||
jobs = list(_jobs.values())
|
||||
|
||||
if status:
|
||||
jobs = [j for j in jobs if j.status == status]
|
||||
|
||||
# Return most recent first
|
||||
jobs.sort(key=lambda j: j.job_id, reverse=True)
|
||||
|
||||
return jobs[:limit]
|
||||
+210
@@ -0,0 +1,210 @@
|
||||
"""JWT authentication utilities for SPARC API."""
|
||||
|
||||
import os
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Optional
|
||||
|
||||
import jwt
|
||||
from fastapi import Depends, HTTPException, status
|
||||
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
||||
from pydantic import BaseModel
|
||||
|
||||
from SPARC import config
|
||||
from SPARC.database import DatabaseClient
|
||||
|
||||
# JWT Configuration
|
||||
JWT_SECRET = os.getenv("JWT_SECRET", "sparc-secret-key-change-in-production")
|
||||
JWT_ALGORITHM = "HS256"
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES = 30
|
||||
REFRESH_TOKEN_EXPIRE_DAYS = 7
|
||||
|
||||
security = HTTPBearer()
|
||||
|
||||
|
||||
class TokenPayload(BaseModel):
|
||||
"""JWT token payload."""
|
||||
|
||||
sub: str # user_id as string (JWT RFC 7519 requires sub to be a string)
|
||||
email: str
|
||||
role: str
|
||||
exp: datetime
|
||||
type: str # "access" or "refresh"
|
||||
|
||||
@property
|
||||
def user_id(self) -> int:
|
||||
"""Get user_id as integer."""
|
||||
return int(self.sub)
|
||||
|
||||
|
||||
class TokenResponse(BaseModel):
|
||||
"""Token response model."""
|
||||
|
||||
access_token: str
|
||||
refresh_token: str
|
||||
token_type: str = "bearer"
|
||||
|
||||
|
||||
class UserResponse(BaseModel):
|
||||
"""User response model."""
|
||||
|
||||
id: int
|
||||
email: str
|
||||
role: str
|
||||
created_at: datetime
|
||||
|
||||
|
||||
def create_access_token(user_id: int, email: str, role: str) -> str:
|
||||
"""Create a JWT access token.
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
email: User email
|
||||
role: User role
|
||||
|
||||
Returns:
|
||||
Encoded JWT token
|
||||
"""
|
||||
expire = datetime.now(timezone.utc) + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
|
||||
payload = {
|
||||
"sub": str(user_id),
|
||||
"email": email,
|
||||
"role": role,
|
||||
"exp": expire,
|
||||
"type": "access",
|
||||
}
|
||||
return jwt.encode(payload, JWT_SECRET, algorithm=JWT_ALGORITHM)
|
||||
|
||||
|
||||
def create_refresh_token(user_id: int, email: str, role: str) -> str:
|
||||
"""Create a JWT refresh token.
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
email: User email
|
||||
role: User role
|
||||
|
||||
Returns:
|
||||
Encoded JWT token
|
||||
"""
|
||||
expire = datetime.now(timezone.utc) + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
|
||||
payload = {
|
||||
"sub": str(user_id),
|
||||
"email": email,
|
||||
"role": role,
|
||||
"exp": expire,
|
||||
"type": "refresh",
|
||||
}
|
||||
return jwt.encode(payload, JWT_SECRET, algorithm=JWT_ALGORITHM)
|
||||
|
||||
|
||||
def create_tokens(user_id: int, email: str, role: str) -> TokenResponse:
|
||||
"""Create both access and refresh tokens.
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
email: User email
|
||||
role: User role
|
||||
|
||||
Returns:
|
||||
TokenResponse with both tokens
|
||||
"""
|
||||
return TokenResponse(
|
||||
access_token=create_access_token(user_id, email, role),
|
||||
refresh_token=create_refresh_token(user_id, email, role),
|
||||
)
|
||||
|
||||
|
||||
def decode_token(token: str) -> Optional[TokenPayload]:
|
||||
"""Decode and validate a JWT token.
|
||||
|
||||
Args:
|
||||
token: JWT token string
|
||||
|
||||
Returns:
|
||||
TokenPayload if valid, None otherwise
|
||||
"""
|
||||
try:
|
||||
payload = jwt.decode(token, JWT_SECRET, algorithms=[JWT_ALGORITHM])
|
||||
return TokenPayload(**payload)
|
||||
except jwt.ExpiredSignatureError:
|
||||
return None
|
||||
except jwt.InvalidTokenError:
|
||||
return None
|
||||
|
||||
|
||||
def get_db_client() -> DatabaseClient:
|
||||
"""Get database client for auth operations."""
|
||||
client = DatabaseClient(config.database_url)
|
||||
client.connect()
|
||||
return client
|
||||
|
||||
|
||||
async def get_current_user(
|
||||
credentials: HTTPAuthorizationCredentials = Depends(security),
|
||||
) -> UserResponse:
|
||||
"""Get the current authenticated user from JWT token.
|
||||
|
||||
Args:
|
||||
credentials: Bearer token from request
|
||||
|
||||
Returns:
|
||||
UserResponse with user details
|
||||
|
||||
Raises:
|
||||
HTTPException: If token is invalid or expired
|
||||
"""
|
||||
token = credentials.credentials
|
||||
payload = decode_token(token)
|
||||
|
||||
if not payload:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid or expired token",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
|
||||
if payload.type != "access":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid token type",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
|
||||
db = get_db_client()
|
||||
user = db.get_user_by_id(payload.user_id)
|
||||
|
||||
if not user:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="User not found",
|
||||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
|
||||
return UserResponse(
|
||||
id=user["id"],
|
||||
email=user["email"],
|
||||
role=user["role"],
|
||||
created_at=user["created_at"],
|
||||
)
|
||||
|
||||
|
||||
async def get_current_admin(
|
||||
current_user: UserResponse = Depends(get_current_user),
|
||||
) -> UserResponse:
|
||||
"""Require admin role for the current user.
|
||||
|
||||
Args:
|
||||
current_user: Current authenticated user
|
||||
|
||||
Returns:
|
||||
UserResponse if admin
|
||||
|
||||
Raises:
|
||||
HTTPException: If user is not admin
|
||||
"""
|
||||
if current_user.role != "admin":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Admin access required",
|
||||
)
|
||||
return current_user
|
||||
+23
-2
@@ -10,5 +10,26 @@ load_dotenv()
|
||||
# SerpAPI key for patent search
|
||||
api_key = os.getenv("API_KEY")
|
||||
|
||||
# Anthropic API key for LLM analysis
|
||||
anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
|
||||
# OpenRouter API key for LLM analysis
|
||||
openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
|
||||
|
||||
# Database configuration - all messages are stored in the database
|
||||
# The database serves as both a persistent store and a cache layer
|
||||
database_url = os.getenv("DATABASE_URL", "postgresql://postgres:postgres@localhost:5432/sparc")
|
||||
|
||||
# Cache configuration
|
||||
# When enabled (default), the system checks the database for cached responses
|
||||
# before making API calls, saving tokens and reducing latency
|
||||
use_cache = os.getenv("USE_CACHE", "true").lower() in ("true", "1", "yes")
|
||||
|
||||
# Legacy compatibility - USE_DATABASE is deprecated, database is always used
|
||||
# This variable is kept for backwards compatibility but has no effect
|
||||
use_database = os.getenv("USE_DATABASE", "false").lower() in ("true", "1", "yes")
|
||||
|
||||
# Patent search configuration
|
||||
patent_search_days = int(os.getenv("PATENT_SEARCH_DAYS", "90"))
|
||||
patent_thread_workers = int(os.getenv("PATENT_THREAD_WORKERS", "5"))
|
||||
|
||||
# Root path for running behind a reverse proxy (e.g., "/api" when served at /api/)
|
||||
# This ensures OpenAPI docs work correctly when accessed via the proxy
|
||||
root_path = os.getenv("ROOT_PATH", "")
|
||||
|
||||
@@ -0,0 +1,672 @@
|
||||
"""Database client for storing and retrieving LLM messages and user authentication."""
|
||||
|
||||
import contextlib
|
||||
import psycopg2
|
||||
from psycopg2.pool import ThreadedConnectionPool
|
||||
from psycopg2.extras import RealDictCursor
|
||||
from typing import Dict, List, Optional
|
||||
from datetime import datetime, timedelta
|
||||
import json
|
||||
import hashlib
|
||||
import bcrypt
|
||||
|
||||
|
||||
class DatabaseClient:
|
||||
"""Handles database operations for message storage and retrieval."""
|
||||
|
||||
def __init__(self, database_url: str, minconn: int = 2, maxconn: int = 10):
|
||||
"""Initialize the database client.
|
||||
|
||||
Args:
|
||||
database_url: PostgreSQL connection string
|
||||
minconn: Minimum connections in the pool
|
||||
maxconn: Maximum connections in the pool
|
||||
"""
|
||||
self.database_url = database_url
|
||||
self._pool: ThreadedConnectionPool | None = None
|
||||
self._minconn = minconn
|
||||
self._maxconn = maxconn
|
||||
# Legacy single connection kept for backwards compatibility
|
||||
self.conn = None
|
||||
|
||||
def _ensure_pool(self):
|
||||
"""Create the connection pool if it doesn't exist yet."""
|
||||
if self._pool is None or self._pool.closed:
|
||||
self._pool = ThreadedConnectionPool(
|
||||
self._minconn, self._maxconn, self.database_url
|
||||
)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def get_conn(self):
|
||||
"""Check out a connection from the pool. Returns it on exit."""
|
||||
self._ensure_pool()
|
||||
conn = self._pool.getconn()
|
||||
try:
|
||||
yield conn
|
||||
finally:
|
||||
self._pool.putconn(conn)
|
||||
|
||||
def connect(self):
|
||||
"""Establish database connection (legacy single-connection path)."""
|
||||
if not self.conn or self.conn.closed:
|
||||
self.conn = psycopg2.connect(self.database_url)
|
||||
|
||||
def close(self):
|
||||
"""Close database connection and pool."""
|
||||
if self.conn and not self.conn.closed:
|
||||
self.conn.close()
|
||||
if self._pool and not self._pool.closed:
|
||||
self._pool.closeall()
|
||||
|
||||
def initialize_schema(self):
|
||||
"""Create database tables if they don't exist."""
|
||||
self.connect()
|
||||
|
||||
with self.conn.cursor() as cursor:
|
||||
# Create messages table
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS llm_messages (
|
||||
id SERIAL PRIMARY KEY,
|
||||
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
company_name VARCHAR(255),
|
||||
analysis_type VARCHAR(50),
|
||||
model VARCHAR(100),
|
||||
prompt TEXT NOT NULL,
|
||||
prompt_hash VARCHAR(64),
|
||||
response TEXT,
|
||||
metadata JSONB,
|
||||
token_usage JSONB,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
is_cached BOOLEAN DEFAULT FALSE
|
||||
)
|
||||
""")
|
||||
|
||||
# Create index on timestamp for analytics queries
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_messages_timestamp
|
||||
ON llm_messages(timestamp)
|
||||
""")
|
||||
|
||||
# Create index on company_name for filtering
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_messages_company
|
||||
ON llm_messages(company_name)
|
||||
""")
|
||||
|
||||
# Add prompt_hash and is_cached columns if they don't exist (for existing tables)
|
||||
# This must run BEFORE creating the index on prompt_hash
|
||||
cursor.execute("""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'llm_messages' AND column_name = 'prompt_hash'
|
||||
) THEN
|
||||
ALTER TABLE llm_messages ADD COLUMN prompt_hash VARCHAR(64);
|
||||
END IF;
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'llm_messages' AND column_name = 'is_cached'
|
||||
) THEN
|
||||
ALTER TABLE llm_messages ADD COLUMN is_cached BOOLEAN DEFAULT FALSE;
|
||||
END IF;
|
||||
END $$;
|
||||
""")
|
||||
|
||||
# Create index on prompt_hash for cache lookups
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_messages_prompt_hash
|
||||
ON llm_messages(prompt_hash)
|
||||
""")
|
||||
|
||||
# Create users table for authentication
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS users (
|
||||
id SERIAL PRIMARY KEY,
|
||||
email VARCHAR(255) UNIQUE NOT NULL,
|
||||
password_hash VARCHAR(255) NOT NULL,
|
||||
role VARCHAR(20) DEFAULT 'user' CHECK (role IN ('admin', 'user')),
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""")
|
||||
|
||||
# Create index on email for fast lookups
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_users_email
|
||||
ON users(email)
|
||||
""")
|
||||
|
||||
# Create patents cache table
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS patents (
|
||||
patent_id VARCHAR(64) PRIMARY KEY,
|
||||
company_name VARCHAR(255),
|
||||
pdf_link TEXT,
|
||||
raw_sections JSONB,
|
||||
minimized_content TEXT,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_patents_company
|
||||
ON patents(company_name)
|
||||
""")
|
||||
|
||||
# Create SERP query cache table
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS serp_queries (
|
||||
id SERIAL PRIMARY KEY,
|
||||
company_name VARCHAR(255),
|
||||
query_hash VARCHAR(64) UNIQUE,
|
||||
result_patent_ids TEXT[],
|
||||
expires_at TIMESTAMP NOT NULL,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_serp_queries_hash
|
||||
ON serp_queries(query_hash)
|
||||
""")
|
||||
|
||||
self.conn.commit()
|
||||
|
||||
@staticmethod
|
||||
def hash_prompt(prompt: str) -> str:
|
||||
"""Generate a hash of the prompt for cache lookups.
|
||||
|
||||
Args:
|
||||
prompt: The prompt text to hash
|
||||
|
||||
Returns:
|
||||
SHA-256 hash of the prompt
|
||||
"""
|
||||
return hashlib.sha256(prompt.encode()).hexdigest()
|
||||
|
||||
def get_cached_response(
|
||||
self,
|
||||
prompt: str,
|
||||
company_name: Optional[str] = None,
|
||||
analysis_type: Optional[str] = None,
|
||||
) -> Optional[Dict]:
|
||||
"""Look up a cached response for a given prompt.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to look up
|
||||
company_name: Optional company name filter
|
||||
analysis_type: Optional analysis type filter
|
||||
|
||||
Returns:
|
||||
Cached message dict if found, None otherwise
|
||||
"""
|
||||
self.connect()
|
||||
|
||||
prompt_hash = self.hash_prompt(prompt)
|
||||
|
||||
query = """
|
||||
SELECT * FROM llm_messages
|
||||
WHERE prompt_hash = %s
|
||||
AND response IS NOT NULL
|
||||
AND response NOT LIKE '[DATABASE MODE]%%'
|
||||
AND response NOT LIKE '[TEST MODE]%%'
|
||||
AND response NOT LIKE '[NO API]%%'
|
||||
"""
|
||||
params = [prompt_hash]
|
||||
|
||||
if company_name:
|
||||
query += " AND company_name = %s"
|
||||
params.append(company_name)
|
||||
|
||||
if analysis_type:
|
||||
query += " AND analysis_type = %s"
|
||||
params.append(analysis_type)
|
||||
|
||||
query += " ORDER BY timestamp DESC LIMIT 1"
|
||||
|
||||
with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute(query, params)
|
||||
result = cursor.fetchone()
|
||||
return dict(result) if result else None
|
||||
|
||||
def store_message(
|
||||
self,
|
||||
prompt: str,
|
||||
response: str,
|
||||
company_name: Optional[str] = None,
|
||||
analysis_type: Optional[str] = None,
|
||||
model: Optional[str] = None,
|
||||
metadata: Optional[Dict] = None,
|
||||
token_usage: Optional[Dict] = None,
|
||||
is_cached: bool = False,
|
||||
) -> int:
|
||||
"""Store an LLM message exchange in the database.
|
||||
|
||||
Args:
|
||||
prompt: The prompt sent to the LLM
|
||||
response: The response from the LLM
|
||||
company_name: Name of company being analyzed
|
||||
analysis_type: Type of analysis (e.g., 'single_patent', 'portfolio')
|
||||
model: Model identifier used
|
||||
metadata: Additional metadata as dict
|
||||
token_usage: Token usage information
|
||||
is_cached: Whether this response was served from cache
|
||||
|
||||
Returns:
|
||||
The ID of the inserted record
|
||||
"""
|
||||
self.connect()
|
||||
|
||||
prompt_hash = self.hash_prompt(prompt)
|
||||
|
||||
with self.conn.cursor() as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO llm_messages
|
||||
(prompt, prompt_hash, response, company_name, analysis_type, model, metadata, token_usage, is_cached)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
RETURNING id
|
||||
""",
|
||||
(
|
||||
prompt,
|
||||
prompt_hash,
|
||||
response,
|
||||
company_name,
|
||||
analysis_type,
|
||||
model,
|
||||
json.dumps(metadata) if metadata else None,
|
||||
json.dumps(token_usage) if token_usage else None,
|
||||
is_cached,
|
||||
),
|
||||
)
|
||||
|
||||
message_id = cursor.fetchone()[0]
|
||||
self.conn.commit()
|
||||
|
||||
return message_id
|
||||
|
||||
def get_messages(
|
||||
self,
|
||||
company_name: Optional[str] = None,
|
||||
analysis_type: Optional[str] = None,
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
) -> List[Dict]:
|
||||
"""Retrieve messages from the database.
|
||||
|
||||
Args:
|
||||
company_name: Filter by company name
|
||||
analysis_type: Filter by analysis type
|
||||
limit: Maximum number of records to return
|
||||
offset: Number of records to skip
|
||||
|
||||
Returns:
|
||||
List of message dictionaries
|
||||
"""
|
||||
self.connect()
|
||||
|
||||
query = "SELECT * FROM llm_messages WHERE 1=1"
|
||||
params = []
|
||||
|
||||
if company_name:
|
||||
query += " AND company_name = %s"
|
||||
params.append(company_name)
|
||||
|
||||
if analysis_type:
|
||||
query += " AND analysis_type = %s"
|
||||
params.append(analysis_type)
|
||||
|
||||
query += " ORDER BY timestamp DESC LIMIT %s OFFSET %s"
|
||||
params.extend([limit, offset])
|
||||
|
||||
with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute(query, params)
|
||||
return [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
def get_analytics(self, days: int = 30) -> Dict:
|
||||
"""Get analytics on message usage.
|
||||
|
||||
Args:
|
||||
days: Number of days to look back
|
||||
|
||||
Returns:
|
||||
Dictionary with analytics data
|
||||
"""
|
||||
self.connect()
|
||||
|
||||
with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
# Total messages
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT COUNT(*) as total_messages
|
||||
FROM llm_messages
|
||||
WHERE timestamp >= NOW() - INTERVAL '%s days'
|
||||
""",
|
||||
(days,),
|
||||
)
|
||||
total = cursor.fetchone()["total_messages"]
|
||||
|
||||
# Messages by company
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT company_name, COUNT(*) as count
|
||||
FROM llm_messages
|
||||
WHERE timestamp >= NOW() - INTERVAL '%s days'
|
||||
GROUP BY company_name
|
||||
ORDER BY count DESC
|
||||
LIMIT 10
|
||||
""",
|
||||
(days,),
|
||||
)
|
||||
by_company = cursor.fetchall()
|
||||
|
||||
# Messages by type
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT analysis_type, COUNT(*) as count
|
||||
FROM llm_messages
|
||||
WHERE timestamp >= NOW() - INTERVAL '%s days'
|
||||
GROUP BY analysis_type
|
||||
ORDER BY count DESC
|
||||
""",
|
||||
(days,),
|
||||
)
|
||||
by_type = cursor.fetchall()
|
||||
|
||||
return {
|
||||
"total_messages": total,
|
||||
"by_company": [dict(row) for row in by_company],
|
||||
"by_type": [dict(row) for row in by_type],
|
||||
"period_days": days,
|
||||
}
|
||||
|
||||
# Patent Cache Methods
|
||||
|
||||
def get_cached_patent(self, patent_id: str) -> Optional[Dict]:
|
||||
"""Look up a cached patent by ID.
|
||||
|
||||
Returns:
|
||||
Dict with raw_sections and minimized_content, or None.
|
||||
"""
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute(
|
||||
"SELECT * FROM patents WHERE patent_id = %s",
|
||||
(patent_id,),
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
return dict(row) if row else None
|
||||
|
||||
def store_patent(
|
||||
self,
|
||||
patent_id: str,
|
||||
company_name: str,
|
||||
pdf_link: str,
|
||||
raw_sections: Dict,
|
||||
minimized_content: str,
|
||||
) -> None:
|
||||
"""Store a processed patent in the cache."""
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO patents (patent_id, company_name, pdf_link, raw_sections, minimized_content)
|
||||
VALUES (%s, %s, %s, %s, %s)
|
||||
ON CONFLICT (patent_id) DO UPDATE SET
|
||||
raw_sections = EXCLUDED.raw_sections,
|
||||
minimized_content = EXCLUDED.minimized_content
|
||||
""",
|
||||
(patent_id, company_name, pdf_link, json.dumps(raw_sections), minimized_content),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
def get_cached_serp_query(self, query_hash: str) -> Optional[List[str]]:
|
||||
"""Look up cached SERP query results.
|
||||
|
||||
Returns:
|
||||
List of patent IDs if cache hit and not expired, None otherwise.
|
||||
"""
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT result_patent_ids FROM serp_queries
|
||||
WHERE query_hash = %s AND expires_at > NOW()
|
||||
""",
|
||||
(query_hash,),
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
return row["result_patent_ids"] if row else None
|
||||
|
||||
def store_serp_query(
|
||||
self,
|
||||
company_name: str,
|
||||
query_hash: str,
|
||||
patent_ids: List[str],
|
||||
ttl_hours: int = 24,
|
||||
) -> None:
|
||||
"""Store SERP query results in the cache."""
|
||||
expires_at = datetime.now() + timedelta(hours=ttl_hours)
|
||||
with self.get_conn() as conn:
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO serp_queries (company_name, query_hash, result_patent_ids, expires_at)
|
||||
VALUES (%s, %s, %s, %s)
|
||||
ON CONFLICT (query_hash) DO UPDATE SET
|
||||
result_patent_ids = EXCLUDED.result_patent_ids,
|
||||
expires_at = EXCLUDED.expires_at
|
||||
""",
|
||||
(company_name, query_hash, patent_ids, expires_at),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
# User Authentication Methods
|
||||
|
||||
@staticmethod
|
||||
def hash_password(password: str) -> str:
|
||||
"""Hash a password using bcrypt.
|
||||
|
||||
Args:
|
||||
password: Plain text password
|
||||
|
||||
Returns:
|
||||
Hashed password string
|
||||
"""
|
||||
return bcrypt.hashpw(password.encode(), bcrypt.gensalt()).decode()
|
||||
|
||||
@staticmethod
|
||||
def verify_password(password: str, password_hash: str) -> bool:
|
||||
"""Verify a password against its hash.
|
||||
|
||||
Args:
|
||||
password: Plain text password
|
||||
password_hash: Stored hash
|
||||
|
||||
Returns:
|
||||
True if password matches
|
||||
"""
|
||||
return bcrypt.checkpw(password.encode(), password_hash.encode())
|
||||
|
||||
def create_user(
|
||||
self,
|
||||
email: str,
|
||||
password: str,
|
||||
role: str = "user",
|
||||
) -> Optional[Dict]:
|
||||
"""Create a new user.
|
||||
|
||||
Args:
|
||||
email: User email
|
||||
password: Plain text password
|
||||
role: User role ('admin' or 'user')
|
||||
|
||||
Returns:
|
||||
Created user dict or None if email exists
|
||||
"""
|
||||
self.connect()
|
||||
|
||||
password_hash = self.hash_password(password)
|
||||
|
||||
try:
|
||||
with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO users (email, password_hash, role)
|
||||
VALUES (%s, %s, %s)
|
||||
RETURNING id, email, role, created_at
|
||||
""",
|
||||
(email, password_hash, role),
|
||||
)
|
||||
user = cursor.fetchone()
|
||||
self.conn.commit()
|
||||
return dict(user) if user else None
|
||||
except psycopg2.errors.UniqueViolation:
|
||||
self.conn.rollback()
|
||||
return None
|
||||
|
||||
def authenticate_user(self, email: str, password: str) -> Optional[Dict]:
|
||||
"""Authenticate a user by email and password.
|
||||
|
||||
Args:
|
||||
email: User email
|
||||
password: Plain text password
|
||||
|
||||
Returns:
|
||||
User dict if authenticated, None otherwise
|
||||
"""
|
||||
self.connect()
|
||||
|
||||
with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute(
|
||||
"SELECT * FROM users WHERE email = %s",
|
||||
(email,),
|
||||
)
|
||||
user = cursor.fetchone()
|
||||
|
||||
if user and self.verify_password(password, user["password_hash"]):
|
||||
return {
|
||||
"id": user["id"],
|
||||
"email": user["email"],
|
||||
"role": user["role"],
|
||||
"created_at": user["created_at"],
|
||||
}
|
||||
return None
|
||||
|
||||
def get_user_by_id(self, user_id: int) -> Optional[Dict]:
|
||||
"""Get a user by ID.
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
|
||||
Returns:
|
||||
User dict or None
|
||||
"""
|
||||
self.connect()
|
||||
|
||||
with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute(
|
||||
"SELECT id, email, role, created_at FROM users WHERE id = %s",
|
||||
(user_id,),
|
||||
)
|
||||
user = cursor.fetchone()
|
||||
return dict(user) if user else None
|
||||
|
||||
def get_user_by_email(self, email: str) -> Optional[Dict]:
|
||||
"""Get a user by email.
|
||||
|
||||
Args:
|
||||
email: User email
|
||||
|
||||
Returns:
|
||||
User dict or None
|
||||
"""
|
||||
self.connect()
|
||||
|
||||
with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute(
|
||||
"SELECT id, email, role, created_at FROM users WHERE email = %s",
|
||||
(email,),
|
||||
)
|
||||
user = cursor.fetchone()
|
||||
return dict(user) if user else None
|
||||
|
||||
def get_all_users(self, limit: int = 100, offset: int = 0) -> List[Dict]:
|
||||
"""Get all users (admin only).
|
||||
|
||||
Args:
|
||||
limit: Maximum number of users
|
||||
offset: Offset for pagination
|
||||
|
||||
Returns:
|
||||
List of user dicts
|
||||
"""
|
||||
self.connect()
|
||||
|
||||
with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT id, email, role, created_at
|
||||
FROM users
|
||||
ORDER BY created_at DESC
|
||||
LIMIT %s OFFSET %s
|
||||
""",
|
||||
(limit, offset),
|
||||
)
|
||||
return [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
def update_user_role(self, user_id: int, role: str) -> Optional[Dict]:
|
||||
"""Update a user's role (admin only).
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
role: New role ('admin' or 'user')
|
||||
|
||||
Returns:
|
||||
Updated user dict or None
|
||||
"""
|
||||
self.connect()
|
||||
|
||||
with self.conn.cursor(cursor_factory=RealDictCursor) as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
UPDATE users
|
||||
SET role = %s, updated_at = CURRENT_TIMESTAMP
|
||||
WHERE id = %s
|
||||
RETURNING id, email, role, created_at
|
||||
""",
|
||||
(role, user_id),
|
||||
)
|
||||
user = cursor.fetchone()
|
||||
self.conn.commit()
|
||||
return dict(user) if user else None
|
||||
|
||||
def delete_user(self, user_id: int) -> bool:
|
||||
"""Delete a user (admin only).
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
|
||||
Returns:
|
||||
True if deleted
|
||||
"""
|
||||
self.connect()
|
||||
|
||||
with self.conn.cursor() as cursor:
|
||||
cursor.execute("DELETE FROM users WHERE id = %s", (user_id,))
|
||||
deleted = cursor.rowcount > 0
|
||||
self.conn.commit()
|
||||
return deleted
|
||||
|
||||
def get_user_count(self) -> int:
|
||||
"""Get total user count.
|
||||
|
||||
Returns:
|
||||
Number of users
|
||||
"""
|
||||
self.connect()
|
||||
|
||||
with self.conn.cursor() as cursor:
|
||||
cursor.execute("SELECT COUNT(*) FROM users")
|
||||
return cursor.fetchone()[0]
|
||||
+166
-18
@@ -1,21 +1,39 @@
|
||||
"""LLM integration for patent analysis using Anthropic's Claude."""
|
||||
"""LLM integration for patent analysis using OpenRouter."""
|
||||
|
||||
from anthropic import Anthropic
|
||||
from openai import OpenAI
|
||||
from SPARC import config
|
||||
from SPARC.database import DatabaseClient
|
||||
from typing import Dict
|
||||
|
||||
|
||||
class LLMAnalyzer:
|
||||
"""Handles LLM-based analysis of patent content."""
|
||||
|
||||
def __init__(self, api_key: str | None = None):
|
||||
def __init__(self, api_key: str | None = None, test_mode: bool = False, use_cache: bool | None = None):
|
||||
"""Initialize the LLM analyzer.
|
||||
|
||||
Args:
|
||||
api_key: Anthropic API key. If None, will attempt to load from config.
|
||||
api_key: OpenRouter API key. If None, will attempt to load from config.
|
||||
test_mode: If True, print prompts instead of making API calls
|
||||
use_cache: If True, check database cache before making API calls.
|
||||
If None, uses config.use_cache (default: True)
|
||||
"""
|
||||
self.client = Anthropic(api_key=api_key or config.anthropic_api_key)
|
||||
self.model = "claude-3-5-sonnet-20241022"
|
||||
self.test_mode = test_mode
|
||||
self.use_cache = use_cache if use_cache is not None else config.use_cache
|
||||
self.model = "anthropic/claude-3.5-sonnet"
|
||||
|
||||
# Always initialize database client for storage and caching
|
||||
self.db_client = DatabaseClient(config.database_url)
|
||||
self.db_client.initialize_schema()
|
||||
|
||||
# Initialize OpenRouter client if API key is available
|
||||
if (api_key or config.openrouter_api_key) and not test_mode:
|
||||
self.client = OpenAI(
|
||||
api_key=api_key or config.openrouter_api_key,
|
||||
base_url="https://openrouter.ai/api/v1"
|
||||
)
|
||||
else:
|
||||
self.client = None
|
||||
|
||||
def analyze_patent_content(self, patent_content: str, company_name: str) -> str:
|
||||
"""Analyze patent content to estimate company innovation and performance.
|
||||
@@ -40,14 +58,76 @@ Patent Content:
|
||||
|
||||
Provide a concise analysis (2-3 paragraphs) focusing on what this patent reveals about the company's technical direction and competitive advantage."""
|
||||
|
||||
message = self.client.messages.create(
|
||||
if self.test_mode:
|
||||
print("=" * 80)
|
||||
print("TEST MODE - Prompt that would be sent to LLM:")
|
||||
print("=" * 80)
|
||||
print(prompt)
|
||||
print("=" * 80)
|
||||
return "[TEST MODE - No API call made]"
|
||||
|
||||
# Check cache first
|
||||
if self.use_cache:
|
||||
cached = self.db_client.get_cached_response(
|
||||
prompt=prompt,
|
||||
company_name=company_name,
|
||||
analysis_type="single_patent"
|
||||
)
|
||||
if cached:
|
||||
# Log the cache hit
|
||||
self.db_client.store_message(
|
||||
prompt=prompt,
|
||||
response=cached["response"],
|
||||
company_name=company_name,
|
||||
analysis_type="single_patent",
|
||||
model=self.model,
|
||||
metadata={
|
||||
"patent_content_length": len(patent_content),
|
||||
"cache_hit": True,
|
||||
"original_message_id": cached["id"]
|
||||
},
|
||||
is_cached=True
|
||||
)
|
||||
return cached["response"]
|
||||
|
||||
# Call API if no cache hit and client is available
|
||||
if self.client:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
max_tokens=1024,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
response_text = response.choices[0].message.content
|
||||
|
||||
# Store in database for future cache lookups
|
||||
self.db_client.store_message(
|
||||
prompt=prompt,
|
||||
response=response_text,
|
||||
company_name=company_name,
|
||||
analysis_type="single_patent",
|
||||
model=self.model,
|
||||
metadata={"patent_content_length": len(patent_content)},
|
||||
token_usage={
|
||||
"prompt_tokens": response.usage.prompt_tokens,
|
||||
"completion_tokens": response.usage.completion_tokens,
|
||||
"total_tokens": response.usage.total_tokens
|
||||
} if hasattr(response, 'usage') else None
|
||||
)
|
||||
|
||||
return response_text
|
||||
|
||||
# No API client available - store prompt for later processing
|
||||
placeholder = "[NO API] Prompt stored in database. Configure OPENROUTER_API_KEY to enable analysis."
|
||||
self.db_client.store_message(
|
||||
prompt=prompt,
|
||||
response=placeholder,
|
||||
company_name=company_name,
|
||||
analysis_type="single_patent",
|
||||
model=self.model,
|
||||
max_tokens=1024,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
metadata={"patent_content_length": len(patent_content), "pending": True}
|
||||
)
|
||||
|
||||
return message.content[0].text
|
||||
|
||||
return placeholder
|
||||
|
||||
def analyze_patent_portfolio(
|
||||
self, patents_data: list[Dict[str, str]], company_name: str
|
||||
) -> str:
|
||||
@@ -84,10 +164,78 @@ Patent Portfolio:
|
||||
|
||||
Provide a comprehensive analysis (4-5 paragraphs) with a final verdict on the company's innovation strength and performance outlook."""
|
||||
|
||||
message = self.client.messages.create(
|
||||
model=self.model,
|
||||
max_tokens=2048,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
if self.test_mode:
|
||||
print(prompt)
|
||||
return "[TEST MODE]"
|
||||
|
||||
return message.content[0].text
|
||||
metadata = {
|
||||
"patent_count": len(patents_data),
|
||||
"patent_ids": [p['patent_id'] for p in patents_data]
|
||||
}
|
||||
|
||||
# Check cache first
|
||||
if self.use_cache:
|
||||
cached = self.db_client.get_cached_response(
|
||||
prompt=prompt,
|
||||
company_name=company_name,
|
||||
analysis_type="portfolio"
|
||||
)
|
||||
if cached:
|
||||
# Log the cache hit
|
||||
self.db_client.store_message(
|
||||
prompt=prompt,
|
||||
response=cached["response"],
|
||||
company_name=company_name,
|
||||
analysis_type="portfolio",
|
||||
model=self.model,
|
||||
metadata={
|
||||
**metadata,
|
||||
"cache_hit": True,
|
||||
"original_message_id": cached["id"]
|
||||
},
|
||||
is_cached=True
|
||||
)
|
||||
return cached["response"]
|
||||
|
||||
# Call API if no cache hit and client is available
|
||||
if self.client:
|
||||
try:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
max_tokens=2048,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
|
||||
response_text = response.choices[0].message.content
|
||||
|
||||
# Store in database for future cache lookups
|
||||
self.db_client.store_message(
|
||||
prompt=prompt,
|
||||
response=response_text,
|
||||
company_name=company_name,
|
||||
analysis_type="portfolio",
|
||||
model=self.model,
|
||||
metadata=metadata,
|
||||
token_usage={
|
||||
"prompt_tokens": response.usage.prompt_tokens,
|
||||
"completion_tokens": response.usage.completion_tokens,
|
||||
"total_tokens": response.usage.total_tokens
|
||||
} if hasattr(response, 'usage') else None
|
||||
)
|
||||
|
||||
return response_text
|
||||
except AttributeError:
|
||||
return prompt
|
||||
|
||||
# No API client available - store prompt for later processing
|
||||
placeholder = "[NO API] Prompt stored in database. Configure OPENROUTER_API_KEY to enable analysis."
|
||||
self.db_client.store_message(
|
||||
prompt=prompt,
|
||||
response=placeholder,
|
||||
company_name=company_name,
|
||||
analysis_type="portfolio",
|
||||
model=self.model,
|
||||
metadata={**metadata, "pending": True}
|
||||
)
|
||||
return placeholder
|
||||
|
||||
|
||||
+42
-12
@@ -1,47 +1,77 @@
|
||||
import os
|
||||
import serpapi
|
||||
from SPARC import config
|
||||
import re
|
||||
import pdfplumber # pip install pdfplumber
|
||||
import requests
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict
|
||||
from SPARC.types import Patents, Patent
|
||||
|
||||
class SERP:
|
||||
def query(company: str) -> Patents:
|
||||
def query(company: str, days_back: int = None) -> Patents:
|
||||
"""Query Google Patents for a company's recent patents.
|
||||
|
||||
Args:
|
||||
company: Name of the company to search for
|
||||
days_back: Number of days to look back for patents (default from config)
|
||||
|
||||
Returns:
|
||||
Patents object containing list of patents with PDF links
|
||||
|
||||
Note:
|
||||
Patents without PDF download links are skipped. This occurs when
|
||||
Google Patents doesn't have a PDF available for a particular patent
|
||||
(e.g., recently filed patents, certain international patents, or
|
||||
patents with restricted access). The returned count may be lower
|
||||
than the requested number of results.
|
||||
"""
|
||||
if days_back is None:
|
||||
days_back = config.patent_search_days
|
||||
end_date = datetime.now()
|
||||
start_date = end_date - timedelta(days=days_back)
|
||||
date_filter = f"cdr:1,cd_min:{start_date.strftime('%-m/%-d/%Y')},cd_max:{end_date.strftime('%-m/%-d/%Y')}"
|
||||
|
||||
# Make API call
|
||||
params = {
|
||||
"engine": "google_patents",
|
||||
"q": company,
|
||||
"num": 10,
|
||||
"filter": 1,
|
||||
"tbs": "cdr:1,cd_min:10/28/2025,cd_max:11/4/2025",
|
||||
"tbs": date_filter,
|
||||
"api_key": config.api_key,
|
||||
}
|
||||
search = serpapi.search(params)
|
||||
# Convert data into a list of publicationID
|
||||
# Convert results to Patent objects, skipping any without PDF links
|
||||
patent_ids = []
|
||||
list_of_patents = search["organic_results"]
|
||||
for patent in list_of_patents:
|
||||
patent_ids.append(Patent(patent_id=patent["publication_number"], pdf_link=patent["pdf"], summary=None))
|
||||
pdf_link = patent.get("pdf")
|
||||
if pdf_link:
|
||||
patent_ids.append(Patent(patent_id=patent["publication_number"], pdf_link=pdf_link, summary=None))
|
||||
# Patents without PDF links are skipped (see docstring for details)
|
||||
|
||||
return Patents(patents=patent_ids)
|
||||
|
||||
def save_patents(patent: Patent) -> Patent:
|
||||
"""
|
||||
Save the patent PDF to the patents folder
|
||||
|
||||
Save the patent PDF to the patents folder, skipping download if already cached.
|
||||
|
||||
Args:
|
||||
patent: Patent object
|
||||
|
||||
Returns:
|
||||
Patent object with updated PDF path
|
||||
"""
|
||||
response = requests.get(patent.pdf_link)
|
||||
print(patent.pdf_link)
|
||||
with open(f"patents/{patent.patent_id}.pdf", "wb") as f:
|
||||
f.write(response.content)
|
||||
|
||||
patent.pdf_path = f"patents/{patent.patent_id}.pdf"
|
||||
pdf_path = f"patents/{patent.patent_id}.pdf"
|
||||
os.makedirs("patents", exist_ok=True)
|
||||
|
||||
if not (os.path.exists(pdf_path) and os.path.getsize(pdf_path) > 0):
|
||||
response = requests.get(patent.pdf_link)
|
||||
with open(pdf_path, "wb") as f:
|
||||
f.write(response.content)
|
||||
|
||||
patent.pdf_path = pdf_path
|
||||
return patent
|
||||
|
||||
def parse_patent_pdf(pdf_path: str) -> Dict:
|
||||
|
||||
+25
-1
@@ -1,4 +1,5 @@
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -12,3 +13,26 @@ class Patent:
|
||||
@dataclass
|
||||
class Patents:
|
||||
patents: list[Patent]
|
||||
|
||||
|
||||
@dataclass
|
||||
class CompanyAnalysisResult:
|
||||
"""Result of analyzing a single company's patent portfolio."""
|
||||
|
||||
company_name: str
|
||||
analysis: str
|
||||
patent_count: int
|
||||
success: bool
|
||||
error: str | None = None
|
||||
timestamp: datetime = field(default_factory=datetime.now)
|
||||
|
||||
|
||||
@dataclass
|
||||
class BatchAnalysisResult:
|
||||
"""Result of batch analyzing multiple companies."""
|
||||
|
||||
results: list[CompanyAnalysisResult]
|
||||
total_companies: int
|
||||
successful: int
|
||||
failed: int
|
||||
timestamp: datetime = field(default_factory=datetime.now)
|
||||
|
||||
@@ -0,0 +1,63 @@
|
||||
services:
|
||||
postgres:
|
||||
image: postgres:16-alpine
|
||||
container_name: sparc-postgres
|
||||
environment:
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_PASSWORD: postgres
|
||||
POSTGRES_DB: sparc
|
||||
ports:
|
||||
- "5432:5432"
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U postgres"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
restart: unless-stopped
|
||||
|
||||
init-db:
|
||||
build: .
|
||||
container_name: sparc-init-db
|
||||
command: python scripts/init_database.py
|
||||
environment:
|
||||
DATABASE_URL: postgresql://postgres:postgres@postgres:5432/sparc
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
restart: "no"
|
||||
|
||||
api:
|
||||
build: .
|
||||
container_name: sparc-api
|
||||
command: uvicorn SPARC.api:app --host 0.0.0.0 --port 8000
|
||||
environment:
|
||||
API_KEY: ${API_KEY}
|
||||
OPENROUTER_API_KEY: ${OPENROUTER_API_KEY}
|
||||
DATABASE_URL: postgresql://postgres:postgres@postgres:5432/sparc
|
||||
USE_CACHE: "true"
|
||||
JWT_SECRET: ${JWT_SECRET:-sparc-secret-key-change-in-production}
|
||||
ROOT_PATH: /api
|
||||
ports:
|
||||
- "8000:8000"
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
init-db:
|
||||
condition: service_completed_successfully
|
||||
volumes:
|
||||
- ./patents:/app/patents
|
||||
restart: unless-stopped
|
||||
|
||||
dashboard:
|
||||
build: ./frontend
|
||||
container_name: sparc-dashboard
|
||||
ports:
|
||||
- "8080:80"
|
||||
depends_on:
|
||||
- api
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
@@ -0,0 +1,188 @@
|
||||
# Container Registry and CI/CD Setup
|
||||
|
||||
This document explains how to build and push Docker images using Gitea Actions and the Gitea Container Registry.
|
||||
|
||||
## Overview
|
||||
|
||||
The SPARC project uses Gitea Actions (GitHub Actions-compatible) to automatically build and push Docker images to the Gitea Container Registry whenever code is pushed to the repository.
|
||||
|
||||
## Workflow Configuration
|
||||
|
||||
The workflow is defined in `.gitea/workflows/build.yaml` and automatically:
|
||||
- Builds the Docker image from the `Dockerfile`
|
||||
- Tags the image appropriately based on the git ref (branch/tag)
|
||||
- Pushes to the Gitea Container Registry at `10.0.1.10`
|
||||
|
||||
### Triggers
|
||||
|
||||
The workflow runs on:
|
||||
- **Push to main branch**: Builds and tags with commit SHA + `latest`
|
||||
- **Push of tags**: Builds and tags with the tag name + `latest`
|
||||
- **Manual dispatch**: Can be triggered manually from Gitea UI
|
||||
|
||||
### Image Naming
|
||||
|
||||
Images are pushed to: `10.0.1.10/0xwheatyz/sparc:<tag>`
|
||||
|
||||
- Main branch commits: `10.0.1.10/0xwheatyz/sparc:<sha>` and `10.0.1.10/0xwheatyz/sparc:latest`
|
||||
- Tags: `10.0.1.10/0xwheatyz/sparc:<tag-name>` and `10.0.1.10/0xwheatyz/sparc:latest`
|
||||
- Other branches: `10.0.1.10/0xwheatyz/sparc:<branch-name>`
|
||||
|
||||
## Prerequisites
|
||||
|
||||
### 1. Enable Container Registry in Gitea
|
||||
|
||||
The Gitea instance must have the Container Registry (Packages) feature enabled:
|
||||
|
||||
1. Access Gitea as administrator
|
||||
2. Go to Site Administration > Configuration
|
||||
3. Find "Packages" section
|
||||
4. Ensure packages/container registry is enabled
|
||||
|
||||
### 2. Create Personal Access Token
|
||||
|
||||
The workflow needs a personal access token with package write permissions:
|
||||
|
||||
1. In Gitea UI, click your profile → Settings
|
||||
2. Go to Applications → Manage Access Tokens
|
||||
3. Click "Generate New Token"
|
||||
4. Give it a descriptive name (e.g., "Actions Container Registry")
|
||||
5. Select scopes:
|
||||
- `write:package` (required)
|
||||
- `read:package` (required)
|
||||
6. Click "Generate Token"
|
||||
7. **Copy the token immediately** (you won't see it again)
|
||||
|
||||
### 3. Add Token as Repository Secret
|
||||
|
||||
1. Go to your repository in Gitea
|
||||
2. Click Settings → Secrets
|
||||
3. Click "Add Secret"
|
||||
4. Name: `GITEA_TOKEN`
|
||||
5. Value: Paste the personal access token
|
||||
6. Click "Add Secret"
|
||||
|
||||
## Usage
|
||||
|
||||
### Automatic Builds
|
||||
|
||||
Once configured, the workflow runs automatically:
|
||||
|
||||
```bash
|
||||
# Push to main branch - triggers build
|
||||
git add .
|
||||
git commit -m "feat: add new feature"
|
||||
git push origin main
|
||||
|
||||
# Create and push a tag - triggers build with tag
|
||||
git tag v1.0.0
|
||||
git push origin v1.0.0
|
||||
```
|
||||
|
||||
### Manual Builds
|
||||
|
||||
You can also trigger builds manually:
|
||||
|
||||
1. Go to repository → Actions
|
||||
2. Click on "Build and Push Docker Image" workflow
|
||||
3. Click "Run workflow"
|
||||
4. Select the branch
|
||||
5. Click "Run workflow"
|
||||
|
||||
### Monitor Build Progress
|
||||
|
||||
1. Go to repository → Actions
|
||||
2. Click on the running workflow
|
||||
3. View logs for each step
|
||||
|
||||
## Pulling Images
|
||||
|
||||
Once built, images can be pulled from the registry:
|
||||
|
||||
```bash
|
||||
# Log in to registry
|
||||
docker login 10.0.1.10 -u your-username
|
||||
|
||||
# Pull the latest image
|
||||
docker pull 10.0.1.10/0xwheatyz/sparc:latest
|
||||
|
||||
# Pull a specific tag
|
||||
docker pull 10.0.1.10/0xwheatyz/sparc:v1.0.0
|
||||
|
||||
# Pull a specific commit
|
||||
docker pull 10.0.1.10/0xwheatyz/sparc:abc1234
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Workflow Fails at Login Step
|
||||
|
||||
**Error**: `Error response from daemon: login attempt to http://10.0.1.10/v2/ failed with status: 404 Not Found`
|
||||
|
||||
**Solution**: Container registry is not enabled in Gitea. Contact administrator to enable packages feature.
|
||||
|
||||
### Workflow Fails with 401 Unauthorized
|
||||
|
||||
**Error**: `unauthorized: authentication required`
|
||||
|
||||
**Solutions**:
|
||||
1. Verify `GITEA_TOKEN` secret exists and is correct
|
||||
2. Verify token has `write:package` and `read:package` scopes
|
||||
3. Regenerate token if it has expired
|
||||
|
||||
### Workflow Fails at Push Step
|
||||
|
||||
**Error**: `denied: permission denied`
|
||||
|
||||
**Solutions**:
|
||||
1. Ensure your user account has write access to the repository
|
||||
2. Verify the token has the correct permissions
|
||||
3. Check if the repository owner matches the registry path
|
||||
|
||||
### Image Not Appearing in Packages
|
||||
|
||||
**Check**:
|
||||
1. Go to repository → Packages tab
|
||||
2. If no packages appear, check workflow logs for errors
|
||||
3. Verify the image was successfully pushed (check workflow output)
|
||||
|
||||
## Advanced Configuration
|
||||
|
||||
### Using a Different Registry
|
||||
|
||||
To push to a different container registry (e.g., Docker Hub, GHCR):
|
||||
|
||||
1. Update the `REGISTRY` variable in `.gitea/workflows/build.yaml`
|
||||
2. Update the login step with appropriate credentials
|
||||
3. Add registry credentials as secrets
|
||||
|
||||
### Building Multi-platform Images
|
||||
|
||||
To build for multiple architectures:
|
||||
|
||||
```yaml
|
||||
- name: Build Docker image
|
||||
run: |
|
||||
docker buildx build \
|
||||
--platform linux/amd64,linux/arm64 \
|
||||
-t ${{ steps.tags.outputs.IMAGE_TAG }} \
|
||||
--push .
|
||||
```
|
||||
|
||||
### Adding Build Arguments
|
||||
|
||||
To pass build arguments:
|
||||
|
||||
```yaml
|
||||
- name: Build Docker image
|
||||
run: |
|
||||
docker build \
|
||||
--build-arg VERSION=${{ gitea.sha_short }} \
|
||||
-t ${{ steps.tags.outputs.IMAGE_TAG }} .
|
||||
```
|
||||
|
||||
## References
|
||||
|
||||
- [Gitea Actions Documentation](https://docs.gitea.com/usage/actions/overview)
|
||||
- [Gitea Packages Documentation](https://docs.gitea.com/usage/packages/overview)
|
||||
- [GitHub Actions Syntax](https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions) (Gitea Actions compatible)
|
||||
@@ -0,0 +1,331 @@
|
||||
# Database Storage and Caching
|
||||
|
||||
This document explains how SPARC uses PostgreSQL for storing LLM messages, enabling response caching and analytics.
|
||||
|
||||
## Overview
|
||||
|
||||
SPARC stores all LLM interactions in PostgreSQL, providing:
|
||||
|
||||
- **Response Caching**: Avoid redundant API calls for previously analyzed patents
|
||||
- **Analytics**: Track usage patterns, token consumption, and analysis history
|
||||
- **Persistence**: Maintain analysis history across sessions
|
||||
|
||||
SPARC supports two cache modes:
|
||||
|
||||
1. **Cache Mode** (default, `USE_CACHE=true`): Check database for cached responses before making API calls
|
||||
2. **Fresh Mode** (`USE_CACHE=false`): Always make fresh API calls (still stores results in database)
|
||||
|
||||
## Setup
|
||||
|
||||
### 1. Start the Database
|
||||
|
||||
Use docker-compose to start the PostgreSQL database:
|
||||
|
||||
```bash
|
||||
docker-compose up -d postgres
|
||||
```
|
||||
|
||||
This will start a PostgreSQL instance accessible at `localhost:5432`.
|
||||
|
||||
### 2. Initialize the Database Schema
|
||||
|
||||
Run the initialization script to create the necessary tables:
|
||||
|
||||
```bash
|
||||
python scripts/init_database.py
|
||||
```
|
||||
|
||||
This creates the `llm_messages` table and indexes for efficient querying.
|
||||
|
||||
### 3. Configure Environment Variables
|
||||
|
||||
Create a `.env` file (or copy from `.env.example`):
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
Edit `.env` and set:
|
||||
|
||||
```env
|
||||
# Database connection (required)
|
||||
DATABASE_URL=postgresql://postgres:postgres@localhost:5432/sparc
|
||||
|
||||
# Cache mode: use cached responses when available
|
||||
USE_CACHE=true
|
||||
|
||||
# API key for fresh LLM calls
|
||||
OPENROUTER_API_KEY=your_openrouter_key_here
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Running with Cache Mode (Default)
|
||||
|
||||
Set `USE_CACHE=true` in your `.env` file, then run the application normally:
|
||||
|
||||
```bash
|
||||
python main.py
|
||||
```
|
||||
|
||||
The application will:
|
||||
- Check the database for cached responses matching the request
|
||||
- If found, return the cached response (no API call)
|
||||
- If not found, make an API call and store the response for future use
|
||||
|
||||
### Running with Fresh Mode
|
||||
|
||||
Set `USE_CACHE=false` in your `.env` file to always get fresh responses:
|
||||
|
||||
```bash
|
||||
python main.py
|
||||
```
|
||||
|
||||
The application will:
|
||||
- Always send messages to OpenRouter for real LLM responses
|
||||
- Store all responses in the database
|
||||
- Useful when you need the latest analysis or want to refresh cached data
|
||||
|
||||
## Viewing Analytics
|
||||
|
||||
### View Message Statistics
|
||||
|
||||
```bash
|
||||
python scripts/view_analytics.py
|
||||
```
|
||||
|
||||
Options:
|
||||
- `--days N`: Analyze messages from the last N days (default: 30)
|
||||
|
||||
Example output:
|
||||
```
|
||||
SPARC Analytics - Last 30 days
|
||||
======================================================================
|
||||
|
||||
Total Messages: 45
|
||||
|
||||
Messages by Company:
|
||||
nvidia: 25
|
||||
intel: 12
|
||||
amd: 8
|
||||
|
||||
Messages by Analysis Type:
|
||||
portfolio: 30
|
||||
single_patent: 15
|
||||
|
||||
======================================================================
|
||||
```
|
||||
|
||||
### View Stored Messages
|
||||
|
||||
```bash
|
||||
python scripts/view_messages.py
|
||||
```
|
||||
|
||||
Options:
|
||||
- `--company COMPANY`: Filter by company name
|
||||
- `--type TYPE`: Filter by analysis type (single_patent or portfolio)
|
||||
- `--limit N`: Maximum number of messages to display (default: 10)
|
||||
|
||||
Examples:
|
||||
```bash
|
||||
# View last 10 messages
|
||||
python scripts/view_messages.py
|
||||
|
||||
# View all messages for nvidia
|
||||
python scripts/view_messages.py --company nvidia --limit 100
|
||||
|
||||
# View portfolio analyses only
|
||||
python scripts/view_messages.py --type portfolio
|
||||
```
|
||||
|
||||
## Database Schema
|
||||
|
||||
### llm_messages Table
|
||||
|
||||
| Column | Type | Description |
|
||||
|--------|------|-------------|
|
||||
| id | SERIAL | Primary key |
|
||||
| timestamp | TIMESTAMP | When the message was created |
|
||||
| company_name | VARCHAR(255) | Company being analyzed |
|
||||
| analysis_type | VARCHAR(50) | Type of analysis (single_patent, portfolio) |
|
||||
| model | VARCHAR(100) | LLM model identifier |
|
||||
| prompt | TEXT | The full prompt sent to the LLM |
|
||||
| response | TEXT | The response from the LLM |
|
||||
| metadata | JSONB | Additional metadata (patent IDs, content length, etc.) |
|
||||
| token_usage | JSONB | Token usage statistics (when available) |
|
||||
| created_at | TIMESTAMP | Record creation timestamp |
|
||||
|
||||
### Indexes
|
||||
|
||||
- `idx_messages_timestamp`: Speeds up time-based queries
|
||||
- `idx_messages_company`: Speeds up company-specific queries
|
||||
|
||||
## Docker Compose
|
||||
|
||||
The included `docker-compose.yml` provides:
|
||||
|
||||
1. **PostgreSQL Database**:
|
||||
- Image: `postgres:16-alpine`
|
||||
- Port: `5432`
|
||||
- Credentials: postgres/postgres
|
||||
- Database: sparc
|
||||
- Persistent storage via volume
|
||||
|
||||
2. **Application Container** (optional):
|
||||
- Builds from Dockerfile
|
||||
- Connects to PostgreSQL
|
||||
- Mounts current directory
|
||||
|
||||
### Start Services
|
||||
|
||||
```bash
|
||||
# Start just the database
|
||||
docker-compose up -d postgres
|
||||
|
||||
# Start everything
|
||||
docker-compose up -d
|
||||
|
||||
# View logs
|
||||
docker-compose logs -f
|
||||
|
||||
# Stop services
|
||||
docker-compose down
|
||||
|
||||
# Stop and remove volumes (WARNING: deletes data)
|
||||
docker-compose down -v
|
||||
```
|
||||
|
||||
## Toggling Between Modes
|
||||
|
||||
You can easily switch between modes by changing the `USE_CACHE` environment variable:
|
||||
|
||||
### Quick Toggle (temporary)
|
||||
|
||||
```bash
|
||||
# Run with caching enabled
|
||||
USE_CACHE=true python main.py
|
||||
|
||||
# Run with fresh API calls
|
||||
USE_CACHE=false python main.py
|
||||
```
|
||||
|
||||
### Persistent Toggle
|
||||
|
||||
Edit your `.env` file:
|
||||
|
||||
```env
|
||||
# Use cached responses when available (recommended for most use)
|
||||
USE_CACHE=true
|
||||
|
||||
# Always make fresh API calls
|
||||
USE_CACHE=false
|
||||
```
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Cost Optimization with Caching
|
||||
|
||||
Cache mode reduces API costs by reusing previous analysis results:
|
||||
|
||||
```bash
|
||||
USE_CACHE=true python main.py
|
||||
```
|
||||
|
||||
If the same company/patent combination was analyzed before, the cached response is returned instantly.
|
||||
|
||||
### Fresh Analysis
|
||||
|
||||
When you need the latest LLM analysis (e.g., after model updates):
|
||||
|
||||
```bash
|
||||
USE_CACHE=false python main.py
|
||||
```
|
||||
|
||||
### Collecting Usage Analytics
|
||||
|
||||
The database stores all interactions, enabling analytics on:
|
||||
- Which companies are analyzed most frequently
|
||||
- Types of analyses performed
|
||||
- Token usage and costs over time
|
||||
- Response caching hit rates
|
||||
|
||||
### Development and Debugging
|
||||
|
||||
Database storage is useful for:
|
||||
- Reviewing actual prompts sent to the LLM
|
||||
- Analyzing response patterns
|
||||
- Debugging the full pipeline end-to-end
|
||||
- Understanding token usage patterns
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Connection Refused
|
||||
|
||||
If you get "connection refused" errors:
|
||||
|
||||
1. Ensure PostgreSQL is running: `docker-compose ps`
|
||||
2. Check the DATABASE_URL in your `.env` file
|
||||
3. Wait for the database to be healthy: `docker-compose logs postgres`
|
||||
|
||||
### Schema Not Found
|
||||
|
||||
If you get "relation does not exist" errors:
|
||||
|
||||
1. Run the initialization script: `python scripts/init_database.py`
|
||||
2. Verify tables were created: `docker-compose exec postgres psql -U postgres -d sparc -c "\dt"`
|
||||
|
||||
### Permission Denied
|
||||
|
||||
If you get permission errors:
|
||||
|
||||
1. Check your DATABASE_URL credentials match docker-compose.yml
|
||||
2. Ensure the database container is running: `docker-compose up -d postgres`
|
||||
|
||||
## Advanced Usage
|
||||
|
||||
### Direct Database Access
|
||||
|
||||
You can access the database directly using psql:
|
||||
|
||||
```bash
|
||||
docker-compose exec postgres psql -U postgres -d sparc
|
||||
```
|
||||
|
||||
Example queries:
|
||||
|
||||
```sql
|
||||
-- View all messages
|
||||
SELECT id, company_name, analysis_type, timestamp FROM llm_messages ORDER BY timestamp DESC LIMIT 10;
|
||||
|
||||
-- Count messages by company
|
||||
SELECT company_name, COUNT(*) FROM llm_messages GROUP BY company_name;
|
||||
|
||||
-- View recent prompts
|
||||
SELECT prompt FROM llm_messages ORDER BY timestamp DESC LIMIT 5;
|
||||
```
|
||||
|
||||
### Programmatic Access
|
||||
|
||||
You can use the `DatabaseClient` directly in your code:
|
||||
|
||||
```python
|
||||
from SPARC.database import DatabaseClient
|
||||
from SPARC import config
|
||||
|
||||
db = DatabaseClient(config.database_url)
|
||||
|
||||
# Get messages
|
||||
messages = db.get_messages(company_name="nvidia", limit=10)
|
||||
|
||||
# Get analytics
|
||||
analytics = db.get_analytics(days=7)
|
||||
|
||||
# Store a custom message
|
||||
db.store_message(
|
||||
prompt="test prompt",
|
||||
response="test response",
|
||||
company_name="test",
|
||||
analysis_type="custom"
|
||||
)
|
||||
```
|
||||
@@ -0,0 +1,396 @@
|
||||
# SPARC Complete Deployment Guide
|
||||
|
||||
This guide provides step-by-step instructions for deploying the SPARC (Semiconductor Patent & Analytics Report Core) application with all features enabled, including SERP API patent retrieval, LLM analysis, database storage, and the web UI.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Prerequisites](#prerequisites)
|
||||
- [Step 1: Clone and Configure](#step-1-clone-and-configure)
|
||||
- [Step 2: Start Services with Docker Compose](#step-2-start-services-with-docker-compose)
|
||||
- [Step 3: Initialize the Database](#step-3-initialize-the-database)
|
||||
- [Step 4: Run the Services](#step-4-run-the-services)
|
||||
- [Step 5: Verify Deployment](#step-5-verify-deployment)
|
||||
- [Step 6: Using the Application](#step-6-using-the-application)
|
||||
- [Step 7: View Stored Data](#step-7-view-stored-data)
|
||||
- [Architecture Overview](#architecture-overview)
|
||||
- [Environment Variables Reference](#environment-variables-reference)
|
||||
- [Production Docker Compose](#production-docker-compose)
|
||||
- [Troubleshooting](#troubleshooting)
|
||||
|
||||
---
|
||||
|
||||
## Prerequisites
|
||||
|
||||
1. **Docker & Docker Compose** installed
|
||||
2. **API Keys** (you'll need to obtain these):
|
||||
- **SerpAPI Key**: Sign up at https://serpapi.com/ (free tier: 100 searches/month)
|
||||
- **OpenRouter API Key**: Sign up at https://openrouter.ai/ (pay-as-you-go)
|
||||
|
||||
---
|
||||
|
||||
## Step 1: Clone and Configure
|
||||
|
||||
```bash
|
||||
git clone <repository-url>
|
||||
cd SPARC
|
||||
|
||||
# Create environment file
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
Edit `.env` with your API keys:
|
||||
|
||||
```env
|
||||
# Required API Keys
|
||||
API_KEY=your_serpapi_key_here
|
||||
OPENROUTER_API_KEY=your_openrouter_key_here
|
||||
|
||||
# Database Configuration (matches docker-compose.yml)
|
||||
DATABASE_URL=postgresql://postgres:postgres@localhost:5432/sparc
|
||||
USE_DATABASE=true
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 2: Start Services with Docker Compose
|
||||
|
||||
```bash
|
||||
# Start all services (PostgreSQL, API, and Dashboard)
|
||||
docker-compose up -d
|
||||
|
||||
# Check status
|
||||
docker-compose ps
|
||||
|
||||
# You should see:
|
||||
# - sparc-postgres (healthy)
|
||||
# - sparc-api (running on port 8000)
|
||||
# - sparc-dashboard (running on port 8080)
|
||||
```
|
||||
|
||||
The database is automatically initialized by the `init-db` service.
|
||||
|
||||
---
|
||||
|
||||
## Step 3: Database Schema
|
||||
|
||||
The `init-db` service automatically creates the `llm_messages` table with the following schema:
|
||||
|
||||
| Column | Type | Purpose |
|
||||
|--------|------|---------|
|
||||
| `id` | SERIAL | Primary key |
|
||||
| `timestamp` | TIMESTAMP | Message creation time |
|
||||
| `company_name` | VARCHAR(255) | Company being analyzed |
|
||||
| `analysis_type` | VARCHAR(50) | 'single_patent' or 'portfolio' |
|
||||
| `model` | VARCHAR(100) | LLM model identifier |
|
||||
| `prompt` | TEXT | Full prompt sent to LLM |
|
||||
| `response` | TEXT | LLM response |
|
||||
| `metadata` | JSONB | Patent IDs, content lengths |
|
||||
| `token_usage` | JSONB | prompt/completion/total tokens |
|
||||
| `created_at` | TIMESTAMP | Record timestamp |
|
||||
|
||||
---
|
||||
|
||||
## Step 4: Run the Services
|
||||
|
||||
### Option A: Run with Docker Compose (Recommended)
|
||||
|
||||
All services are started automatically with `docker-compose up -d` from Step 2.
|
||||
|
||||
```bash
|
||||
# View logs
|
||||
docker-compose logs -f
|
||||
|
||||
# View specific service logs
|
||||
docker-compose logs -f api
|
||||
docker-compose logs -f dashboard
|
||||
```
|
||||
|
||||
### Option B: Run Locally (Development)
|
||||
|
||||
If you prefer running services locally without Docker:
|
||||
|
||||
```bash
|
||||
# Start PostgreSQL with Docker
|
||||
docker-compose up -d postgres
|
||||
|
||||
# Wait for database to be healthy, then initialize
|
||||
python scripts/init_database.py
|
||||
|
||||
# Start FastAPI backend
|
||||
uvicorn SPARC.api:app --host 0.0.0.0 --port 8000 --reload
|
||||
|
||||
# For the React frontend (separate terminal)
|
||||
cd frontend
|
||||
npm install
|
||||
npm run dev
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 5: Verify Deployment
|
||||
|
||||
```bash
|
||||
# Check API health
|
||||
curl http://localhost:8000/health
|
||||
|
||||
# Expected response:
|
||||
# {"status":"healthy","version":"0.1.0","timestamp":"..."}
|
||||
```
|
||||
|
||||
Access the services:
|
||||
|
||||
| Service | URL |
|
||||
|---------|-----|
|
||||
| REST API | http://localhost:8000 |
|
||||
| API Documentation (Swagger) | http://localhost:8000/docs |
|
||||
| Dashboard (Web UI) | http://localhost:8080 |
|
||||
|
||||
---
|
||||
|
||||
## Step 6: Using the Application
|
||||
|
||||
### Via Dashboard (Web UI)
|
||||
|
||||
1. Open http://localhost:8080
|
||||
2. Register a new account or login (default admin: `admin` / `admin`)
|
||||
3. Navigate to **"Analysis"** from the sidebar
|
||||
4. Enter a company name (e.g., "Intel")
|
||||
5. Click **"Analyze"**
|
||||
|
||||
This will:
|
||||
- Query SerpAPI for recent patents
|
||||
- Download and parse patent PDFs
|
||||
- Send patent content to Claude for analysis
|
||||
- Store prompt/response in PostgreSQL (with caching)
|
||||
- Display results in the dashboard
|
||||
|
||||
### Via REST API
|
||||
|
||||
```bash
|
||||
# Analyze single company
|
||||
curl http://localhost:8000/analyze/Intel
|
||||
|
||||
# Batch analyze multiple companies (synchronous)
|
||||
curl -X POST http://localhost:8000/analyze/batch \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"companies": ["Intel", "AMD", "NVIDIA"], "max_workers": 3}'
|
||||
|
||||
# Async batch (for large jobs)
|
||||
curl -X POST http://localhost:8000/analyze/batch/async \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"companies": ["Intel", "AMD"]}'
|
||||
|
||||
# Check job status
|
||||
curl http://localhost:8000/jobs/{job_id}
|
||||
|
||||
# List all jobs
|
||||
curl http://localhost:8000/jobs
|
||||
```
|
||||
|
||||
### Via Python
|
||||
|
||||
```python
|
||||
from SPARC.analyzer import CompanyAnalyzer
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze("Intel")
|
||||
print(result.analysis)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 7: View Stored Data
|
||||
|
||||
```bash
|
||||
# View analytics (aggregated usage)
|
||||
python scripts/view_analytics.py
|
||||
|
||||
# View stored messages
|
||||
python scripts/view_messages.py
|
||||
|
||||
# Query database directly
|
||||
docker exec -it sparc-postgres psql -U postgres -d sparc -c \
|
||||
"SELECT company_name, analysis_type, token_usage FROM llm_messages ORDER BY timestamp DESC LIMIT 10;"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
```
|
||||
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
|
||||
│ Dashboard │───▶│ FastAPI │───▶│ Analyzer │
|
||||
│ (8501) │ │ (8000) │ │ │
|
||||
└──────────────┘ └──────────────┘ └──────┬───────┘
|
||||
│
|
||||
┌──────────────────────────┼──────────────────────────┐
|
||||
│ │ │
|
||||
▼ ▼ ▼
|
||||
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
|
||||
│ SerpAPI │ │ OpenRouter │ │ PostgreSQL │
|
||||
│ (Patents) │ │ (Claude) │ │ (Storage) │
|
||||
└──────────────┘ └──────────────┘ └──────────────┘
|
||||
```
|
||||
|
||||
### Component Responsibilities
|
||||
|
||||
| Component | Purpose |
|
||||
|-----------|---------|
|
||||
| **Dashboard** | React TypeScript web UI with authentication |
|
||||
| **FastAPI** | REST API with JWT authentication |
|
||||
| **Analyzer** | Orchestrates patent retrieval and LLM analysis |
|
||||
| **SerpAPI** | Retrieves patent data from Google Patents |
|
||||
| **OpenRouter** | Routes requests to Claude for AI analysis |
|
||||
| **PostgreSQL** | Stores prompts, responses, users, and cached results |
|
||||
|
||||
---
|
||||
|
||||
## Environment Variables Reference
|
||||
|
||||
| Variable | Required | Default | Description |
|
||||
|----------|----------|---------|-------------|
|
||||
| `API_KEY` | Yes | - | SerpAPI key for patent search |
|
||||
| `OPENROUTER_API_KEY` | Yes | - | OpenRouter API key for Claude access |
|
||||
| `DATABASE_URL` | Yes | - | PostgreSQL connection string |
|
||||
| `USE_CACHE` | No | `true` | Check database for cached responses before API calls |
|
||||
| `JWT_SECRET` | Yes | - | Secret key for JWT authentication (change in production!) |
|
||||
|
||||
### Database URL Format
|
||||
|
||||
```
|
||||
postgresql://[user]:[password]@[host]:[port]/[database]
|
||||
```
|
||||
|
||||
Example:
|
||||
```
|
||||
postgresql://postgres:postgres@localhost:5432/sparc
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Docker Compose Services
|
||||
|
||||
The `docker-compose.yml` includes all services needed for production:
|
||||
|
||||
| Service | Container | Port | Description |
|
||||
|---------|-----------|------|-------------|
|
||||
| `postgres` | sparc-postgres | 5432 | PostgreSQL database |
|
||||
| `init-db` | sparc-init-db | - | One-time database initialization (seeds admin user) |
|
||||
| `api` | sparc-api | 8000 | FastAPI REST API with JWT auth |
|
||||
| `dashboard` | sparc-dashboard | 8080 | React TypeScript web UI |
|
||||
|
||||
### Common Docker Compose Commands
|
||||
|
||||
```bash
|
||||
# Start all services
|
||||
docker-compose up -d
|
||||
|
||||
# Start with rebuild (after code changes)
|
||||
docker-compose up -d --build
|
||||
|
||||
# View logs
|
||||
docker-compose logs -f
|
||||
|
||||
# View specific service logs
|
||||
docker-compose logs -f api
|
||||
docker-compose logs -f dashboard
|
||||
|
||||
# Stop all services
|
||||
docker-compose down
|
||||
|
||||
# Stop and remove volumes (WARNING: deletes data)
|
||||
docker-compose down -v
|
||||
|
||||
# Restart a specific service
|
||||
docker-compose restart api
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Database Connection Issues
|
||||
|
||||
```bash
|
||||
# Check if postgres is running
|
||||
docker-compose ps
|
||||
|
||||
# Check postgres logs
|
||||
docker-compose logs postgres
|
||||
|
||||
# Test database connection
|
||||
docker exec -it sparc-postgres psql -U postgres -d sparc -c "SELECT 1;"
|
||||
```
|
||||
|
||||
### API Key Issues
|
||||
|
||||
```bash
|
||||
# Verify environment variables are set
|
||||
echo $API_KEY
|
||||
echo $OPENROUTER_API_KEY
|
||||
|
||||
# Test SerpAPI directly
|
||||
curl "https://serpapi.com/search?engine=google_patents&q=Intel&api_key=$API_KEY"
|
||||
```
|
||||
|
||||
### Port Conflicts
|
||||
|
||||
If ports 8000, 8501, or 5432 are in use:
|
||||
|
||||
```bash
|
||||
# Find what's using the port
|
||||
lsof -i :8000
|
||||
|
||||
# Or change ports in docker-compose.yml
|
||||
ports:
|
||||
- "8080:8000" # Use 8080 instead of 8000
|
||||
```
|
||||
|
||||
### Container Issues
|
||||
|
||||
```bash
|
||||
# Rebuild containers after code changes
|
||||
docker-compose build --no-cache
|
||||
|
||||
# Remove all containers and start fresh
|
||||
docker-compose down
|
||||
docker-compose up -d --build
|
||||
```
|
||||
|
||||
### Viewing Application Logs
|
||||
|
||||
```bash
|
||||
# All services
|
||||
docker-compose logs -f
|
||||
|
||||
# Specific service
|
||||
docker-compose logs -f api
|
||||
docker-compose logs -f dashboard
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Quick Reference
|
||||
|
||||
```bash
|
||||
# Docker setup (recommended)
|
||||
cp .env.example .env
|
||||
# Edit .env with API keys
|
||||
docker-compose up -d
|
||||
|
||||
# Local development setup
|
||||
cp .env.example .env
|
||||
# Edit .env with API keys
|
||||
docker-compose up -d postgres
|
||||
python scripts/init_database.py
|
||||
uvicorn SPARC.api:app --reload &
|
||||
cd frontend && npm install && npm run dev &
|
||||
|
||||
# Check status
|
||||
curl http://localhost:8000/health
|
||||
open http://localhost:8080
|
||||
|
||||
# View data
|
||||
python scripts/view_analytics.py
|
||||
python scripts/view_messages.py
|
||||
```
|
||||
@@ -20,6 +20,14 @@
|
||||
packages = [
|
||||
python
|
||||
pkgs.python311Packages.virtualenv # gives `virtualenv` tool
|
||||
pkgs.zlib
|
||||
pkgs.stdenv.cc.cc.lib
|
||||
];
|
||||
|
||||
# Required for numpy and other C extension packages
|
||||
LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath [
|
||||
pkgs.zlib
|
||||
pkgs.stdenv.cc.cc.lib
|
||||
];
|
||||
|
||||
shellHook = ''
|
||||
@@ -48,8 +56,8 @@
|
||||
fi
|
||||
|
||||
# Prompt tweak so you can see when venv is active
|
||||
export PS1="(SPARC-venv) $PS1"
|
||||
export NIX_PROJECT_SHELL="SPARC"
|
||||
'';
|
||||
};
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
# Dependencies
|
||||
node_modules/
|
||||
|
||||
# Build output
|
||||
dist/
|
||||
|
||||
# Local env files
|
||||
.env.local
|
||||
.env.*.local
|
||||
|
||||
# Editor directories
|
||||
.vscode/
|
||||
.idea/
|
||||
|
||||
# OS files
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Debug logs
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
@@ -0,0 +1,32 @@
|
||||
# Build stage
|
||||
FROM node:20-alpine AS build
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy package files
|
||||
COPY package.json package-lock.json* ./
|
||||
|
||||
# Install dependencies
|
||||
RUN npm install
|
||||
|
||||
# Copy source files
|
||||
COPY . .
|
||||
|
||||
# Build the application
|
||||
RUN npm run build
|
||||
|
||||
# Production stage
|
||||
FROM nginx:alpine
|
||||
|
||||
# Copy built files
|
||||
COPY --from=build /app/dist /usr/share/nginx/html
|
||||
|
||||
# Copy nginx template (processed at startup with envsubst)
|
||||
COPY nginx.conf.template /etc/nginx/templates/default.conf.template
|
||||
|
||||
# Default API URL (override with -e API_URL=...)
|
||||
ENV API_URL=http://api:8000/
|
||||
|
||||
EXPOSE 80
|
||||
|
||||
CMD ["nginx", "-g", "daemon off;"]
|
||||
@@ -0,0 +1,13 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>SPARC Dashboard</title>
|
||||
</head>
|
||||
<body>
|
||||
<div id="root"></div>
|
||||
<script type="module" src="/src/main.tsx"></script>
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,34 @@
|
||||
server {
|
||||
listen 80;
|
||||
server_name localhost;
|
||||
root /usr/share/nginx/html;
|
||||
index index.html;
|
||||
|
||||
# Gzip compression
|
||||
gzip on;
|
||||
gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript;
|
||||
|
||||
# Handle React Router (SPA)
|
||||
location / {
|
||||
try_files $uri $uri/ /index.html;
|
||||
}
|
||||
|
||||
# Proxy API requests to backend
|
||||
location /api/ {
|
||||
proxy_pass ${API_URL}/;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection 'upgrade';
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_cache_bypass $http_upgrade;
|
||||
}
|
||||
|
||||
# Cache static assets
|
||||
location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2)$ {
|
||||
expires 1y;
|
||||
add_header Cache-Control "public, immutable";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"name": "sparc-dashboard",
|
||||
"private": true,
|
||||
"version": "1.0.0",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
"build": "tsc -b && vite build",
|
||||
"lint": "eslint .",
|
||||
"preview": "vite preview"
|
||||
},
|
||||
"dependencies": {
|
||||
"@tanstack/react-query": "^5.51.0",
|
||||
"axios": "^1.7.2",
|
||||
"lucide-react": "^0.400.0",
|
||||
"react": "^18.3.1",
|
||||
"react-dom": "^18.3.1",
|
||||
"react-router-dom": "^6.24.0",
|
||||
"recharts": "^2.12.7"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@eslint/js": "^9.6.0",
|
||||
"@types/react": "^18.3.3",
|
||||
"@types/react-dom": "^18.3.0",
|
||||
"@vitejs/plugin-react": "^4.3.1",
|
||||
"autoprefixer": "^10.4.19",
|
||||
"eslint": "^9.6.0",
|
||||
"eslint-plugin-react-hooks": "^5.1.0",
|
||||
"eslint-plugin-react-refresh": "^0.4.7",
|
||||
"globals": "^15.8.0",
|
||||
"postcss": "^8.4.39",
|
||||
"tailwindcss": "^3.4.4",
|
||||
"typescript": "~5.5.3",
|
||||
"typescript-eslint": "^8.0.0",
|
||||
"vite": "^5.3.3"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
export default {
|
||||
plugins: {
|
||||
tailwindcss: {},
|
||||
autoprefixer: {},
|
||||
},
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
import { BrowserRouter, Routes, Route, Navigate } from 'react-router-dom';
|
||||
import { QueryClient, QueryClientProvider } from '@tanstack/react-query';
|
||||
import { AuthProvider } from './context/AuthContext';
|
||||
import { Layout } from './components/Layout';
|
||||
import { ProtectedRoute } from './components/ProtectedRoute';
|
||||
import { Login } from './pages/Login';
|
||||
import { Register } from './pages/Register';
|
||||
import { Analysis } from './pages/Analysis';
|
||||
import { Batch } from './pages/Batch';
|
||||
import { AnalyticsPage } from './pages/Analytics';
|
||||
import { About } from './pages/About';
|
||||
import { AdminUsers } from './pages/AdminUsers';
|
||||
|
||||
const queryClient = new QueryClient({
|
||||
defaultOptions: {
|
||||
queries: {
|
||||
staleTime: 1000 * 60 * 5, // 5 minutes
|
||||
retry: 1,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
function App() {
|
||||
return (
|
||||
<QueryClientProvider client={queryClient}>
|
||||
<AuthProvider>
|
||||
<BrowserRouter>
|
||||
<Routes>
|
||||
{/* Public routes */}
|
||||
<Route path="/login" element={<Login />} />
|
||||
<Route path="/register" element={<Register />} />
|
||||
|
||||
{/* Protected routes */}
|
||||
<Route
|
||||
element={
|
||||
<ProtectedRoute>
|
||||
<Layout />
|
||||
</ProtectedRoute>
|
||||
}
|
||||
>
|
||||
<Route path="/analysis" element={<Analysis />} />
|
||||
<Route path="/batch" element={<Batch />} />
|
||||
<Route path="/analytics" element={<AnalyticsPage />} />
|
||||
<Route path="/about" element={<About />} />
|
||||
|
||||
{/* Admin routes */}
|
||||
<Route
|
||||
path="/admin/users"
|
||||
element={
|
||||
<ProtectedRoute requireAdmin>
|
||||
<AdminUsers />
|
||||
</ProtectedRoute>
|
||||
}
|
||||
/>
|
||||
</Route>
|
||||
|
||||
{/* Default redirect */}
|
||||
<Route path="/" element={<Navigate to="/analysis" replace />} />
|
||||
<Route path="*" element={<Navigate to="/analysis" replace />} />
|
||||
</Routes>
|
||||
</BrowserRouter>
|
||||
</AuthProvider>
|
||||
</QueryClientProvider>
|
||||
);
|
||||
}
|
||||
|
||||
export default App;
|
||||
@@ -0,0 +1,154 @@
|
||||
import axios, { AxiosError, InternalAxiosRequestConfig } from 'axios';
|
||||
import type { TokenResponse, User, CompanyAnalysis, BatchAnalysisResult, JobStatus, Analytics } from '../types';
|
||||
|
||||
const API_BASE_URL = import.meta.env.VITE_API_URL || '/api';
|
||||
|
||||
const api = axios.create({
|
||||
baseURL: API_BASE_URL,
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
});
|
||||
|
||||
// Token management
|
||||
let accessToken: string | null = localStorage.getItem('access_token');
|
||||
let refreshToken: string | null = localStorage.getItem('refresh_token');
|
||||
|
||||
export const setTokens = (tokens: TokenResponse) => {
|
||||
accessToken = tokens.access_token;
|
||||
refreshToken = tokens.refresh_token;
|
||||
localStorage.setItem('access_token', tokens.access_token);
|
||||
localStorage.setItem('refresh_token', tokens.refresh_token);
|
||||
};
|
||||
|
||||
export const clearTokens = () => {
|
||||
accessToken = null;
|
||||
refreshToken = null;
|
||||
localStorage.removeItem('access_token');
|
||||
localStorage.removeItem('refresh_token');
|
||||
};
|
||||
|
||||
export const getAccessToken = () => accessToken;
|
||||
|
||||
// Request interceptor to add auth header
|
||||
api.interceptors.request.use((config: InternalAxiosRequestConfig) => {
|
||||
if (accessToken) {
|
||||
config.headers.Authorization = `Bearer ${accessToken}`;
|
||||
}
|
||||
return config;
|
||||
});
|
||||
|
||||
// Response interceptor to handle token refresh
|
||||
api.interceptors.response.use(
|
||||
(response) => response,
|
||||
async (error: AxiosError) => {
|
||||
const originalRequest = error.config as InternalAxiosRequestConfig & { _retry?: boolean };
|
||||
|
||||
if (error.response?.status === 401 && !originalRequest._retry && refreshToken) {
|
||||
originalRequest._retry = true;
|
||||
|
||||
try {
|
||||
const response = await axios.post<TokenResponse>(`${API_BASE_URL}/auth/refresh`, {
|
||||
refresh_token: refreshToken,
|
||||
});
|
||||
|
||||
setTokens(response.data);
|
||||
originalRequest.headers.Authorization = `Bearer ${response.data.access_token}`;
|
||||
|
||||
return api(originalRequest);
|
||||
} catch {
|
||||
clearTokens();
|
||||
window.location.href = '/login';
|
||||
}
|
||||
}
|
||||
|
||||
return Promise.reject(error);
|
||||
}
|
||||
);
|
||||
|
||||
// Auth API
|
||||
export const authApi = {
|
||||
register: async (email: string, password: string): Promise<User> => {
|
||||
const response = await api.post<User>('/auth/register', { email, password });
|
||||
return response.data;
|
||||
},
|
||||
|
||||
login: async (email: string, password: string): Promise<TokenResponse> => {
|
||||
const response = await api.post<TokenResponse>('/auth/login', { email, password });
|
||||
setTokens(response.data);
|
||||
return response.data;
|
||||
},
|
||||
|
||||
getMe: async (): Promise<User> => {
|
||||
const response = await api.get<User>('/auth/me');
|
||||
return response.data;
|
||||
},
|
||||
|
||||
logout: () => {
|
||||
clearTokens();
|
||||
},
|
||||
};
|
||||
|
||||
// Analysis API
|
||||
export const analysisApi = {
|
||||
analyzeCompany: async (companyName: string): Promise<CompanyAnalysis> => {
|
||||
const response = await api.get<CompanyAnalysis>(`/analyze/${encodeURIComponent(companyName)}`);
|
||||
return response.data;
|
||||
},
|
||||
|
||||
analyzeBatch: async (companies: string[], maxWorkers = 3): Promise<BatchAnalysisResult> => {
|
||||
const response = await api.post<BatchAnalysisResult>('/analyze/batch', {
|
||||
companies,
|
||||
max_workers: maxWorkers,
|
||||
});
|
||||
return response.data;
|
||||
},
|
||||
|
||||
analyzeBatchAsync: async (companies: string[], maxWorkers = 3): Promise<JobStatus> => {
|
||||
const response = await api.post<JobStatus>('/analyze/batch/async', {
|
||||
companies,
|
||||
max_workers: maxWorkers,
|
||||
});
|
||||
return response.data;
|
||||
},
|
||||
|
||||
getJobStatus: async (jobId: string): Promise<JobStatus> => {
|
||||
const response = await api.get<JobStatus>(`/jobs/${jobId}`);
|
||||
return response.data;
|
||||
},
|
||||
|
||||
listJobs: async (status?: string, limit = 10): Promise<JobStatus[]> => {
|
||||
const params = new URLSearchParams();
|
||||
if (status) params.append('status', status);
|
||||
params.append('limit', limit.toString());
|
||||
const response = await api.get<JobStatus[]>(`/jobs?${params}`);
|
||||
return response.data;
|
||||
},
|
||||
};
|
||||
|
||||
// Analytics API
|
||||
export const analyticsApi = {
|
||||
getAnalytics: async (days = 30): Promise<Analytics> => {
|
||||
const response = await api.get<Analytics>(`/analytics?days=${days}`);
|
||||
return response.data;
|
||||
},
|
||||
};
|
||||
|
||||
// Admin API
|
||||
export const adminApi = {
|
||||
listUsers: async (limit = 100, offset = 0): Promise<User[]> => {
|
||||
const response = await api.get<User[]>(`/admin/users?limit=${limit}&offset=${offset}`);
|
||||
return response.data;
|
||||
},
|
||||
|
||||
updateUserRole: async (userId: number, role: 'admin' | 'user'): Promise<User> => {
|
||||
const response = await api.patch<User>(`/admin/users/${userId}/role`, { role });
|
||||
return response.data;
|
||||
},
|
||||
|
||||
deleteUser: async (userId: number): Promise<void> => {
|
||||
await api.delete(`/admin/users/${userId}`);
|
||||
},
|
||||
};
|
||||
|
||||
export default api;
|
||||
@@ -0,0 +1,108 @@
|
||||
import { Outlet, NavLink, useNavigate } from 'react-router-dom';
|
||||
import { useAuth } from '../context/AuthContext';
|
||||
import { Search, Layers, BarChart3, Info, Users, LogOut } from 'lucide-react';
|
||||
|
||||
export function Layout() {
|
||||
const { user, isAdmin, logout } = useAuth();
|
||||
const navigate = useNavigate();
|
||||
|
||||
const handleLogout = () => {
|
||||
logout();
|
||||
navigate('/login');
|
||||
};
|
||||
|
||||
const navItems = [
|
||||
{ to: '/analysis', icon: Search, label: 'Analysis' },
|
||||
{ to: '/batch', icon: Layers, label: 'Batch' },
|
||||
{ to: '/analytics', icon: BarChart3, label: 'Analytics' },
|
||||
{ to: '/about', icon: Info, label: 'About' },
|
||||
];
|
||||
|
||||
if (isAdmin) {
|
||||
navItems.push({ to: '/admin/users', icon: Users, label: 'Users' });
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="min-h-screen bg-gradient-to-br from-bg-dark to-indigo-950">
|
||||
{/* Header */}
|
||||
<header className="bg-bg-card/80 backdrop-blur-lg border-b border-primary/20">
|
||||
<div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
|
||||
<div className="flex items-center justify-between h-16">
|
||||
{/* Brand */}
|
||||
<div className="flex items-center gap-3">
|
||||
<span className="text-2xl">⚡</span>
|
||||
<div>
|
||||
<h1 className="text-xl font-bold bg-gradient-to-r from-primary to-secondary bg-clip-text text-transparent">
|
||||
SPARC
|
||||
</h1>
|
||||
<span className="text-xs text-text-secondary uppercase tracking-wider">
|
||||
Semiconductor Patent Analytics
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Navigation */}
|
||||
<nav className="hidden md:flex items-center gap-1 bg-bg-card/60 rounded-xl p-1 border border-primary/15">
|
||||
{navItems.map(({ to, icon: Icon, label }) => (
|
||||
<NavLink
|
||||
key={to}
|
||||
to={to}
|
||||
className={({ isActive }) =>
|
||||
`flex items-center gap-2 px-4 py-2 rounded-lg text-sm font-medium transition-all ${
|
||||
isActive
|
||||
? 'bg-gradient-to-r from-primary to-primary-dark text-white'
|
||||
: 'text-text-secondary hover:text-text-primary hover:bg-bg-card-hover'
|
||||
}`
|
||||
}
|
||||
>
|
||||
<Icon size={16} />
|
||||
{label}
|
||||
</NavLink>
|
||||
))}
|
||||
</nav>
|
||||
|
||||
{/* User menu */}
|
||||
<div className="flex items-center gap-4">
|
||||
<div className="text-right hidden sm:block">
|
||||
<div className="text-sm font-medium text-text-primary">{user?.email}</div>
|
||||
<div className="text-xs text-text-secondary capitalize">{user?.role}</div>
|
||||
</div>
|
||||
<button
|
||||
onClick={handleLogout}
|
||||
className="flex items-center gap-2 px-3 py-2 rounded-lg text-text-secondary hover:text-error hover:bg-error/10 transition-all"
|
||||
>
|
||||
<LogOut size={18} />
|
||||
<span className="hidden sm:inline">Logout</span>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
{/* Mobile Navigation */}
|
||||
<nav className="md:hidden fixed bottom-0 left-0 right-0 bg-bg-card/95 backdrop-blur-lg border-t border-primary/20 z-50">
|
||||
<div className="flex justify-around py-2">
|
||||
{navItems.map(({ to, icon: Icon, label }) => (
|
||||
<NavLink
|
||||
key={to}
|
||||
to={to}
|
||||
className={({ isActive }) =>
|
||||
`flex flex-col items-center gap-1 px-3 py-2 rounded-lg text-xs font-medium transition-all ${
|
||||
isActive ? 'text-primary' : 'text-text-secondary'
|
||||
}`
|
||||
}
|
||||
>
|
||||
<Icon size={20} />
|
||||
{label}
|
||||
</NavLink>
|
||||
))}
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
{/* Main content */}
|
||||
<main className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-8 pb-24 md:pb-8">
|
||||
<Outlet />
|
||||
</main>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
import { Navigate, useLocation } from 'react-router-dom';
|
||||
import { useAuth } from '../context/AuthContext';
|
||||
|
||||
interface ProtectedRouteProps {
|
||||
children: React.ReactNode;
|
||||
requireAdmin?: boolean;
|
||||
}
|
||||
|
||||
export function ProtectedRoute({ children, requireAdmin = false }: ProtectedRouteProps) {
|
||||
const { isAuthenticated, isAdmin, isLoading } = useAuth();
|
||||
const location = useLocation();
|
||||
|
||||
if (isLoading) {
|
||||
return (
|
||||
<div className="min-h-screen bg-gradient-to-br from-bg-dark to-indigo-950 flex items-center justify-center">
|
||||
<div className="animate-spin rounded-full h-12 w-12 border-t-2 border-b-2 border-primary"></div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (!isAuthenticated) {
|
||||
return <Navigate to="/login" state={{ from: location }} replace />;
|
||||
}
|
||||
|
||||
if (requireAdmin && !isAdmin) {
|
||||
return <Navigate to="/analysis" replace />;
|
||||
}
|
||||
|
||||
return <>{children}</>;
|
||||
}
|
||||
@@ -0,0 +1,81 @@
|
||||
import { createContext, useContext, useState, useEffect, ReactNode } from 'react';
|
||||
import { authApi, getAccessToken } from '../api/client';
|
||||
import type { User } from '../types';
|
||||
|
||||
interface AuthContextType {
|
||||
user: User | null;
|
||||
isLoading: boolean;
|
||||
isAuthenticated: boolean;
|
||||
isAdmin: boolean;
|
||||
login: (email: string, password: string) => Promise<void>;
|
||||
register: (email: string, password: string) => Promise<void>;
|
||||
logout: () => void;
|
||||
refreshUser: () => Promise<void>;
|
||||
}
|
||||
|
||||
const AuthContext = createContext<AuthContextType | undefined>(undefined);
|
||||
|
||||
export function AuthProvider({ children }: { children: ReactNode }) {
|
||||
const [user, setUser] = useState<User | null>(null);
|
||||
const [isLoading, setIsLoading] = useState(true);
|
||||
|
||||
const refreshUser = async () => {
|
||||
try {
|
||||
const userData = await authApi.getMe();
|
||||
setUser(userData);
|
||||
} catch {
|
||||
setUser(null);
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
const initAuth = async () => {
|
||||
if (getAccessToken()) {
|
||||
await refreshUser();
|
||||
}
|
||||
setIsLoading(false);
|
||||
};
|
||||
initAuth();
|
||||
}, []);
|
||||
|
||||
const login = async (email: string, password: string) => {
|
||||
await authApi.login(email, password);
|
||||
await refreshUser();
|
||||
};
|
||||
|
||||
const register = async (email: string, password: string) => {
|
||||
await authApi.register(email, password);
|
||||
await authApi.login(email, password);
|
||||
await refreshUser();
|
||||
};
|
||||
|
||||
const logout = () => {
|
||||
authApi.logout();
|
||||
setUser(null);
|
||||
};
|
||||
|
||||
return (
|
||||
<AuthContext.Provider
|
||||
value={{
|
||||
user,
|
||||
isLoading,
|
||||
isAuthenticated: !!user,
|
||||
isAdmin: user?.role === 'admin',
|
||||
login,
|
||||
register,
|
||||
logout,
|
||||
refreshUser,
|
||||
}}
|
||||
>
|
||||
{children}
|
||||
</AuthContext.Provider>
|
||||
);
|
||||
}
|
||||
|
||||
export function useAuth() {
|
||||
const context = useContext(AuthContext);
|
||||
if (context === undefined) {
|
||||
throw new Error('useAuth must be used within an AuthProvider');
|
||||
}
|
||||
return context;
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
@tailwind base;
|
||||
@tailwind components;
|
||||
@tailwind utilities;
|
||||
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
|
||||
-webkit-font-smoothing: antialiased;
|
||||
-moz-osx-font-smoothing: grayscale;
|
||||
}
|
||||
|
||||
/* Custom scrollbar */
|
||||
::-webkit-scrollbar {
|
||||
width: 8px;
|
||||
height: 8px;
|
||||
}
|
||||
|
||||
::-webkit-scrollbar-track {
|
||||
background: #1e293b;
|
||||
}
|
||||
|
||||
::-webkit-scrollbar-thumb {
|
||||
background: #6366f1;
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
::-webkit-scrollbar-thumb:hover {
|
||||
background: #4f46e5;
|
||||
}
|
||||
|
||||
/* Selection */
|
||||
::selection {
|
||||
background: rgba(99, 102, 241, 0.3);
|
||||
color: #f8fafc;
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
import { StrictMode } from 'react';
|
||||
import { createRoot } from 'react-dom/client';
|
||||
import App from './App';
|
||||
import './index.css';
|
||||
|
||||
createRoot(document.getElementById('root')!).render(
|
||||
<StrictMode>
|
||||
<App />
|
||||
</StrictMode>
|
||||
);
|
||||
@@ -0,0 +1,171 @@
|
||||
import { useQuery } from '@tanstack/react-query';
|
||||
import axios from 'axios';
|
||||
import { Search, FileText, Bot, Zap, Globe, BarChart3, CheckCircle, AlertTriangle, XCircle } from 'lucide-react';
|
||||
|
||||
const API_BASE_URL = import.meta.env.VITE_API_URL || '/api';
|
||||
|
||||
export function About() {
|
||||
const { data: health } = useQuery({
|
||||
queryKey: ['health'],
|
||||
queryFn: async () => {
|
||||
const response = await axios.get(`${API_BASE_URL}/health`);
|
||||
return response.data;
|
||||
},
|
||||
refetchInterval: 30000,
|
||||
});
|
||||
|
||||
const features = [
|
||||
{
|
||||
icon: Search,
|
||||
title: 'Patent Retrieval',
|
||||
description: 'Automated collection via SerpAPI\'s Google Patents',
|
||||
},
|
||||
{
|
||||
icon: FileText,
|
||||
title: 'Intelligent Parsing',
|
||||
description: 'Extracts key sections from patent documents',
|
||||
},
|
||||
{
|
||||
icon: Bot,
|
||||
title: 'AI Analysis',
|
||||
description: 'Deep analysis powered by Claude 3.5 Sonnet',
|
||||
},
|
||||
{
|
||||
icon: Zap,
|
||||
title: 'Batch Processing',
|
||||
description: 'Analyze multiple companies concurrently',
|
||||
},
|
||||
{
|
||||
icon: Globe,
|
||||
title: 'REST API',
|
||||
description: 'FastAPI web service for seamless integration',
|
||||
},
|
||||
{
|
||||
icon: BarChart3,
|
||||
title: 'Analytics',
|
||||
description: 'Track and visualize historical analysis data',
|
||||
},
|
||||
];
|
||||
|
||||
const techStack = [
|
||||
{ label: 'Backend', value: 'Python, FastAPI' },
|
||||
{ label: 'AI Model', value: 'Claude 3.5 Sonnet' },
|
||||
{ label: 'Database', value: 'PostgreSQL' },
|
||||
{ label: 'Frontend', value: 'React, TailwindCSS' },
|
||||
{ label: 'Data Source', value: 'SerpAPI Patents' },
|
||||
];
|
||||
|
||||
return (
|
||||
<div className="space-y-8">
|
||||
{/* Header */}
|
||||
<div>
|
||||
<h2 className="text-xl font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-2">
|
||||
About SPARC
|
||||
</h2>
|
||||
</div>
|
||||
|
||||
<div className="grid grid-cols-1 lg:grid-cols-3 gap-8">
|
||||
{/* Main Content */}
|
||||
<div className="lg:col-span-2 space-y-6">
|
||||
{/* Description */}
|
||||
<p className="text-text-secondary leading-relaxed">
|
||||
<strong className="text-text-primary">SPARC</strong> (Semiconductor Patent & Analytics Report Core)
|
||||
is an AI-powered patent analysis platform that evaluates company performance by analyzing their
|
||||
patent portfolios with cutting-edge language models.
|
||||
</p>
|
||||
|
||||
{/* Features */}
|
||||
<div>
|
||||
<h3 className="text-lg font-semibold text-text-primary mb-4">Key Features</h3>
|
||||
<div className="space-y-3">
|
||||
{features.map(({ icon: Icon, title, description }) => (
|
||||
<div
|
||||
key={title}
|
||||
className="flex items-start gap-4 py-3 border-b border-primary/10 last:border-0"
|
||||
>
|
||||
<div className="flex-shrink-0">
|
||||
<Icon className="text-primary" size={20} />
|
||||
</div>
|
||||
<div>
|
||||
<div className="font-medium text-text-primary">{title}</div>
|
||||
<div className="text-sm text-text-secondary">{description}</div>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Sidebar */}
|
||||
<div className="space-y-6">
|
||||
{/* Tech Stack */}
|
||||
<div className="bg-gradient-to-br from-primary/10 to-secondary/5 border border-primary/20 rounded-xl p-5">
|
||||
<h3 className="font-semibold text-text-primary mb-4">Technology Stack</h3>
|
||||
<div className="space-y-3">
|
||||
{techStack.map(({ label, value }) => (
|
||||
<div key={label}>
|
||||
<div className="text-primary text-sm">{label}</div>
|
||||
<div className="text-text-secondary text-sm">{value}</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* API Endpoints */}
|
||||
<div className="bg-bg-card/60 border border-primary/15 rounded-xl p-5">
|
||||
<h3 className="font-semibold text-text-primary mb-4">API Endpoints</h3>
|
||||
<div className="space-y-2">
|
||||
<code className="block bg-bg-dark px-3 py-2 rounded text-sm text-text-secondary">
|
||||
http://localhost:8000/docs
|
||||
</code>
|
||||
<code className="block bg-bg-dark px-3 py-2 rounded text-sm text-text-secondary">
|
||||
http://localhost:8000/health
|
||||
</code>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* System Status */}
|
||||
<div>
|
||||
<h3 className="text-lg font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-4">
|
||||
System Status
|
||||
</h3>
|
||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-4">
|
||||
<StatusCard
|
||||
label="API"
|
||||
status={health ? 'online' : 'offline'}
|
||||
/>
|
||||
<StatusCard
|
||||
label="Database"
|
||||
status="configured"
|
||||
/>
|
||||
<StatusCard
|
||||
label="Dashboard"
|
||||
status="online"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function StatusCard({ label, status }: { label: string; status: 'online' | 'offline' | 'configured' }) {
|
||||
const statusConfig = {
|
||||
online: { icon: CheckCircle, color: 'text-success', bg: 'bg-success' },
|
||||
offline: { icon: XCircle, color: 'text-error', bg: 'bg-error' },
|
||||
configured: { icon: AlertTriangle, color: 'text-warning', bg: 'bg-warning' },
|
||||
};
|
||||
|
||||
const { icon: Icon, color, bg } = statusConfig[status];
|
||||
|
||||
return (
|
||||
<div className="bg-gradient-to-br from-primary/10 to-secondary/10 border border-primary/20 rounded-xl p-5 text-center">
|
||||
<div className={`inline-flex items-center justify-center w-8 h-8 rounded-full ${bg}/20 mb-2`}>
|
||||
<Icon className={color} size={20} />
|
||||
</div>
|
||||
<div className="text-sm text-text-secondary uppercase tracking-wide">{label}</div>
|
||||
<div className={`font-semibold ${color} capitalize`}>{status}</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,183 @@
|
||||
import { useState } from 'react';
|
||||
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
|
||||
import { adminApi } from '../api/client';
|
||||
import { useAuth } from '../context/AuthContext';
|
||||
import { Users, Shield, User, Trash2, AlertCircle } from 'lucide-react';
|
||||
import type { User as UserType } from '../types';
|
||||
|
||||
export function AdminUsers() {
|
||||
const { user: currentUser } = useAuth();
|
||||
const queryClient = useQueryClient();
|
||||
const [deleteConfirm, setDeleteConfirm] = useState<number | null>(null);
|
||||
|
||||
const { data: users, isLoading, isError } = useQuery({
|
||||
queryKey: ['admin-users'],
|
||||
queryFn: () => adminApi.listUsers(),
|
||||
});
|
||||
|
||||
const updateRoleMutation = useMutation({
|
||||
mutationFn: ({ userId, role }: { userId: number; role: 'admin' | 'user' }) =>
|
||||
adminApi.updateUserRole(userId, role),
|
||||
onSuccess: () => {
|
||||
queryClient.invalidateQueries({ queryKey: ['admin-users'] });
|
||||
},
|
||||
});
|
||||
|
||||
const deleteMutation = useMutation({
|
||||
mutationFn: (userId: number) => adminApi.deleteUser(userId),
|
||||
onSuccess: () => {
|
||||
queryClient.invalidateQueries({ queryKey: ['admin-users'] });
|
||||
setDeleteConfirm(null);
|
||||
},
|
||||
});
|
||||
|
||||
const handleRoleChange = (user: UserType) => {
|
||||
const newRole = user.role === 'admin' ? 'user' : 'admin';
|
||||
updateRoleMutation.mutate({ userId: user.id, role: newRole });
|
||||
};
|
||||
|
||||
const handleDelete = (userId: number) => {
|
||||
deleteMutation.mutate(userId);
|
||||
};
|
||||
|
||||
if (isLoading) {
|
||||
return (
|
||||
<div className="flex items-center justify-center min-h-[400px]">
|
||||
<div className="animate-spin rounded-full h-12 w-12 border-t-2 border-b-2 border-primary"></div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (isError) {
|
||||
return (
|
||||
<div className="flex items-center gap-2 bg-error/10 border border-error/20 text-error rounded-xl px-4 py-3">
|
||||
<AlertCircle size={18} />
|
||||
<span>Failed to load users.</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between">
|
||||
<div>
|
||||
<h2 className="text-xl font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-2">
|
||||
User Management
|
||||
</h2>
|
||||
<p className="text-text-secondary">Manage user accounts and permissions.</p>
|
||||
</div>
|
||||
<div className="flex items-center gap-2 bg-primary/10 border border-primary/20 rounded-xl px-4 py-2">
|
||||
<Users size={18} className="text-primary" />
|
||||
<span className="text-text-primary font-semibold">{users?.length || 0} Users</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Users Table */}
|
||||
<div className="bg-bg-card/60 border border-primary/15 rounded-2xl overflow-hidden">
|
||||
<div className="overflow-x-auto">
|
||||
<table className="w-full">
|
||||
<thead>
|
||||
<tr className="border-b border-primary/10">
|
||||
<th className="text-left px-6 py-4 text-sm font-semibold text-text-secondary uppercase tracking-wider">
|
||||
User
|
||||
</th>
|
||||
<th className="text-left px-6 py-4 text-sm font-semibold text-text-secondary uppercase tracking-wider">
|
||||
Role
|
||||
</th>
|
||||
<th className="text-left px-6 py-4 text-sm font-semibold text-text-secondary uppercase tracking-wider">
|
||||
Created
|
||||
</th>
|
||||
<th className="text-right px-6 py-4 text-sm font-semibold text-text-secondary uppercase tracking-wider">
|
||||
Actions
|
||||
</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody className="divide-y divide-primary/10">
|
||||
{users?.map((user) => (
|
||||
<tr key={user.id} className="hover:bg-bg-card-hover/50 transition-colors">
|
||||
<td className="px-6 py-4">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="w-10 h-10 rounded-full bg-gradient-to-br from-primary/20 to-secondary/20 flex items-center justify-center">
|
||||
{user.role === 'admin' ? (
|
||||
<Shield className="text-primary" size={18} />
|
||||
) : (
|
||||
<User className="text-secondary" size={18} />
|
||||
)}
|
||||
</div>
|
||||
<div>
|
||||
<div className="font-medium text-text-primary">{user.email}</div>
|
||||
{user.id === currentUser?.id && (
|
||||
<span className="text-xs text-primary">(You)</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</td>
|
||||
<td className="px-6 py-4">
|
||||
<span
|
||||
className={`inline-flex items-center gap-1 px-3 py-1 rounded-full text-xs font-semibold uppercase ${
|
||||
user.role === 'admin'
|
||||
? 'bg-primary/20 text-primary border border-primary/30'
|
||||
: 'bg-secondary/20 text-secondary border border-secondary/30'
|
||||
}`}
|
||||
>
|
||||
{user.role === 'admin' ? <Shield size={12} /> : <User size={12} />}
|
||||
{user.role}
|
||||
</span>
|
||||
</td>
|
||||
<td className="px-6 py-4 text-text-secondary">
|
||||
{new Date(user.created_at).toLocaleDateString()}
|
||||
</td>
|
||||
<td className="px-6 py-4">
|
||||
<div className="flex items-center justify-end gap-2">
|
||||
{user.id !== currentUser?.id && (
|
||||
<>
|
||||
<button
|
||||
onClick={() => handleRoleChange(user)}
|
||||
disabled={updateRoleMutation.isPending}
|
||||
className={`px-3 py-1.5 rounded-lg text-sm font-medium transition-all ${
|
||||
user.role === 'admin'
|
||||
? 'bg-secondary/10 text-secondary hover:bg-secondary/20 border border-secondary/30'
|
||||
: 'bg-primary/10 text-primary hover:bg-primary/20 border border-primary/30'
|
||||
} disabled:opacity-50`}
|
||||
>
|
||||
{user.role === 'admin' ? 'Demote' : 'Promote'}
|
||||
</button>
|
||||
|
||||
{deleteConfirm === user.id ? (
|
||||
<div className="flex items-center gap-1">
|
||||
<button
|
||||
onClick={() => handleDelete(user.id)}
|
||||
disabled={deleteMutation.isPending}
|
||||
className="px-3 py-1.5 rounded-lg text-sm font-medium bg-error text-white hover:bg-error/80 transition-all disabled:opacity-50"
|
||||
>
|
||||
Confirm
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setDeleteConfirm(null)}
|
||||
className="px-3 py-1.5 rounded-lg text-sm font-medium bg-bg-card-hover text-text-secondary hover:text-text-primary transition-all"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
</div>
|
||||
) : (
|
||||
<button
|
||||
onClick={() => setDeleteConfirm(user.id)}
|
||||
className="p-1.5 rounded-lg text-error/70 hover:text-error hover:bg-error/10 transition-all"
|
||||
>
|
||||
<Trash2 size={18} />
|
||||
</button>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,135 @@
|
||||
import { useState } from 'react';
|
||||
import { useMutation } from '@tanstack/react-query';
|
||||
import { analysisApi } from '../api/client';
|
||||
import { Search, CheckCircle, AlertCircle, Clock, FileText } from 'lucide-react';
|
||||
import type { CompanyAnalysis } from '../types';
|
||||
|
||||
export function Analysis() {
|
||||
const [companyName, setCompanyName] = useState('');
|
||||
const [result, setResult] = useState<CompanyAnalysis | null>(null);
|
||||
|
||||
const mutation = useMutation({
|
||||
mutationFn: (name: string) => analysisApi.analyzeCompany(name),
|
||||
onSuccess: (data) => setResult(data),
|
||||
});
|
||||
|
||||
const handleSubmit = (e: React.FormEvent) => {
|
||||
e.preventDefault();
|
||||
if (companyName.trim()) {
|
||||
mutation.mutate(companyName.trim());
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
{/* Header */}
|
||||
<div>
|
||||
<h2 className="text-xl font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-2">
|
||||
Single Company Analysis
|
||||
</h2>
|
||||
<p className="text-text-secondary">
|
||||
Analyze a company's patent portfolio using AI-powered insights.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{/* Search Form */}
|
||||
<form onSubmit={handleSubmit} className="flex gap-4">
|
||||
<div className="flex-1 relative">
|
||||
<Search className="absolute left-4 top-1/2 -translate-y-1/2 text-text-secondary" size={18} />
|
||||
<input
|
||||
type="text"
|
||||
value={companyName}
|
||||
onChange={(e) => setCompanyName(e.target.value)}
|
||||
placeholder="Enter company name (e.g., nvidia, intel, amd)"
|
||||
className="w-full bg-bg-card/80 border border-primary/30 rounded-xl pl-12 pr-4 py-3 text-text-primary placeholder-text-secondary/50 focus:outline-none focus:border-primary focus:ring-2 focus:ring-primary/20 transition-all"
|
||||
/>
|
||||
</div>
|
||||
<button
|
||||
type="submit"
|
||||
disabled={mutation.isPending || !companyName.trim()}
|
||||
className="bg-gradient-to-r from-primary to-primary-dark text-white font-semibold py-3 px-6 rounded-xl hover:shadow-lg hover:shadow-primary/30 transition-all disabled:opacity-50 disabled:cursor-not-allowed flex items-center gap-2"
|
||||
>
|
||||
{mutation.isPending ? (
|
||||
<div className="animate-spin rounded-full h-5 w-5 border-t-2 border-b-2 border-white"></div>
|
||||
) : (
|
||||
<>
|
||||
<Search size={18} />
|
||||
Analyze
|
||||
</>
|
||||
)}
|
||||
</button>
|
||||
</form>
|
||||
|
||||
{/* Error */}
|
||||
{mutation.isError && (
|
||||
<div className="flex items-center gap-2 bg-error/10 border border-error/20 text-error rounded-xl px-4 py-3">
|
||||
<AlertCircle size={18} />
|
||||
<span>Analysis failed. Please try again.</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Results */}
|
||||
{result && (
|
||||
<div className="space-y-6">
|
||||
{/* Success/Failure Status */}
|
||||
{result.success ? (
|
||||
<div className="flex items-center gap-2 bg-success/10 border border-success/20 text-success rounded-xl px-4 py-3">
|
||||
<CheckCircle size={18} />
|
||||
<span>Analysis complete for {result.company_name.toUpperCase()}</span>
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex items-center gap-2 bg-error/10 border border-error/20 text-error rounded-xl px-4 py-3">
|
||||
<AlertCircle size={18} />
|
||||
<span>Analysis failed: {result.error}</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Metrics */}
|
||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-4">
|
||||
<MetricCard
|
||||
icon={FileText}
|
||||
label="Patents Found"
|
||||
value={result.patent_count.toString()}
|
||||
/>
|
||||
<MetricCard
|
||||
icon={CheckCircle}
|
||||
label="Analysis Status"
|
||||
value={result.success ? 'Complete' : 'Failed'}
|
||||
/>
|
||||
<MetricCard
|
||||
icon={Clock}
|
||||
label="Timestamp"
|
||||
value={new Date(result.timestamp).toLocaleTimeString()}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Analysis Content */}
|
||||
{result.success && result.analysis && (
|
||||
<div className="bg-bg-card/60 backdrop-blur-lg border border-primary/15 rounded-2xl p-6">
|
||||
<h3 className="text-lg font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-4">
|
||||
AI Analysis Results
|
||||
</h3>
|
||||
<div className="prose prose-invert max-w-none">
|
||||
<div className="text-text-primary whitespace-pre-wrap leading-relaxed">
|
||||
{result.analysis}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function MetricCard({ icon: Icon, label, value }: { icon: typeof FileText; label: string; value: string }) {
|
||||
return (
|
||||
<div className="bg-gradient-to-br from-primary/10 to-secondary/10 border border-primary/20 rounded-xl p-5 text-center">
|
||||
<Icon className="mx-auto mb-2 text-primary" size={24} />
|
||||
<div className="text-2xl font-bold bg-gradient-to-r from-primary to-secondary bg-clip-text text-transparent">
|
||||
{value}
|
||||
</div>
|
||||
<div className="text-sm text-text-secondary uppercase tracking-wide mt-1">{label}</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,179 @@
|
||||
import { useState } from 'react';
|
||||
import { useQuery } from '@tanstack/react-query';
|
||||
import { analyticsApi } from '../api/client';
|
||||
import { AlertCircle, Database } from 'lucide-react';
|
||||
import { PieChart, Pie, Cell, BarChart, Bar, XAxis, YAxis, Tooltip, ResponsiveContainer, Legend } from 'recharts';
|
||||
|
||||
const COLORS = ['#6366f1', '#0ea5e9', '#10b981', '#f59e0b', '#ef4444', '#8b5cf6', '#ec4899', '#14b8a6'];
|
||||
|
||||
export function AnalyticsPage() {
|
||||
const [days, setDays] = useState(30);
|
||||
|
||||
const { data, isLoading, isError } = useQuery({
|
||||
queryKey: ['analytics', days],
|
||||
queryFn: () => analyticsApi.getAnalytics(days),
|
||||
});
|
||||
|
||||
if (isLoading) {
|
||||
return (
|
||||
<div className="flex items-center justify-center min-h-[400px]">
|
||||
<div className="animate-spin rounded-full h-12 w-12 border-t-2 border-b-2 border-primary"></div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (isError) {
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
<div>
|
||||
<h2 className="text-xl font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-2">
|
||||
Analytics Dashboard
|
||||
</h2>
|
||||
</div>
|
||||
<div className="bg-gradient-to-br from-primary/10 to-secondary/5 border border-primary/20 rounded-xl p-6">
|
||||
<div className="flex items-center gap-3 text-warning mb-2">
|
||||
<Database size={24} />
|
||||
<span className="font-semibold">Database Not Connected</span>
|
||||
</div>
|
||||
<p className="text-text-secondary">
|
||||
Set <code className="bg-bg-card px-2 py-1 rounded">USE_DATABASE=true</code> in your .env file to enable analytics tracking.
|
||||
</p>
|
||||
</div>
|
||||
<div className="flex items-center gap-2 bg-secondary/10 border border-secondary/20 text-secondary rounded-xl px-4 py-3">
|
||||
<AlertCircle size={18} />
|
||||
<span>Analytics features require storing analysis results in PostgreSQL for historical tracking.</span>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
if (!data || (data.total_messages === 0 && data.by_company.length === 0)) {
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
<div>
|
||||
<h2 className="text-xl font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-2">
|
||||
Analytics Dashboard
|
||||
</h2>
|
||||
<p className="text-text-secondary">Track historical analysis data and view insights.</p>
|
||||
</div>
|
||||
<div className="flex items-center gap-2 bg-secondary/10 border border-secondary/20 text-secondary rounded-xl px-4 py-3">
|
||||
<AlertCircle size={18} />
|
||||
<span>No analytics data available yet. Run some analyses first!</span>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
const companyData = data.by_company.map((c) => ({
|
||||
name: (c.company_name || 'Unknown').toUpperCase(),
|
||||
value: c.count,
|
||||
}));
|
||||
|
||||
const typeData = data.by_type.map((t) => ({
|
||||
name: t.analysis_type || 'Unknown',
|
||||
count: t.count,
|
||||
}));
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
{/* Header */}
|
||||
<div className="flex flex-wrap items-center justify-between gap-4">
|
||||
<div>
|
||||
<h2 className="text-xl font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-2">
|
||||
Analytics Dashboard
|
||||
</h2>
|
||||
<p className="text-text-secondary">Track historical analysis data and view insights.</p>
|
||||
</div>
|
||||
|
||||
{/* Time Range Selector */}
|
||||
<select
|
||||
value={days}
|
||||
onChange={(e) => setDays(Number(e.target.value))}
|
||||
className="bg-bg-card/80 border border-primary/30 rounded-xl px-4 py-2 text-text-primary focus:outline-none focus:border-primary"
|
||||
>
|
||||
<option value={7}>Last 7 days</option>
|
||||
<option value={14}>Last 14 days</option>
|
||||
<option value={30}>Last 30 days</option>
|
||||
<option value={90}>Last 90 days</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
{/* Summary Metrics */}
|
||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-4">
|
||||
<MetricCard label="Total Analyses" value={data.total_messages} />
|
||||
<MetricCard label="Companies Analyzed" value={data.by_company.length} />
|
||||
<MetricCard label="Analysis Types" value={data.by_type.length} />
|
||||
</div>
|
||||
|
||||
{/* Charts */}
|
||||
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
|
||||
{/* Pie Chart - Distribution by Company */}
|
||||
{companyData.length > 0 && (
|
||||
<div className="bg-bg-card/60 border border-primary/15 rounded-2xl p-6">
|
||||
<h3 className="text-lg font-semibold text-text-primary mb-4">Distribution by Company</h3>
|
||||
<ResponsiveContainer width="100%" height={300}>
|
||||
<PieChart>
|
||||
<Pie
|
||||
data={companyData}
|
||||
cx="50%"
|
||||
cy="50%"
|
||||
innerRadius={60}
|
||||
outerRadius={100}
|
||||
paddingAngle={2}
|
||||
dataKey="value"
|
||||
label={({ name, percent }) => `${name} ${(percent * 100).toFixed(0)}%`}
|
||||
labelLine={false}
|
||||
>
|
||||
{companyData.map((_, index) => (
|
||||
<Cell key={`cell-${index}`} fill={COLORS[index % COLORS.length]} />
|
||||
))}
|
||||
</Pie>
|
||||
<Tooltip
|
||||
contentStyle={{
|
||||
backgroundColor: '#1e293b',
|
||||
border: '1px solid rgba(99, 102, 241, 0.3)',
|
||||
borderRadius: '8px',
|
||||
}}
|
||||
/>
|
||||
<Legend />
|
||||
</PieChart>
|
||||
</ResponsiveContainer>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Bar Chart - Analysis Types */}
|
||||
{typeData.length > 0 && (
|
||||
<div className="bg-bg-card/60 border border-primary/15 rounded-2xl p-6">
|
||||
<h3 className="text-lg font-semibold text-text-primary mb-4">Analysis Types</h3>
|
||||
<ResponsiveContainer width="100%" height={300}>
|
||||
<BarChart data={typeData}>
|
||||
<XAxis dataKey="name" stroke="#94a3b8" fontSize={12} />
|
||||
<YAxis stroke="#94a3b8" fontSize={12} />
|
||||
<Tooltip
|
||||
contentStyle={{
|
||||
backgroundColor: '#1e293b',
|
||||
border: '1px solid rgba(99, 102, 241, 0.3)',
|
||||
borderRadius: '8px',
|
||||
}}
|
||||
labelStyle={{ color: '#f8fafc' }}
|
||||
/>
|
||||
<Bar dataKey="count" fill="#6366f1" radius={[4, 4, 0, 0]} />
|
||||
</BarChart>
|
||||
</ResponsiveContainer>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function MetricCard({ label, value }: { label: string; value: number }) {
|
||||
return (
|
||||
<div className="bg-gradient-to-br from-primary/10 to-secondary/10 border border-primary/20 rounded-xl p-5 text-center">
|
||||
<div className="text-3xl font-bold bg-gradient-to-r from-primary to-secondary bg-clip-text text-transparent">
|
||||
{value}
|
||||
</div>
|
||||
<div className="text-sm text-text-secondary uppercase tracking-wide mt-1">{label}</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,248 @@
|
||||
import { useState } from 'react';
|
||||
import { useMutation } from '@tanstack/react-query';
|
||||
import { analysisApi } from '../api/client';
|
||||
import { Rocket, CheckCircle, AlertCircle, ChevronDown, ChevronUp } from 'lucide-react';
|
||||
import { BarChart, Bar, XAxis, YAxis, Tooltip, ResponsiveContainer, Cell } from 'recharts';
|
||||
import type { BatchAnalysisResult } from '../types';
|
||||
|
||||
export function Batch() {
|
||||
const [companiesInput, setCompaniesInput] = useState('');
|
||||
const [maxWorkers, setMaxWorkers] = useState(3);
|
||||
const [result, setResult] = useState<BatchAnalysisResult | null>(null);
|
||||
const [expandedItems, setExpandedItems] = useState<Set<string>>(new Set());
|
||||
|
||||
const mutation = useMutation({
|
||||
mutationFn: ({ companies, workers }: { companies: string[]; workers: number }) =>
|
||||
analysisApi.analyzeBatch(companies, workers),
|
||||
onSuccess: (data) => setResult(data),
|
||||
});
|
||||
|
||||
const handleSubmit = (e: React.FormEvent) => {
|
||||
e.preventDefault();
|
||||
const companies = companiesInput
|
||||
.split(/[,\n]/)
|
||||
.map((c) => c.trim())
|
||||
.filter((c) => c.length > 0);
|
||||
|
||||
if (companies.length > 0) {
|
||||
mutation.mutate({ companies, workers: maxWorkers });
|
||||
}
|
||||
};
|
||||
|
||||
const toggleExpand = (company: string) => {
|
||||
const newExpanded = new Set(expandedItems);
|
||||
if (newExpanded.has(company)) {
|
||||
newExpanded.delete(company);
|
||||
} else {
|
||||
newExpanded.add(company);
|
||||
}
|
||||
setExpandedItems(newExpanded);
|
||||
};
|
||||
|
||||
const chartData = result?.results.map((r) => ({
|
||||
name: r.company_name.toUpperCase(),
|
||||
patents: r.patent_count,
|
||||
success: r.success,
|
||||
}));
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
{/* Header */}
|
||||
<div>
|
||||
<h2 className="text-xl font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-2">
|
||||
Batch Company Analysis
|
||||
</h2>
|
||||
<p className="text-text-secondary">
|
||||
Analyze multiple companies simultaneously for comparative insights.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{/* Input Form */}
|
||||
<form onSubmit={handleSubmit} className="grid grid-cols-1 md:grid-cols-3 gap-4">
|
||||
<div className="md:col-span-2">
|
||||
<textarea
|
||||
value={companiesInput}
|
||||
onChange={(e) => setCompaniesInput(e.target.value)}
|
||||
placeholder="Enter company names (one per line or comma-separated): nvidia amd intel qualcomm"
|
||||
rows={6}
|
||||
className="w-full bg-bg-card/80 border border-primary/30 rounded-xl px-4 py-3 text-text-primary placeholder-text-secondary/50 focus:outline-none focus:border-primary focus:ring-2 focus:ring-primary/20 transition-all resize-none"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="space-y-4">
|
||||
<div>
|
||||
<label className="block text-sm font-medium text-text-secondary mb-2">
|
||||
Concurrent Workers
|
||||
</label>
|
||||
<input
|
||||
type="range"
|
||||
min={1}
|
||||
max={5}
|
||||
value={maxWorkers}
|
||||
onChange={(e) => setMaxWorkers(Number(e.target.value))}
|
||||
className="w-full accent-primary"
|
||||
/>
|
||||
<div className="text-center text-text-primary font-semibold">{maxWorkers}</div>
|
||||
</div>
|
||||
|
||||
<button
|
||||
type="submit"
|
||||
disabled={mutation.isPending || !companiesInput.trim()}
|
||||
className="w-full bg-gradient-to-r from-primary to-primary-dark text-white font-semibold py-3 px-6 rounded-xl hover:shadow-lg hover:shadow-primary/30 transition-all disabled:opacity-50 disabled:cursor-not-allowed flex items-center justify-center gap-2"
|
||||
>
|
||||
{mutation.isPending ? (
|
||||
<div className="animate-spin rounded-full h-5 w-5 border-t-2 border-b-2 border-white"></div>
|
||||
) : (
|
||||
<>
|
||||
<Rocket size={18} />
|
||||
Run Batch Analysis
|
||||
</>
|
||||
)}
|
||||
</button>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
{/* Progress */}
|
||||
{mutation.isPending && (
|
||||
<div className="bg-bg-card/60 border border-primary/15 rounded-xl p-4">
|
||||
<div className="flex items-center gap-2 text-secondary">
|
||||
<div className="animate-spin rounded-full h-4 w-4 border-t-2 border-b-2 border-secondary"></div>
|
||||
<span>Analyzing companies...</span>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Error */}
|
||||
{mutation.isError && (
|
||||
<div className="flex items-center gap-2 bg-error/10 border border-error/20 text-error rounded-xl px-4 py-3">
|
||||
<AlertCircle size={18} />
|
||||
<span>Batch analysis failed. Please try again.</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Results */}
|
||||
{result && (
|
||||
<div className="space-y-6">
|
||||
{/* Summary Metrics */}
|
||||
<div>
|
||||
<h3 className="text-lg font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-4">
|
||||
Results Summary
|
||||
</h3>
|
||||
<div className="grid grid-cols-2 md:grid-cols-4 gap-4">
|
||||
<SummaryCard label="Total Companies" value={result.total_companies} />
|
||||
<SummaryCard label="Successful" value={result.successful} color="success" />
|
||||
<SummaryCard label="Failed" value={result.failed} color="error" />
|
||||
<SummaryCard
|
||||
label="Success Rate"
|
||||
value={`${Math.round((result.successful / result.total_companies) * 100)}%`}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Chart */}
|
||||
{chartData && chartData.length > 0 && (
|
||||
<div className="bg-bg-card/60 border border-primary/15 rounded-2xl p-6">
|
||||
<ResponsiveContainer width="100%" height={300}>
|
||||
<BarChart data={chartData}>
|
||||
<XAxis dataKey="name" stroke="#94a3b8" fontSize={12} />
|
||||
<YAxis stroke="#94a3b8" fontSize={12} />
|
||||
<Tooltip
|
||||
contentStyle={{
|
||||
backgroundColor: '#1e293b',
|
||||
border: '1px solid rgba(99, 102, 241, 0.3)',
|
||||
borderRadius: '8px',
|
||||
}}
|
||||
labelStyle={{ color: '#f8fafc' }}
|
||||
/>
|
||||
<Bar dataKey="patents" radius={[4, 4, 0, 0]}>
|
||||
{chartData.map((entry, index) => (
|
||||
<Cell
|
||||
key={`cell-${index}`}
|
||||
fill={entry.success ? '#10b981' : '#ef4444'}
|
||||
/>
|
||||
))}
|
||||
</Bar>
|
||||
</BarChart>
|
||||
</ResponsiveContainer>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Detailed Results */}
|
||||
<div>
|
||||
<h3 className="text-lg font-semibold text-text-primary border-b-2 border-primary/30 pb-2 mb-4">
|
||||
Detailed Results
|
||||
</h3>
|
||||
<div className="space-y-3">
|
||||
{result.results.map((r) => (
|
||||
<div
|
||||
key={r.company_name}
|
||||
className="bg-bg-card/60 border border-primary/15 rounded-xl overflow-hidden"
|
||||
>
|
||||
<button
|
||||
onClick={() => toggleExpand(r.company_name)}
|
||||
className="w-full flex items-center justify-between p-4 hover:bg-bg-card-hover transition-colors"
|
||||
>
|
||||
<div className="flex items-center gap-3">
|
||||
{r.success ? (
|
||||
<CheckCircle className="text-success" size={20} />
|
||||
) : (
|
||||
<AlertCircle className="text-error" size={20} />
|
||||
)}
|
||||
<span className="font-semibold text-text-primary">
|
||||
{r.company_name.toUpperCase()}
|
||||
</span>
|
||||
<span className="text-text-secondary">
|
||||
{r.patent_count} patents
|
||||
</span>
|
||||
</div>
|
||||
{expandedItems.has(r.company_name) ? (
|
||||
<ChevronUp className="text-text-secondary" size={20} />
|
||||
) : (
|
||||
<ChevronDown className="text-text-secondary" size={20} />
|
||||
)}
|
||||
</button>
|
||||
{expandedItems.has(r.company_name) && (
|
||||
<div className="border-t border-primary/10 p-4 bg-bg-dark/40">
|
||||
{r.success ? (
|
||||
<div className="text-text-primary whitespace-pre-wrap leading-relaxed">
|
||||
{r.analysis}
|
||||
</div>
|
||||
) : (
|
||||
<div className="text-error">{r.error}</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function SummaryCard({
|
||||
label,
|
||||
value,
|
||||
color,
|
||||
}: {
|
||||
label: string;
|
||||
value: number | string;
|
||||
color?: 'success' | 'error';
|
||||
}) {
|
||||
const colorClass = color === 'success' ? 'text-success' : color === 'error' ? 'text-error' : '';
|
||||
|
||||
return (
|
||||
<div className="bg-gradient-to-br from-primary/10 to-secondary/10 border border-primary/20 rounded-xl p-4 text-center">
|
||||
<div
|
||||
className={`text-2xl font-bold ${
|
||||
colorClass || 'bg-gradient-to-r from-primary to-secondary bg-clip-text text-transparent'
|
||||
}`}
|
||||
>
|
||||
{value}
|
||||
</div>
|
||||
<div className="text-sm text-text-secondary uppercase tracking-wide mt-1">{label}</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,121 @@
|
||||
import { useState } from 'react';
|
||||
import { Link, useNavigate, useLocation } from 'react-router-dom';
|
||||
import { useAuth } from '../context/AuthContext';
|
||||
import { LogIn, Mail, Lock, AlertCircle } from 'lucide-react';
|
||||
|
||||
export function Login() {
|
||||
const [email, setEmail] = useState('');
|
||||
const [password, setPassword] = useState('');
|
||||
const [error, setError] = useState('');
|
||||
const [isLoading, setIsLoading] = useState(false);
|
||||
|
||||
const { login } = useAuth();
|
||||
const navigate = useNavigate();
|
||||
const location = useLocation();
|
||||
|
||||
const from = (location.state as { from?: { pathname: string } })?.from?.pathname || '/analysis';
|
||||
|
||||
const handleSubmit = async (e: React.FormEvent) => {
|
||||
e.preventDefault();
|
||||
setError('');
|
||||
setIsLoading(true);
|
||||
|
||||
try {
|
||||
await login(email, password);
|
||||
navigate(from, { replace: true });
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : 'Invalid email or password');
|
||||
} finally {
|
||||
setIsLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="min-h-screen bg-gradient-to-br from-bg-dark to-indigo-950 flex items-center justify-center px-4">
|
||||
<div className="w-full max-w-md">
|
||||
{/* Brand */}
|
||||
<div className="text-center mb-8">
|
||||
<div className="flex items-center justify-center gap-3 mb-4">
|
||||
<span className="text-4xl">⚡</span>
|
||||
<h1 className="text-3xl font-bold bg-gradient-to-r from-primary to-secondary bg-clip-text text-transparent">
|
||||
SPARC
|
||||
</h1>
|
||||
</div>
|
||||
<p className="text-text-secondary">Semiconductor Patent Analytics Dashboard</p>
|
||||
</div>
|
||||
|
||||
{/* Login Card */}
|
||||
<div className="bg-bg-card/60 backdrop-blur-lg border border-primary/15 rounded-2xl p-8">
|
||||
<h2 className="text-xl font-semibold text-text-primary mb-6">Sign in to your account</h2>
|
||||
|
||||
{error && (
|
||||
<div className="flex items-center gap-2 bg-error/10 border border-error/20 text-error rounded-lg px-4 py-3 mb-6">
|
||||
<AlertCircle size={18} />
|
||||
<span className="text-sm">{error}</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<form onSubmit={handleSubmit} className="space-y-5">
|
||||
<div>
|
||||
<label htmlFor="email" className="block text-sm font-medium text-text-secondary mb-2">
|
||||
Email
|
||||
</label>
|
||||
<div className="relative">
|
||||
<Mail className="absolute left-3 top-1/2 -translate-y-1/2 text-text-secondary" size={18} />
|
||||
<input
|
||||
id="email"
|
||||
type="email"
|
||||
value={email}
|
||||
onChange={(e) => setEmail(e.target.value)}
|
||||
required
|
||||
className="w-full bg-bg-dark/80 border border-primary/30 rounded-xl pl-10 pr-4 py-3 text-text-primary placeholder-text-secondary/50 focus:outline-none focus:border-primary focus:ring-2 focus:ring-primary/20 transition-all"
|
||||
placeholder="you@example.com"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label htmlFor="password" className="block text-sm font-medium text-text-secondary mb-2">
|
||||
Password
|
||||
</label>
|
||||
<div className="relative">
|
||||
<Lock className="absolute left-3 top-1/2 -translate-y-1/2 text-text-secondary" size={18} />
|
||||
<input
|
||||
id="password"
|
||||
type="password"
|
||||
value={password}
|
||||
onChange={(e) => setPassword(e.target.value)}
|
||||
required
|
||||
className="w-full bg-bg-dark/80 border border-primary/30 rounded-xl pl-10 pr-4 py-3 text-text-primary placeholder-text-secondary/50 focus:outline-none focus:border-primary focus:ring-2 focus:ring-primary/20 transition-all"
|
||||
placeholder="Enter your password"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<button
|
||||
type="submit"
|
||||
disabled={isLoading}
|
||||
className="w-full bg-gradient-to-r from-primary to-primary-dark text-white font-semibold py-3 px-4 rounded-xl hover:shadow-lg hover:shadow-primary/30 transition-all disabled:opacity-50 disabled:cursor-not-allowed flex items-center justify-center gap-2"
|
||||
>
|
||||
{isLoading ? (
|
||||
<div className="animate-spin rounded-full h-5 w-5 border-t-2 border-b-2 border-white"></div>
|
||||
) : (
|
||||
<>
|
||||
<LogIn size={18} />
|
||||
Sign In
|
||||
</>
|
||||
)}
|
||||
</button>
|
||||
</form>
|
||||
|
||||
<div className="mt-6 text-center">
|
||||
<span className="text-text-secondary text-sm">Don't have an account? </span>
|
||||
<Link to="/register" className="text-primary hover:text-primary-dark font-medium text-sm">
|
||||
Sign up
|
||||
</Link>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,153 @@
|
||||
import { useState } from 'react';
|
||||
import { Link, useNavigate } from 'react-router-dom';
|
||||
import { useAuth } from '../context/AuthContext';
|
||||
import { UserPlus, Mail, Lock, AlertCircle } from 'lucide-react';
|
||||
|
||||
export function Register() {
|
||||
const [email, setEmail] = useState('');
|
||||
const [password, setPassword] = useState('');
|
||||
const [confirmPassword, setConfirmPassword] = useState('');
|
||||
const [error, setError] = useState('');
|
||||
const [isLoading, setIsLoading] = useState(false);
|
||||
|
||||
const { register } = useAuth();
|
||||
const navigate = useNavigate();
|
||||
|
||||
const handleSubmit = async (e: React.FormEvent) => {
|
||||
e.preventDefault();
|
||||
setError('');
|
||||
|
||||
if (password !== confirmPassword) {
|
||||
setError('Passwords do not match');
|
||||
return;
|
||||
}
|
||||
|
||||
if (password.length < 8) {
|
||||
setError('Password must be at least 8 characters');
|
||||
return;
|
||||
}
|
||||
|
||||
setIsLoading(true);
|
||||
|
||||
try {
|
||||
await register(email, password);
|
||||
navigate('/analysis', { replace: true });
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : 'Registration failed');
|
||||
} finally {
|
||||
setIsLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="min-h-screen bg-gradient-to-br from-bg-dark to-indigo-950 flex items-center justify-center px-4">
|
||||
<div className="w-full max-w-md">
|
||||
{/* Brand */}
|
||||
<div className="text-center mb-8">
|
||||
<div className="flex items-center justify-center gap-3 mb-4">
|
||||
<span className="text-4xl">⚡</span>
|
||||
<h1 className="text-3xl font-bold bg-gradient-to-r from-primary to-secondary bg-clip-text text-transparent">
|
||||
SPARC
|
||||
</h1>
|
||||
</div>
|
||||
<p className="text-text-secondary">Semiconductor Patent Analytics Dashboard</p>
|
||||
</div>
|
||||
|
||||
{/* Register Card */}
|
||||
<div className="bg-bg-card/60 backdrop-blur-lg border border-primary/15 rounded-2xl p-8">
|
||||
<h2 className="text-xl font-semibold text-text-primary mb-6">Create your account</h2>
|
||||
|
||||
{error && (
|
||||
<div className="flex items-center gap-2 bg-error/10 border border-error/20 text-error rounded-lg px-4 py-3 mb-6">
|
||||
<AlertCircle size={18} />
|
||||
<span className="text-sm">{error}</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<form onSubmit={handleSubmit} className="space-y-5">
|
||||
<div>
|
||||
<label htmlFor="email" className="block text-sm font-medium text-text-secondary mb-2">
|
||||
Email
|
||||
</label>
|
||||
<div className="relative">
|
||||
<Mail className="absolute left-3 top-1/2 -translate-y-1/2 text-text-secondary" size={18} />
|
||||
<input
|
||||
id="email"
|
||||
type="email"
|
||||
value={email}
|
||||
onChange={(e) => setEmail(e.target.value)}
|
||||
required
|
||||
className="w-full bg-bg-dark/80 border border-primary/30 rounded-xl pl-10 pr-4 py-3 text-text-primary placeholder-text-secondary/50 focus:outline-none focus:border-primary focus:ring-2 focus:ring-primary/20 transition-all"
|
||||
placeholder="you@example.com"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label htmlFor="password" className="block text-sm font-medium text-text-secondary mb-2">
|
||||
Password
|
||||
</label>
|
||||
<div className="relative">
|
||||
<Lock className="absolute left-3 top-1/2 -translate-y-1/2 text-text-secondary" size={18} />
|
||||
<input
|
||||
id="password"
|
||||
type="password"
|
||||
value={password}
|
||||
onChange={(e) => setPassword(e.target.value)}
|
||||
required
|
||||
minLength={8}
|
||||
className="w-full bg-bg-dark/80 border border-primary/30 rounded-xl pl-10 pr-4 py-3 text-text-primary placeholder-text-secondary/50 focus:outline-none focus:border-primary focus:ring-2 focus:ring-primary/20 transition-all"
|
||||
placeholder="At least 8 characters"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label htmlFor="confirmPassword" className="block text-sm font-medium text-text-secondary mb-2">
|
||||
Confirm Password
|
||||
</label>
|
||||
<div className="relative">
|
||||
<Lock className="absolute left-3 top-1/2 -translate-y-1/2 text-text-secondary" size={18} />
|
||||
<input
|
||||
id="confirmPassword"
|
||||
type="password"
|
||||
value={confirmPassword}
|
||||
onChange={(e) => setConfirmPassword(e.target.value)}
|
||||
required
|
||||
className="w-full bg-bg-dark/80 border border-primary/30 rounded-xl pl-10 pr-4 py-3 text-text-primary placeholder-text-secondary/50 focus:outline-none focus:border-primary focus:ring-2 focus:ring-primary/20 transition-all"
|
||||
placeholder="Confirm your password"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<button
|
||||
type="submit"
|
||||
disabled={isLoading}
|
||||
className="w-full bg-gradient-to-r from-primary to-primary-dark text-white font-semibold py-3 px-4 rounded-xl hover:shadow-lg hover:shadow-primary/30 transition-all disabled:opacity-50 disabled:cursor-not-allowed flex items-center justify-center gap-2"
|
||||
>
|
||||
{isLoading ? (
|
||||
<div className="animate-spin rounded-full h-5 w-5 border-t-2 border-b-2 border-white"></div>
|
||||
) : (
|
||||
<>
|
||||
<UserPlus size={18} />
|
||||
Create Account
|
||||
</>
|
||||
)}
|
||||
</button>
|
||||
</form>
|
||||
|
||||
<div className="mt-6 text-center">
|
||||
<span className="text-text-secondary text-sm">Already have an account? </span>
|
||||
<Link to="/login" className="text-primary hover:text-primary-dark font-medium text-sm">
|
||||
Sign in
|
||||
</Link>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p className="mt-6 text-center text-xs text-text-secondary">
|
||||
The first registered user will automatically become an admin.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
export interface User {
|
||||
id: number;
|
||||
email: string;
|
||||
role: 'admin' | 'user';
|
||||
created_at: string;
|
||||
}
|
||||
|
||||
export interface TokenResponse {
|
||||
access_token: string;
|
||||
refresh_token: string;
|
||||
token_type: string;
|
||||
}
|
||||
|
||||
export interface CompanyAnalysis {
|
||||
company_name: string;
|
||||
analysis: string;
|
||||
patent_count: number;
|
||||
success: boolean;
|
||||
error: string | null;
|
||||
timestamp: string;
|
||||
}
|
||||
|
||||
export interface BatchAnalysisResult {
|
||||
results: CompanyAnalysis[];
|
||||
total_companies: number;
|
||||
successful: number;
|
||||
failed: number;
|
||||
timestamp: string;
|
||||
}
|
||||
|
||||
export interface JobStatus {
|
||||
job_id: string;
|
||||
status: 'pending' | 'running' | 'completed' | 'failed';
|
||||
progress: number;
|
||||
total_companies: number;
|
||||
completed_companies: number;
|
||||
result: BatchAnalysisResult | null;
|
||||
error: string | null;
|
||||
}
|
||||
|
||||
export interface Analytics {
|
||||
total_messages: number;
|
||||
by_company: Array<{ company_name: string; count: number }>;
|
||||
by_type: Array<{ analysis_type: string; count: number }>;
|
||||
period_days: number;
|
||||
}
|
||||
Vendored
+9
@@ -0,0 +1,9 @@
|
||||
/// <reference types="vite/client" />
|
||||
|
||||
interface ImportMetaEnv {
|
||||
readonly VITE_API_URL: string;
|
||||
}
|
||||
|
||||
interface ImportMeta {
|
||||
readonly env: ImportMetaEnv;
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
/** @type {import('tailwindcss').Config} */
|
||||
export default {
|
||||
content: [
|
||||
"./index.html",
|
||||
"./src/**/*.{js,ts,jsx,tsx}",
|
||||
],
|
||||
theme: {
|
||||
extend: {
|
||||
colors: {
|
||||
primary: {
|
||||
DEFAULT: '#6366f1',
|
||||
dark: '#4f46e5',
|
||||
},
|
||||
secondary: '#0ea5e9',
|
||||
success: '#10b981',
|
||||
warning: '#f59e0b',
|
||||
error: '#ef4444',
|
||||
bg: {
|
||||
dark: '#0f172a',
|
||||
card: '#1e293b',
|
||||
'card-hover': '#334155',
|
||||
},
|
||||
text: {
|
||||
primary: '#f8fafc',
|
||||
secondary: '#94a3b8',
|
||||
},
|
||||
border: '#334155',
|
||||
},
|
||||
},
|
||||
},
|
||||
plugins: [],
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2020",
|
||||
"useDefineForClassFields": true,
|
||||
"lib": ["ES2020", "DOM", "DOM.Iterable"],
|
||||
"module": "ESNext",
|
||||
"skipLibCheck": true,
|
||||
"moduleResolution": "bundler",
|
||||
"allowImportingTsExtensions": true,
|
||||
"isolatedModules": true,
|
||||
"moduleDetection": "force",
|
||||
"noEmit": true,
|
||||
"jsx": "react-jsx",
|
||||
"strict": true,
|
||||
"noUnusedLocals": true,
|
||||
"noUnusedParameters": true,
|
||||
"noFallthroughCasesInSwitch": true,
|
||||
"baseUrl": ".",
|
||||
"paths": {
|
||||
"@/*": ["src/*"]
|
||||
}
|
||||
},
|
||||
"include": ["src"]
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
import { defineConfig } from 'vite'
|
||||
import react from '@vitejs/plugin-react'
|
||||
|
||||
export default defineConfig({
|
||||
plugins: [react()],
|
||||
server: {
|
||||
port: 3000,
|
||||
proxy: {
|
||||
'/api': {
|
||||
target: 'http://localhost:8000',
|
||||
changeOrigin: true,
|
||||
rewrite: (path) => path.replace(/^\/api/, ''),
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
@@ -1,10 +1,43 @@
|
||||
from SPARC.serp_api import SERP
|
||||
"""SPARC - Semiconductor Patent & Analytics Report Core
|
||||
|
||||
patents = SERP.query("nvidia")
|
||||
Example usage of the company performance analyzer.
|
||||
|
||||
for patent in patents.patents:
|
||||
patent = SERP.save_patents(patent)
|
||||
patent.summary = SERP.parse_patent_pdf(patent.pdf_path)
|
||||
print(patent.summary)
|
||||
Before running:
|
||||
1. Create a .env file with:
|
||||
API_KEY=your_serpapi_key
|
||||
OPENROUTER_API_KEY=your_openrouter_key
|
||||
|
||||
print(patents)
|
||||
2. Run: python main.py
|
||||
"""
|
||||
|
||||
from SPARC.analyzer import CompanyAnalyzer
|
||||
|
||||
|
||||
def main():
|
||||
"""Analyze a company's performance based on their patent portfolio."""
|
||||
|
||||
# Initialize the analyzer (loads API keys from .env)
|
||||
analyzer = CompanyAnalyzer()
|
||||
|
||||
# Analyze a company - this will:
|
||||
# 1. Retrieve patents from SERP API
|
||||
# 2. Download and parse patent PDFs
|
||||
# 3. Minimize content (remove bloat)
|
||||
# 4. Analyze with Claude to estimate performance
|
||||
company_name = "nvidia"
|
||||
|
||||
print(f"\n{'=' * 70}")
|
||||
print(f"SPARC Patent Analysis - {company_name.upper()}")
|
||||
print(f"{'=' * 70}\n")
|
||||
|
||||
analysis = analyzer.analyze_company(company_name)
|
||||
|
||||
print(f"\n{'=' * 70}")
|
||||
print("ANALYSIS RESULTS")
|
||||
print(f"{'=' * 70}\n")
|
||||
print(analysis)
|
||||
print(f"\n{'=' * 70}\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
+10
-1
@@ -4,4 +4,13 @@ pdfplumber
|
||||
requests
|
||||
pytest
|
||||
pytest-mock
|
||||
anthropic
|
||||
openai
|
||||
psycopg2-binary
|
||||
fastapi
|
||||
uvicorn[standard]
|
||||
pydantic[email]
|
||||
httpx
|
||||
numpy
|
||||
pandas
|
||||
bcrypt
|
||||
PyJWT
|
||||
|
||||
@@ -0,0 +1,227 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Estimate token usage per company portfolio for SPARC analysis."""
|
||||
|
||||
import tiktoken
|
||||
from typing import Dict, List, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class TokenEstimate:
|
||||
"""Token usage estimate for a company portfolio."""
|
||||
company_name: str
|
||||
patent_count: int
|
||||
prompt_tokens: int
|
||||
estimated_completion_tokens: int
|
||||
total_tokens: int
|
||||
cost_estimate_usd: float
|
||||
|
||||
|
||||
class TokenEstimator:
|
||||
"""Estimate token usage for SPARC patent analysis."""
|
||||
|
||||
# Claude 3.5 Sonnet pricing via OpenRouter (per 1M tokens)
|
||||
INPUT_COST_PER_1M = 3.00 # $3.00 per 1M input tokens
|
||||
OUTPUT_COST_PER_1M = 15.00 # $15.00 per 1M output tokens
|
||||
|
||||
# Estimated output tokens based on max_tokens settings
|
||||
SINGLE_PATENT_MAX_OUTPUT = 1024
|
||||
PORTFOLIO_MAX_OUTPUT = 2048
|
||||
|
||||
def __init__(self):
|
||||
# Use cl100k_base encoding (closest to Claude's tokenizer)
|
||||
self.encoder = tiktoken.get_encoding("cl100k_base")
|
||||
|
||||
def count_tokens(self, text: str) -> int:
|
||||
"""Count tokens in a text string."""
|
||||
return len(self.encoder.encode(text))
|
||||
|
||||
def build_single_patent_prompt(self, patent_content: str, company_name: str) -> str:
|
||||
"""Build prompt for single patent analysis (matches llm.py)."""
|
||||
return f"""You are a patent analyst evaluating {company_name}'s innovation strategy.
|
||||
|
||||
Analyze the following patent content and provide insights on:
|
||||
1. Innovation quality and novelty
|
||||
2. Technical complexity and defensibility
|
||||
3. Market potential and commercial viability
|
||||
4. Strategic positioning relative to industry trends
|
||||
|
||||
Patent Content:
|
||||
{patent_content}
|
||||
|
||||
Provide a concise analysis (2-3 paragraphs) focusing on what this patent reveals about the company's technical direction and competitive advantage."""
|
||||
|
||||
def build_portfolio_prompt(self, patents_data: List[Dict[str, str]], company_name: str) -> str:
|
||||
"""Build prompt for portfolio analysis (matches llm.py)."""
|
||||
portfolio_summary = []
|
||||
for idx, patent in enumerate(patents_data, 1):
|
||||
portfolio_summary.append(
|
||||
f"Patent {idx} ({patent['patent_id']}):\n{patent['content']}"
|
||||
)
|
||||
combined_content = "\n\n---\n\n".join(portfolio_summary)
|
||||
|
||||
return f"""You are analyzing {company_name}'s patent portfolio to estimate their future performance and innovation trajectory.
|
||||
|
||||
You have {len(patents_data)} recent patents to analyze. Evaluate the portfolio holistically:
|
||||
|
||||
1. Innovation Trends: What technology areas are they focusing on?
|
||||
2. Strategic Direction: What does this reveal about their business strategy?
|
||||
3. Competitive Position: How defensible are these innovations?
|
||||
4. Market Outlook: What market opportunities do these patents target?
|
||||
5. Performance Forecast: Based on this innovation activity, what's your assessment of their likely performance?
|
||||
|
||||
Patent Portfolio:
|
||||
{combined_content}
|
||||
|
||||
Provide a comprehensive analysis (4-5 paragraphs) with a final verdict on the company's innovation strength and performance outlook."""
|
||||
|
||||
def estimate_portfolio(
|
||||
self,
|
||||
company_name: str,
|
||||
patents_data: List[Dict[str, str]],
|
||||
include_individual_patents: bool = False
|
||||
) -> TokenEstimate:
|
||||
"""Estimate tokens for a company portfolio analysis.
|
||||
|
||||
Args:
|
||||
company_name: Name of the company
|
||||
patents_data: List of dicts with 'patent_id' and 'content' keys
|
||||
include_individual_patents: If True, also count individual patent analysis calls
|
||||
"""
|
||||
# Portfolio analysis tokens
|
||||
portfolio_prompt = self.build_portfolio_prompt(patents_data, company_name)
|
||||
prompt_tokens = self.count_tokens(portfolio_prompt)
|
||||
completion_tokens = self.PORTFOLIO_MAX_OUTPUT
|
||||
|
||||
# Optionally add individual patent analysis
|
||||
if include_individual_patents:
|
||||
for patent in patents_data:
|
||||
single_prompt = self.build_single_patent_prompt(patent['content'], company_name)
|
||||
prompt_tokens += self.count_tokens(single_prompt)
|
||||
completion_tokens += self.SINGLE_PATENT_MAX_OUTPUT
|
||||
|
||||
total_tokens = prompt_tokens + completion_tokens
|
||||
|
||||
# Calculate cost
|
||||
input_cost = (prompt_tokens / 1_000_000) * self.INPUT_COST_PER_1M
|
||||
output_cost = (completion_tokens / 1_000_000) * self.OUTPUT_COST_PER_1M
|
||||
total_cost = input_cost + output_cost
|
||||
|
||||
return TokenEstimate(
|
||||
company_name=company_name,
|
||||
patent_count=len(patents_data),
|
||||
prompt_tokens=prompt_tokens,
|
||||
estimated_completion_tokens=completion_tokens,
|
||||
total_tokens=total_tokens,
|
||||
cost_estimate_usd=total_cost
|
||||
)
|
||||
|
||||
def estimate_from_sample(
|
||||
self,
|
||||
company_name: str,
|
||||
patent_count: int = 10,
|
||||
avg_patent_chars: int = 5000
|
||||
) -> TokenEstimate:
|
||||
"""Estimate tokens using sample/average patent sizes.
|
||||
|
||||
Args:
|
||||
company_name: Name of the company
|
||||
patent_count: Number of patents (default 10, typical from SERP)
|
||||
avg_patent_chars: Average characters per minimized patent content
|
||||
"""
|
||||
# Generate sample patent data
|
||||
sample_content = "A" * avg_patent_chars # Placeholder content
|
||||
patents_data = [
|
||||
{"patent_id": f"US{10000000 + i}", "content": sample_content}
|
||||
for i in range(patent_count)
|
||||
]
|
||||
|
||||
return self.estimate_portfolio(company_name, patents_data)
|
||||
|
||||
|
||||
def main():
|
||||
"""Run token estimation examples."""
|
||||
estimator = TokenEstimator()
|
||||
|
||||
print("=" * 70)
|
||||
print("SPARC Token Usage Estimator")
|
||||
print("=" * 70)
|
||||
|
||||
# Example 1: Estimate with sample data
|
||||
print("\n📊 Sample Estimates (10 patents, ~5000 chars each):\n")
|
||||
|
||||
companies = ["Apple Inc.", "Microsoft Corporation", "Tesla Motors", "Google LLC"]
|
||||
|
||||
total_tokens = 0
|
||||
total_cost = 0.0
|
||||
|
||||
for company in companies:
|
||||
estimate = estimator.estimate_from_sample(company, patent_count=10, avg_patent_chars=5000)
|
||||
print(f" {company}:")
|
||||
print(f" Patents: {estimate.patent_count}")
|
||||
print(f" Prompt tokens: {estimate.prompt_tokens:,}")
|
||||
print(f" Est. completion tokens: {estimate.estimated_completion_tokens:,}")
|
||||
print(f" Total tokens: {estimate.total_tokens:,}")
|
||||
print(f" Est. cost: ${estimate.cost_estimate_usd:.4f}")
|
||||
print()
|
||||
|
||||
total_tokens += estimate.total_tokens
|
||||
total_cost += estimate.cost_estimate_usd
|
||||
|
||||
print("-" * 70)
|
||||
print(f" TOTAL for {len(companies)} companies:")
|
||||
print(f" Total tokens: {total_tokens:,}")
|
||||
print(f" Total est. cost: ${total_cost:.4f}")
|
||||
|
||||
# Example 2: Different portfolio sizes
|
||||
print("\n" + "=" * 70)
|
||||
print("📈 Token Scaling by Portfolio Size:")
|
||||
print("=" * 70 + "\n")
|
||||
|
||||
for patent_count in [5, 10, 15, 20]:
|
||||
estimate = estimator.estimate_from_sample("Sample Corp", patent_count=patent_count)
|
||||
print(f" {patent_count} patents: {estimate.prompt_tokens:,} prompt tokens, ${estimate.cost_estimate_usd:.4f}")
|
||||
|
||||
# Example 3: With actual patent content (simulated)
|
||||
print("\n" + "=" * 70)
|
||||
print("📝 Example with Real Patent Structure:")
|
||||
print("=" * 70 + "\n")
|
||||
|
||||
sample_patents = [
|
||||
{
|
||||
"patent_id": "US11234567",
|
||||
"content": """ABSTRACT: A method for machine learning optimization using gradient descent.
|
||||
|
||||
CLAIMS:
|
||||
1. A computer-implemented method comprising:
|
||||
receiving input data;
|
||||
processing the input data through a neural network;
|
||||
optimizing weights using backpropagation.
|
||||
|
||||
SUMMARY: This invention relates to improvements in neural network training efficiency."""
|
||||
},
|
||||
{
|
||||
"patent_id": "US11234568",
|
||||
"content": """ABSTRACT: System for distributed computing in cloud environments.
|
||||
|
||||
CLAIMS:
|
||||
1. A distributed system comprising:
|
||||
a plurality of compute nodes;
|
||||
a load balancer;
|
||||
a message queue for task distribution.
|
||||
|
||||
SUMMARY: The present disclosure improves cloud computing resource allocation."""
|
||||
}
|
||||
]
|
||||
|
||||
estimate = estimator.estimate_portfolio("Tech Corp", sample_patents)
|
||||
print(f" Company: {estimate.company_name}")
|
||||
print(f" Patents analyzed: {estimate.patent_count}")
|
||||
print(f" Prompt tokens: {estimate.prompt_tokens:,}")
|
||||
print(f" Est. completion: {estimate.estimated_completion_tokens:,}")
|
||||
print(f" Total: {estimate.total_tokens:,}")
|
||||
print(f" Est. cost: ${estimate.cost_estimate_usd:.4f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,75 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Initialize the SPARC database schema.
|
||||
|
||||
This script creates the necessary tables and indexes for storing
|
||||
LLM messages for testing and analytics.
|
||||
|
||||
Usage:
|
||||
python scripts/init_database.py
|
||||
"""
|
||||
|
||||
import secrets
|
||||
import string
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add parent directory to path
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from SPARC import config
|
||||
from SPARC.database import DatabaseClient
|
||||
|
||||
DEFAULT_ADMIN_EMAIL = "admin@sparc.dev"
|
||||
|
||||
|
||||
def generate_password(length: int = 16) -> str:
|
||||
"""Generate a secure random password."""
|
||||
alphabet = string.ascii_letters + string.digits
|
||||
return "".join(secrets.choice(alphabet) for _ in range(length))
|
||||
|
||||
|
||||
def main():
|
||||
"""Initialize the database schema."""
|
||||
print("Initializing SPARC database...")
|
||||
print(f"Database URL: {config.database_url}")
|
||||
|
||||
try:
|
||||
db_client = DatabaseClient(config.database_url)
|
||||
db_client.initialize_schema()
|
||||
print("Database schema initialized successfully!")
|
||||
print("\nTables created:")
|
||||
print(" - llm_messages: Stores all LLM prompts and responses")
|
||||
print(" - users: Stores user accounts")
|
||||
print("\nIndexes created:")
|
||||
print(" - idx_messages_timestamp: For time-based queries")
|
||||
print(" - idx_messages_company: For company-specific queries")
|
||||
print(" - idx_users_email: For user lookups")
|
||||
|
||||
# Create default admin user if not exists
|
||||
existing_admin = db_client.get_user_by_email(DEFAULT_ADMIN_EMAIL)
|
||||
if existing_admin:
|
||||
print(f"\nDefault admin user already exists: {DEFAULT_ADMIN_EMAIL}")
|
||||
else:
|
||||
password = generate_password()
|
||||
admin_user = db_client.create_user(
|
||||
email=DEFAULT_ADMIN_EMAIL,
|
||||
password=password,
|
||||
role="admin",
|
||||
)
|
||||
if admin_user:
|
||||
print("\n" + "=" * 50)
|
||||
print("DEFAULT ADMIN CREDENTIALS")
|
||||
print("=" * 50)
|
||||
print(f"Email: {DEFAULT_ADMIN_EMAIL}")
|
||||
print(f"Password: {password}")
|
||||
print("=" * 50)
|
||||
print("Please save these credentials securely!")
|
||||
print("=" * 50)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error initializing database: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env python3
|
||||
"""View analytics from the message database.
|
||||
|
||||
This script displays statistics about stored LLM messages including
|
||||
usage by company, analysis type, and time periods.
|
||||
|
||||
Usage:
|
||||
python scripts/view_analytics.py [--days DAYS]
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import argparse
|
||||
import json
|
||||
|
||||
# Add parent directory to path
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from SPARC import config
|
||||
from SPARC.database import DatabaseClient
|
||||
|
||||
|
||||
def main():
|
||||
"""Display analytics from the database."""
|
||||
parser = argparse.ArgumentParser(description="View SPARC message analytics")
|
||||
parser.add_argument(
|
||||
"--days",
|
||||
type=int,
|
||||
default=30,
|
||||
help="Number of days to analyze (default: 30)"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"SPARC Analytics - Last {args.days} days")
|
||||
print("=" * 70)
|
||||
|
||||
try:
|
||||
db_client = DatabaseClient(config.database_url)
|
||||
analytics = db_client.get_analytics(days=args.days)
|
||||
|
||||
print(f"\nTotal Messages: {analytics['total_messages']}")
|
||||
|
||||
print("\nMessages by Company:")
|
||||
if analytics['by_company']:
|
||||
for item in analytics['by_company']:
|
||||
company = item['company_name'] or '(unknown)'
|
||||
print(f" {company}: {item['count']}")
|
||||
else:
|
||||
print(" No data")
|
||||
|
||||
print("\nMessages by Analysis Type:")
|
||||
if analytics['by_type']:
|
||||
for item in analytics['by_type']:
|
||||
analysis_type = item['analysis_type'] or '(unknown)'
|
||||
print(f" {analysis_type}: {item['count']}")
|
||||
else:
|
||||
print(" No data")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error retrieving analytics: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python3
|
||||
"""View stored messages from the database.
|
||||
|
||||
This script displays stored LLM messages with filtering options.
|
||||
|
||||
Usage:
|
||||
python scripts/view_messages.py [--company COMPANY] [--type TYPE] [--limit LIMIT]
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import argparse
|
||||
from datetime import datetime
|
||||
|
||||
# Add parent directory to path
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from SPARC import config
|
||||
from SPARC.database import DatabaseClient
|
||||
|
||||
|
||||
def main():
|
||||
"""Display messages from the database."""
|
||||
parser = argparse.ArgumentParser(description="View stored SPARC messages")
|
||||
parser.add_argument(
|
||||
"--company",
|
||||
type=str,
|
||||
help="Filter by company name"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--type",
|
||||
type=str,
|
||||
choices=["single_patent", "portfolio"],
|
||||
help="Filter by analysis type"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--limit",
|
||||
type=int,
|
||||
default=10,
|
||||
help="Maximum number of messages to display (default: 10)"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
print("SPARC Stored Messages")
|
||||
print("=" * 70)
|
||||
|
||||
try:
|
||||
db_client = DatabaseClient(config.database_url)
|
||||
messages = db_client.get_messages(
|
||||
company_name=args.company,
|
||||
analysis_type=args.type,
|
||||
limit=args.limit
|
||||
)
|
||||
|
||||
if not messages:
|
||||
print("\nNo messages found.")
|
||||
return
|
||||
|
||||
print(f"\nShowing {len(messages)} message(s):\n")
|
||||
|
||||
for i, msg in enumerate(messages, 1):
|
||||
print(f"Message #{msg['id']} - {msg['timestamp']}")
|
||||
print(f"Company: {msg['company_name'] or '(unknown)'}")
|
||||
print(f"Type: {msg['analysis_type'] or '(unknown)'}")
|
||||
print(f"Model: {msg['model'] or '(unknown)'}")
|
||||
print(f"\nPrompt (first 200 chars):")
|
||||
print(f" {msg['prompt'][:200]}...")
|
||||
print(f"\nResponse (first 200 chars):")
|
||||
print(f" {msg['response'][:200] if msg['response'] else '(no response)'}...")
|
||||
print("\n" + "-" * 70 + "\n")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error retrieving messages: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,141 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Test script to verify database caching functionality.
|
||||
|
||||
This script tests the LLMAnalyzer with database caching without requiring
|
||||
actual API keys or patent downloads.
|
||||
"""
|
||||
|
||||
from SPARC.llm import LLMAnalyzer
|
||||
from SPARC.database import DatabaseClient
|
||||
from SPARC import config
|
||||
|
||||
def test_database_storage():
|
||||
"""Test that messages are always stored in database."""
|
||||
print("Testing Database Storage & Caching")
|
||||
print("=" * 70)
|
||||
|
||||
# Initialize analyzer (database is always used)
|
||||
print("\n1. Initializing LLMAnalyzer...")
|
||||
analyzer = LLMAnalyzer(use_cache=True)
|
||||
|
||||
print(f" - use_cache: {analyzer.use_cache}")
|
||||
print(f" - db_client: {analyzer.db_client is not None}")
|
||||
print(f" - client (API): {analyzer.client is not None}")
|
||||
|
||||
# Test single patent analysis (without API key, stores placeholder)
|
||||
print("\n2. Testing single patent analysis (no API key)...")
|
||||
result = analyzer.analyze_patent_content(
|
||||
patent_content="Test patent content about semiconductor innovation",
|
||||
company_name="TestCorp"
|
||||
)
|
||||
print(f" Result: {result[:80]}...")
|
||||
|
||||
# Test portfolio analysis
|
||||
print("\n3. Testing portfolio analysis (no API key)...")
|
||||
test_patents = [
|
||||
{"patent_id": "US001", "content": "First test patent"},
|
||||
{"patent_id": "US002", "content": "Second test patent"},
|
||||
]
|
||||
result = analyzer.analyze_patent_portfolio(
|
||||
patents_data=test_patents,
|
||||
company_name="TestCorp"
|
||||
)
|
||||
print(f" Result: {result[:80]}...")
|
||||
|
||||
# Verify messages were stored
|
||||
print("\n4. Verifying messages were stored...")
|
||||
db_client = DatabaseClient(config.database_url)
|
||||
messages = db_client.get_messages(company_name="TestCorp", limit=10)
|
||||
print(f" Found {len(messages)} stored messages")
|
||||
|
||||
for msg in messages:
|
||||
cached_status = "CACHED" if msg.get('is_cached') else "NEW"
|
||||
print(f" - ID: {msg['id']}, Type: {msg['analysis_type']}, Status: {cached_status}")
|
||||
|
||||
# Get analytics
|
||||
print("\n5. Getting analytics...")
|
||||
analytics = db_client.get_analytics(days=1)
|
||||
print(f" Total messages: {analytics['total_messages']}")
|
||||
print(f" By company: {analytics['by_company']}")
|
||||
print(f" By type: {analytics['by_type']}")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("Database storage test completed successfully!")
|
||||
|
||||
def test_caching():
|
||||
"""Test that caching works correctly."""
|
||||
print("\nTesting Cache Functionality")
|
||||
print("=" * 70)
|
||||
|
||||
db_client = DatabaseClient(config.database_url)
|
||||
db_client.initialize_schema()
|
||||
|
||||
# Store a fake cached response
|
||||
print("\n1. Storing a test response in database...")
|
||||
test_prompt = "Test prompt for caching"
|
||||
test_response = "This is a cached response from previous API call"
|
||||
|
||||
db_client.store_message(
|
||||
prompt=test_prompt,
|
||||
response=test_response,
|
||||
company_name="CacheTest",
|
||||
analysis_type="test",
|
||||
model="test-model"
|
||||
)
|
||||
|
||||
# Try to retrieve from cache
|
||||
print("\n2. Testing cache retrieval...")
|
||||
cached = db_client.get_cached_response(
|
||||
prompt=test_prompt,
|
||||
company_name="CacheTest",
|
||||
analysis_type="test"
|
||||
)
|
||||
|
||||
if cached:
|
||||
print(f" Cache hit! Response: {cached['response']}")
|
||||
else:
|
||||
print(" Cache miss (unexpected)")
|
||||
|
||||
# Test cache miss
|
||||
print("\n3. Testing cache miss...")
|
||||
cached = db_client.get_cached_response(
|
||||
prompt="Different prompt",
|
||||
company_name="CacheTest",
|
||||
analysis_type="test"
|
||||
)
|
||||
|
||||
if cached:
|
||||
print(" Unexpected cache hit")
|
||||
else:
|
||||
print(" Cache miss as expected")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("Cache test completed successfully!")
|
||||
|
||||
def test_test_mode():
|
||||
"""Test that test mode works correctly."""
|
||||
print("\nTesting Test Mode")
|
||||
print("=" * 70)
|
||||
|
||||
print("\n1. Initializing LLMAnalyzer in test mode...")
|
||||
analyzer = LLMAnalyzer(test_mode=True)
|
||||
|
||||
print(f" - test_mode: {analyzer.test_mode}")
|
||||
print(f" - db_client: {analyzer.db_client is not None}")
|
||||
|
||||
print("\n2. Testing single patent analysis (test mode)...")
|
||||
result = analyzer.analyze_patent_content(
|
||||
patent_content="Test patent content",
|
||||
company_name="TestCorp2"
|
||||
)
|
||||
print(f" Result: {result}")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
print("Test mode test completed successfully!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_database_storage()
|
||||
print("\n")
|
||||
test_caching()
|
||||
print("\n")
|
||||
test_test_mode()
|
||||
@@ -0,0 +1,540 @@
|
||||
"""Tests for the high-level company analyzer orchestration."""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import Mock, patch, call, MagicMock
|
||||
from SPARC.analyzer import CompanyAnalyzer
|
||||
from SPARC.types import Patent, Patents, CompanyAnalysisResult, BatchAnalysisResult
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def mock_db(mocker):
|
||||
"""Mock DatabaseClient for all tests so no real DB connection is needed."""
|
||||
mock_db_cls = mocker.patch("SPARC.analyzer.DatabaseClient")
|
||||
mock_db_instance = MagicMock()
|
||||
mock_db_instance.get_cached_patent.return_value = None
|
||||
mock_db_instance.get_cached_serp_query.return_value = None
|
||||
mock_db_cls.return_value = mock_db_instance
|
||||
return mock_db_instance
|
||||
|
||||
|
||||
class TestCompanyAnalyzer:
|
||||
"""Test the CompanyAnalyzer orchestration logic."""
|
||||
|
||||
def test_analyzer_initialization(self, mocker):
|
||||
"""Test analyzer initialization with API key."""
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
analyzer = CompanyAnalyzer(openrouter_api_key="test-key")
|
||||
|
||||
mock_llm.assert_called_once_with(api_key="test-key")
|
||||
|
||||
def test_analyze_company_full_pipeline(self, mocker, mock_db):
|
||||
"""Test complete company analysis pipeline."""
|
||||
# Mock all the dependencies
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
# Setup mock return values
|
||||
test_patent = Patent(
|
||||
patent_id="US123", pdf_link="http://example.com/test.pdf"
|
||||
)
|
||||
mock_query.return_value = Patents(patents=[test_patent])
|
||||
|
||||
test_patent.pdf_path = "patents/US123.pdf"
|
||||
mock_save.return_value = test_patent
|
||||
|
||||
mock_parse.return_value = {
|
||||
"abstract": "Test abstract",
|
||||
"claims": "Test claims",
|
||||
}
|
||||
|
||||
mock_minimize.return_value = "Minimized content"
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_portfolio.return_value = (
|
||||
"Strong innovation portfolio"
|
||||
)
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
# Run the analysis
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_company("TestCorp")
|
||||
|
||||
# Verify the pipeline executed correctly
|
||||
assert result == "Strong innovation portfolio"
|
||||
mock_query.assert_called_once_with("TestCorp")
|
||||
mock_save.assert_called_once()
|
||||
mock_parse.assert_called_once_with("patents/US123.pdf")
|
||||
mock_minimize.assert_called_once()
|
||||
mock_llm_instance.analyze_patent_portfolio.assert_called_once()
|
||||
|
||||
# Verify the data passed to LLM
|
||||
llm_call_args = mock_llm_instance.analyze_patent_portfolio.call_args
|
||||
patents_data = llm_call_args[1]["patents_data"]
|
||||
assert len(patents_data) == 1
|
||||
assert patents_data[0]["patent_id"] == "US123"
|
||||
assert patents_data[0]["content"] == "Minimized content"
|
||||
|
||||
def test_analyze_company_no_patents_found(self, mocker):
|
||||
"""Test handling when no patents are found for a company."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_query.return_value = Patents(patents=[])
|
||||
mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_company("UnknownCorp")
|
||||
|
||||
assert result == "No patents found for UnknownCorp"
|
||||
|
||||
def test_analyze_company_handles_processing_errors(self, mocker):
|
||||
"""Test that analysis continues even if some patents fail to process."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
# Create two test patents
|
||||
patent1 = Patent(patent_id="US123", pdf_link="http://example.com/1.pdf")
|
||||
patent2 = Patent(patent_id="US456", pdf_link="http://example.com/2.pdf")
|
||||
mock_query.return_value = Patents(patents=[patent1, patent2])
|
||||
|
||||
# First patent processes successfully
|
||||
patent1.pdf_path = "patents/US123.pdf"
|
||||
|
||||
# Second patent raises an error
|
||||
def save_side_effect(p):
|
||||
if p.patent_id == "US123":
|
||||
p.pdf_path = "patents/US123.pdf"
|
||||
return p
|
||||
else:
|
||||
raise Exception("Download failed")
|
||||
|
||||
mock_save.side_effect = save_side_effect
|
||||
|
||||
mock_parse.return_value = {"abstract": "Test"}
|
||||
mock_minimize.return_value = "Content"
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis result"
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_company("TestCorp")
|
||||
|
||||
# Should still succeed with the one patent that worked
|
||||
assert result == "Analysis result"
|
||||
|
||||
# Verify only one patent was analyzed
|
||||
llm_call_args = mock_llm_instance.analyze_patent_portfolio.call_args
|
||||
patents_data = llm_call_args[1]["patents_data"]
|
||||
assert len(patents_data) == 1
|
||||
assert patents_data[0]["patent_id"] == "US123"
|
||||
|
||||
def test_analyze_company_all_patents_fail(self, mocker):
|
||||
"""Test handling when all patents fail to process."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
patent = Patent(patent_id="US123", pdf_link="http://example.com/1.pdf")
|
||||
mock_query.return_value = Patents(patents=[patent])
|
||||
|
||||
# Make processing fail
|
||||
mock_save.side_effect = Exception("Processing error")
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_company("TestCorp")
|
||||
|
||||
assert result == "Failed to process any patents for TestCorp"
|
||||
|
||||
def test_analyze_single_patent(self, mocker):
|
||||
"""Test single patent analysis."""
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
mock_parse.return_value = {"abstract": "Test abstract"}
|
||||
mock_minimize.return_value = "Minimized content"
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_content.return_value = (
|
||||
"Innovative patent analysis"
|
||||
)
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_single_patent("US123", "TestCorp")
|
||||
|
||||
assert result == "Innovative patent analysis"
|
||||
mock_parse.assert_called_once_with("patents/US123.pdf")
|
||||
mock_llm_instance.analyze_patent_content.assert_called_once_with(
|
||||
patent_content="Minimized content", company_name="TestCorp"
|
||||
)
|
||||
|
||||
def test_analyze_single_patent_error_handling(self, mocker):
|
||||
"""Test single patent analysis with processing error."""
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
mock_parse.side_effect = FileNotFoundError("PDF not found")
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_single_patent("US999", "TestCorp")
|
||||
|
||||
assert "Failed to analyze patent US999" in result
|
||||
assert "PDF not found" in result
|
||||
|
||||
|
||||
class TestSingleQueryBugFix:
|
||||
"""Test that SERP.query is only called once per company analysis."""
|
||||
|
||||
def test_analyze_company_safe_calls_query_once(self, mocker, mock_db):
|
||||
"""_analyze_company_safe should call SERP.query exactly once."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf")
|
||||
mock_query.return_value = Patents(patents=[patent])
|
||||
|
||||
def save_side_effect(p):
|
||||
p.pdf_path = "patents/US123.pdf"
|
||||
return p
|
||||
|
||||
mock_save.side_effect = save_side_effect
|
||||
mock_parse.return_value = {"abstract": "Test"}
|
||||
mock_minimize.return_value = "Content"
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis"
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
analyzer._analyze_company_safe("TestCorp")
|
||||
|
||||
# The key assertion: SERP.query called exactly once, not twice
|
||||
mock_query.assert_called_once_with("TestCorp")
|
||||
|
||||
def test_analyze_company_with_prefetched_patents_skips_query(self, mocker):
|
||||
"""analyze_company should not call SERP.query when patents are provided."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf")
|
||||
prefetched = Patents(patents=[patent])
|
||||
|
||||
def save_side_effect(p):
|
||||
p.pdf_path = "patents/US123.pdf"
|
||||
return p
|
||||
|
||||
mock_save.side_effect = save_side_effect
|
||||
mock_parse.return_value = {"abstract": "Test"}
|
||||
mock_minimize.return_value = "Content"
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis"
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
analyzer.analyze_company("TestCorp", patents=prefetched)
|
||||
|
||||
# SERP.query should never be called
|
||||
mock_query.assert_not_called()
|
||||
|
||||
|
||||
class TestPatentCaching:
|
||||
"""Test patent-level DB caching in the pipeline."""
|
||||
|
||||
def test_process_single_patent_uses_db_cache(self, mocker, mock_db):
|
||||
"""_process_single_patent returns cached content when available."""
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
|
||||
mock_db.get_cached_patent.return_value = {
|
||||
"patent_id": "US123",
|
||||
"minimized_content": "Cached minimized content",
|
||||
}
|
||||
|
||||
patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf")
|
||||
result = CompanyAnalyzer._process_single_patent(patent, "TestCorp", mock_db)
|
||||
|
||||
assert result == {"patent_id": "US123", "content": "Cached minimized content"}
|
||||
# Should NOT download since cache hit
|
||||
mock_save.assert_not_called()
|
||||
|
||||
def test_process_single_patent_stores_to_db_cache(self, mocker, mock_db):
|
||||
"""_process_single_patent stores result in DB after processing."""
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
|
||||
# No cache hit
|
||||
mock_db.get_cached_patent.return_value = None
|
||||
|
||||
patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf")
|
||||
|
||||
def save_side_effect(p):
|
||||
p.pdf_path = "patents/US123.pdf"
|
||||
return p
|
||||
|
||||
mock_save.side_effect = save_side_effect
|
||||
mock_parse.return_value = {"abstract": "Test abstract"}
|
||||
mock_minimize.return_value = "Minimized content"
|
||||
|
||||
result = CompanyAnalyzer._process_single_patent(patent, "TestCorp", mock_db)
|
||||
|
||||
assert result == {"patent_id": "US123", "content": "Minimized content"}
|
||||
mock_db.store_patent.assert_called_once_with(
|
||||
patent_id="US123",
|
||||
company_name="TestCorp",
|
||||
pdf_link="http://example.com/test.pdf",
|
||||
raw_sections={"abstract": "Test abstract"},
|
||||
minimized_content="Minimized content",
|
||||
)
|
||||
|
||||
def test_serp_query_cache_hit_skips_api(self, mocker, mock_db):
|
||||
"""When SERP query is cached, API call is skipped."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
# Simulate SERP cache hit
|
||||
mock_db.get_cached_serp_query.return_value = ["US123"]
|
||||
# Simulate patent cache hit too
|
||||
mock_db.get_cached_patent.return_value = {
|
||||
"patent_id": "US123",
|
||||
"minimized_content": "Cached content",
|
||||
}
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis"
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_company("TestCorp")
|
||||
|
||||
assert result == "Analysis"
|
||||
# SERP.query should NOT be called
|
||||
mock_query.assert_not_called()
|
||||
# No downloads should happen
|
||||
mock_save.assert_not_called()
|
||||
|
||||
def test_serp_query_cache_miss_stores_result(self, mocker, mock_db):
|
||||
"""When SERP query cache misses, result is stored after API call."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
mock_db.get_cached_serp_query.return_value = None
|
||||
|
||||
patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf")
|
||||
mock_query.return_value = Patents(patents=[patent])
|
||||
|
||||
def save_side_effect(p):
|
||||
p.pdf_path = "patents/US123.pdf"
|
||||
return p
|
||||
|
||||
mock_save.side_effect = save_side_effect
|
||||
mock_parse.return_value = {"abstract": "Test"}
|
||||
mock_minimize.return_value = "Content"
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis"
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
analyzer.analyze_company("TestCorp")
|
||||
|
||||
mock_db.store_serp_query.assert_called_once()
|
||||
call_kwargs = mock_db.store_serp_query.call_args[1]
|
||||
assert call_kwargs["company_name"] == "TestCorp"
|
||||
assert call_kwargs["patent_ids"] == ["US123"]
|
||||
|
||||
|
||||
class TestBatchProcessing:
|
||||
"""Test multi-company batch processing functionality."""
|
||||
|
||||
def test_analyze_companies_success(self, mocker):
|
||||
"""Test batch analysis of multiple companies."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
# Setup mock returns
|
||||
def query_side_effect(company):
|
||||
patent = Patent(
|
||||
patent_id=f"US-{company}",
|
||||
pdf_link=f"http://example.com/{company}.pdf",
|
||||
)
|
||||
return Patents(patents=[patent])
|
||||
|
||||
mock_query.side_effect = query_side_effect
|
||||
|
||||
def save_side_effect(patent):
|
||||
patent.pdf_path = f"patents/{patent.patent_id}.pdf"
|
||||
return patent
|
||||
|
||||
mock_save.side_effect = save_side_effect
|
||||
mock_parse.return_value = {"abstract": "Test"}
|
||||
mock_minimize.return_value = "Content"
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis result"
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_companies(["CompanyA", "CompanyB"], max_workers=2)
|
||||
|
||||
assert isinstance(result, BatchAnalysisResult)
|
||||
assert result.total_companies == 2
|
||||
assert result.successful == 2
|
||||
assert result.failed == 0
|
||||
assert len(result.results) == 2
|
||||
|
||||
def test_analyze_companies_with_failures(self, mocker):
|
||||
"""Test batch analysis handles partial failures."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
def query_side_effect(company):
|
||||
if company == "FailCorp":
|
||||
return Patents(patents=[])
|
||||
patent = Patent(
|
||||
patent_id=f"US-{company}",
|
||||
pdf_link=f"http://example.com/{company}.pdf",
|
||||
)
|
||||
return Patents(patents=[patent])
|
||||
|
||||
mock_query.side_effect = query_side_effect
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_companies(["GoodCorp", "FailCorp"], max_workers=1)
|
||||
|
||||
assert result.total_companies == 2
|
||||
assert result.failed >= 1 # At least FailCorp should fail
|
||||
|
||||
# Find the failed result
|
||||
fail_result = next(r for r in result.results if r.company_name == "FailCorp")
|
||||
assert fail_result.success is False
|
||||
|
||||
def test_analyze_companies_sequential(self, mocker):
|
||||
"""Test sequential batch analysis."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
def query_side_effect(company):
|
||||
patent = Patent(
|
||||
patent_id=f"US-{company}",
|
||||
pdf_link=f"http://example.com/{company}.pdf",
|
||||
)
|
||||
return Patents(patents=[patent])
|
||||
|
||||
mock_query.side_effect = query_side_effect
|
||||
|
||||
def save_side_effect(patent):
|
||||
patent.pdf_path = f"patents/{patent.patent_id}.pdf"
|
||||
return patent
|
||||
|
||||
mock_save.side_effect = save_side_effect
|
||||
mock_parse.return_value = {"abstract": "Test"}
|
||||
mock_minimize.return_value = "Content"
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis"
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_companies_sequential(["Corp1", "Corp2", "Corp3"])
|
||||
|
||||
assert result.total_companies == 3
|
||||
assert len(result.results) == 3
|
||||
|
||||
def test_analyze_companies_progress_callback(self, mocker):
|
||||
"""Test that progress callback is invoked correctly."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
def query_side_effect(company):
|
||||
patent = Patent(
|
||||
patent_id=f"US-{company}",
|
||||
pdf_link=f"http://example.com/{company}.pdf",
|
||||
)
|
||||
return Patents(patents=[patent])
|
||||
|
||||
mock_query.side_effect = query_side_effect
|
||||
|
||||
def save_side_effect(patent):
|
||||
patent.pdf_path = f"patents/{patent.patent_id}.pdf"
|
||||
return patent
|
||||
|
||||
mock_save.side_effect = save_side_effect
|
||||
mock_parse.return_value = {"abstract": "Test"}
|
||||
mock_minimize.return_value = "Content"
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis"
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
callback = Mock()
|
||||
analyzer = CompanyAnalyzer()
|
||||
analyzer.analyze_companies(["A", "B"], max_workers=1, progress_callback=callback)
|
||||
|
||||
assert callback.call_count == 2
|
||||
|
||||
def test_company_analysis_result_structure(self, mocker, mock_db):
|
||||
"""Test CompanyAnalysisResult has correct structure."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf")
|
||||
mock_query.return_value = Patents(patents=[patent])
|
||||
|
||||
# Simulate DB caching: after store, subsequent get returns the IDs
|
||||
mock_db.get_cached_serp_query.side_effect = [None, ["US123"]]
|
||||
|
||||
def save_side_effect(p):
|
||||
p.pdf_path = "patents/US123.pdf"
|
||||
return p
|
||||
|
||||
mock_save.side_effect = save_side_effect
|
||||
mock_parse.return_value = {"abstract": "Test"}
|
||||
mock_minimize.return_value = "Content"
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_portfolio.return_value = "Strong innovation"
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_companies(["TestCorp"], max_workers=1)
|
||||
|
||||
assert len(result.results) == 1
|
||||
company_result = result.results[0]
|
||||
assert company_result.company_name == "TestCorp"
|
||||
assert company_result.analysis == "Strong innovation"
|
||||
assert company_result.patent_count == 1
|
||||
assert company_result.success is True
|
||||
assert company_result.error is None
|
||||
assert company_result.timestamp is not None
|
||||
@@ -0,0 +1,183 @@
|
||||
"""Tests for FastAPI web service endpoints."""
|
||||
|
||||
import pytest
|
||||
from datetime import datetime
|
||||
from unittest.mock import Mock, patch
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from SPARC.api import app, _analyzer, _jobs
|
||||
from SPARC.types import CompanyAnalysisResult, BatchAnalysisResult
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client():
|
||||
"""Create test client."""
|
||||
return TestClient(app)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_analyzer(mocker):
|
||||
"""Mock the global analyzer."""
|
||||
mock = Mock()
|
||||
mocker.patch("SPARC.api._analyzer", mock)
|
||||
return mock
|
||||
|
||||
|
||||
class TestHealthEndpoint:
|
||||
"""Test health check endpoint."""
|
||||
|
||||
def test_health_returns_ok(self, client):
|
||||
"""Test health endpoint returns healthy status."""
|
||||
response = client.get("/health")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["status"] == "healthy"
|
||||
assert data["version"] == "1.0.0"
|
||||
assert "timestamp" in data
|
||||
|
||||
|
||||
class TestAnalyzeCompanyEndpoint:
|
||||
"""Test single company analysis endpoint."""
|
||||
|
||||
def test_analyze_company_success(self, client, mock_analyzer):
|
||||
"""Test successful company analysis."""
|
||||
mock_result = CompanyAnalysisResult(
|
||||
company_name="nvidia",
|
||||
analysis="Strong AI patent portfolio",
|
||||
patent_count=5,
|
||||
success=True,
|
||||
timestamp=datetime.now(),
|
||||
)
|
||||
mock_analyzer._analyze_company_safe.return_value = mock_result
|
||||
|
||||
response = client.get("/analyze/nvidia")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["company_name"] == "nvidia"
|
||||
assert data["analysis"] == "Strong AI patent portfolio"
|
||||
assert data["patent_count"] == 5
|
||||
assert data["success"] is True
|
||||
|
||||
def test_analyze_company_failure(self, client, mock_analyzer):
|
||||
"""Test company analysis with error."""
|
||||
mock_result = CompanyAnalysisResult(
|
||||
company_name="unknown",
|
||||
analysis="",
|
||||
patent_count=0,
|
||||
success=False,
|
||||
error="No patents found",
|
||||
timestamp=datetime.now(),
|
||||
)
|
||||
mock_analyzer._analyze_company_safe.return_value = mock_result
|
||||
|
||||
response = client.get("/analyze/unknown")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["success"] is False
|
||||
assert data["error"] == "No patents found"
|
||||
|
||||
|
||||
class TestBatchAnalysisEndpoint:
|
||||
"""Test batch analysis endpoint."""
|
||||
|
||||
def test_batch_analysis_success(self, client, mock_analyzer):
|
||||
"""Test successful batch analysis."""
|
||||
results = [
|
||||
CompanyAnalysisResult(
|
||||
company_name="nvidia",
|
||||
analysis="Strong portfolio",
|
||||
patent_count=5,
|
||||
success=True,
|
||||
timestamp=datetime.now(),
|
||||
),
|
||||
CompanyAnalysisResult(
|
||||
company_name="amd",
|
||||
analysis="Growing portfolio",
|
||||
patent_count=3,
|
||||
success=True,
|
||||
timestamp=datetime.now(),
|
||||
),
|
||||
]
|
||||
mock_batch = BatchAnalysisResult(
|
||||
results=results,
|
||||
total_companies=2,
|
||||
successful=2,
|
||||
failed=0,
|
||||
timestamp=datetime.now(),
|
||||
)
|
||||
mock_analyzer.analyze_companies.return_value = mock_batch
|
||||
|
||||
response = client.post(
|
||||
"/analyze/batch",
|
||||
json={"companies": ["nvidia", "amd"], "max_workers": 2},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["total_companies"] == 2
|
||||
assert data["successful"] == 2
|
||||
assert data["failed"] == 0
|
||||
assert len(data["results"]) == 2
|
||||
|
||||
def test_batch_analysis_validation(self, client):
|
||||
"""Test batch analysis request validation."""
|
||||
# Empty companies list
|
||||
response = client.post("/analyze/batch", json={"companies": []})
|
||||
assert response.status_code == 422
|
||||
|
||||
# Too many companies
|
||||
response = client.post(
|
||||
"/analyze/batch",
|
||||
json={"companies": [f"company{i}" for i in range(25)]},
|
||||
)
|
||||
assert response.status_code == 422
|
||||
|
||||
# Invalid max_workers
|
||||
response = client.post(
|
||||
"/analyze/batch",
|
||||
json={"companies": ["nvidia"], "max_workers": 10},
|
||||
)
|
||||
assert response.status_code == 422
|
||||
|
||||
|
||||
class TestAsyncBatchEndpoint:
|
||||
"""Test async batch analysis endpoint."""
|
||||
|
||||
def test_async_batch_creates_job(self, client, mock_analyzer):
|
||||
"""Test async endpoint creates a job."""
|
||||
response = client.post(
|
||||
"/analyze/batch/async",
|
||||
json={"companies": ["nvidia", "amd"]},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert "job_id" in data
|
||||
assert data["status"] == "pending"
|
||||
assert data["total_companies"] == 2
|
||||
assert data["progress"] == 0
|
||||
|
||||
|
||||
class TestJobEndpoints:
|
||||
"""Test job management endpoints."""
|
||||
|
||||
def test_get_job_not_found(self, client):
|
||||
"""Test getting nonexistent job."""
|
||||
response = client.get("/jobs/nonexistent")
|
||||
assert response.status_code == 404
|
||||
|
||||
def test_list_jobs(self, client, mocker):
|
||||
"""Test listing jobs."""
|
||||
# Clear existing jobs
|
||||
mocker.patch.dict("SPARC.api._jobs", {}, clear=True)
|
||||
|
||||
response = client.get("/jobs")
|
||||
assert response.status_code == 200
|
||||
assert isinstance(response.json(), list)
|
||||
|
||||
def test_list_jobs_with_filter(self, client, mocker):
|
||||
"""Test listing jobs with status filter."""
|
||||
response = client.get("/jobs?status=completed")
|
||||
assert response.status_code == 200
|
||||
+103
-39
@@ -1,72 +1,116 @@
|
||||
"""Tests for LLM analysis functionality."""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import Mock, MagicMock
|
||||
from unittest.mock import Mock, MagicMock, patch
|
||||
from SPARC.llm import LLMAnalyzer
|
||||
|
||||
|
||||
class TestLLMAnalyzer:
|
||||
"""Test LLM analyzer initialization and API interaction."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def mock_database(self, mocker):
|
||||
"""Mock the database client for all tests."""
|
||||
mock_db_client = Mock()
|
||||
mock_db_client.get_cached_response.return_value = None # No cache hit by default
|
||||
mock_db_client.store_message.return_value = 1
|
||||
mocker.patch("SPARC.llm.DatabaseClient", return_value=mock_db_client)
|
||||
return mock_db_client
|
||||
|
||||
def test_analyzer_initialization_with_api_key(self, mocker):
|
||||
"""Test that analyzer initializes with provided API key."""
|
||||
mock_anthropic = mocker.patch("SPARC.llm.Anthropic")
|
||||
mock_openai = mocker.patch("SPARC.llm.OpenAI")
|
||||
|
||||
analyzer = LLMAnalyzer(api_key="test-key-123")
|
||||
|
||||
mock_anthropic.assert_called_once_with(api_key="test-key-123")
|
||||
assert analyzer.model == "claude-3-5-sonnet-20241022"
|
||||
mock_openai.assert_called_once_with(
|
||||
api_key="test-key-123",
|
||||
base_url="https://openrouter.ai/api/v1"
|
||||
)
|
||||
assert analyzer.model == "anthropic/claude-3.5-sonnet"
|
||||
|
||||
def test_analyzer_initialization_from_config(self, mocker):
|
||||
"""Test that analyzer loads API key from config when not provided."""
|
||||
mock_anthropic = mocker.patch("SPARC.llm.Anthropic")
|
||||
mock_openai = mocker.patch("SPARC.llm.OpenAI")
|
||||
mock_config = mocker.patch("SPARC.llm.config")
|
||||
mock_config.anthropic_api_key = "config-key-456"
|
||||
mock_config.openrouter_api_key = "config-key-456"
|
||||
mock_config.use_cache = True
|
||||
mock_config.database_url = "postgresql://localhost/test"
|
||||
|
||||
analyzer = LLMAnalyzer()
|
||||
|
||||
mock_anthropic.assert_called_once_with(api_key="config-key-456")
|
||||
mock_openai.assert_called_once_with(
|
||||
api_key="config-key-456",
|
||||
base_url="https://openrouter.ai/api/v1"
|
||||
)
|
||||
|
||||
def test_analyze_patent_content(self, mocker):
|
||||
def test_analyze_patent_content(self, mocker, mock_database):
|
||||
"""Test single patent content analysis."""
|
||||
mock_anthropic = mocker.patch("SPARC.llm.Anthropic")
|
||||
mock_openai = mocker.patch("SPARC.llm.OpenAI")
|
||||
mock_client = Mock()
|
||||
mock_anthropic.return_value = mock_client
|
||||
mock_openai.return_value = mock_client
|
||||
|
||||
# Mock the API response
|
||||
mock_response = Mock()
|
||||
mock_response.content = [Mock(text="Innovative GPU architecture.")]
|
||||
mock_client.messages.create.return_value = mock_response
|
||||
mock_response.choices = [Mock(message=Mock(content="Innovative GPU architecture."))]
|
||||
mock_response.usage = Mock(prompt_tokens=100, completion_tokens=50, total_tokens=150)
|
||||
mock_client.chat.completions.create.return_value = mock_response
|
||||
|
||||
analyzer = LLMAnalyzer(api_key="test-key")
|
||||
analyzer = LLMAnalyzer(api_key="test-key", use_cache=False)
|
||||
result = analyzer.analyze_patent_content(
|
||||
patent_content="ABSTRACT: GPU with new cache design...",
|
||||
company_name="NVIDIA",
|
||||
)
|
||||
|
||||
assert result == "Innovative GPU architecture."
|
||||
mock_client.messages.create.assert_called_once()
|
||||
mock_client.chat.completions.create.assert_called_once()
|
||||
|
||||
# Verify the prompt includes company name and content
|
||||
call_args = mock_client.messages.create.call_args
|
||||
call_args = mock_client.chat.completions.create.call_args
|
||||
prompt_text = call_args[1]["messages"][0]["content"]
|
||||
assert "NVIDIA" in prompt_text
|
||||
assert "GPU with new cache design" in prompt_text
|
||||
|
||||
def test_analyze_patent_portfolio(self, mocker):
|
||||
"""Test portfolio analysis with multiple patents."""
|
||||
mock_anthropic = mocker.patch("SPARC.llm.Anthropic")
|
||||
# Verify message was stored in database
|
||||
mock_database.store_message.assert_called_once()
|
||||
|
||||
def test_analyze_patent_content_cache_hit(self, mocker, mock_database):
|
||||
"""Test that cached responses are returned without API call."""
|
||||
mock_openai = mocker.patch("SPARC.llm.OpenAI")
|
||||
mock_client = Mock()
|
||||
mock_anthropic.return_value = mock_client
|
||||
mock_openai.return_value = mock_client
|
||||
|
||||
# Set up cache hit
|
||||
mock_database.get_cached_response.return_value = {
|
||||
"id": 1,
|
||||
"response": "Cached analysis result"
|
||||
}
|
||||
|
||||
analyzer = LLMAnalyzer(api_key="test-key", use_cache=True)
|
||||
result = analyzer.analyze_patent_content(
|
||||
patent_content="ABSTRACT: GPU with new cache design...",
|
||||
company_name="NVIDIA",
|
||||
)
|
||||
|
||||
assert result == "Cached analysis result"
|
||||
# API should NOT be called on cache hit
|
||||
mock_client.chat.completions.create.assert_not_called()
|
||||
|
||||
def test_analyze_patent_portfolio(self, mocker, mock_database):
|
||||
"""Test portfolio analysis with multiple patents."""
|
||||
mock_openai = mocker.patch("SPARC.llm.OpenAI")
|
||||
mock_client = Mock()
|
||||
mock_openai.return_value = mock_client
|
||||
|
||||
# Mock the API response
|
||||
mock_response = Mock()
|
||||
mock_response.content = [
|
||||
Mock(text="Strong portfolio in AI and graphics.")
|
||||
mock_response.choices = [
|
||||
Mock(message=Mock(content="Strong portfolio in AI and graphics."))
|
||||
]
|
||||
mock_client.messages.create.return_value = mock_response
|
||||
mock_response.usage = Mock(prompt_tokens=200, completion_tokens=100, total_tokens=300)
|
||||
mock_client.chat.completions.create.return_value = mock_response
|
||||
|
||||
analyzer = LLMAnalyzer(api_key="test-key")
|
||||
analyzer = LLMAnalyzer(api_key="test-key", use_cache=False)
|
||||
patents_data = [
|
||||
{"patent_id": "US123", "content": "AI acceleration patent"},
|
||||
{"patent_id": "US456", "content": "Graphics rendering patent"},
|
||||
@@ -77,48 +121,68 @@ class TestLLMAnalyzer:
|
||||
)
|
||||
|
||||
assert result == "Strong portfolio in AI and graphics."
|
||||
mock_client.messages.create.assert_called_once()
|
||||
mock_client.chat.completions.create.assert_called_once()
|
||||
|
||||
# Verify the prompt includes all patents
|
||||
call_args = mock_client.messages.create.call_args
|
||||
call_args = mock_client.chat.completions.create.call_args
|
||||
prompt_text = call_args[1]["messages"][0]["content"]
|
||||
assert "US123" in prompt_text
|
||||
assert "US456" in prompt_text
|
||||
assert "AI acceleration patent" in prompt_text
|
||||
assert "Graphics rendering patent" in prompt_text
|
||||
|
||||
def test_analyze_patent_portfolio_with_correct_token_limit(self, mocker):
|
||||
def test_analyze_patent_portfolio_with_correct_token_limit(self, mocker, mock_database):
|
||||
"""Test that portfolio analysis uses higher token limit."""
|
||||
mock_anthropic = mocker.patch("SPARC.llm.Anthropic")
|
||||
mock_openai = mocker.patch("SPARC.llm.OpenAI")
|
||||
mock_client = Mock()
|
||||
mock_anthropic.return_value = mock_client
|
||||
mock_openai.return_value = mock_client
|
||||
|
||||
mock_response = Mock()
|
||||
mock_response.content = [Mock(text="Analysis result.")]
|
||||
mock_client.messages.create.return_value = mock_response
|
||||
mock_response.choices = [Mock(message=Mock(content="Analysis result."))]
|
||||
mock_response.usage = Mock(prompt_tokens=100, completion_tokens=50, total_tokens=150)
|
||||
mock_client.chat.completions.create.return_value = mock_response
|
||||
|
||||
analyzer = LLMAnalyzer(api_key="test-key")
|
||||
analyzer = LLMAnalyzer(api_key="test-key", use_cache=False)
|
||||
patents_data = [{"patent_id": "US123", "content": "Test content"}]
|
||||
|
||||
analyzer.analyze_patent_portfolio(patents_data, "TestCo")
|
||||
|
||||
call_args = mock_client.messages.create.call_args
|
||||
call_args = mock_client.chat.completions.create.call_args
|
||||
# Portfolio analysis should use 2048 tokens
|
||||
assert call_args[1]["max_tokens"] == 2048
|
||||
|
||||
def test_analyze_single_patent_with_correct_token_limit(self, mocker):
|
||||
def test_analyze_single_patent_with_correct_token_limit(self, mocker, mock_database):
|
||||
"""Test that single patent analysis uses lower token limit."""
|
||||
mock_anthropic = mocker.patch("SPARC.llm.Anthropic")
|
||||
mock_openai = mocker.patch("SPARC.llm.OpenAI")
|
||||
mock_client = Mock()
|
||||
mock_anthropic.return_value = mock_client
|
||||
mock_openai.return_value = mock_client
|
||||
|
||||
mock_response = Mock()
|
||||
mock_response.content = [Mock(text="Analysis result.")]
|
||||
mock_client.messages.create.return_value = mock_response
|
||||
mock_response.choices = [Mock(message=Mock(content="Analysis result."))]
|
||||
mock_response.usage = Mock(prompt_tokens=100, completion_tokens=50, total_tokens=150)
|
||||
mock_client.chat.completions.create.return_value = mock_response
|
||||
|
||||
analyzer = LLMAnalyzer(api_key="test-key")
|
||||
analyzer = LLMAnalyzer(api_key="test-key", use_cache=False)
|
||||
analyzer.analyze_patent_content("Test content", "TestCo")
|
||||
|
||||
call_args = mock_client.messages.create.call_args
|
||||
call_args = mock_client.chat.completions.create.call_args
|
||||
# Single patent should use 1024 tokens
|
||||
assert call_args[1]["max_tokens"] == 1024
|
||||
|
||||
def test_database_always_initialized(self, mocker, mock_database):
|
||||
"""Test that database client is always initialized."""
|
||||
mock_openai = mocker.patch("SPARC.llm.OpenAI")
|
||||
|
||||
analyzer = LLMAnalyzer(api_key="test-key")
|
||||
|
||||
assert analyzer.db_client is not None
|
||||
|
||||
def test_no_api_key_stores_placeholder(self, mocker, mock_database):
|
||||
"""Test that without API key, a placeholder is stored."""
|
||||
mocker.patch("SPARC.llm.config")
|
||||
|
||||
analyzer = LLMAnalyzer(use_cache=False)
|
||||
result = analyzer.analyze_patent_content("Test content", "TestCo")
|
||||
|
||||
assert "[NO API]" in result
|
||||
mock_database.store_message.assert_called_once()
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
"""Tests for SERP API patent retrieval and parsing functionality."""
|
||||
|
||||
import os
|
||||
import pytest
|
||||
from unittest.mock import patch, Mock
|
||||
from datetime import datetime, timedelta
|
||||
from SPARC.serp_api import SERP
|
||||
from SPARC.types import Patent
|
||||
|
||||
|
||||
class TestTextCleaning:
|
||||
@@ -176,3 +180,89 @@ class TestPatentMinimization:
|
||||
|
||||
# Sections should be separated by double newlines
|
||||
assert "\n\n" in result
|
||||
|
||||
|
||||
class TestDynamicDateRange:
|
||||
"""Test dynamic date range computation in SERP.query."""
|
||||
|
||||
def test_query_uses_rolling_date_window(self, mocker):
|
||||
"""Verify the date filter uses a rolling window, not hardcoded dates."""
|
||||
mock_search = mocker.patch("SPARC.serp_api.serpapi.search")
|
||||
mock_search.return_value = {"organic_results": []}
|
||||
mocker.patch("SPARC.serp_api.config.api_key", "fake-key")
|
||||
mocker.patch("SPARC.serp_api.config.patent_search_days", 90)
|
||||
|
||||
SERP.query("TestCorp")
|
||||
|
||||
call_params = mock_search.call_args[0][0]
|
||||
tbs = call_params["tbs"]
|
||||
# Should contain "cdr:1,cd_min:" with a date, not the old hardcoded one
|
||||
assert "cdr:1,cd_min:" in tbs
|
||||
assert "10/28/2025" not in tbs # old hardcoded date gone
|
||||
|
||||
def test_query_respects_days_back_param(self, mocker):
|
||||
"""Verify days_back parameter controls the date window."""
|
||||
mock_search = mocker.patch("SPARC.serp_api.serpapi.search")
|
||||
mock_search.return_value = {"organic_results": []}
|
||||
mocker.patch("SPARC.serp_api.config.api_key", "fake-key")
|
||||
mocker.patch("SPARC.serp_api.config.patent_search_days", 90)
|
||||
|
||||
now = datetime.now()
|
||||
SERP.query("TestCorp", days_back=30)
|
||||
|
||||
call_params = mock_search.call_args[0][0]
|
||||
tbs = call_params["tbs"]
|
||||
expected_start = (now - timedelta(days=30)).strftime("%-m/%-d/%Y")
|
||||
assert expected_start in tbs
|
||||
|
||||
|
||||
class TestFilesystemPDFCaching:
|
||||
"""Test that save_patents skips download for existing files."""
|
||||
|
||||
def test_save_patents_skips_download_when_cached(self, mocker, tmp_path):
|
||||
"""Already-downloaded PDFs should not be re-downloaded."""
|
||||
mock_get = mocker.patch("SPARC.serp_api.requests.get")
|
||||
mocker.patch("SPARC.serp_api.os.makedirs")
|
||||
|
||||
pdf_path = tmp_path / "US123.pdf"
|
||||
pdf_path.write_bytes(b"%PDF-1.4 fake content")
|
||||
|
||||
mocker.patch("SPARC.serp_api.os.path.exists", return_value=True)
|
||||
mocker.patch("SPARC.serp_api.os.path.getsize", return_value=100)
|
||||
|
||||
patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf")
|
||||
result = SERP.save_patents(patent)
|
||||
|
||||
mock_get.assert_not_called()
|
||||
assert result.pdf_path == "patents/US123.pdf"
|
||||
|
||||
def test_save_patents_downloads_when_not_cached(self, mocker):
|
||||
"""Missing PDFs should be downloaded."""
|
||||
mock_response = Mock()
|
||||
mock_response.content = b"%PDF-1.4 content"
|
||||
mock_get = mocker.patch("SPARC.serp_api.requests.get", return_value=mock_response)
|
||||
mocker.patch("SPARC.serp_api.os.makedirs")
|
||||
mocker.patch("SPARC.serp_api.os.path.exists", return_value=False)
|
||||
mock_open = mocker.patch("builtins.open", mocker.mock_open())
|
||||
|
||||
patent = Patent(patent_id="US456", pdf_link="http://example.com/test.pdf")
|
||||
result = SERP.save_patents(patent)
|
||||
|
||||
mock_get.assert_called_once_with("http://example.com/test.pdf")
|
||||
assert result.pdf_path == "patents/US456.pdf"
|
||||
|
||||
def test_save_patents_redownloads_empty_files(self, mocker):
|
||||
"""Empty/corrupt PDFs (0 bytes) should be re-downloaded."""
|
||||
mock_response = Mock()
|
||||
mock_response.content = b"%PDF-1.4 content"
|
||||
mock_get = mocker.patch("SPARC.serp_api.requests.get", return_value=mock_response)
|
||||
mocker.patch("SPARC.serp_api.os.makedirs")
|
||||
mocker.patch("SPARC.serp_api.os.path.exists", return_value=True)
|
||||
mocker.patch("SPARC.serp_api.os.path.getsize", return_value=0)
|
||||
mock_open = mocker.patch("builtins.open", mocker.mock_open())
|
||||
|
||||
patent = Patent(patent_id="US789", pdf_link="http://example.com/test.pdf")
|
||||
result = SERP.save_patents(patent)
|
||||
|
||||
mock_get.assert_called_once()
|
||||
assert result.pdf_path == "patents/US789.pdf"
|
||||
|
||||
Reference in New Issue
Block a user