feat: add FastAPI web service wrapper
- Create REST API with endpoints for single and batch analysis - Add async job support for long-running batch operations - Implement job status tracking and listing endpoints - Add 9 tests for API endpoints - Update requirements.txt with fastapi, uvicorn, httpx - Document API usage in README 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
+286
@@ -0,0 +1,286 @@
|
||||
"""FastAPI web service wrapper for SPARC patent analysis.
|
||||
|
||||
Provides REST API endpoints for analyzing company patent portfolios.
|
||||
"""
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
from datetime import datetime
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import BackgroundTasks, FastAPI, HTTPException, Query
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from SPARC.analyzer import CompanyAnalyzer
|
||||
from SPARC.types import BatchAnalysisResult, CompanyAnalysisResult
|
||||
|
||||
|
||||
# Pydantic models for API
|
||||
class CompanyAnalysisResponse(BaseModel):
|
||||
"""Response model for single company analysis."""
|
||||
|
||||
company_name: str
|
||||
analysis: str
|
||||
patent_count: int
|
||||
success: bool
|
||||
error: str | None = None
|
||||
timestamp: datetime
|
||||
|
||||
|
||||
class BatchAnalysisResponse(BaseModel):
|
||||
"""Response model for batch company analysis."""
|
||||
|
||||
results: list[CompanyAnalysisResponse]
|
||||
total_companies: int
|
||||
successful: int
|
||||
failed: int
|
||||
timestamp: datetime
|
||||
|
||||
|
||||
class BatchAnalysisRequest(BaseModel):
|
||||
"""Request model for batch company analysis."""
|
||||
|
||||
companies: list[str] = Field(
|
||||
..., min_length=1, max_length=20, description="List of company names to analyze"
|
||||
)
|
||||
max_workers: int = Field(
|
||||
default=3, ge=1, le=5, description="Max concurrent analyses"
|
||||
)
|
||||
|
||||
|
||||
class JobStatus(BaseModel):
|
||||
"""Status of a background analysis job."""
|
||||
|
||||
job_id: str
|
||||
status: str # "pending", "running", "completed", "failed"
|
||||
progress: int # 0-100
|
||||
total_companies: int
|
||||
completed_companies: int
|
||||
result: BatchAnalysisResponse | None = None
|
||||
error: str | None = None
|
||||
|
||||
|
||||
class HealthResponse(BaseModel):
|
||||
"""Health check response."""
|
||||
|
||||
status: str
|
||||
version: str
|
||||
timestamp: datetime
|
||||
|
||||
|
||||
# In-memory job storage (for demo; production would use Redis/DB)
|
||||
_jobs: dict[str, JobStatus] = {}
|
||||
_job_counter = 0
|
||||
|
||||
|
||||
def _convert_result(result: CompanyAnalysisResult) -> CompanyAnalysisResponse:
|
||||
"""Convert internal result to API response model."""
|
||||
return CompanyAnalysisResponse(
|
||||
company_name=result.company_name,
|
||||
analysis=result.analysis,
|
||||
patent_count=result.patent_count,
|
||||
success=result.success,
|
||||
error=result.error,
|
||||
timestamp=result.timestamp,
|
||||
)
|
||||
|
||||
|
||||
def _convert_batch_result(result: BatchAnalysisResult) -> BatchAnalysisResponse:
|
||||
"""Convert internal batch result to API response model."""
|
||||
return BatchAnalysisResponse(
|
||||
results=[_convert_result(r) for r in result.results],
|
||||
total_companies=result.total_companies,
|
||||
successful=result.successful,
|
||||
failed=result.failed,
|
||||
timestamp=result.timestamp,
|
||||
)
|
||||
|
||||
|
||||
# Global analyzer instance
|
||||
_analyzer: CompanyAnalyzer | None = None
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Initialize resources on startup."""
|
||||
global _analyzer
|
||||
_analyzer = CompanyAnalyzer()
|
||||
yield
|
||||
# Cleanup if needed
|
||||
_analyzer = None
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="SPARC API",
|
||||
description="Semiconductor Patent & Analytics Report Core - Patent portfolio analysis using AI",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
|
||||
@app.get("/health", response_model=HealthResponse, tags=["System"])
|
||||
async def health_check():
|
||||
"""Check API health status."""
|
||||
return HealthResponse(
|
||||
status="healthy",
|
||||
version="1.0.0",
|
||||
timestamp=datetime.now(),
|
||||
)
|
||||
|
||||
|
||||
@app.get(
|
||||
"/analyze/{company_name}",
|
||||
response_model=CompanyAnalysisResponse,
|
||||
tags=["Analysis"],
|
||||
)
|
||||
async def analyze_company(company_name: str):
|
||||
"""Analyze a single company's patent portfolio.
|
||||
|
||||
This endpoint retrieves recent patents for the specified company,
|
||||
parses them, and uses AI to generate a comprehensive analysis.
|
||||
|
||||
Args:
|
||||
company_name: Name of the company to analyze (e.g., "nvidia", "intel")
|
||||
|
||||
Returns:
|
||||
Analysis results including patent count, AI insights, and success status
|
||||
"""
|
||||
if not _analyzer:
|
||||
raise HTTPException(status_code=503, detail="Analyzer not initialized")
|
||||
|
||||
result = _analyzer._analyze_company_safe(company_name)
|
||||
return _convert_result(result)
|
||||
|
||||
|
||||
@app.post(
|
||||
"/analyze/batch",
|
||||
response_model=BatchAnalysisResponse,
|
||||
tags=["Analysis"],
|
||||
)
|
||||
async def analyze_companies_batch(request: BatchAnalysisRequest):
|
||||
"""Analyze multiple companies' patent portfolios.
|
||||
|
||||
Processes companies concurrently for improved performance.
|
||||
Limited to 20 companies per request.
|
||||
|
||||
Args:
|
||||
request: List of company names and optional worker count
|
||||
|
||||
Returns:
|
||||
Batch results with individual company analyses and summary statistics
|
||||
"""
|
||||
if not _analyzer:
|
||||
raise HTTPException(status_code=503, detail="Analyzer not initialized")
|
||||
|
||||
result = _analyzer.analyze_companies(
|
||||
companies=request.companies,
|
||||
max_workers=request.max_workers,
|
||||
)
|
||||
return _convert_batch_result(result)
|
||||
|
||||
|
||||
def _run_batch_job(job_id: str, companies: list[str], max_workers: int):
|
||||
"""Background task for batch analysis."""
|
||||
global _jobs, _analyzer
|
||||
|
||||
if not _analyzer:
|
||||
_jobs[job_id].status = "failed"
|
||||
_jobs[job_id].error = "Analyzer not initialized"
|
||||
return
|
||||
|
||||
_jobs[job_id].status = "running"
|
||||
|
||||
def progress_callback(company: str, completed: int, total: int):
|
||||
_jobs[job_id].completed_companies = completed
|
||||
_jobs[job_id].progress = int((completed / total) * 100)
|
||||
|
||||
try:
|
||||
result = _analyzer.analyze_companies(
|
||||
companies=companies,
|
||||
max_workers=max_workers,
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
_jobs[job_id].status = "completed"
|
||||
_jobs[job_id].progress = 100
|
||||
_jobs[job_id].result = _convert_batch_result(result)
|
||||
except Exception as e:
|
||||
_jobs[job_id].status = "failed"
|
||||
_jobs[job_id].error = str(e)
|
||||
|
||||
|
||||
@app.post("/analyze/batch/async", response_model=JobStatus, tags=["Analysis"])
|
||||
async def analyze_companies_async(
|
||||
request: BatchAnalysisRequest, background_tasks: BackgroundTasks
|
||||
):
|
||||
"""Start an asynchronous batch analysis job.
|
||||
|
||||
Returns immediately with a job ID that can be used to poll for status.
|
||||
Useful for large batch analyses that may take a long time.
|
||||
|
||||
Args:
|
||||
request: List of company names and optional worker count
|
||||
|
||||
Returns:
|
||||
Job status with job_id for polling
|
||||
"""
|
||||
global _job_counter
|
||||
|
||||
_job_counter += 1
|
||||
job_id = f"job_{_job_counter}_{datetime.now().strftime('%Y%m%d%H%M%S')}"
|
||||
|
||||
_jobs[job_id] = JobStatus(
|
||||
job_id=job_id,
|
||||
status="pending",
|
||||
progress=0,
|
||||
total_companies=len(request.companies),
|
||||
completed_companies=0,
|
||||
)
|
||||
|
||||
background_tasks.add_task(
|
||||
_run_batch_job, job_id, request.companies, request.max_workers
|
||||
)
|
||||
|
||||
return _jobs[job_id]
|
||||
|
||||
|
||||
@app.get("/jobs/{job_id}", response_model=JobStatus, tags=["Jobs"])
|
||||
async def get_job_status(job_id: str):
|
||||
"""Get the status of a background analysis job.
|
||||
|
||||
Args:
|
||||
job_id: The job ID returned from the async batch endpoint
|
||||
|
||||
Returns:
|
||||
Current job status including progress and results when complete
|
||||
"""
|
||||
if job_id not in _jobs:
|
||||
raise HTTPException(status_code=404, detail=f"Job {job_id} not found")
|
||||
|
||||
return _jobs[job_id]
|
||||
|
||||
|
||||
@app.get("/jobs", response_model=list[JobStatus], tags=["Jobs"])
|
||||
async def list_jobs(
|
||||
status: Annotated[
|
||||
str | None,
|
||||
Query(description="Filter by status: pending, running, completed, failed"),
|
||||
] = None,
|
||||
limit: Annotated[int, Query(ge=1, le=100)] = 10,
|
||||
):
|
||||
"""List all analysis jobs.
|
||||
|
||||
Args:
|
||||
status: Optional filter by job status
|
||||
limit: Maximum number of jobs to return (default 10, max 100)
|
||||
|
||||
Returns:
|
||||
List of job statuses
|
||||
"""
|
||||
jobs = list(_jobs.values())
|
||||
|
||||
if status:
|
||||
jobs = [j for j in jobs if j.status == status]
|
||||
|
||||
# Return most recent first
|
||||
jobs.sort(key=lambda j: j.job_id, reverse=True)
|
||||
|
||||
return jobs[:limit]
|
||||
Reference in New Issue
Block a user