feat: add FastAPI web service wrapper

- Create REST API with endpoints for single and batch analysis - Add async job support for long-running batch operations - Implement job status tracking and listing endpoints - Add 9 tests for API endpoints - Update requirements.txt with fastapi, uvicorn, httpx - Document API usage in README 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2026-03-12 23:26:26 -04:00
parent 1c6d903301
commit 3479ba8a46
4 changed files with 514 additions and 2 deletions
@@ -0,0 +1,286 @@
+"""FastAPI web service wrapper for SPARC patent analysis.
+
+Provides REST API endpoints for analyzing company patent portfolios.
+"""
+
+from contextlib import asynccontextmanager
+from datetime import datetime
+from typing import Annotated
+
+from fastapi import BackgroundTasks, FastAPI, HTTPException, Query
+from pydantic import BaseModel, Field
+
+from SPARC.analyzer import CompanyAnalyzer
+from SPARC.types import BatchAnalysisResult, CompanyAnalysisResult
+
+
+# Pydantic models for API
+class CompanyAnalysisResponse(BaseModel):
+    """Response model for single company analysis."""
+
+    company_name: str
+    analysis: str
+    patent_count: int
+    success: bool
+    error: str | None = None
+    timestamp: datetime
+
+
+class BatchAnalysisResponse(BaseModel):
+    """Response model for batch company analysis."""
+
+    results: list[CompanyAnalysisResponse]
+    total_companies: int
+    successful: int
+    failed: int
+    timestamp: datetime
+
+
+class BatchAnalysisRequest(BaseModel):
+    """Request model for batch company analysis."""
+
+    companies: list[str] = Field(
+        ..., min_length=1, max_length=20, description="List of company names to analyze"
+    )
+    max_workers: int = Field(
+        default=3, ge=1, le=5, description="Max concurrent analyses"
+    )
+
+
+class JobStatus(BaseModel):
+    """Status of a background analysis job."""
+
+    job_id: str
+    status: str  # "pending", "running", "completed", "failed"
+    progress: int  # 0-100
+    total_companies: int
+    completed_companies: int
+    result: BatchAnalysisResponse | None = None
+    error: str | None = None
+
+
+class HealthResponse(BaseModel):
+    """Health check response."""
+
+    status: str
+    version: str
+    timestamp: datetime
+
+
+# In-memory job storage (for demo; production would use Redis/DB)
+_jobs: dict[str, JobStatus] = {}
+_job_counter = 0
+
+
+def _convert_result(result: CompanyAnalysisResult) -> CompanyAnalysisResponse:
+    """Convert internal result to API response model."""
+    return CompanyAnalysisResponse(
+        company_name=result.company_name,
+        analysis=result.analysis,
+        patent_count=result.patent_count,
+        success=result.success,
+        error=result.error,
+        timestamp=result.timestamp,
+    )
+
+
+def _convert_batch_result(result: BatchAnalysisResult) -> BatchAnalysisResponse:
+    """Convert internal batch result to API response model."""
+    return BatchAnalysisResponse(
+        results=[_convert_result(r) for r in result.results],
+        total_companies=result.total_companies,
+        successful=result.successful,
+        failed=result.failed,
+        timestamp=result.timestamp,
+    )
+
+
+# Global analyzer instance
+_analyzer: CompanyAnalyzer | None = None
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Initialize resources on startup."""
+    global _analyzer
+    _analyzer = CompanyAnalyzer()
+    yield
+    # Cleanup if needed
+    _analyzer = None
+
+
+app = FastAPI(
+    title="SPARC API",
+    description="Semiconductor Patent & Analytics Report Core - Patent portfolio analysis using AI",
+    version="1.0.0",
+    lifespan=lifespan,
+)
+
+
+@app.get("/health", response_model=HealthResponse, tags=["System"])
+async def health_check():
+    """Check API health status."""
+    return HealthResponse(
+        status="healthy",
+        version="1.0.0",
+        timestamp=datetime.now(),
+    )
+
+
+@app.get(
+    "/analyze/{company_name}",
+    response_model=CompanyAnalysisResponse,
+    tags=["Analysis"],
+)
+async def analyze_company(company_name: str):
+    """Analyze a single company's patent portfolio.
+
+    This endpoint retrieves recent patents for the specified company,
+    parses them, and uses AI to generate a comprehensive analysis.
+
+    Args:
+        company_name: Name of the company to analyze (e.g., "nvidia", "intel")
+
+    Returns:
+        Analysis results including patent count, AI insights, and success status
+    """
+    if not _analyzer:
+        raise HTTPException(status_code=503, detail="Analyzer not initialized")
+
+    result = _analyzer._analyze_company_safe(company_name)
+    return _convert_result(result)
+
+
+@app.post(
+    "/analyze/batch",
+    response_model=BatchAnalysisResponse,
+    tags=["Analysis"],
+)
+async def analyze_companies_batch(request: BatchAnalysisRequest):
+    """Analyze multiple companies' patent portfolios.
+
+    Processes companies concurrently for improved performance.
+    Limited to 20 companies per request.
+
+    Args:
+        request: List of company names and optional worker count
+
+    Returns:
+        Batch results with individual company analyses and summary statistics
+    """
+    if not _analyzer:
+        raise HTTPException(status_code=503, detail="Analyzer not initialized")
+
+    result = _analyzer.analyze_companies(
+        companies=request.companies,
+        max_workers=request.max_workers,
+    )
+    return _convert_batch_result(result)
+
+
+def _run_batch_job(job_id: str, companies: list[str], max_workers: int):
+    """Background task for batch analysis."""
+    global _jobs, _analyzer
+
+    if not _analyzer:
+        _jobs[job_id].status = "failed"
+        _jobs[job_id].error = "Analyzer not initialized"
+        return
+
+    _jobs[job_id].status = "running"
+
+    def progress_callback(company: str, completed: int, total: int):
+        _jobs[job_id].completed_companies = completed
+        _jobs[job_id].progress = int((completed / total) * 100)
+
+    try:
+        result = _analyzer.analyze_companies(
+            companies=companies,
+            max_workers=max_workers,
+            progress_callback=progress_callback,
+        )
+        _jobs[job_id].status = "completed"
+        _jobs[job_id].progress = 100
+        _jobs[job_id].result = _convert_batch_result(result)
+    except Exception as e:
+        _jobs[job_id].status = "failed"
+        _jobs[job_id].error = str(e)
+
+
+@app.post("/analyze/batch/async", response_model=JobStatus, tags=["Analysis"])
+async def analyze_companies_async(
+    request: BatchAnalysisRequest, background_tasks: BackgroundTasks
+):
+    """Start an asynchronous batch analysis job.
+
+    Returns immediately with a job ID that can be used to poll for status.
+    Useful for large batch analyses that may take a long time.
+
+    Args:
+        request: List of company names and optional worker count
+
+    Returns:
+        Job status with job_id for polling
+    """
+    global _job_counter
+
+    _job_counter += 1
+    job_id = f"job_{_job_counter}_{datetime.now().strftime('%Y%m%d%H%M%S')}"
+
+    _jobs[job_id] = JobStatus(
+        job_id=job_id,
+        status="pending",
+        progress=0,
+        total_companies=len(request.companies),
+        completed_companies=0,
+    )
+
+    background_tasks.add_task(
+        _run_batch_job, job_id, request.companies, request.max_workers
+    )
+
+    return _jobs[job_id]
+
+
+@app.get("/jobs/{job_id}", response_model=JobStatus, tags=["Jobs"])
+async def get_job_status(job_id: str):
+    """Get the status of a background analysis job.
+
+    Args:
+        job_id: The job ID returned from the async batch endpoint
+
+    Returns:
+        Current job status including progress and results when complete
+    """
+    if job_id not in _jobs:
+        raise HTTPException(status_code=404, detail=f"Job {job_id} not found")
+
+    return _jobs[job_id]
+
+
+@app.get("/jobs", response_model=list[JobStatus], tags=["Jobs"])
+async def list_jobs(
+    status: Annotated[
+        str | None,
+        Query(description="Filter by status: pending, running, completed, failed"),
+    ] = None,
+    limit: Annotated[int, Query(ge=1, le=100)] = 10,
+):
+    """List all analysis jobs.
+
+    Args:
+        status: Optional filter by job status
+        limit: Maximum number of jobs to return (default 10, max 100)
+
+    Returns:
+        List of job statuses
+    """
+    jobs = list(_jobs.values())
+
+    if status:
+        jobs = [j for j in jobs if j.status == status]
+
+    # Return most recent first
+    jobs.sort(key=lambda j: j.job_id, reverse=True)
+
+    return jobs[:limit]