deploy: security hardening, multi-model support, S3 storage, analytics, CI improvements (70 commits) #4

Merged
0xWheatyz merged 70 commits from leeworks-agents/SPARC:main into main 2026-03-31 11:53:47 +00:00
2 changed files with 53 additions and 11 deletions
Showing only changes of commit ab74904845 - Show all commits
+19 -9
View File
@@ -108,12 +108,10 @@ class CompanyAnalyzer:
def analyze_single_patent(self, patent_id: str, company_name: str) -> str:
"""Analyze a single patent by ID.
Prerequisite:
The patent PDF must already exist at ``patents/{patent_id}.pdf``
before calling this method. PDFs are downloaded automatically when
using the batch analysis pipeline (``analyze_company`` or the
``/analyze/batch`` API endpoint). For standalone usage, download
the PDF manually or call ``SERP.save_patents()`` first.
If the patent PDF is not already on disk, this method attempts to
download it automatically by looking up the PDF link in the database
cache. If the link is not cached either, a ``FileNotFoundError`` is
raised with instructions on how to obtain the PDF.
Args:
patent_id: Publication ID of the patent (e.g. "US-11234567-B2")
@@ -123,7 +121,7 @@ class CompanyAnalyzer:
Analysis of the specific patent's innovation quality
Raises:
FileNotFoundError: If the patent PDF is not found at the expected path.
FileNotFoundError: If the patent PDF cannot be found or downloaded.
"""
import os
logger.info("Analyzing patent %s for %s...", patent_id, company_name)
@@ -131,9 +129,21 @@ class CompanyAnalyzer:
patent_path = f"patents/{patent_id}.pdf"
if not os.path.exists(patent_path):
# Attempt to download the PDF automatically from cached metadata
cached = self.db.get_cached_patent(patent_id)
pdf_link = cached.get("pdf_link") if cached else None
if pdf_link:
logger.info("PDF not on disk; downloading %s from cached link", patent_id)
patent = SERP.save_patents(
Patent(patent_id=patent_id, pdf_link=pdf_link)
)
patent_path = patent.pdf_path
else:
raise FileNotFoundError(
f"Patent PDF not found at '{patent_path}'. "
f"Download the PDF first using SERP.save_patents() or the batch analysis pipeline."
f"Patent PDF not found at '{patent_path}' and no download link is "
f"cached for '{patent_id}'. Run a company analysis first to populate "
f"the cache, or call SERP.save_patents() with the patent's PDF link."
)
try:
+32
View File
@@ -429,6 +429,38 @@ async def analyze_company(
return _convert_result(result)
@app.get(
"/analyze/patent/{patent_id}",
tags=["Analysis"],
)
async def analyze_single_patent(
patent_id: str,
company_name: str = Query(description="Company name for analysis context"),
_: UserResponse = Depends(get_current_user),
):
"""Analyze a single patent by its publication ID.
If the patent PDF is not already cached locally, the system will attempt
to download it automatically from a previously cached link. If no link
is available, a 404 error is returned.
Args:
patent_id: Patent publication ID (e.g. "US-11234567-B2")
company_name: Company name for analysis context
Returns:
Analysis text for the patent
"""
if not _analyzer:
raise HTTPException(status_code=503, detail="Analyzer not initialized")
try:
analysis = _analyzer.analyze_single_patent(patent_id, company_name)
return {"patent_id": patent_id, "company_name": company_name, "analysis": analysis}
except FileNotFoundError as e:
raise HTTPException(status_code=404, detail=str(e))
@app.post(
"/analyze/batch",
response_model=BatchAnalysisResponse,