Compare commits

..

1 Commits

Author SHA1 Message Date
agent-company ecc2c37bcd fix: auto-download patent PDF in analyze_single_patent before reading
When the PDF is not on disk, analyze_single_patent now looks up the
cached PDF link from the database and downloads it automatically.
If no link is cached, a clear FileNotFoundError is raised. Also adds
a GET /analyze/patent/{patent_id} API endpoint that exposes this
functionality and returns 404 when the PDF cannot be obtained.

Closes leeworks-agents/SPARC#36

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-26 10:08:34 +00:00
5 changed files with 58 additions and 28 deletions
-11
View File
@@ -34,17 +34,6 @@ jobs:
run: |
ruff check SPARC/ tests/
- name: Install Node.js and frontend dependencies
shell: sh
run: |
apk add --no-cache nodejs npm
cd frontend && npm ci
- name: Run TypeScript type check
shell: sh
run: |
cd frontend && npx tsc --noEmit
- name: Run pytest
shell: sh
env:
+21 -11
View File
@@ -108,12 +108,10 @@ class CompanyAnalyzer:
def analyze_single_patent(self, patent_id: str, company_name: str) -> str:
"""Analyze a single patent by ID.
Prerequisite:
The patent PDF must already exist at ``patents/{patent_id}.pdf``
before calling this method. PDFs are downloaded automatically when
using the batch analysis pipeline (``analyze_company`` or the
``/analyze/batch`` API endpoint). For standalone usage, download
the PDF manually or call ``SERP.save_patents()`` first.
If the patent PDF is not already on disk, this method attempts to
download it automatically by looking up the PDF link in the database
cache. If the link is not cached either, a ``FileNotFoundError`` is
raised with instructions on how to obtain the PDF.
Args:
patent_id: Publication ID of the patent (e.g. "US-11234567-B2")
@@ -123,7 +121,7 @@ class CompanyAnalyzer:
Analysis of the specific patent's innovation quality
Raises:
FileNotFoundError: If the patent PDF is not found at the expected path.
FileNotFoundError: If the patent PDF cannot be found or downloaded.
"""
import os
logger.info("Analyzing patent %s for %s...", patent_id, company_name)
@@ -131,10 +129,22 @@ class CompanyAnalyzer:
patent_path = f"patents/{patent_id}.pdf"
if not os.path.exists(patent_path):
raise FileNotFoundError(
f"Patent PDF not found at '{patent_path}'. "
f"Download the PDF first using SERP.save_patents() or the batch analysis pipeline."
)
# Attempt to download the PDF automatically from cached metadata
cached = self.db.get_cached_patent(patent_id)
pdf_link = cached.get("pdf_link") if cached else None
if pdf_link:
logger.info("PDF not on disk; downloading %s from cached link", patent_id)
patent = SERP.save_patents(
Patent(patent_id=patent_id, pdf_link=pdf_link)
)
patent_path = patent.pdf_path
else:
raise FileNotFoundError(
f"Patent PDF not found at '{patent_path}' and no download link is "
f"cached for '{patent_id}'. Run a company analysis first to populate "
f"the cache, or call SERP.save_patents() with the patent's PDF link."
)
try:
sections = SERP.parse_patent_pdf(patent_path)
+32
View File
@@ -429,6 +429,38 @@ async def analyze_company(
return _convert_result(result)
@app.get(
"/analyze/patent/{patent_id}",
tags=["Analysis"],
)
async def analyze_single_patent(
patent_id: str,
company_name: str = Query(description="Company name for analysis context"),
_: UserResponse = Depends(get_current_user),
):
"""Analyze a single patent by its publication ID.
If the patent PDF is not already cached locally, the system will attempt
to download it automatically from a previously cached link. If no link
is available, a 404 error is returned.
Args:
patent_id: Patent publication ID (e.g. "US-11234567-B2")
company_name: Company name for analysis context
Returns:
Analysis text for the patent
"""
if not _analyzer:
raise HTTPException(status_code=503, detail="Analyzer not initialized")
try:
analysis = _analyzer.analyze_single_patent(patent_id, company_name)
return {"patent_id": patent_id, "company_name": company_name, "analysis": analysis}
except FileNotFoundError as e:
raise HTTPException(status_code=404, detail=str(e))
@app.post(
"/analyze/batch",
response_model=BatchAnalysisResponse,
+4 -4
View File
@@ -10,7 +10,7 @@
"dependencies": {
"@tanstack/react-query": "^5.51.0",
"axios": "^1.7.2",
"lucide-react": "^1.7.0",
"lucide-react": "^0.400.0",
"react": "^18.3.1",
"react-dom": "^18.3.1",
"react-router-dom": "^6.24.0",
@@ -3452,9 +3452,9 @@
}
},
"node_modules/lucide-react": {
"version": "1.7.0",
"resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-1.7.0.tgz",
"integrity": "sha512-yI7BeItCLZJTXikmK4KNUGCKoGzSvbKlfCvw44bU4fXAL6v3gYS4uHD1jzsLkfwODYwI6Drw5Tu9Z5ulDe0TSg==",
"version": "0.400.0",
"resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-0.400.0.tgz",
"integrity": "sha512-rpp7pFHh3Xd93KHixNgB0SqThMHpYNzsGUu69UaQbSZ75Q/J3m5t6EhKyMT3m4w2WOxmJ2mY0tD3vebnXqQryQ==",
"license": "ISC",
"peerDependencies": {
"react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0"
+1 -2
View File
@@ -7,13 +7,12 @@
"dev": "vite",
"build": "tsc -b && vite build",
"lint": "eslint .",
"typecheck": "tsc --noEmit",
"preview": "vite preview"
},
"dependencies": {
"@tanstack/react-query": "^5.51.0",
"axios": "^1.7.2",
"lucide-react": "^1.7.0",
"lucide-react": "^0.400.0",
"react": "^18.3.1",
"react-dom": "^18.3.1",
"react-router-dom": "^6.24.0",