Compare commits

..

1 Commits

Author SHA1 Message Date
agent-company a2f81b0396 Add test coverage for analyze_single_patent auto-download path
7 test cases covering:
- PDF on disk analyzed directly (no download)
- Auto-download from cached metadata link when PDF missing
- FileNotFoundError when no cached link available
- Cached patent without pdf_link raises FileNotFoundError
- Analysis pipeline failure returns error string gracefully
- Model override parameter forwarded to LLM
- FileNotFoundError during parsing re-raised (not swallowed)

Closes leeworks-agents/SPARC#1661

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-20 19:21:53 +00:00
2 changed files with 211 additions and 224 deletions
+211
View File
@@ -0,0 +1,211 @@
"""Tests for analyze_single_patent auto-download path.
Covers issue #1661:
- PDF exists on disk: direct analysis (happy path)
- PDF not on disk, cached link exists: auto-download and analyze
- PDF not on disk, no cached link: FileNotFoundError
- Analysis failure after PDF found: graceful error message
- Model override parameter passthrough
"""
import os
from unittest.mock import MagicMock, patch
import pytest
from SPARC.analyzer import CompanyAnalyzer
from SPARC.types import Patent
@pytest.fixture(autouse=True)
def mock_db(mocker):
"""Mock DatabaseClient so no real DB is needed."""
mock_db_cls = mocker.patch("SPARC.analyzer.DatabaseClient")
mock_db_instance = MagicMock()
mock_db_instance.get_cached_patent.return_value = None
mock_db_instance.get_cached_serp_query.return_value = None
mock_db_cls.return_value = mock_db_instance
return mock_db_instance
@pytest.fixture
def analyzer(mocker, mock_db):
"""Create a CompanyAnalyzer with mocked LLM and DB."""
mocker.patch("SPARC.analyzer.LLMAnalyzer")
return CompanyAnalyzer(openrouter_api_key="test-key")
class TestAnalyzeSinglePatentAutoDownload:
"""Test the auto-download logic in analyze_single_patent."""
def test_pdf_on_disk_analyzed_directly(self, analyzer, mocker, tmp_path):
"""When PDF exists on disk, it is analyzed directly without download."""
patent_id = "US-11234567-B2"
# Create the patents dir and PDF file
patents_dir = tmp_path / "patents"
patents_dir.mkdir()
pdf_path = patents_dir / f"{patent_id}.pdf"
pdf_path.write_bytes(b"fake PDF content")
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
mock_parse.return_value = {"abstract": "test", "claims": "test claims"}
mock_minimize.return_value = "minimized content"
analyzer.llm_analyzer.analyze_patent_content.return_value = "Good patent."
# Change cwd so patents/{patent_id}.pdf resolves to our tmp_path
original_cwd = os.getcwd()
os.chdir(tmp_path)
try:
result = analyzer.analyze_single_patent(patent_id, "TestCo")
finally:
os.chdir(original_cwd)
assert result == "Good patent."
# DB cache should not have been queried since file existed
analyzer.db.get_cached_patent.assert_not_called()
def test_auto_download_from_cached_link(self, analyzer, mocker, tmp_path):
"""When PDF is not on disk but link is cached, auto-download occurs."""
patent_id = "US-99887766-A1"
# No patents dir exists (PDF not on disk)
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
downloaded_patent = Patent(patent_id=patent_id, pdf_link="https://example.com/patent.pdf")
downloaded_patent.pdf_path = f"patents/{patent_id}.pdf"
mock_save.return_value = downloaded_patent
# Cached patent has a PDF link
analyzer.db.get_cached_patent.return_value = {
"patent_id": patent_id,
"pdf_link": "https://example.com/patent.pdf",
}
# Mock the rest of the analysis pipeline
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
mock_parse.return_value = {"abstract": "test abstract"}
mock_minimize.return_value = "minimized content"
analyzer.llm_analyzer.analyze_patent_content.return_value = "Strong innovation."
# Change cwd so patents/{patent_id}.pdf does NOT exist
original_cwd = os.getcwd()
os.chdir(tmp_path)
try:
result = analyzer.analyze_single_patent(patent_id, "DownloadCo")
finally:
os.chdir(original_cwd)
assert result == "Strong innovation."
analyzer.db.get_cached_patent.assert_called_once_with(patent_id)
mock_save.assert_called_once()
# Verify the Patent passed to save_patents has the correct ID and link
saved_patent = mock_save.call_args[0][0]
assert saved_patent.patent_id == patent_id
assert saved_patent.pdf_link == "https://example.com/patent.pdf"
def test_no_cached_link_raises_file_not_found(self, analyzer, mocker, tmp_path):
"""When PDF is not on disk and no cached link, FileNotFoundError raised."""
patent_id = "US-00000000-X1"
analyzer.db.get_cached_patent.return_value = None
original_cwd = os.getcwd()
os.chdir(tmp_path)
try:
with pytest.raises(FileNotFoundError, match="no download link is cached"):
analyzer.analyze_single_patent(patent_id, "MissingCo")
finally:
os.chdir(original_cwd)
def test_cached_patent_without_pdf_link_raises(self, analyzer, mocker, tmp_path):
"""When cached patent exists but has no pdf_link, FileNotFoundError raised."""
patent_id = "US-11111111-B1"
analyzer.db.get_cached_patent.return_value = {
"patent_id": patent_id,
"pdf_link": None,
}
original_cwd = os.getcwd()
os.chdir(tmp_path)
try:
with pytest.raises(FileNotFoundError, match="no download link is cached"):
analyzer.analyze_single_patent(patent_id, "NoPDFCo")
finally:
os.chdir(original_cwd)
def test_analysis_exception_returns_error_message(self, analyzer, mocker, tmp_path):
"""When analysis pipeline fails, returns error string instead of raising."""
patent_id = "US-22222222-A2"
# Create the PDF on disk so it skips download
patents_dir = tmp_path / "patents"
patents_dir.mkdir()
(patents_dir / f"{patent_id}.pdf").write_bytes(b"fake PDF")
# Parse fails
mocker.patch(
"SPARC.analyzer.SERP.parse_patent_pdf",
side_effect=ValueError("Corrupt PDF"),
)
original_cwd = os.getcwd()
os.chdir(tmp_path)
try:
result = analyzer.analyze_single_patent(patent_id, "ErrorCo")
finally:
os.chdir(original_cwd)
assert "Failed to analyze patent" in result
assert "Corrupt PDF" in result
def test_model_override_passed_to_llm(self, analyzer, mocker, tmp_path):
"""The model parameter is forwarded to the LLM analyzer."""
patent_id = "US-33333333-B2"
patents_dir = tmp_path / "patents"
patents_dir.mkdir()
(patents_dir / f"{patent_id}.pdf").write_bytes(b"fake PDF")
mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf", return_value={"abstract": "test"})
mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm", return_value="content")
analyzer.llm_analyzer.analyze_patent_content.return_value = "Analysis result."
original_cwd = os.getcwd()
os.chdir(tmp_path)
try:
result = analyzer.analyze_single_patent(
patent_id, "ModelCo", model="openai/gpt-4o"
)
finally:
os.chdir(original_cwd)
assert result == "Analysis result."
analyzer.llm_analyzer.analyze_patent_content.assert_called_once_with(
patent_content="content",
company_name="ModelCo",
model="openai/gpt-4o",
)
def test_file_not_found_during_parse_re_raised(self, analyzer, mocker, tmp_path):
"""FileNotFoundError during parsing is re-raised, not caught."""
patent_id = "US-44444444-C1"
patents_dir = tmp_path / "patents"
patents_dir.mkdir()
(patents_dir / f"{patent_id}.pdf").write_bytes(b"fake PDF")
mocker.patch(
"SPARC.analyzer.SERP.parse_patent_pdf",
side_effect=FileNotFoundError("PDF file vanished"),
)
original_cwd = os.getcwd()
os.chdir(tmp_path)
try:
with pytest.raises(FileNotFoundError, match="PDF file vanished"):
analyzer.analyze_single_patent(patent_id, "VanishCo")
finally:
os.chdir(original_cwd)
-224
View File
@@ -1,224 +0,0 @@
"""Tests for export endpoints: CSV and PDF export of analysis results.
Covers issue #1655:
- GET /export/{company_name} (CSV export)
- GET /export/{company_name}/pdf (PDF export)
All tests mock the database layer and use JWT auth fixtures from test_auth patterns.
"""
from datetime import datetime, timezone
from unittest.mock import MagicMock, patch
import pytest
from fastapi.testclient import TestClient
from SPARC.api import app
from SPARC.auth import create_access_token
@pytest.fixture
def client():
"""Create test client."""
return TestClient(app)
@pytest.fixture(autouse=True)
def mock_db():
"""Mock the database client used by export and auth endpoints."""
db = MagicMock()
# Default: user exists for auth
db.get_user_by_id.return_value = {
"id": 1,
"email": "user@test.com",
"role": "user",
"created_at": datetime(2025, 1, 1, tzinfo=timezone.utc),
}
# Mock get_conn for export queries
mock_cursor = MagicMock()
mock_conn = MagicMock()
mock_conn.cursor.return_value.__enter__ = MagicMock(return_value=mock_cursor)
mock_conn.cursor.return_value.__exit__ = MagicMock(return_value=False)
db.get_conn.return_value.__enter__ = MagicMock(return_value=mock_conn)
db.get_conn.return_value.__exit__ = MagicMock(return_value=False)
db._mock_cursor = mock_cursor
with patch("SPARC.api.get_db_client", return_value=db), \
patch("SPARC.auth.get_db_client", return_value=db):
yield db
def _auth_header():
"""Create an Authorization header with a valid access token."""
token = create_access_token(1, "user@test.com", "user")
return {"Authorization": f"Bearer {token}"}
def _sample_rows():
"""Return sample llm_messages rows as tuples (matching cursor.fetchall format)."""
return [
(
"NVIDIA",
"company_analysis",
"anthropic/claude-3.5-sonnet",
"Strong AI patent portfolio with focus on GPU architectures.",
datetime(2025, 6, 15, 10, 30, 0),
),
(
"NVIDIA",
"patent_analysis",
"openai/gpt-4o",
"Patent US-12345678-B2 covers novel tensor core design.",
datetime(2025, 6, 14, 9, 0, 0),
),
]
class TestCSVExport:
"""GET /export/{company_name} -- CSV export."""
def test_csv_export_success(self, client, mock_db):
"""Valid company with results returns a CSV file."""
mock_db._mock_cursor.fetchall.return_value = _sample_rows()
response = client.get("/export/NVIDIA", headers=_auth_header())
assert response.status_code == 200
assert response.headers["content-type"].startswith("text/csv")
assert "attachment" in response.headers.get("content-disposition", "")
assert "sparc_nvidia_export.csv" in response.headers["content-disposition"]
# Verify CSV content (CSV uses \r\n line endings)
lines = response.text.strip().split("\n")
assert len(lines) == 3 # header + 2 data rows
assert lines[0].strip() == "company_name,analysis_type,model,analysis,timestamp"
assert "NVIDIA" in lines[1]
assert "company_analysis" in lines[1]
def test_csv_export_no_results_returns_404(self, client, mock_db):
"""Unknown company returns 404."""
mock_db._mock_cursor.fetchall.return_value = []
response = client.get("/export/nonexistent", headers=_auth_header())
assert response.status_code == 404
assert "No analysis results found" in response.json()["detail"]
def test_csv_export_unauthenticated_returns_401(self, client):
"""Request without token returns 401."""
response = client.get("/export/NVIDIA")
assert response.status_code == 401
def test_csv_export_invalid_token_returns_401(self, client):
"""Request with invalid token returns 401."""
response = client.get(
"/export/NVIDIA",
headers={"Authorization": "Bearer invalid.token.here"},
)
assert response.status_code == 401
def test_csv_export_filename_sanitization(self, client, mock_db):
"""Company names with spaces get sanitized in the filename."""
mock_db._mock_cursor.fetchall.return_value = [
(
"Tesla Motors",
"company_analysis",
"anthropic/claude-3.5-sonnet",
"EV patent portfolio analysis.",
datetime(2025, 6, 15, 10, 0, 0),
),
]
response = client.get("/export/Tesla Motors", headers=_auth_header())
assert response.status_code == 200
assert "tesla_motors" in response.headers["content-disposition"]
def test_csv_export_single_row(self, client, mock_db):
"""Single analysis result produces valid CSV with one data row."""
mock_db._mock_cursor.fetchall.return_value = [_sample_rows()[0]]
response = client.get("/export/NVIDIA", headers=_auth_header())
assert response.status_code == 200
lines = response.text.strip().split("\n")
assert len(lines) == 2 # header + 1 data row
class TestPDFExport:
"""GET /export/{company_name}/pdf -- PDF report export."""
def test_pdf_export_success(self, client, mock_db):
"""Valid company with results returns a PDF file."""
mock_db._mock_cursor.fetchall.return_value = _sample_rows()
response = client.get("/export/NVIDIA/pdf", headers=_auth_header())
assert response.status_code == 200
assert response.headers["content-type"] == "application/pdf"
assert "attachment" in response.headers.get("content-disposition", "")
# PDF files start with %PDF
assert response.content[:4] == b"%PDF"
def test_pdf_export_no_results_returns_404(self, client, mock_db):
"""Unknown company returns 404."""
mock_db._mock_cursor.fetchall.return_value = []
response = client.get("/export/nonexistent/pdf", headers=_auth_header())
assert response.status_code == 404
assert "No analysis results found" in response.json()["detail"]
def test_pdf_export_unauthenticated_returns_401(self, client):
"""Request without token returns 401."""
response = client.get("/export/NVIDIA/pdf")
assert response.status_code == 401
def test_pdf_export_invalid_token_returns_401(self, client):
"""Request with invalid token returns 401."""
response = client.get(
"/export/NVIDIA/pdf",
headers={"Authorization": "Bearer invalid.token.here"},
)
assert response.status_code == 401
def test_pdf_export_filename_contains_date(self, client, mock_db):
"""PDF filename includes the analysis date."""
mock_db._mock_cursor.fetchall.return_value = _sample_rows()
response = client.get("/export/NVIDIA/pdf", headers=_auth_header())
assert response.status_code == 200
disposition = response.headers["content-disposition"]
assert "nvidia-analysis-" in disposition
assert ".pdf" in disposition
def test_pdf_export_special_chars_in_response(self, client, mock_db):
"""Analysis text with XML-special chars (<, >, &) does not break PDF generation."""
rows = [
(
"TestCo",
"company_analysis",
"anthropic/claude-3.5-sonnet",
"Revenue > $1B & growth <20% for Q4. Test <html> escaping.",
datetime(2025, 6, 15, 10, 0, 0),
),
]
mock_db._mock_cursor.fetchall.return_value = rows
response = client.get("/export/TestCo/pdf", headers=_auth_header())
assert response.status_code == 200
assert response.content[:4] == b"%PDF"
def test_pdf_export_multiple_analyses(self, client, mock_db):
"""Multiple analysis records produce a valid PDF with content."""
mock_db._mock_cursor.fetchall.return_value = _sample_rows()
response = client.get("/export/NVIDIA/pdf", headers=_auth_header())
assert response.status_code == 200
# PDF should have reasonable size (more than just headers)
assert len(response.content) > 500