Compare commits

..

1 Commits

Author SHA1 Message Date
agent-company a2f81b0396 Add test coverage for analyze_single_patent auto-download path
7 test cases covering:
- PDF on disk analyzed directly (no download)
- Auto-download from cached metadata link when PDF missing
- FileNotFoundError when no cached link available
- Cached patent without pdf_link raises FileNotFoundError
- Analysis pipeline failure returns error string gracefully
- Model override parameter forwarded to LLM
- FileNotFoundError during parsing re-raised (not swallowed)

Closes leeworks-agents/SPARC#1661

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-20 19:21:53 +00:00
2 changed files with 211 additions and 263 deletions
+211
View File
@@ -0,0 +1,211 @@
"""Tests for analyze_single_patent auto-download path.
Covers issue #1661:
- PDF exists on disk: direct analysis (happy path)
- PDF not on disk, cached link exists: auto-download and analyze
- PDF not on disk, no cached link: FileNotFoundError
- Analysis failure after PDF found: graceful error message
- Model override parameter passthrough
"""
import os
from unittest.mock import MagicMock, patch
import pytest
from SPARC.analyzer import CompanyAnalyzer
from SPARC.types import Patent
@pytest.fixture(autouse=True)
def mock_db(mocker):
"""Mock DatabaseClient so no real DB is needed."""
mock_db_cls = mocker.patch("SPARC.analyzer.DatabaseClient")
mock_db_instance = MagicMock()
mock_db_instance.get_cached_patent.return_value = None
mock_db_instance.get_cached_serp_query.return_value = None
mock_db_cls.return_value = mock_db_instance
return mock_db_instance
@pytest.fixture
def analyzer(mocker, mock_db):
"""Create a CompanyAnalyzer with mocked LLM and DB."""
mocker.patch("SPARC.analyzer.LLMAnalyzer")
return CompanyAnalyzer(openrouter_api_key="test-key")
class TestAnalyzeSinglePatentAutoDownload:
"""Test the auto-download logic in analyze_single_patent."""
def test_pdf_on_disk_analyzed_directly(self, analyzer, mocker, tmp_path):
"""When PDF exists on disk, it is analyzed directly without download."""
patent_id = "US-11234567-B2"
# Create the patents dir and PDF file
patents_dir = tmp_path / "patents"
patents_dir.mkdir()
pdf_path = patents_dir / f"{patent_id}.pdf"
pdf_path.write_bytes(b"fake PDF content")
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
mock_parse.return_value = {"abstract": "test", "claims": "test claims"}
mock_minimize.return_value = "minimized content"
analyzer.llm_analyzer.analyze_patent_content.return_value = "Good patent."
# Change cwd so patents/{patent_id}.pdf resolves to our tmp_path
original_cwd = os.getcwd()
os.chdir(tmp_path)
try:
result = analyzer.analyze_single_patent(patent_id, "TestCo")
finally:
os.chdir(original_cwd)
assert result == "Good patent."
# DB cache should not have been queried since file existed
analyzer.db.get_cached_patent.assert_not_called()
def test_auto_download_from_cached_link(self, analyzer, mocker, tmp_path):
"""When PDF is not on disk but link is cached, auto-download occurs."""
patent_id = "US-99887766-A1"
# No patents dir exists (PDF not on disk)
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
downloaded_patent = Patent(patent_id=patent_id, pdf_link="https://example.com/patent.pdf")
downloaded_patent.pdf_path = f"patents/{patent_id}.pdf"
mock_save.return_value = downloaded_patent
# Cached patent has a PDF link
analyzer.db.get_cached_patent.return_value = {
"patent_id": patent_id,
"pdf_link": "https://example.com/patent.pdf",
}
# Mock the rest of the analysis pipeline
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
mock_parse.return_value = {"abstract": "test abstract"}
mock_minimize.return_value = "minimized content"
analyzer.llm_analyzer.analyze_patent_content.return_value = "Strong innovation."
# Change cwd so patents/{patent_id}.pdf does NOT exist
original_cwd = os.getcwd()
os.chdir(tmp_path)
try:
result = analyzer.analyze_single_patent(patent_id, "DownloadCo")
finally:
os.chdir(original_cwd)
assert result == "Strong innovation."
analyzer.db.get_cached_patent.assert_called_once_with(patent_id)
mock_save.assert_called_once()
# Verify the Patent passed to save_patents has the correct ID and link
saved_patent = mock_save.call_args[0][0]
assert saved_patent.patent_id == patent_id
assert saved_patent.pdf_link == "https://example.com/patent.pdf"
def test_no_cached_link_raises_file_not_found(self, analyzer, mocker, tmp_path):
"""When PDF is not on disk and no cached link, FileNotFoundError raised."""
patent_id = "US-00000000-X1"
analyzer.db.get_cached_patent.return_value = None
original_cwd = os.getcwd()
os.chdir(tmp_path)
try:
with pytest.raises(FileNotFoundError, match="no download link is cached"):
analyzer.analyze_single_patent(patent_id, "MissingCo")
finally:
os.chdir(original_cwd)
def test_cached_patent_without_pdf_link_raises(self, analyzer, mocker, tmp_path):
"""When cached patent exists but has no pdf_link, FileNotFoundError raised."""
patent_id = "US-11111111-B1"
analyzer.db.get_cached_patent.return_value = {
"patent_id": patent_id,
"pdf_link": None,
}
original_cwd = os.getcwd()
os.chdir(tmp_path)
try:
with pytest.raises(FileNotFoundError, match="no download link is cached"):
analyzer.analyze_single_patent(patent_id, "NoPDFCo")
finally:
os.chdir(original_cwd)
def test_analysis_exception_returns_error_message(self, analyzer, mocker, tmp_path):
"""When analysis pipeline fails, returns error string instead of raising."""
patent_id = "US-22222222-A2"
# Create the PDF on disk so it skips download
patents_dir = tmp_path / "patents"
patents_dir.mkdir()
(patents_dir / f"{patent_id}.pdf").write_bytes(b"fake PDF")
# Parse fails
mocker.patch(
"SPARC.analyzer.SERP.parse_patent_pdf",
side_effect=ValueError("Corrupt PDF"),
)
original_cwd = os.getcwd()
os.chdir(tmp_path)
try:
result = analyzer.analyze_single_patent(patent_id, "ErrorCo")
finally:
os.chdir(original_cwd)
assert "Failed to analyze patent" in result
assert "Corrupt PDF" in result
def test_model_override_passed_to_llm(self, analyzer, mocker, tmp_path):
"""The model parameter is forwarded to the LLM analyzer."""
patent_id = "US-33333333-B2"
patents_dir = tmp_path / "patents"
patents_dir.mkdir()
(patents_dir / f"{patent_id}.pdf").write_bytes(b"fake PDF")
mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf", return_value={"abstract": "test"})
mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm", return_value="content")
analyzer.llm_analyzer.analyze_patent_content.return_value = "Analysis result."
original_cwd = os.getcwd()
os.chdir(tmp_path)
try:
result = analyzer.analyze_single_patent(
patent_id, "ModelCo", model="openai/gpt-4o"
)
finally:
os.chdir(original_cwd)
assert result == "Analysis result."
analyzer.llm_analyzer.analyze_patent_content.assert_called_once_with(
patent_content="content",
company_name="ModelCo",
model="openai/gpt-4o",
)
def test_file_not_found_during_parse_re_raised(self, analyzer, mocker, tmp_path):
"""FileNotFoundError during parsing is re-raised, not caught."""
patent_id = "US-44444444-C1"
patents_dir = tmp_path / "patents"
patents_dir.mkdir()
(patents_dir / f"{patent_id}.pdf").write_bytes(b"fake PDF")
mocker.patch(
"SPARC.analyzer.SERP.parse_patent_pdf",
side_effect=FileNotFoundError("PDF file vanished"),
)
original_cwd = os.getcwd()
os.chdir(tmp_path)
try:
with pytest.raises(FileNotFoundError, match="PDF file vanished"):
analyzer.analyze_single_patent(patent_id, "VanishCo")
finally:
os.chdir(original_cwd)
-263
View File
@@ -1,263 +0,0 @@
"""Tests for S3/MinIO storage backend in storage.py.
Covers issue #1660:
- S3StorageBackend read, write, exists, path_for
- Error handling: NoSuchKey, generic S3 errors, bucket auto-creation
- get_storage_backend() factory function
- LocalStorageBackend (basic sanity checks)
"""
from unittest.mock import MagicMock, patch
import pytest
from SPARC.storage import LocalStorageBackend, S3StorageBackend, get_storage_backend
# ---------- S3StorageBackend ----------
class TestS3StorageBackend:
"""Tests for the S3-compatible storage backend."""
@pytest.fixture
def s3_backend(self):
"""Create an S3StorageBackend with a fully mocked boto3 client."""
with patch.dict("sys.modules", {"boto3": MagicMock()}):
import boto3 as mock_boto
mock_s3 = MagicMock()
mock_boto.client.return_value = mock_s3
mock_s3.head_bucket.return_value = {}
backend = S3StorageBackend(
bucket="test-bucket",
endpoint_url="http://minio:9000",
access_key="minioadmin",
secret_key="minioadmin",
)
# Expose mock for assertions
backend._mock_s3 = mock_s3
yield backend
def test_write_puts_object(self, s3_backend):
"""write() calls put_object with correct bucket, key, and body."""
s3_backend.write("US-12345678-B2.pdf", b"PDF content here")
s3_backend._mock_s3.put_object.assert_called_once_with(
Bucket="test-bucket",
Key="US-12345678-B2.pdf",
Body=b"PDF content here",
ContentType="application/pdf",
)
def test_read_returns_body(self, s3_backend):
"""read() returns the Body content from get_object."""
mock_body = MagicMock()
mock_body.read.return_value = b"PDF data"
s3_backend._mock_s3.get_object.return_value = {"Body": mock_body}
result = s3_backend.read("US-12345678-B2.pdf")
assert result == b"PDF data"
s3_backend._mock_s3.get_object.assert_called_once_with(
Bucket="test-bucket",
Key="US-12345678-B2.pdf",
)
def test_read_nosuchkey_raises_file_not_found(self, s3_backend):
"""read() raises FileNotFoundError when object does not exist."""
# Create a NoSuchKey exception class on the mock
nosuchkey = type("NoSuchKey", (Exception,), {})
s3_backend._mock_s3.exceptions.NoSuchKey = nosuchkey
s3_backend._mock_s3.get_object.side_effect = nosuchkey("not found")
# Reassign s3 to trigger the except branch
s3_backend.s3 = s3_backend._mock_s3
with pytest.raises(FileNotFoundError, match="S3 object not found"):
s3_backend.read("missing.pdf")
def test_read_generic_404_raises_file_not_found(self, s3_backend):
"""read() handles generic 404 errors from S3-compatible APIs."""
nosuchkey = type("NoSuchKey", (Exception,), {})
s3_backend._mock_s3.exceptions.NoSuchKey = nosuchkey
s3_backend.s3 = s3_backend._mock_s3
s3_backend.s3.get_object.side_effect = Exception("An error occurred (404)")
with pytest.raises(FileNotFoundError, match="S3 object not found"):
s3_backend.read("missing.pdf")
def test_read_other_error_re_raises(self, s3_backend):
"""read() re-raises non-404 errors."""
nosuchkey = type("NoSuchKey", (Exception,), {})
s3_backend._mock_s3.exceptions.NoSuchKey = nosuchkey
s3_backend.s3 = s3_backend._mock_s3
s3_backend.s3.get_object.side_effect = Exception("Internal server error")
with pytest.raises(Exception, match="Internal server error"):
s3_backend.read("some-file.pdf")
def test_exists_returns_true_for_existing_object(self, s3_backend):
"""exists() returns True when head_object succeeds with content."""
s3_backend._mock_s3.head_object.return_value = {"ContentLength": 1024}
assert s3_backend.exists("US-12345678-B2.pdf") is True
def test_exists_returns_false_for_missing_object(self, s3_backend):
"""exists() returns False when head_object raises an exception."""
s3_backend._mock_s3.head_object.side_effect = Exception("Not Found")
assert s3_backend.exists("missing.pdf") is False
def test_exists_returns_false_for_zero_length(self, s3_backend):
"""exists() returns False when object has zero content length."""
s3_backend._mock_s3.head_object.return_value = {"ContentLength": 0}
assert s3_backend.exists("empty.pdf") is False
def test_path_for_returns_s3_uri(self, s3_backend):
"""path_for() returns an s3:// URI."""
path = s3_backend.path_for("US-12345678-B2.pdf")
assert path == "s3://test-bucket/US-12345678-B2.pdf"
def test_constructor_creates_bucket_if_missing(self):
"""Constructor creates the bucket if head_bucket fails."""
with patch.dict("sys.modules", {"boto3": MagicMock()}):
import boto3 as mock_boto
mock_s3 = MagicMock()
mock_boto.client.return_value = mock_s3
mock_s3.head_bucket.side_effect = Exception("Bucket not found")
S3StorageBackend(
bucket="new-bucket",
endpoint_url="http://minio:9000",
access_key="admin",
secret_key="admin",
)
mock_s3.create_bucket.assert_called_once_with(Bucket="new-bucket")
def test_constructor_handles_bucket_creation_failure(self):
"""Constructor logs warning but does not crash if bucket creation fails."""
with patch.dict("sys.modules", {"boto3": MagicMock()}):
import boto3 as mock_boto
mock_s3 = MagicMock()
mock_boto.client.return_value = mock_s3
mock_s3.head_bucket.side_effect = Exception("Bucket not found")
mock_s3.create_bucket.side_effect = Exception("Permission denied")
# Should not raise
backend = S3StorageBackend(
bucket="locked-bucket",
endpoint_url="http://minio:9000",
access_key="admin",
secret_key="admin",
)
assert backend.bucket == "locked-bucket"
def test_constructor_passes_endpoint_and_credentials(self):
"""Constructor passes endpoint_url and credentials to boto3.client."""
with patch.dict("sys.modules", {"boto3": MagicMock()}):
import boto3 as mock_boto
mock_s3 = MagicMock()
mock_boto.client.return_value = mock_s3
S3StorageBackend(
bucket="test",
endpoint_url="http://minio:9000",
access_key="mykey",
secret_key="mysecret",
)
mock_boto.client.assert_called_with(
"s3",
endpoint_url="http://minio:9000",
aws_access_key_id="mykey",
aws_secret_access_key="mysecret",
)
# ---------- LocalStorageBackend ----------
class TestLocalStorageBackend:
"""Basic sanity checks for the local filesystem backend."""
def test_write_and_read(self, tmp_path):
"""Write and read round-trip produces identical content."""
backend = LocalStorageBackend(base_dir=str(tmp_path))
backend.write("test.pdf", b"hello world")
result = backend.read("test.pdf")
assert result == b"hello world"
def test_read_missing_file_raises(self, tmp_path):
"""Reading a non-existent file raises FileNotFoundError."""
backend = LocalStorageBackend(base_dir=str(tmp_path))
with pytest.raises(FileNotFoundError):
backend.read("nonexistent.pdf")
def test_exists_true_for_written_file(self, tmp_path):
"""exists() returns True after writing a file."""
backend = LocalStorageBackend(base_dir=str(tmp_path))
backend.write("test.pdf", b"data")
assert backend.exists("test.pdf") is True
def test_exists_false_for_missing_file(self, tmp_path):
"""exists() returns False for non-existent file."""
backend = LocalStorageBackend(base_dir=str(tmp_path))
assert backend.exists("missing.pdf") is False
def test_exists_false_for_empty_file(self, tmp_path):
"""exists() returns False for zero-length file."""
backend = LocalStorageBackend(base_dir=str(tmp_path))
backend.write("empty.pdf", b"")
assert backend.exists("empty.pdf") is False
def test_path_for_returns_full_path(self, tmp_path):
"""path_for() returns the full filesystem path."""
backend = LocalStorageBackend(base_dir=str(tmp_path))
path = backend.path_for("test.pdf")
assert path == str(tmp_path / "test.pdf")
# ---------- get_storage_backend() factory ----------
class TestGetStorageBackend:
"""Tests for the storage backend factory function."""
@patch("SPARC.storage.config")
def test_returns_local_backend_by_default(self, mock_config):
"""Default config returns LocalStorageBackend."""
mock_config.storage_backend = "local"
backend = get_storage_backend()
assert isinstance(backend, LocalStorageBackend)
@patch("SPARC.storage.config")
def test_returns_s3_backend_when_configured(self, mock_config):
"""Setting storage_backend=s3 returns S3StorageBackend."""
mock_config.storage_backend = "s3"
mock_config.s3_bucket = "test-bucket"
mock_config.s3_endpoint_url = "http://minio:9000"
mock_config.s3_access_key = "key"
mock_config.s3_secret_key = "secret"
with patch.dict("sys.modules", {"boto3": MagicMock()}):
backend = get_storage_backend()
assert isinstance(backend, S3StorageBackend)
@patch("SPARC.storage.config")
def test_case_insensitive_backend_selection(self, mock_config):
"""Backend selection is case-insensitive."""
mock_config.storage_backend = "LOCAL"
backend = get_storage_backend()
assert isinstance(backend, LocalStorageBackend)