Compare commits

..

1 Commits

Author SHA1 Message Date
agent-company a2f81b0396 Add test coverage for analyze_single_patent auto-download path
7 test cases covering:
- PDF on disk analyzed directly (no download)
- Auto-download from cached metadata link when PDF missing
- FileNotFoundError when no cached link available
- Cached patent without pdf_link raises FileNotFoundError
- Analysis pipeline failure returns error string gracefully
- Model override parameter forwarded to LLM
- FileNotFoundError during parsing re-raised (not swallowed)

Closes leeworks-agents/SPARC#1661

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-20 19:21:53 +00:00
2 changed files with 211 additions and 280 deletions
+211
View File
@@ -0,0 +1,211 @@
"""Tests for analyze_single_patent auto-download path.
Covers issue #1661:
- PDF exists on disk: direct analysis (happy path)
- PDF not on disk, cached link exists: auto-download and analyze
- PDF not on disk, no cached link: FileNotFoundError
- Analysis failure after PDF found: graceful error message
- Model override parameter passthrough
"""
import os
from unittest.mock import MagicMock, patch
import pytest
from SPARC.analyzer import CompanyAnalyzer
from SPARC.types import Patent
@pytest.fixture(autouse=True)
def mock_db(mocker):
"""Mock DatabaseClient so no real DB is needed."""
mock_db_cls = mocker.patch("SPARC.analyzer.DatabaseClient")
mock_db_instance = MagicMock()
mock_db_instance.get_cached_patent.return_value = None
mock_db_instance.get_cached_serp_query.return_value = None
mock_db_cls.return_value = mock_db_instance
return mock_db_instance
@pytest.fixture
def analyzer(mocker, mock_db):
"""Create a CompanyAnalyzer with mocked LLM and DB."""
mocker.patch("SPARC.analyzer.LLMAnalyzer")
return CompanyAnalyzer(openrouter_api_key="test-key")
class TestAnalyzeSinglePatentAutoDownload:
"""Test the auto-download logic in analyze_single_patent."""
def test_pdf_on_disk_analyzed_directly(self, analyzer, mocker, tmp_path):
"""When PDF exists on disk, it is analyzed directly without download."""
patent_id = "US-11234567-B2"
# Create the patents dir and PDF file
patents_dir = tmp_path / "patents"
patents_dir.mkdir()
pdf_path = patents_dir / f"{patent_id}.pdf"
pdf_path.write_bytes(b"fake PDF content")
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
mock_parse.return_value = {"abstract": "test", "claims": "test claims"}
mock_minimize.return_value = "minimized content"
analyzer.llm_analyzer.analyze_patent_content.return_value = "Good patent."
# Change cwd so patents/{patent_id}.pdf resolves to our tmp_path
original_cwd = os.getcwd()
os.chdir(tmp_path)
try:
result = analyzer.analyze_single_patent(patent_id, "TestCo")
finally:
os.chdir(original_cwd)
assert result == "Good patent."
# DB cache should not have been queried since file existed
analyzer.db.get_cached_patent.assert_not_called()
def test_auto_download_from_cached_link(self, analyzer, mocker, tmp_path):
"""When PDF is not on disk but link is cached, auto-download occurs."""
patent_id = "US-99887766-A1"
# No patents dir exists (PDF not on disk)
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
downloaded_patent = Patent(patent_id=patent_id, pdf_link="https://example.com/patent.pdf")
downloaded_patent.pdf_path = f"patents/{patent_id}.pdf"
mock_save.return_value = downloaded_patent
# Cached patent has a PDF link
analyzer.db.get_cached_patent.return_value = {
"patent_id": patent_id,
"pdf_link": "https://example.com/patent.pdf",
}
# Mock the rest of the analysis pipeline
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
mock_parse.return_value = {"abstract": "test abstract"}
mock_minimize.return_value = "minimized content"
analyzer.llm_analyzer.analyze_patent_content.return_value = "Strong innovation."
# Change cwd so patents/{patent_id}.pdf does NOT exist
original_cwd = os.getcwd()
os.chdir(tmp_path)
try:
result = analyzer.analyze_single_patent(patent_id, "DownloadCo")
finally:
os.chdir(original_cwd)
assert result == "Strong innovation."
analyzer.db.get_cached_patent.assert_called_once_with(patent_id)
mock_save.assert_called_once()
# Verify the Patent passed to save_patents has the correct ID and link
saved_patent = mock_save.call_args[0][0]
assert saved_patent.patent_id == patent_id
assert saved_patent.pdf_link == "https://example.com/patent.pdf"
def test_no_cached_link_raises_file_not_found(self, analyzer, mocker, tmp_path):
"""When PDF is not on disk and no cached link, FileNotFoundError raised."""
patent_id = "US-00000000-X1"
analyzer.db.get_cached_patent.return_value = None
original_cwd = os.getcwd()
os.chdir(tmp_path)
try:
with pytest.raises(FileNotFoundError, match="no download link is cached"):
analyzer.analyze_single_patent(patent_id, "MissingCo")
finally:
os.chdir(original_cwd)
def test_cached_patent_without_pdf_link_raises(self, analyzer, mocker, tmp_path):
"""When cached patent exists but has no pdf_link, FileNotFoundError raised."""
patent_id = "US-11111111-B1"
analyzer.db.get_cached_patent.return_value = {
"patent_id": patent_id,
"pdf_link": None,
}
original_cwd = os.getcwd()
os.chdir(tmp_path)
try:
with pytest.raises(FileNotFoundError, match="no download link is cached"):
analyzer.analyze_single_patent(patent_id, "NoPDFCo")
finally:
os.chdir(original_cwd)
def test_analysis_exception_returns_error_message(self, analyzer, mocker, tmp_path):
"""When analysis pipeline fails, returns error string instead of raising."""
patent_id = "US-22222222-A2"
# Create the PDF on disk so it skips download
patents_dir = tmp_path / "patents"
patents_dir.mkdir()
(patents_dir / f"{patent_id}.pdf").write_bytes(b"fake PDF")
# Parse fails
mocker.patch(
"SPARC.analyzer.SERP.parse_patent_pdf",
side_effect=ValueError("Corrupt PDF"),
)
original_cwd = os.getcwd()
os.chdir(tmp_path)
try:
result = analyzer.analyze_single_patent(patent_id, "ErrorCo")
finally:
os.chdir(original_cwd)
assert "Failed to analyze patent" in result
assert "Corrupt PDF" in result
def test_model_override_passed_to_llm(self, analyzer, mocker, tmp_path):
"""The model parameter is forwarded to the LLM analyzer."""
patent_id = "US-33333333-B2"
patents_dir = tmp_path / "patents"
patents_dir.mkdir()
(patents_dir / f"{patent_id}.pdf").write_bytes(b"fake PDF")
mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf", return_value={"abstract": "test"})
mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm", return_value="content")
analyzer.llm_analyzer.analyze_patent_content.return_value = "Analysis result."
original_cwd = os.getcwd()
os.chdir(tmp_path)
try:
result = analyzer.analyze_single_patent(
patent_id, "ModelCo", model="openai/gpt-4o"
)
finally:
os.chdir(original_cwd)
assert result == "Analysis result."
analyzer.llm_analyzer.analyze_patent_content.assert_called_once_with(
patent_content="content",
company_name="ModelCo",
model="openai/gpt-4o",
)
def test_file_not_found_during_parse_re_raised(self, analyzer, mocker, tmp_path):
"""FileNotFoundError during parsing is re-raised, not caught."""
patent_id = "US-44444444-C1"
patents_dir = tmp_path / "patents"
patents_dir.mkdir()
(patents_dir / f"{patent_id}.pdf").write_bytes(b"fake PDF")
mocker.patch(
"SPARC.analyzer.SERP.parse_patent_pdf",
side_effect=FileNotFoundError("PDF file vanished"),
)
original_cwd = os.getcwd()
os.chdir(tmp_path)
try:
with pytest.raises(FileNotFoundError, match="PDF file vanished"):
analyzer.analyze_single_patent(patent_id, "VanishCo")
finally:
os.chdir(original_cwd)
-280
View File
@@ -1,280 +0,0 @@
"""Tests for webhook notification system: retry logic and Slack/Discord payload format.
Covers issue #1657:
- Retry logic with exponential backoff in _send_with_retry
- Slack/Discord payload formatting in _build_payload
- Generic HTTP POST payload formatting
- notify() dispatching to multiple URLs
- notify_job_completed() and notify_alert() convenience helpers
"""
from datetime import datetime
from unittest.mock import MagicMock, patch, call
import pytest
import requests
from SPARC.webhooks import (
MAX_RETRIES,
_build_payload,
_is_slack_url,
_send_with_retry,
notify,
notify_alert,
notify_job_completed,
)
class TestIsSlackUrl:
"""Tests for Slack/Discord URL detection."""
def test_slack_webhook_url(self):
assert _is_slack_url("https://hooks.slack.com/services/T00/B00/xxx") is True
def test_discord_webhook_url(self):
assert _is_slack_url("https://discord.com/api/webhooks/123/abc") is True
def test_generic_url(self):
assert _is_slack_url("https://example.com/webhook") is False
def test_empty_url(self):
assert _is_slack_url("") is False
class TestBuildPayload:
"""Tests for payload construction."""
def test_generic_payload_structure(self):
"""Generic payload includes event type, timestamp, and data."""
payload = _build_payload("job_completed", {"job_id": "abc123"})
assert payload["event"] == "job_completed"
assert payload["job_id"] == "abc123"
assert "timestamp" in payload
# Timestamp should be ISO format ending with Z
assert payload["timestamp"].endswith("Z")
def test_slack_payload_wraps_in_text(self):
"""Slack payload wraps content in a 'text' field."""
payload = _build_payload("patent_alert", {"company_name": "NVIDIA"}, slack=True)
assert "text" in payload
assert "patent_alert" in payload["text"]
assert "NVIDIA" in payload["text"]
# Slack payload should NOT have the event/timestamp at top level
assert "event" not in payload
assert "timestamp" not in payload
def test_generic_payload_does_not_have_text_field(self):
"""Non-Slack payload does not wrap in text."""
payload = _build_payload("job_completed", {"status": "done"})
assert "text" not in payload
assert payload["status"] == "done"
def test_slack_payload_contains_bold_header(self):
"""Slack payload starts with bold event header using Slack markdown."""
payload = _build_payload("job_completed", {"count": 5}, slack=True)
assert payload["text"].startswith("*[SPARC] job_completed*")
def test_payload_merges_all_data_keys(self):
"""All data keys are included in the generic payload."""
data = {"key1": "val1", "key2": 42, "key3": True}
payload = _build_payload("test_event", data)
assert payload["key1"] == "val1"
assert payload["key2"] == 42
assert payload["key3"] is True
class TestSendWithRetry:
"""Tests for retry logic in _send_with_retry."""
@patch("SPARC.webhooks.time.sleep")
@patch("SPARC.webhooks.requests.post")
def test_success_on_first_attempt(self, mock_post, mock_sleep):
"""Successful delivery on first attempt, no retries."""
mock_post.return_value = MagicMock(status_code=200)
result = _send_with_retry("https://example.com/hook", {"event": "test"})
assert result is True
mock_post.assert_called_once()
mock_sleep.assert_not_called()
@patch("SPARC.webhooks.time.sleep")
@patch("SPARC.webhooks.requests.post")
def test_success_on_second_attempt(self, mock_post, mock_sleep):
"""Fails first, succeeds on retry."""
mock_post.side_effect = [
MagicMock(status_code=500),
MagicMock(status_code=200),
]
result = _send_with_retry("https://example.com/hook", {"event": "test"})
assert result is True
assert mock_post.call_count == 2
mock_sleep.assert_called_once()
@patch("SPARC.webhooks.time.sleep")
@patch("SPARC.webhooks.requests.post")
def test_all_retries_exhausted(self, mock_post, mock_sleep):
"""Returns False after all retries fail."""
mock_post.return_value = MagicMock(status_code=500)
result = _send_with_retry("https://example.com/hook", {"event": "test"})
assert result is False
assert mock_post.call_count == MAX_RETRIES
assert mock_sleep.call_count == MAX_RETRIES - 1
@patch("SPARC.webhooks.time.sleep")
@patch("SPARC.webhooks.requests.post")
def test_exponential_backoff_timing(self, mock_post, mock_sleep):
"""Backoff wait times follow exponential pattern (2^attempt)."""
mock_post.return_value = MagicMock(status_code=500)
_send_with_retry("https://example.com/hook", {"event": "test"})
# With BACKOFF_BASE=2: attempt 1 -> sleep(2), attempt 2 -> sleep(4)
expected_waits = [call(2 ** i) for i in range(1, MAX_RETRIES)]
assert mock_sleep.call_args_list == expected_waits
@patch("SPARC.webhooks.time.sleep")
@patch("SPARC.webhooks.requests.post")
def test_network_error_triggers_retry(self, mock_post, mock_sleep):
"""Network exceptions trigger retry, not immediate failure."""
mock_post.side_effect = [
requests.ConnectionError("Connection refused"),
MagicMock(status_code=200),
]
result = _send_with_retry("https://example.com/hook", {"event": "test"})
assert result is True
assert mock_post.call_count == 2
@patch("SPARC.webhooks.time.sleep")
@patch("SPARC.webhooks.requests.post")
def test_timeout_error_triggers_retry(self, mock_post, mock_sleep):
"""Timeout exceptions trigger retry."""
mock_post.side_effect = [
requests.Timeout("Request timed out"),
MagicMock(status_code=200),
]
result = _send_with_retry("https://example.com/hook", {"event": "test"})
assert result is True
assert mock_post.call_count == 2
@patch("SPARC.webhooks.time.sleep")
@patch("SPARC.webhooks.requests.post")
def test_2xx_status_codes_accepted(self, mock_post, mock_sleep):
"""Any 2xx status code is treated as success."""
mock_post.return_value = MagicMock(status_code=204)
result = _send_with_retry("https://example.com/hook", {"event": "test"})
assert result is True
mock_post.assert_called_once()
@patch("SPARC.webhooks.time.sleep")
@patch("SPARC.webhooks.requests.post")
def test_posts_json_payload(self, mock_post, mock_sleep):
"""Payload is sent as JSON with correct timeout."""
mock_post.return_value = MagicMock(status_code=200)
payload = {"event": "test", "data": "value"}
_send_with_retry("https://example.com/hook", payload)
mock_post.assert_called_once_with(
"https://example.com/hook", json=payload, timeout=10
)
class TestNotify:
"""Tests for the notify() dispatcher."""
@patch("SPARC.webhooks._send_with_retry")
@patch("SPARC.webhooks.WEBHOOK_URLS", ["https://example.com/hook1", "https://example.com/hook2"])
def test_dispatches_to_all_urls(self, mock_send):
"""notify() sends to every configured webhook URL."""
mock_send.return_value = True
notify("job_completed", {"job_id": "test123"})
assert mock_send.call_count == 2
@patch("SPARC.webhooks._send_with_retry")
@patch("SPARC.webhooks.WEBHOOK_URLS", [])
def test_no_urls_configured_returns_immediately(self, mock_send):
"""No-op when no webhook URLs are configured."""
notify("job_completed", {"job_id": "test123"})
mock_send.assert_not_called()
@patch("SPARC.webhooks._send_with_retry")
@patch("SPARC.webhooks.WEBHOOK_URLS", [
"https://hooks.slack.com/services/T00/B00/xxx",
"https://example.com/generic",
])
def test_slack_url_gets_slack_payload(self, mock_send):
"""Slack URLs receive Slack-formatted payloads, others get generic."""
mock_send.return_value = True
notify("test_event", {"key": "val"})
# First call (Slack URL) should have "text" key
slack_payload = mock_send.call_args_list[0][0][1]
assert "text" in slack_payload
# Second call (generic URL) should have "event" key
generic_payload = mock_send.call_args_list[1][0][1]
assert "event" in generic_payload
assert generic_payload["event"] == "test_event"
class TestNotifyJobCompleted:
"""Tests for notify_job_completed() convenience function."""
@patch("SPARC.webhooks.notify")
def test_sends_correct_event_and_data(self, mock_notify):
"""Job completion sends proper event type and summary."""
notify_job_completed(
job_id="batch-001",
status="completed",
total_companies=10,
successful=8,
failed=2,
)
mock_notify.assert_called_once()
event, data = mock_notify.call_args[0]
assert event == "job_completed"
assert data["job_id"] == "batch-001"
assert data["successful"] == 8
assert data["failed"] == 2
assert "8/10" in data["summary"]
class TestNotifyAlert:
"""Tests for notify_alert() convenience function."""
@patch("SPARC.webhooks.notify")
def test_sends_correct_event_and_data(self, mock_notify):
"""Alert notification sends patent_alert event type."""
notify_alert(
company_name="NVIDIA",
alert_type="patent_count_change",
message="Patent count increased by 30%",
)
mock_notify.assert_called_once()
event, data = mock_notify.call_args[0]
assert event == "patent_alert"
assert data["company_name"] == "NVIDIA"
assert data["alert_type"] == "patent_count_change"
assert "30%" in data["message"]