"""Tests for analyze_single_patent auto-download path. Covers issue #1661: - PDF exists on disk: direct analysis (happy path) - PDF not on disk, cached link exists: auto-download and analyze - PDF not on disk, no cached link: FileNotFoundError - Analysis failure after PDF found: graceful error message - Model override parameter passthrough """ import os from unittest.mock import MagicMock, patch import pytest from SPARC.analyzer import CompanyAnalyzer from SPARC.types import Patent @pytest.fixture(autouse=True) def mock_db(mocker): """Mock DatabaseClient so no real DB is needed.""" mock_db_cls = mocker.patch("SPARC.analyzer.DatabaseClient") mock_db_instance = MagicMock() mock_db_instance.get_cached_patent.return_value = None mock_db_instance.get_cached_serp_query.return_value = None mock_db_cls.return_value = mock_db_instance return mock_db_instance @pytest.fixture def analyzer(mocker, mock_db): """Create a CompanyAnalyzer with mocked LLM and DB.""" mocker.patch("SPARC.analyzer.LLMAnalyzer") return CompanyAnalyzer(openrouter_api_key="test-key") class TestAnalyzeSinglePatentAutoDownload: """Test the auto-download logic in analyze_single_patent.""" def test_pdf_on_disk_analyzed_directly(self, analyzer, mocker, tmp_path): """When PDF exists on disk, it is analyzed directly without download.""" patent_id = "US-11234567-B2" # Create the patents dir and PDF file patents_dir = tmp_path / "patents" patents_dir.mkdir() pdf_path = patents_dir / f"{patent_id}.pdf" pdf_path.write_bytes(b"fake PDF content") mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") mock_parse.return_value = {"abstract": "test", "claims": "test claims"} mock_minimize.return_value = "minimized content" analyzer.llm_analyzer.analyze_patent_content.return_value = "Good patent." # Change cwd so patents/{patent_id}.pdf resolves to our tmp_path original_cwd = os.getcwd() os.chdir(tmp_path) try: result = analyzer.analyze_single_patent(patent_id, "TestCo") finally: os.chdir(original_cwd) assert result == "Good patent." # DB cache should not have been queried since file existed analyzer.db.get_cached_patent.assert_not_called() def test_auto_download_from_cached_link(self, analyzer, mocker, tmp_path): """When PDF is not on disk but link is cached, auto-download occurs.""" patent_id = "US-99887766-A1" # No patents dir exists (PDF not on disk) mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") downloaded_patent = Patent(patent_id=patent_id, pdf_link="https://example.com/patent.pdf") downloaded_patent.pdf_path = f"patents/{patent_id}.pdf" mock_save.return_value = downloaded_patent # Cached patent has a PDF link analyzer.db.get_cached_patent.return_value = { "patent_id": patent_id, "pdf_link": "https://example.com/patent.pdf", } # Mock the rest of the analysis pipeline mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") mock_parse.return_value = {"abstract": "test abstract"} mock_minimize.return_value = "minimized content" analyzer.llm_analyzer.analyze_patent_content.return_value = "Strong innovation." # Change cwd so patents/{patent_id}.pdf does NOT exist original_cwd = os.getcwd() os.chdir(tmp_path) try: result = analyzer.analyze_single_patent(patent_id, "DownloadCo") finally: os.chdir(original_cwd) assert result == "Strong innovation." analyzer.db.get_cached_patent.assert_called_once_with(patent_id) mock_save.assert_called_once() # Verify the Patent passed to save_patents has the correct ID and link saved_patent = mock_save.call_args[0][0] assert saved_patent.patent_id == patent_id assert saved_patent.pdf_link == "https://example.com/patent.pdf" def test_no_cached_link_raises_file_not_found(self, analyzer, mocker, tmp_path): """When PDF is not on disk and no cached link, FileNotFoundError raised.""" patent_id = "US-00000000-X1" analyzer.db.get_cached_patent.return_value = None original_cwd = os.getcwd() os.chdir(tmp_path) try: with pytest.raises(FileNotFoundError, match="no download link is cached"): analyzer.analyze_single_patent(patent_id, "MissingCo") finally: os.chdir(original_cwd) def test_cached_patent_without_pdf_link_raises(self, analyzer, mocker, tmp_path): """When cached patent exists but has no pdf_link, FileNotFoundError raised.""" patent_id = "US-11111111-B1" analyzer.db.get_cached_patent.return_value = { "patent_id": patent_id, "pdf_link": None, } original_cwd = os.getcwd() os.chdir(tmp_path) try: with pytest.raises(FileNotFoundError, match="no download link is cached"): analyzer.analyze_single_patent(patent_id, "NoPDFCo") finally: os.chdir(original_cwd) def test_analysis_exception_returns_error_message(self, analyzer, mocker, tmp_path): """When analysis pipeline fails, returns error string instead of raising.""" patent_id = "US-22222222-A2" # Create the PDF on disk so it skips download patents_dir = tmp_path / "patents" patents_dir.mkdir() (patents_dir / f"{patent_id}.pdf").write_bytes(b"fake PDF") # Parse fails mocker.patch( "SPARC.analyzer.SERP.parse_patent_pdf", side_effect=ValueError("Corrupt PDF"), ) original_cwd = os.getcwd() os.chdir(tmp_path) try: result = analyzer.analyze_single_patent(patent_id, "ErrorCo") finally: os.chdir(original_cwd) assert "Failed to analyze patent" in result assert "Corrupt PDF" in result def test_model_override_passed_to_llm(self, analyzer, mocker, tmp_path): """The model parameter is forwarded to the LLM analyzer.""" patent_id = "US-33333333-B2" patents_dir = tmp_path / "patents" patents_dir.mkdir() (patents_dir / f"{patent_id}.pdf").write_bytes(b"fake PDF") mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf", return_value={"abstract": "test"}) mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm", return_value="content") analyzer.llm_analyzer.analyze_patent_content.return_value = "Analysis result." original_cwd = os.getcwd() os.chdir(tmp_path) try: result = analyzer.analyze_single_patent( patent_id, "ModelCo", model="openai/gpt-4o" ) finally: os.chdir(original_cwd) assert result == "Analysis result." analyzer.llm_analyzer.analyze_patent_content.assert_called_once_with( patent_content="content", company_name="ModelCo", model="openai/gpt-4o", ) def test_file_not_found_during_parse_re_raised(self, analyzer, mocker, tmp_path): """FileNotFoundError during parsing is re-raised, not caught.""" patent_id = "US-44444444-C1" patents_dir = tmp_path / "patents" patents_dir.mkdir() (patents_dir / f"{patent_id}.pdf").write_bytes(b"fake PDF") mocker.patch( "SPARC.analyzer.SERP.parse_patent_pdf", side_effect=FileNotFoundError("PDF file vanished"), ) original_cwd = os.getcwd() os.chdir(tmp_path) try: with pytest.raises(FileNotFoundError, match="PDF file vanished"): analyzer.analyze_single_patent(patent_id, "VanishCo") finally: os.chdir(original_cwd)