"""Tests for the high-level company analyzer orchestration.""" import pytest from unittest.mock import Mock, patch, call, MagicMock from SPARC.analyzer import CompanyAnalyzer from SPARC.types import Patent, Patents, CompanyAnalysisResult, BatchAnalysisResult @pytest.fixture(autouse=True) def mock_db(mocker): """Mock DatabaseClient for all tests so no real DB connection is needed.""" mock_db_cls = mocker.patch("SPARC.analyzer.DatabaseClient") mock_db_instance = MagicMock() mock_db_instance.get_cached_patent.return_value = None mock_db_instance.get_cached_serp_query.return_value = None mock_db_cls.return_value = mock_db_instance return mock_db_instance class TestCompanyAnalyzer: """Test the CompanyAnalyzer orchestration logic.""" def test_analyzer_initialization(self, mocker): """Test analyzer initialization with API key.""" mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") analyzer = CompanyAnalyzer(openrouter_api_key="test-key") mock_llm.assert_called_once_with(api_key="test-key") def test_analyze_company_full_pipeline(self, mocker, mock_db): """Test complete company analysis pipeline.""" # Mock all the dependencies mock_query = mocker.patch("SPARC.analyzer.SERP.query") mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") # Setup mock return values test_patent = Patent( patent_id="US123", pdf_link="http://example.com/test.pdf" ) mock_query.return_value = Patents(patents=[test_patent]) test_patent.pdf_path = "patents/US123.pdf" mock_save.return_value = test_patent mock_parse.return_value = { "abstract": "Test abstract", "claims": "Test claims", } mock_minimize.return_value = "Minimized content" mock_llm_instance = Mock() mock_llm_instance.analyze_patent_portfolio.return_value = ( "Strong innovation portfolio" ) mock_llm.return_value = mock_llm_instance # Run the analysis analyzer = CompanyAnalyzer() result = analyzer.analyze_company("TestCorp") # Verify the pipeline executed correctly assert result == "Strong innovation portfolio" mock_query.assert_called_once_with("TestCorp") mock_save.assert_called_once() mock_parse.assert_called_once_with("patents/US123.pdf") mock_minimize.assert_called_once() mock_llm_instance.analyze_patent_portfolio.assert_called_once() # Verify the data passed to LLM llm_call_args = mock_llm_instance.analyze_patent_portfolio.call_args patents_data = llm_call_args[1]["patents_data"] assert len(patents_data) == 1 assert patents_data[0]["patent_id"] == "US123" assert patents_data[0]["content"] == "Minimized content" def test_analyze_company_no_patents_found(self, mocker): """Test handling when no patents are found for a company.""" mock_query = mocker.patch("SPARC.analyzer.SERP.query") mock_query.return_value = Patents(patents=[]) mocker.patch("SPARC.analyzer.LLMAnalyzer") analyzer = CompanyAnalyzer() result = analyzer.analyze_company("UnknownCorp") assert result == "No patents found for UnknownCorp" def test_analyze_company_handles_processing_errors(self, mocker): """Test that analysis continues even if some patents fail to process.""" mock_query = mocker.patch("SPARC.analyzer.SERP.query") mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") # Create two test patents patent1 = Patent(patent_id="US123", pdf_link="http://example.com/1.pdf") patent2 = Patent(patent_id="US456", pdf_link="http://example.com/2.pdf") mock_query.return_value = Patents(patents=[patent1, patent2]) # First patent processes successfully patent1.pdf_path = "patents/US123.pdf" # Second patent raises an error def save_side_effect(p): if p.patent_id == "US123": p.pdf_path = "patents/US123.pdf" return p else: raise Exception("Download failed") mock_save.side_effect = save_side_effect mock_parse.return_value = {"abstract": "Test"} mock_minimize.return_value = "Content" mock_llm_instance = Mock() mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis result" mock_llm.return_value = mock_llm_instance analyzer = CompanyAnalyzer() result = analyzer.analyze_company("TestCorp") # Should still succeed with the one patent that worked assert result == "Analysis result" # Verify only one patent was analyzed llm_call_args = mock_llm_instance.analyze_patent_portfolio.call_args patents_data = llm_call_args[1]["patents_data"] assert len(patents_data) == 1 assert patents_data[0]["patent_id"] == "US123" def test_analyze_company_all_patents_fail(self, mocker): """Test handling when all patents fail to process.""" mock_query = mocker.patch("SPARC.analyzer.SERP.query") mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") mocker.patch("SPARC.analyzer.LLMAnalyzer") patent = Patent(patent_id="US123", pdf_link="http://example.com/1.pdf") mock_query.return_value = Patents(patents=[patent]) # Make processing fail mock_save.side_effect = Exception("Processing error") analyzer = CompanyAnalyzer() result = analyzer.analyze_company("TestCorp") assert result == "Failed to process any patents for TestCorp" def test_analyze_single_patent(self, mocker): """Test single patent analysis.""" mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") mock_parse.return_value = {"abstract": "Test abstract"} mock_minimize.return_value = "Minimized content" mock_llm_instance = Mock() mock_llm_instance.analyze_patent_content.return_value = ( "Innovative patent analysis" ) mock_llm.return_value = mock_llm_instance analyzer = CompanyAnalyzer() result = analyzer.analyze_single_patent("US123", "TestCorp") assert result == "Innovative patent analysis" mock_parse.assert_called_once_with("patents/US123.pdf") mock_llm_instance.analyze_patent_content.assert_called_once_with( patent_content="Minimized content", company_name="TestCorp" ) def test_analyze_single_patent_error_handling(self, mocker): """Test single patent analysis with processing error.""" mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") mocker.patch("SPARC.analyzer.LLMAnalyzer") mock_parse.side_effect = FileNotFoundError("PDF not found") analyzer = CompanyAnalyzer() result = analyzer.analyze_single_patent("US999", "TestCorp") assert "Failed to analyze patent US999" in result assert "PDF not found" in result class TestSingleQueryBugFix: """Test that SERP.query is only called once per company analysis.""" def test_analyze_company_safe_calls_query_once(self, mocker, mock_db): """_analyze_company_safe should call SERP.query exactly once.""" mock_query = mocker.patch("SPARC.analyzer.SERP.query") mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf") mock_query.return_value = Patents(patents=[patent]) def save_side_effect(p): p.pdf_path = "patents/US123.pdf" return p mock_save.side_effect = save_side_effect mock_parse.return_value = {"abstract": "Test"} mock_minimize.return_value = "Content" mock_llm_instance = Mock() mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis" mock_llm.return_value = mock_llm_instance analyzer = CompanyAnalyzer() analyzer._analyze_company_safe("TestCorp") # The key assertion: SERP.query called exactly once, not twice mock_query.assert_called_once_with("TestCorp") def test_analyze_company_with_prefetched_patents_skips_query(self, mocker): """analyze_company should not call SERP.query when patents are provided.""" mock_query = mocker.patch("SPARC.analyzer.SERP.query") mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf") prefetched = Patents(patents=[patent]) def save_side_effect(p): p.pdf_path = "patents/US123.pdf" return p mock_save.side_effect = save_side_effect mock_parse.return_value = {"abstract": "Test"} mock_minimize.return_value = "Content" mock_llm_instance = Mock() mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis" mock_llm.return_value = mock_llm_instance analyzer = CompanyAnalyzer() analyzer.analyze_company("TestCorp", patents=prefetched) # SERP.query should never be called mock_query.assert_not_called() class TestPatentCaching: """Test patent-level DB caching in the pipeline.""" def test_process_single_patent_uses_db_cache(self, mocker, mock_db): """_process_single_patent returns cached content when available.""" mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") mock_db.get_cached_patent.return_value = { "patent_id": "US123", "minimized_content": "Cached minimized content", } patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf") result = CompanyAnalyzer._process_single_patent(patent, "TestCorp", mock_db) assert result == {"patent_id": "US123", "content": "Cached minimized content"} # Should NOT download since cache hit mock_save.assert_not_called() def test_process_single_patent_stores_to_db_cache(self, mocker, mock_db): """_process_single_patent stores result in DB after processing.""" mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") # No cache hit mock_db.get_cached_patent.return_value = None patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf") def save_side_effect(p): p.pdf_path = "patents/US123.pdf" return p mock_save.side_effect = save_side_effect mock_parse.return_value = {"abstract": "Test abstract"} mock_minimize.return_value = "Minimized content" result = CompanyAnalyzer._process_single_patent(patent, "TestCorp", mock_db) assert result == {"patent_id": "US123", "content": "Minimized content"} mock_db.store_patent.assert_called_once_with( patent_id="US123", company_name="TestCorp", pdf_link="http://example.com/test.pdf", raw_sections={"abstract": "Test abstract"}, minimized_content="Minimized content", ) def test_serp_query_cache_hit_skips_api(self, mocker, mock_db): """When SERP query is cached, API call is skipped.""" mock_query = mocker.patch("SPARC.analyzer.SERP.query") mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") # Simulate SERP cache hit mock_db.get_cached_serp_query.return_value = ["US123"] # Simulate patent cache hit too mock_db.get_cached_patent.return_value = { "patent_id": "US123", "minimized_content": "Cached content", } mock_llm_instance = Mock() mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis" mock_llm.return_value = mock_llm_instance analyzer = CompanyAnalyzer() result = analyzer.analyze_company("TestCorp") assert result == "Analysis" # SERP.query should NOT be called mock_query.assert_not_called() # No downloads should happen mock_save.assert_not_called() def test_serp_query_cache_miss_stores_result(self, mocker, mock_db): """When SERP query cache misses, result is stored after API call.""" mock_query = mocker.patch("SPARC.analyzer.SERP.query") mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") mock_db.get_cached_serp_query.return_value = None patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf") mock_query.return_value = Patents(patents=[patent]) def save_side_effect(p): p.pdf_path = "patents/US123.pdf" return p mock_save.side_effect = save_side_effect mock_parse.return_value = {"abstract": "Test"} mock_minimize.return_value = "Content" mock_llm_instance = Mock() mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis" mock_llm.return_value = mock_llm_instance analyzer = CompanyAnalyzer() analyzer.analyze_company("TestCorp") mock_db.store_serp_query.assert_called_once() call_kwargs = mock_db.store_serp_query.call_args[1] assert call_kwargs["company_name"] == "TestCorp" assert call_kwargs["patent_ids"] == ["US123"] class TestBatchProcessing: """Test multi-company batch processing functionality.""" def test_analyze_companies_success(self, mocker): """Test batch analysis of multiple companies.""" mock_query = mocker.patch("SPARC.analyzer.SERP.query") mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") # Setup mock returns def query_side_effect(company): patent = Patent( patent_id=f"US-{company}", pdf_link=f"http://example.com/{company}.pdf", ) return Patents(patents=[patent]) mock_query.side_effect = query_side_effect def save_side_effect(patent): patent.pdf_path = f"patents/{patent.patent_id}.pdf" return patent mock_save.side_effect = save_side_effect mock_parse.return_value = {"abstract": "Test"} mock_minimize.return_value = "Content" mock_llm_instance = Mock() mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis result" mock_llm.return_value = mock_llm_instance analyzer = CompanyAnalyzer() result = analyzer.analyze_companies(["CompanyA", "CompanyB"], max_workers=2) assert isinstance(result, BatchAnalysisResult) assert result.total_companies == 2 assert result.successful == 2 assert result.failed == 0 assert len(result.results) == 2 def test_analyze_companies_with_failures(self, mocker): """Test batch analysis handles partial failures.""" mock_query = mocker.patch("SPARC.analyzer.SERP.query") mocker.patch("SPARC.analyzer.LLMAnalyzer") def query_side_effect(company): if company == "FailCorp": return Patents(patents=[]) patent = Patent( patent_id=f"US-{company}", pdf_link=f"http://example.com/{company}.pdf", ) return Patents(patents=[patent]) mock_query.side_effect = query_side_effect analyzer = CompanyAnalyzer() result = analyzer.analyze_companies(["GoodCorp", "FailCorp"], max_workers=1) assert result.total_companies == 2 assert result.failed >= 1 # At least FailCorp should fail # Find the failed result fail_result = next(r for r in result.results if r.company_name == "FailCorp") assert fail_result.success is False def test_analyze_companies_sequential(self, mocker): """Test sequential batch analysis.""" mock_query = mocker.patch("SPARC.analyzer.SERP.query") mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") def query_side_effect(company): patent = Patent( patent_id=f"US-{company}", pdf_link=f"http://example.com/{company}.pdf", ) return Patents(patents=[patent]) mock_query.side_effect = query_side_effect def save_side_effect(patent): patent.pdf_path = f"patents/{patent.patent_id}.pdf" return patent mock_save.side_effect = save_side_effect mock_parse.return_value = {"abstract": "Test"} mock_minimize.return_value = "Content" mock_llm_instance = Mock() mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis" mock_llm.return_value = mock_llm_instance analyzer = CompanyAnalyzer() result = analyzer.analyze_companies_sequential(["Corp1", "Corp2", "Corp3"]) assert result.total_companies == 3 assert len(result.results) == 3 def test_analyze_companies_progress_callback(self, mocker): """Test that progress callback is invoked correctly.""" mock_query = mocker.patch("SPARC.analyzer.SERP.query") mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") def query_side_effect(company): patent = Patent( patent_id=f"US-{company}", pdf_link=f"http://example.com/{company}.pdf", ) return Patents(patents=[patent]) mock_query.side_effect = query_side_effect def save_side_effect(patent): patent.pdf_path = f"patents/{patent.patent_id}.pdf" return patent mock_save.side_effect = save_side_effect mock_parse.return_value = {"abstract": "Test"} mock_minimize.return_value = "Content" mock_llm_instance = Mock() mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis" mock_llm.return_value = mock_llm_instance callback = Mock() analyzer = CompanyAnalyzer() analyzer.analyze_companies(["A", "B"], max_workers=1, progress_callback=callback) assert callback.call_count == 2 def test_company_analysis_result_structure(self, mocker, mock_db): """Test CompanyAnalysisResult has correct structure.""" mock_query = mocker.patch("SPARC.analyzer.SERP.query") mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf") mock_query.return_value = Patents(patents=[patent]) # Simulate DB caching: after store, subsequent get returns the IDs mock_db.get_cached_serp_query.side_effect = [None, ["US123"]] def save_side_effect(p): p.pdf_path = "patents/US123.pdf" return p mock_save.side_effect = save_side_effect mock_parse.return_value = {"abstract": "Test"} mock_minimize.return_value = "Content" mock_llm_instance = Mock() mock_llm_instance.analyze_patent_portfolio.return_value = "Strong innovation" mock_llm.return_value = mock_llm_instance analyzer = CompanyAnalyzer() result = analyzer.analyze_companies(["TestCorp"], max_workers=1) assert len(result.results) == 1 company_result = result.results[0] assert company_result.company_name == "TestCorp" assert company_result.analysis == "Strong innovation" assert company_result.patent_count == 1 assert company_result.success is True assert company_result.error is None assert company_result.timestamp is not None