"""Tests for the high-level company analyzer orchestration.""" import pytest from unittest.mock import Mock, patch, call from SPARC.analyzer import CompanyAnalyzer from SPARC.types import Patent, Patents, CompanyAnalysisResult, BatchAnalysisResult class TestCompanyAnalyzer: """Test the CompanyAnalyzer orchestration logic.""" def test_analyzer_initialization(self, mocker): """Test analyzer initialization with API key.""" mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") analyzer = CompanyAnalyzer(openrouter_api_key="test-key") mock_llm.assert_called_once_with(api_key="test-key") def test_analyze_company_full_pipeline(self, mocker): """Test complete company analysis pipeline.""" # Mock all the dependencies mock_query = mocker.patch("SPARC.analyzer.SERP.query") mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") # Setup mock return values test_patent = Patent( patent_id="US123", pdf_link="http://example.com/test.pdf" ) mock_query.return_value = Patents(patents=[test_patent]) test_patent.pdf_path = "patents/US123.pdf" mock_save.return_value = test_patent mock_parse.return_value = { "abstract": "Test abstract", "claims": "Test claims", } mock_minimize.return_value = "Minimized content" mock_llm_instance = Mock() mock_llm_instance.analyze_patent_portfolio.return_value = ( "Strong innovation portfolio" ) mock_llm.return_value = mock_llm_instance # Run the analysis analyzer = CompanyAnalyzer() result = analyzer.analyze_company("TestCorp") # Verify the pipeline executed correctly assert result == "Strong innovation portfolio" mock_query.assert_called_once_with("TestCorp") mock_save.assert_called_once() mock_parse.assert_called_once_with("patents/US123.pdf") mock_minimize.assert_called_once() mock_llm_instance.analyze_patent_portfolio.assert_called_once() # Verify the data passed to LLM llm_call_args = mock_llm_instance.analyze_patent_portfolio.call_args patents_data = llm_call_args[1]["patents_data"] assert len(patents_data) == 1 assert patents_data[0]["patent_id"] == "US123" assert patents_data[0]["content"] == "Minimized content" def test_analyze_company_no_patents_found(self, mocker): """Test handling when no patents are found for a company.""" mock_query = mocker.patch("SPARC.analyzer.SERP.query") mock_query.return_value = Patents(patents=[]) mocker.patch("SPARC.analyzer.LLMAnalyzer") analyzer = CompanyAnalyzer() result = analyzer.analyze_company("UnknownCorp") assert result == "No patents found for UnknownCorp" def test_analyze_company_handles_processing_errors(self, mocker): """Test that analysis continues even if some patents fail to process.""" mock_query = mocker.patch("SPARC.analyzer.SERP.query") mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") # Create two test patents patent1 = Patent(patent_id="US123", pdf_link="http://example.com/1.pdf") patent2 = Patent(patent_id="US456", pdf_link="http://example.com/2.pdf") mock_query.return_value = Patents(patents=[patent1, patent2]) # First patent processes successfully patent1.pdf_path = "patents/US123.pdf" # Second patent raises an error def save_side_effect(p): if p.patent_id == "US123": p.pdf_path = "patents/US123.pdf" return p else: raise Exception("Download failed") mock_save.side_effect = save_side_effect mock_parse.return_value = {"abstract": "Test"} mock_minimize.return_value = "Content" mock_llm_instance = Mock() mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis result" mock_llm.return_value = mock_llm_instance analyzer = CompanyAnalyzer() result = analyzer.analyze_company("TestCorp") # Should still succeed with the one patent that worked assert result == "Analysis result" # Verify only one patent was analyzed llm_call_args = mock_llm_instance.analyze_patent_portfolio.call_args patents_data = llm_call_args[1]["patents_data"] assert len(patents_data) == 1 assert patents_data[0]["patent_id"] == "US123" def test_analyze_company_all_patents_fail(self, mocker): """Test handling when all patents fail to process.""" mock_query = mocker.patch("SPARC.analyzer.SERP.query") mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") mocker.patch("SPARC.analyzer.LLMAnalyzer") patent = Patent(patent_id="US123", pdf_link="http://example.com/1.pdf") mock_query.return_value = Patents(patents=[patent]) # Make processing fail mock_save.side_effect = Exception("Processing error") analyzer = CompanyAnalyzer() result = analyzer.analyze_company("TestCorp") assert result == "Failed to process any patents for TestCorp" def test_analyze_single_patent(self, mocker): """Test single patent analysis.""" mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") mock_parse.return_value = {"abstract": "Test abstract"} mock_minimize.return_value = "Minimized content" mock_llm_instance = Mock() mock_llm_instance.analyze_patent_content.return_value = ( "Innovative patent analysis" ) mock_llm.return_value = mock_llm_instance analyzer = CompanyAnalyzer() result = analyzer.analyze_single_patent("US123", "TestCorp") assert result == "Innovative patent analysis" mock_parse.assert_called_once_with("patents/US123.pdf") mock_llm_instance.analyze_patent_content.assert_called_once_with( patent_content="Minimized content", company_name="TestCorp" ) def test_analyze_single_patent_error_handling(self, mocker): """Test single patent analysis with processing error.""" mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") mocker.patch("SPARC.analyzer.LLMAnalyzer") mock_parse.side_effect = FileNotFoundError("PDF not found") analyzer = CompanyAnalyzer() result = analyzer.analyze_single_patent("US999", "TestCorp") assert "Failed to analyze patent US999" in result assert "PDF not found" in result class TestBatchProcessing: """Test multi-company batch processing functionality.""" def test_analyze_companies_success(self, mocker): """Test batch analysis of multiple companies.""" mock_query = mocker.patch("SPARC.analyzer.SERP.query") mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") # Setup mock returns def query_side_effect(company): patent = Patent( patent_id=f"US-{company}", pdf_link=f"http://example.com/{company}.pdf", ) return Patents(patents=[patent]) mock_query.side_effect = query_side_effect def save_side_effect(patent): patent.pdf_path = f"patents/{patent.patent_id}.pdf" return patent mock_save.side_effect = save_side_effect mock_parse.return_value = {"abstract": "Test"} mock_minimize.return_value = "Content" mock_llm_instance = Mock() mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis result" mock_llm.return_value = mock_llm_instance analyzer = CompanyAnalyzer() result = analyzer.analyze_companies(["CompanyA", "CompanyB"], max_workers=2) assert isinstance(result, BatchAnalysisResult) assert result.total_companies == 2 assert result.successful == 2 assert result.failed == 0 assert len(result.results) == 2 def test_analyze_companies_with_failures(self, mocker): """Test batch analysis handles partial failures.""" mock_query = mocker.patch("SPARC.analyzer.SERP.query") mocker.patch("SPARC.analyzer.LLMAnalyzer") def query_side_effect(company): if company == "FailCorp": return Patents(patents=[]) patent = Patent( patent_id=f"US-{company}", pdf_link=f"http://example.com/{company}.pdf", ) return Patents(patents=[patent]) mock_query.side_effect = query_side_effect analyzer = CompanyAnalyzer() result = analyzer.analyze_companies(["GoodCorp", "FailCorp"], max_workers=1) assert result.total_companies == 2 assert result.failed >= 1 # At least FailCorp should fail # Find the failed result fail_result = next(r for r in result.results if r.company_name == "FailCorp") assert fail_result.success is False def test_analyze_companies_sequential(self, mocker): """Test sequential batch analysis.""" mock_query = mocker.patch("SPARC.analyzer.SERP.query") mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") def query_side_effect(company): patent = Patent( patent_id=f"US-{company}", pdf_link=f"http://example.com/{company}.pdf", ) return Patents(patents=[patent]) mock_query.side_effect = query_side_effect def save_side_effect(patent): patent.pdf_path = f"patents/{patent.patent_id}.pdf" return patent mock_save.side_effect = save_side_effect mock_parse.return_value = {"abstract": "Test"} mock_minimize.return_value = "Content" mock_llm_instance = Mock() mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis" mock_llm.return_value = mock_llm_instance analyzer = CompanyAnalyzer() result = analyzer.analyze_companies_sequential(["Corp1", "Corp2", "Corp3"]) assert result.total_companies == 3 assert len(result.results) == 3 def test_analyze_companies_progress_callback(self, mocker): """Test that progress callback is invoked correctly.""" mock_query = mocker.patch("SPARC.analyzer.SERP.query") mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") def query_side_effect(company): patent = Patent( patent_id=f"US-{company}", pdf_link=f"http://example.com/{company}.pdf", ) return Patents(patents=[patent]) mock_query.side_effect = query_side_effect def save_side_effect(patent): patent.pdf_path = f"patents/{patent.patent_id}.pdf" return patent mock_save.side_effect = save_side_effect mock_parse.return_value = {"abstract": "Test"} mock_minimize.return_value = "Content" mock_llm_instance = Mock() mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis" mock_llm.return_value = mock_llm_instance callback = Mock() analyzer = CompanyAnalyzer() analyzer.analyze_companies(["A", "B"], max_workers=1, progress_callback=callback) assert callback.call_count == 2 def test_company_analysis_result_structure(self, mocker): """Test CompanyAnalysisResult has correct structure.""" mock_query = mocker.patch("SPARC.analyzer.SERP.query") mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf") mock_query.return_value = Patents(patents=[patent]) def save_side_effect(p): p.pdf_path = "patents/US123.pdf" return p mock_save.side_effect = save_side_effect mock_parse.return_value = {"abstract": "Test"} mock_minimize.return_value = "Content" mock_llm_instance = Mock() mock_llm_instance.analyze_patent_portfolio.return_value = "Strong innovation" mock_llm.return_value = mock_llm_instance analyzer = CompanyAnalyzer() result = analyzer.analyze_companies(["TestCorp"], max_workers=1) assert len(result.results) == 1 company_result = result.results[0] assert company_result.company_name == "TestCorp" assert company_result.analysis == "Strong innovation" assert company_result.patent_count == 1 assert company_result.success is True assert company_result.error is None assert company_result.timestamp is not None