Files
SPARC/tests/test_analyzer.py
T
0xWheatyz 1c6d903301 feat: add multi-company batch processing
- Add CompanyAnalysisResult and BatchAnalysisResult dataclasses
- Implement analyze_companies() for concurrent batch analysis
- Implement analyze_companies_sequential() for rate-limited scenarios
- Add progress callback support for monitoring batch jobs
- Include 5 new tests for batch processing functionality
- Fix pre-existing test mock issue in test_llm.py

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2026-03-12 23:23:07 -04:00

353 lines
14 KiB
Python

"""Tests for the high-level company analyzer orchestration."""
import pytest
from unittest.mock import Mock, patch, call
from SPARC.analyzer import CompanyAnalyzer
from SPARC.types import Patent, Patents, CompanyAnalysisResult, BatchAnalysisResult
class TestCompanyAnalyzer:
"""Test the CompanyAnalyzer orchestration logic."""
def test_analyzer_initialization(self, mocker):
"""Test analyzer initialization with API key."""
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
analyzer = CompanyAnalyzer(openrouter_api_key="test-key")
mock_llm.assert_called_once_with(api_key="test-key")
def test_analyze_company_full_pipeline(self, mocker):
"""Test complete company analysis pipeline."""
# Mock all the dependencies
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
# Setup mock return values
test_patent = Patent(
patent_id="US123", pdf_link="http://example.com/test.pdf"
)
mock_query.return_value = Patents(patents=[test_patent])
test_patent.pdf_path = "patents/US123.pdf"
mock_save.return_value = test_patent
mock_parse.return_value = {
"abstract": "Test abstract",
"claims": "Test claims",
}
mock_minimize.return_value = "Minimized content"
mock_llm_instance = Mock()
mock_llm_instance.analyze_patent_portfolio.return_value = (
"Strong innovation portfolio"
)
mock_llm.return_value = mock_llm_instance
# Run the analysis
analyzer = CompanyAnalyzer()
result = analyzer.analyze_company("TestCorp")
# Verify the pipeline executed correctly
assert result == "Strong innovation portfolio"
mock_query.assert_called_once_with("TestCorp")
mock_save.assert_called_once()
mock_parse.assert_called_once_with("patents/US123.pdf")
mock_minimize.assert_called_once()
mock_llm_instance.analyze_patent_portfolio.assert_called_once()
# Verify the data passed to LLM
llm_call_args = mock_llm_instance.analyze_patent_portfolio.call_args
patents_data = llm_call_args[1]["patents_data"]
assert len(patents_data) == 1
assert patents_data[0]["patent_id"] == "US123"
assert patents_data[0]["content"] == "Minimized content"
def test_analyze_company_no_patents_found(self, mocker):
"""Test handling when no patents are found for a company."""
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
mock_query.return_value = Patents(patents=[])
mocker.patch("SPARC.analyzer.LLMAnalyzer")
analyzer = CompanyAnalyzer()
result = analyzer.analyze_company("UnknownCorp")
assert result == "No patents found for UnknownCorp"
def test_analyze_company_handles_processing_errors(self, mocker):
"""Test that analysis continues even if some patents fail to process."""
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
# Create two test patents
patent1 = Patent(patent_id="US123", pdf_link="http://example.com/1.pdf")
patent2 = Patent(patent_id="US456", pdf_link="http://example.com/2.pdf")
mock_query.return_value = Patents(patents=[patent1, patent2])
# First patent processes successfully
patent1.pdf_path = "patents/US123.pdf"
# Second patent raises an error
def save_side_effect(p):
if p.patent_id == "US123":
p.pdf_path = "patents/US123.pdf"
return p
else:
raise Exception("Download failed")
mock_save.side_effect = save_side_effect
mock_parse.return_value = {"abstract": "Test"}
mock_minimize.return_value = "Content"
mock_llm_instance = Mock()
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis result"
mock_llm.return_value = mock_llm_instance
analyzer = CompanyAnalyzer()
result = analyzer.analyze_company("TestCorp")
# Should still succeed with the one patent that worked
assert result == "Analysis result"
# Verify only one patent was analyzed
llm_call_args = mock_llm_instance.analyze_patent_portfolio.call_args
patents_data = llm_call_args[1]["patents_data"]
assert len(patents_data) == 1
assert patents_data[0]["patent_id"] == "US123"
def test_analyze_company_all_patents_fail(self, mocker):
"""Test handling when all patents fail to process."""
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
mocker.patch("SPARC.analyzer.LLMAnalyzer")
patent = Patent(patent_id="US123", pdf_link="http://example.com/1.pdf")
mock_query.return_value = Patents(patents=[patent])
# Make processing fail
mock_save.side_effect = Exception("Processing error")
analyzer = CompanyAnalyzer()
result = analyzer.analyze_company("TestCorp")
assert result == "Failed to process any patents for TestCorp"
def test_analyze_single_patent(self, mocker):
"""Test single patent analysis."""
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
mock_parse.return_value = {"abstract": "Test abstract"}
mock_minimize.return_value = "Minimized content"
mock_llm_instance = Mock()
mock_llm_instance.analyze_patent_content.return_value = (
"Innovative patent analysis"
)
mock_llm.return_value = mock_llm_instance
analyzer = CompanyAnalyzer()
result = analyzer.analyze_single_patent("US123", "TestCorp")
assert result == "Innovative patent analysis"
mock_parse.assert_called_once_with("patents/US123.pdf")
mock_llm_instance.analyze_patent_content.assert_called_once_with(
patent_content="Minimized content", company_name="TestCorp"
)
def test_analyze_single_patent_error_handling(self, mocker):
"""Test single patent analysis with processing error."""
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
mocker.patch("SPARC.analyzer.LLMAnalyzer")
mock_parse.side_effect = FileNotFoundError("PDF not found")
analyzer = CompanyAnalyzer()
result = analyzer.analyze_single_patent("US999", "TestCorp")
assert "Failed to analyze patent US999" in result
assert "PDF not found" in result
class TestBatchProcessing:
"""Test multi-company batch processing functionality."""
def test_analyze_companies_success(self, mocker):
"""Test batch analysis of multiple companies."""
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
# Setup mock returns
def query_side_effect(company):
patent = Patent(
patent_id=f"US-{company}",
pdf_link=f"http://example.com/{company}.pdf",
)
return Patents(patents=[patent])
mock_query.side_effect = query_side_effect
def save_side_effect(patent):
patent.pdf_path = f"patents/{patent.patent_id}.pdf"
return patent
mock_save.side_effect = save_side_effect
mock_parse.return_value = {"abstract": "Test"}
mock_minimize.return_value = "Content"
mock_llm_instance = Mock()
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis result"
mock_llm.return_value = mock_llm_instance
analyzer = CompanyAnalyzer()
result = analyzer.analyze_companies(["CompanyA", "CompanyB"], max_workers=2)
assert isinstance(result, BatchAnalysisResult)
assert result.total_companies == 2
assert result.successful == 2
assert result.failed == 0
assert len(result.results) == 2
def test_analyze_companies_with_failures(self, mocker):
"""Test batch analysis handles partial failures."""
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
mocker.patch("SPARC.analyzer.LLMAnalyzer")
def query_side_effect(company):
if company == "FailCorp":
return Patents(patents=[])
patent = Patent(
patent_id=f"US-{company}",
pdf_link=f"http://example.com/{company}.pdf",
)
return Patents(patents=[patent])
mock_query.side_effect = query_side_effect
analyzer = CompanyAnalyzer()
result = analyzer.analyze_companies(["GoodCorp", "FailCorp"], max_workers=1)
assert result.total_companies == 2
assert result.failed >= 1 # At least FailCorp should fail
# Find the failed result
fail_result = next(r for r in result.results if r.company_name == "FailCorp")
assert fail_result.success is False
def test_analyze_companies_sequential(self, mocker):
"""Test sequential batch analysis."""
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
def query_side_effect(company):
patent = Patent(
patent_id=f"US-{company}",
pdf_link=f"http://example.com/{company}.pdf",
)
return Patents(patents=[patent])
mock_query.side_effect = query_side_effect
def save_side_effect(patent):
patent.pdf_path = f"patents/{patent.patent_id}.pdf"
return patent
mock_save.side_effect = save_side_effect
mock_parse.return_value = {"abstract": "Test"}
mock_minimize.return_value = "Content"
mock_llm_instance = Mock()
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis"
mock_llm.return_value = mock_llm_instance
analyzer = CompanyAnalyzer()
result = analyzer.analyze_companies_sequential(["Corp1", "Corp2", "Corp3"])
assert result.total_companies == 3
assert len(result.results) == 3
def test_analyze_companies_progress_callback(self, mocker):
"""Test that progress callback is invoked correctly."""
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
def query_side_effect(company):
patent = Patent(
patent_id=f"US-{company}",
pdf_link=f"http://example.com/{company}.pdf",
)
return Patents(patents=[patent])
mock_query.side_effect = query_side_effect
def save_side_effect(patent):
patent.pdf_path = f"patents/{patent.patent_id}.pdf"
return patent
mock_save.side_effect = save_side_effect
mock_parse.return_value = {"abstract": "Test"}
mock_minimize.return_value = "Content"
mock_llm_instance = Mock()
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis"
mock_llm.return_value = mock_llm_instance
callback = Mock()
analyzer = CompanyAnalyzer()
analyzer.analyze_companies(["A", "B"], max_workers=1, progress_callback=callback)
assert callback.call_count == 2
def test_company_analysis_result_structure(self, mocker):
"""Test CompanyAnalysisResult has correct structure."""
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf")
mock_query.return_value = Patents(patents=[patent])
def save_side_effect(p):
p.pdf_path = "patents/US123.pdf"
return p
mock_save.side_effect = save_side_effect
mock_parse.return_value = {"abstract": "Test"}
mock_minimize.return_value = "Content"
mock_llm_instance = Mock()
mock_llm_instance.analyze_patent_portfolio.return_value = "Strong innovation"
mock_llm.return_value = mock_llm_instance
analyzer = CompanyAnalyzer()
result = analyzer.analyze_companies(["TestCorp"], max_workers=1)
assert len(result.results) == 1
company_result = result.results[0]
assert company_result.company_name == "TestCorp"
assert company_result.analysis == "Strong innovation"
assert company_result.patent_count == 1
assert company_result.success is True
assert company_result.error is None
assert company_result.timestamp is not None