feat: add multi-company batch processing

- Add CompanyAnalysisResult and BatchAnalysisResult dataclasses
- Implement analyze_companies() for concurrent batch analysis
- Implement analyze_companies_sequential() for rate-limited scenarios
- Add progress callback support for monitoring batch jobs
- Include 5 new tests for batch processing functionality
- Fix pre-existing test mock issue in test_llm.py

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2026-03-12 23:23:07 -04:00
parent 84fd0bef32
commit 1c6d903301
5 changed files with 388 additions and 7 deletions
+176 -2
View File
@@ -1,9 +1,9 @@
"""Tests for the high-level company analyzer orchestration."""
import pytest
from unittest.mock import Mock, patch
from unittest.mock import Mock, patch, call
from SPARC.analyzer import CompanyAnalyzer
from SPARC.types import Patent, Patents
from SPARC.types import Patent, Patents, CompanyAnalysisResult, BatchAnalysisResult
class TestCompanyAnalyzer:
@@ -176,3 +176,177 @@ class TestCompanyAnalyzer:
assert "Failed to analyze patent US999" in result
assert "PDF not found" in result
class TestBatchProcessing:
"""Test multi-company batch processing functionality."""
def test_analyze_companies_success(self, mocker):
"""Test batch analysis of multiple companies."""
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
# Setup mock returns
def query_side_effect(company):
patent = Patent(
patent_id=f"US-{company}",
pdf_link=f"http://example.com/{company}.pdf",
)
return Patents(patents=[patent])
mock_query.side_effect = query_side_effect
def save_side_effect(patent):
patent.pdf_path = f"patents/{patent.patent_id}.pdf"
return patent
mock_save.side_effect = save_side_effect
mock_parse.return_value = {"abstract": "Test"}
mock_minimize.return_value = "Content"
mock_llm_instance = Mock()
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis result"
mock_llm.return_value = mock_llm_instance
analyzer = CompanyAnalyzer()
result = analyzer.analyze_companies(["CompanyA", "CompanyB"], max_workers=2)
assert isinstance(result, BatchAnalysisResult)
assert result.total_companies == 2
assert result.successful == 2
assert result.failed == 0
assert len(result.results) == 2
def test_analyze_companies_with_failures(self, mocker):
"""Test batch analysis handles partial failures."""
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
mocker.patch("SPARC.analyzer.LLMAnalyzer")
def query_side_effect(company):
if company == "FailCorp":
return Patents(patents=[])
patent = Patent(
patent_id=f"US-{company}",
pdf_link=f"http://example.com/{company}.pdf",
)
return Patents(patents=[patent])
mock_query.side_effect = query_side_effect
analyzer = CompanyAnalyzer()
result = analyzer.analyze_companies(["GoodCorp", "FailCorp"], max_workers=1)
assert result.total_companies == 2
assert result.failed >= 1 # At least FailCorp should fail
# Find the failed result
fail_result = next(r for r in result.results if r.company_name == "FailCorp")
assert fail_result.success is False
def test_analyze_companies_sequential(self, mocker):
"""Test sequential batch analysis."""
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
def query_side_effect(company):
patent = Patent(
patent_id=f"US-{company}",
pdf_link=f"http://example.com/{company}.pdf",
)
return Patents(patents=[patent])
mock_query.side_effect = query_side_effect
def save_side_effect(patent):
patent.pdf_path = f"patents/{patent.patent_id}.pdf"
return patent
mock_save.side_effect = save_side_effect
mock_parse.return_value = {"abstract": "Test"}
mock_minimize.return_value = "Content"
mock_llm_instance = Mock()
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis"
mock_llm.return_value = mock_llm_instance
analyzer = CompanyAnalyzer()
result = analyzer.analyze_companies_sequential(["Corp1", "Corp2", "Corp3"])
assert result.total_companies == 3
assert len(result.results) == 3
def test_analyze_companies_progress_callback(self, mocker):
"""Test that progress callback is invoked correctly."""
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
def query_side_effect(company):
patent = Patent(
patent_id=f"US-{company}",
pdf_link=f"http://example.com/{company}.pdf",
)
return Patents(patents=[patent])
mock_query.side_effect = query_side_effect
def save_side_effect(patent):
patent.pdf_path = f"patents/{patent.patent_id}.pdf"
return patent
mock_save.side_effect = save_side_effect
mock_parse.return_value = {"abstract": "Test"}
mock_minimize.return_value = "Content"
mock_llm_instance = Mock()
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis"
mock_llm.return_value = mock_llm_instance
callback = Mock()
analyzer = CompanyAnalyzer()
analyzer.analyze_companies(["A", "B"], max_workers=1, progress_callback=callback)
assert callback.call_count == 2
def test_company_analysis_result_structure(self, mocker):
"""Test CompanyAnalysisResult has correct structure."""
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf")
mock_query.return_value = Patents(patents=[patent])
def save_side_effect(p):
p.pdf_path = "patents/US123.pdf"
return p
mock_save.side_effect = save_side_effect
mock_parse.return_value = {"abstract": "Test"}
mock_minimize.return_value = "Content"
mock_llm_instance = Mock()
mock_llm_instance.analyze_patent_portfolio.return_value = "Strong innovation"
mock_llm.return_value = mock_llm_instance
analyzer = CompanyAnalyzer()
result = analyzer.analyze_companies(["TestCorp"], max_workers=1)
assert len(result.results) == 1
company_result = result.results[0]
assert company_result.company_name == "TestCorp"
assert company_result.analysis == "Strong innovation"
assert company_result.patent_count == 1
assert company_result.success is True
assert company_result.error is None
assert company_result.timestamp is not None