forked from 0xWheatyz/SPARC
feat: add multi-company batch processing
- Add CompanyAnalysisResult and BatchAnalysisResult dataclasses - Implement analyze_companies() for concurrent batch analysis - Implement analyze_companies_sequential() for rate-limited scenarios - Add progress callback support for monitoring batch jobs - Include 5 new tests for batch processing functionality - Fix pre-existing test mock issue in test_llm.py 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
+176
-2
@@ -1,9 +1,9 @@
|
||||
"""Tests for the high-level company analyzer orchestration."""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import Mock, patch
|
||||
from unittest.mock import Mock, patch, call
|
||||
from SPARC.analyzer import CompanyAnalyzer
|
||||
from SPARC.types import Patent, Patents
|
||||
from SPARC.types import Patent, Patents, CompanyAnalysisResult, BatchAnalysisResult
|
||||
|
||||
|
||||
class TestCompanyAnalyzer:
|
||||
@@ -176,3 +176,177 @@ class TestCompanyAnalyzer:
|
||||
|
||||
assert "Failed to analyze patent US999" in result
|
||||
assert "PDF not found" in result
|
||||
|
||||
|
||||
class TestBatchProcessing:
|
||||
"""Test multi-company batch processing functionality."""
|
||||
|
||||
def test_analyze_companies_success(self, mocker):
|
||||
"""Test batch analysis of multiple companies."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
# Setup mock returns
|
||||
def query_side_effect(company):
|
||||
patent = Patent(
|
||||
patent_id=f"US-{company}",
|
||||
pdf_link=f"http://example.com/{company}.pdf",
|
||||
)
|
||||
return Patents(patents=[patent])
|
||||
|
||||
mock_query.side_effect = query_side_effect
|
||||
|
||||
def save_side_effect(patent):
|
||||
patent.pdf_path = f"patents/{patent.patent_id}.pdf"
|
||||
return patent
|
||||
|
||||
mock_save.side_effect = save_side_effect
|
||||
mock_parse.return_value = {"abstract": "Test"}
|
||||
mock_minimize.return_value = "Content"
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis result"
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_companies(["CompanyA", "CompanyB"], max_workers=2)
|
||||
|
||||
assert isinstance(result, BatchAnalysisResult)
|
||||
assert result.total_companies == 2
|
||||
assert result.successful == 2
|
||||
assert result.failed == 0
|
||||
assert len(result.results) == 2
|
||||
|
||||
def test_analyze_companies_with_failures(self, mocker):
|
||||
"""Test batch analysis handles partial failures."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
def query_side_effect(company):
|
||||
if company == "FailCorp":
|
||||
return Patents(patents=[])
|
||||
patent = Patent(
|
||||
patent_id=f"US-{company}",
|
||||
pdf_link=f"http://example.com/{company}.pdf",
|
||||
)
|
||||
return Patents(patents=[patent])
|
||||
|
||||
mock_query.side_effect = query_side_effect
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_companies(["GoodCorp", "FailCorp"], max_workers=1)
|
||||
|
||||
assert result.total_companies == 2
|
||||
assert result.failed >= 1 # At least FailCorp should fail
|
||||
|
||||
# Find the failed result
|
||||
fail_result = next(r for r in result.results if r.company_name == "FailCorp")
|
||||
assert fail_result.success is False
|
||||
|
||||
def test_analyze_companies_sequential(self, mocker):
|
||||
"""Test sequential batch analysis."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
def query_side_effect(company):
|
||||
patent = Patent(
|
||||
patent_id=f"US-{company}",
|
||||
pdf_link=f"http://example.com/{company}.pdf",
|
||||
)
|
||||
return Patents(patents=[patent])
|
||||
|
||||
mock_query.side_effect = query_side_effect
|
||||
|
||||
def save_side_effect(patent):
|
||||
patent.pdf_path = f"patents/{patent.patent_id}.pdf"
|
||||
return patent
|
||||
|
||||
mock_save.side_effect = save_side_effect
|
||||
mock_parse.return_value = {"abstract": "Test"}
|
||||
mock_minimize.return_value = "Content"
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis"
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_companies_sequential(["Corp1", "Corp2", "Corp3"])
|
||||
|
||||
assert result.total_companies == 3
|
||||
assert len(result.results) == 3
|
||||
|
||||
def test_analyze_companies_progress_callback(self, mocker):
|
||||
"""Test that progress callback is invoked correctly."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
def query_side_effect(company):
|
||||
patent = Patent(
|
||||
patent_id=f"US-{company}",
|
||||
pdf_link=f"http://example.com/{company}.pdf",
|
||||
)
|
||||
return Patents(patents=[patent])
|
||||
|
||||
mock_query.side_effect = query_side_effect
|
||||
|
||||
def save_side_effect(patent):
|
||||
patent.pdf_path = f"patents/{patent.patent_id}.pdf"
|
||||
return patent
|
||||
|
||||
mock_save.side_effect = save_side_effect
|
||||
mock_parse.return_value = {"abstract": "Test"}
|
||||
mock_minimize.return_value = "Content"
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis"
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
callback = Mock()
|
||||
analyzer = CompanyAnalyzer()
|
||||
analyzer.analyze_companies(["A", "B"], max_workers=1, progress_callback=callback)
|
||||
|
||||
assert callback.call_count == 2
|
||||
|
||||
def test_company_analysis_result_structure(self, mocker):
|
||||
"""Test CompanyAnalysisResult has correct structure."""
|
||||
mock_query = mocker.patch("SPARC.analyzer.SERP.query")
|
||||
mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
|
||||
mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
|
||||
mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
|
||||
mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
|
||||
|
||||
patent = Patent(patent_id="US123", pdf_link="http://example.com/test.pdf")
|
||||
mock_query.return_value = Patents(patents=[patent])
|
||||
|
||||
def save_side_effect(p):
|
||||
p.pdf_path = "patents/US123.pdf"
|
||||
return p
|
||||
|
||||
mock_save.side_effect = save_side_effect
|
||||
mock_parse.return_value = {"abstract": "Test"}
|
||||
mock_minimize.return_value = "Content"
|
||||
|
||||
mock_llm_instance = Mock()
|
||||
mock_llm_instance.analyze_patent_portfolio.return_value = "Strong innovation"
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
|
||||
analyzer = CompanyAnalyzer()
|
||||
result = analyzer.analyze_companies(["TestCorp"], max_workers=1)
|
||||
|
||||
assert len(result.results) == 1
|
||||
company_result = result.results[0]
|
||||
assert company_result.company_name == "TestCorp"
|
||||
assert company_result.analysis == "Strong innovation"
|
||||
assert company_result.patent_count == 1
|
||||
assert company_result.success is True
|
||||
assert company_result.error is None
|
||||
assert company_result.timestamp is not None
|
||||
|
||||
Reference in New Issue
Block a user