From a91c3badabf1af8dd90158a1a5a5999a5a26b210 Mon Sep 17 00:00:00 2001 From: 0xWheatyz Date: Thu, 19 Feb 2026 18:57:10 -0500 Subject: [PATCH] feat: implement company performance estimation orchestration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created CompanyAnalyzer class that orchestrates the complete pipeline: 1. Retrieves patents via SERP API 2. Downloads and parses PDFs 3. Minimizes content (removes bloat) 4. Analyzes portfolio with LLM 5. Returns performance estimation Features: - Full company portfolio analysis - Single patent analysis support - Robust error handling (continues on partial failures) - Progress logging for user visibility Updated main.py with clean example usage demonstrating the high-level API. Added comprehensive test suite (7 tests) covering: - Full pipeline integration - Error handling at each stage - Single patent analysis - Edge cases (no patents, all failures) All 26 tests passing. This completes the core functionality for patent-based company performance estimation. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- SPARC/analyzer.py | 112 ++++++++++++++++++++++++++ main.py | 47 +++++++++-- tests/test_analyzer.py | 178 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 330 insertions(+), 7 deletions(-) create mode 100644 SPARC/analyzer.py create mode 100644 tests/test_analyzer.py diff --git a/SPARC/analyzer.py b/SPARC/analyzer.py new file mode 100644 index 0000000..68d110f --- /dev/null +++ b/SPARC/analyzer.py @@ -0,0 +1,112 @@ +"""High-level patent analysis orchestration. + +This module ties together patent retrieval, parsing, and LLM analysis +to provide company performance estimation based on patent portfolios. +""" + +from SPARC.serp_api import SERP +from SPARC.llm import LLMAnalyzer +from SPARC.types import Patent +from typing import List + + +class CompanyAnalyzer: + """Orchestrates end-to-end company performance analysis via patents.""" + + def __init__(self, anthropic_api_key: str | None = None): + """Initialize the company analyzer. + + Args: + anthropic_api_key: Optional Anthropic API key. If None, loads from config. + """ + self.llm_analyzer = LLMAnalyzer(api_key=anthropic_api_key) + + def analyze_company(self, company_name: str) -> str: + """Analyze a company's performance based on their patent portfolio. + + This is the main entry point that orchestrates the full pipeline: + 1. Retrieve patents from SERP API + 2. Download and parse each patent PDF + 3. Minimize patent content (remove bloat) + 4. Analyze portfolio with LLM + 5. Return performance estimation + + Args: + company_name: Name of the company to analyze + + Returns: + Comprehensive analysis of company's innovation and performance outlook + """ + print(f"Retrieving patents for {company_name}...") + patents = SERP.query(company_name) + + if not patents.patents: + return f"No patents found for {company_name}" + + print(f"Found {len(patents.patents)} patents. Processing...") + + # Download and parse each patent + processed_patents = [] + for idx, patent in enumerate(patents.patents, 1): + print(f"Processing patent {idx}/{len(patents.patents)}: {patent.patent_id}") + + try: + # Download PDF + patent = SERP.save_patents(patent) + + # Parse sections from PDF + sections = SERP.parse_patent_pdf(patent.pdf_path) + + # Minimize for LLM (remove bloat) + minimized_content = SERP.minimize_patent_for_llm(sections) + + processed_patents.append( + {"patent_id": patent.patent_id, "content": minimized_content} + ) + + except Exception as e: + print(f"Warning: Failed to process {patent.patent_id}: {e}") + continue + + if not processed_patents: + return f"Failed to process any patents for {company_name}" + + print(f"Analyzing portfolio with LLM...") + + # Analyze the full portfolio with LLM + analysis = self.llm_analyzer.analyze_patent_portfolio( + patents_data=processed_patents, company_name=company_name + ) + + return analysis + + def analyze_single_patent(self, patent_id: str, company_name: str) -> str: + """Analyze a single patent by ID. + + Useful for focused analysis of specific innovations. + + Args: + patent_id: Publication ID of the patent + company_name: Name of the company (for context) + + Returns: + Analysis of the specific patent's innovation quality + """ + # Note: This simplified version assumes the patent PDF is already downloaded + # A more complete implementation would support direct patent ID lookup + print(f"Analyzing patent {patent_id} for {company_name}...") + + patent_path = f"patents/{patent_id}.pdf" + + try: + sections = SERP.parse_patent_pdf(patent_path) + minimized_content = SERP.minimize_patent_for_llm(sections) + + analysis = self.llm_analyzer.analyze_patent_content( + patent_content=minimized_content, company_name=company_name + ) + + return analysis + + except Exception as e: + return f"Failed to analyze patent {patent_id}: {e}" diff --git a/main.py b/main.py index 04b47f0..82e87fc 100644 --- a/main.py +++ b/main.py @@ -1,10 +1,43 @@ -from SPARC.serp_api import SERP +"""SPARC - Semiconductor Patent & Analytics Report Core -patents = SERP.query("nvidia") +Example usage of the company performance analyzer. -for patent in patents.patents: - patent = SERP.save_patents(patent) - patent.summary = SERP.parse_patent_pdf(patent.pdf_path) - print(patent.summary) +Before running: +1. Create a .env file with: + API_KEY=your_serpapi_key + ANTHROPIC_API_KEY=your_anthropic_key -print(patents) +2. Run: python main.py +""" + +from SPARC.analyzer import CompanyAnalyzer + + +def main(): + """Analyze a company's performance based on their patent portfolio.""" + + # Initialize the analyzer (loads API keys from .env) + analyzer = CompanyAnalyzer() + + # Analyze a company - this will: + # 1. Retrieve patents from SERP API + # 2. Download and parse patent PDFs + # 3. Minimize content (remove bloat) + # 4. Analyze with Claude to estimate performance + company_name = "nvidia" + + print(f"\n{'=' * 70}") + print(f"SPARC Patent Analysis - {company_name.upper()}") + print(f"{'=' * 70}\n") + + analysis = analyzer.analyze_company(company_name) + + print(f"\n{'=' * 70}") + print("ANALYSIS RESULTS") + print(f"{'=' * 70}\n") + print(analysis) + print(f"\n{'=' * 70}\n") + + +if __name__ == "__main__": + main() diff --git a/tests/test_analyzer.py b/tests/test_analyzer.py new file mode 100644 index 0000000..84ed701 --- /dev/null +++ b/tests/test_analyzer.py @@ -0,0 +1,178 @@ +"""Tests for the high-level company analyzer orchestration.""" + +import pytest +from unittest.mock import Mock, patch +from SPARC.analyzer import CompanyAnalyzer +from SPARC.types import Patent, Patents + + +class TestCompanyAnalyzer: + """Test the CompanyAnalyzer orchestration logic.""" + + def test_analyzer_initialization(self, mocker): + """Test analyzer initialization with API key.""" + mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") + + analyzer = CompanyAnalyzer(anthropic_api_key="test-key") + + mock_llm.assert_called_once_with(api_key="test-key") + + def test_analyze_company_full_pipeline(self, mocker): + """Test complete company analysis pipeline.""" + # Mock all the dependencies + mock_query = mocker.patch("SPARC.analyzer.SERP.query") + mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") + mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") + mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") + mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") + + # Setup mock return values + test_patent = Patent( + patent_id="US123", pdf_link="http://example.com/test.pdf" + ) + mock_query.return_value = Patents(patents=[test_patent]) + + test_patent.pdf_path = "patents/US123.pdf" + mock_save.return_value = test_patent + + mock_parse.return_value = { + "abstract": "Test abstract", + "claims": "Test claims", + } + + mock_minimize.return_value = "Minimized content" + + mock_llm_instance = Mock() + mock_llm_instance.analyze_patent_portfolio.return_value = ( + "Strong innovation portfolio" + ) + mock_llm.return_value = mock_llm_instance + + # Run the analysis + analyzer = CompanyAnalyzer() + result = analyzer.analyze_company("TestCorp") + + # Verify the pipeline executed correctly + assert result == "Strong innovation portfolio" + mock_query.assert_called_once_with("TestCorp") + mock_save.assert_called_once() + mock_parse.assert_called_once_with("patents/US123.pdf") + mock_minimize.assert_called_once() + mock_llm_instance.analyze_patent_portfolio.assert_called_once() + + # Verify the data passed to LLM + llm_call_args = mock_llm_instance.analyze_patent_portfolio.call_args + patents_data = llm_call_args[1]["patents_data"] + assert len(patents_data) == 1 + assert patents_data[0]["patent_id"] == "US123" + assert patents_data[0]["content"] == "Minimized content" + + def test_analyze_company_no_patents_found(self, mocker): + """Test handling when no patents are found for a company.""" + mock_query = mocker.patch("SPARC.analyzer.SERP.query") + mock_query.return_value = Patents(patents=[]) + mocker.patch("SPARC.analyzer.LLMAnalyzer") + + analyzer = CompanyAnalyzer() + result = analyzer.analyze_company("UnknownCorp") + + assert result == "No patents found for UnknownCorp" + + def test_analyze_company_handles_processing_errors(self, mocker): + """Test that analysis continues even if some patents fail to process.""" + mock_query = mocker.patch("SPARC.analyzer.SERP.query") + mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") + mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") + mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") + mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") + + # Create two test patents + patent1 = Patent(patent_id="US123", pdf_link="http://example.com/1.pdf") + patent2 = Patent(patent_id="US456", pdf_link="http://example.com/2.pdf") + mock_query.return_value = Patents(patents=[patent1, patent2]) + + # First patent processes successfully + patent1.pdf_path = "patents/US123.pdf" + + # Second patent raises an error + def save_side_effect(p): + if p.patent_id == "US123": + p.pdf_path = "patents/US123.pdf" + return p + else: + raise Exception("Download failed") + + mock_save.side_effect = save_side_effect + + mock_parse.return_value = {"abstract": "Test"} + mock_minimize.return_value = "Content" + + mock_llm_instance = Mock() + mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis result" + mock_llm.return_value = mock_llm_instance + + analyzer = CompanyAnalyzer() + result = analyzer.analyze_company("TestCorp") + + # Should still succeed with the one patent that worked + assert result == "Analysis result" + + # Verify only one patent was analyzed + llm_call_args = mock_llm_instance.analyze_patent_portfolio.call_args + patents_data = llm_call_args[1]["patents_data"] + assert len(patents_data) == 1 + assert patents_data[0]["patent_id"] == "US123" + + def test_analyze_company_all_patents_fail(self, mocker): + """Test handling when all patents fail to process.""" + mock_query = mocker.patch("SPARC.analyzer.SERP.query") + mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents") + mocker.patch("SPARC.analyzer.LLMAnalyzer") + + patent = Patent(patent_id="US123", pdf_link="http://example.com/1.pdf") + mock_query.return_value = Patents(patents=[patent]) + + # Make processing fail + mock_save.side_effect = Exception("Processing error") + + analyzer = CompanyAnalyzer() + result = analyzer.analyze_company("TestCorp") + + assert result == "Failed to process any patents for TestCorp" + + def test_analyze_single_patent(self, mocker): + """Test single patent analysis.""" + mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") + mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm") + mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer") + + mock_parse.return_value = {"abstract": "Test abstract"} + mock_minimize.return_value = "Minimized content" + + mock_llm_instance = Mock() + mock_llm_instance.analyze_patent_content.return_value = ( + "Innovative patent analysis" + ) + mock_llm.return_value = mock_llm_instance + + analyzer = CompanyAnalyzer() + result = analyzer.analyze_single_patent("US123", "TestCorp") + + assert result == "Innovative patent analysis" + mock_parse.assert_called_once_with("patents/US123.pdf") + mock_llm_instance.analyze_patent_content.assert_called_once_with( + patent_content="Minimized content", company_name="TestCorp" + ) + + def test_analyze_single_patent_error_handling(self, mocker): + """Test single patent analysis with processing error.""" + mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf") + mocker.patch("SPARC.analyzer.LLMAnalyzer") + + mock_parse.side_effect = FileNotFoundError("PDF not found") + + analyzer = CompanyAnalyzer() + result = analyzer.analyze_single_patent("US999", "TestCorp") + + assert "Failed to analyze patent US999" in result + assert "PDF not found" in result