From a91c3badabf1af8dd90158a1a5a5999a5a26b210 Mon Sep 17 00:00:00 2001
From: 0xWheatyz <wyatt@leeworks.dev>
Date: Thu, 19 Feb 2026 18:57:10 -0500
Subject: [PATCH] feat: implement company performance estimation orchestration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created CompanyAnalyzer class that orchestrates the complete pipeline:
1. Retrieves patents via SERP API
2. Downloads and parses PDFs
3. Minimizes content (removes bloat)
4. Analyzes portfolio with LLM
5. Returns performance estimation

Features:
- Full company portfolio analysis
- Single patent analysis support
- Robust error handling (continues on partial failures)
- Progress logging for user visibility

Updated main.py with clean example usage demonstrating the high-level API.

Added comprehensive test suite (7 tests) covering:
- Full pipeline integration
- Error handling at each stage
- Single patent analysis
- Edge cases (no patents, all failures)

All 26 tests passing.

This completes the core functionality for patent-based company
performance estimation.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 SPARC/analyzer.py      | 112 ++++++++++++++++++++++++++
 main.py                |  47 +++++++++--
 tests/test_analyzer.py | 178 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 330 insertions(+), 7 deletions(-)
 create mode 100644 SPARC/analyzer.py
 create mode 100644 tests/test_analyzer.py

diff --git a/SPARC/analyzer.py b/SPARC/analyzer.py
new file mode 100644
index 0000000..68d110f
--- /dev/null
+++ b/SPARC/analyzer.py
@@ -0,0 +1,112 @@
+"""High-level patent analysis orchestration.
+
+This module ties together patent retrieval, parsing, and LLM analysis
+to provide company performance estimation based on patent portfolios.
+"""
+
+from SPARC.serp_api import SERP
+from SPARC.llm import LLMAnalyzer
+from SPARC.types import Patent
+from typing import List
+
+
+class CompanyAnalyzer:
+    """Orchestrates end-to-end company performance analysis via patents."""
+
+    def __init__(self, anthropic_api_key: str | None = None):
+        """Initialize the company analyzer.
+
+        Args:
+          anthropic_api_key: Optional Anthropic API key. If None, loads from config.
+        """
+        self.llm_analyzer = LLMAnalyzer(api_key=anthropic_api_key)
+
+    def analyze_company(self, company_name: str) -> str:
+        """Analyze a company's performance based on their patent portfolio.
+
+        This is the main entry point that orchestrates the full pipeline:
+        1. Retrieve patents from SERP API
+        2. Download and parse each patent PDF
+        3. Minimize patent content (remove bloat)
+        4. Analyze portfolio with LLM
+        5. Return performance estimation
+
+        Args:
+          company_name: Name of the company to analyze
+
+        Returns:
+          Comprehensive analysis of company's innovation and performance outlook
+        """
+        print(f"Retrieving patents for {company_name}...")
+        patents = SERP.query(company_name)
+
+        if not patents.patents:
+            return f"No patents found for {company_name}"
+
+        print(f"Found {len(patents.patents)} patents. Processing...")
+
+        # Download and parse each patent
+        processed_patents = []
+        for idx, patent in enumerate(patents.patents, 1):
+            print(f"Processing patent {idx}/{len(patents.patents)}: {patent.patent_id}")
+
+            try:
+                # Download PDF
+                patent = SERP.save_patents(patent)
+
+                # Parse sections from PDF
+                sections = SERP.parse_patent_pdf(patent.pdf_path)
+
+                # Minimize for LLM (remove bloat)
+                minimized_content = SERP.minimize_patent_for_llm(sections)
+
+                processed_patents.append(
+                    {"patent_id": patent.patent_id, "content": minimized_content}
+                )
+
+            except Exception as e:
+                print(f"Warning: Failed to process {patent.patent_id}: {e}")
+                continue
+
+        if not processed_patents:
+            return f"Failed to process any patents for {company_name}"
+
+        print(f"Analyzing portfolio with LLM...")
+
+        # Analyze the full portfolio with LLM
+        analysis = self.llm_analyzer.analyze_patent_portfolio(
+            patents_data=processed_patents, company_name=company_name
+        )
+
+        return analysis
+
+    def analyze_single_patent(self, patent_id: str, company_name: str) -> str:
+        """Analyze a single patent by ID.
+
+        Useful for focused analysis of specific innovations.
+
+        Args:
+          patent_id: Publication ID of the patent
+          company_name: Name of the company (for context)
+
+        Returns:
+          Analysis of the specific patent's innovation quality
+        """
+        # Note: This simplified version assumes the patent PDF is already downloaded
+        # A more complete implementation would support direct patent ID lookup
+        print(f"Analyzing patent {patent_id} for {company_name}...")
+
+        patent_path = f"patents/{patent_id}.pdf"
+
+        try:
+            sections = SERP.parse_patent_pdf(patent_path)
+            minimized_content = SERP.minimize_patent_for_llm(sections)
+
+            analysis = self.llm_analyzer.analyze_patent_content(
+                patent_content=minimized_content, company_name=company_name
+            )
+
+            return analysis
+
+        except Exception as e:
+            return f"Failed to analyze patent {patent_id}: {e}"
diff --git a/main.py b/main.py
index 04b47f0..82e87fc 100644
--- a/main.py
+++ b/main.py
@@ -1,10 +1,43 @@
-from SPARC.serp_api import SERP
+"""SPARC - Semiconductor Patent & Analytics Report Core
 
-patents = SERP.query("nvidia")
+Example usage of the company performance analyzer.
 
-for patent in patents.patents:
-  patent = SERP.save_patents(patent)
-  patent.summary = SERP.parse_patent_pdf(patent.pdf_path)
-  print(patent.summary)
+Before running:
+1. Create a .env file with:
+   API_KEY=your_serpapi_key
+   ANTHROPIC_API_KEY=your_anthropic_key
 
-print(patents)
+2. Run: python main.py
+"""
+
+from SPARC.analyzer import CompanyAnalyzer
+
+
+def main():
+    """Analyze a company's performance based on their patent portfolio."""
+
+    # Initialize the analyzer (loads API keys from .env)
+    analyzer = CompanyAnalyzer()
+
+    # Analyze a company - this will:
+    # 1. Retrieve patents from SERP API
+    # 2. Download and parse patent PDFs
+    # 3. Minimize content (remove bloat)
+    # 4. Analyze with Claude to estimate performance
+    company_name = "nvidia"
+
+    print(f"\n{'=' * 70}")
+    print(f"SPARC Patent Analysis - {company_name.upper()}")
+    print(f"{'=' * 70}\n")
+
+    analysis = analyzer.analyze_company(company_name)
+
+    print(f"\n{'=' * 70}")
+    print("ANALYSIS RESULTS")
+    print(f"{'=' * 70}\n")
+    print(analysis)
+    print(f"\n{'=' * 70}\n")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_analyzer.py b/tests/test_analyzer.py
new file mode 100644
index 0000000..84ed701
--- /dev/null
+++ b/tests/test_analyzer.py
@@ -0,0 +1,178 @@
+"""Tests for the high-level company analyzer orchestration."""
+
+import pytest
+from unittest.mock import Mock, patch
+from SPARC.analyzer import CompanyAnalyzer
+from SPARC.types import Patent, Patents
+
+
+class TestCompanyAnalyzer:
+    """Test the CompanyAnalyzer orchestration logic."""
+
+    def test_analyzer_initialization(self, mocker):
+        """Test analyzer initialization with API key."""
+        mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
+
+        analyzer = CompanyAnalyzer(anthropic_api_key="test-key")
+
+        mock_llm.assert_called_once_with(api_key="test-key")
+
+    def test_analyze_company_full_pipeline(self, mocker):
+        """Test complete company analysis pipeline."""
+        # Mock all the dependencies
+        mock_query = mocker.patch("SPARC.analyzer.SERP.query")
+        mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
+        mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
+        mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
+        mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
+
+        # Setup mock return values
+        test_patent = Patent(
+            patent_id="US123", pdf_link="http://example.com/test.pdf"
+        )
+        mock_query.return_value = Patents(patents=[test_patent])
+
+        test_patent.pdf_path = "patents/US123.pdf"
+        mock_save.return_value = test_patent
+
+        mock_parse.return_value = {
+            "abstract": "Test abstract",
+            "claims": "Test claims",
+        }
+
+        mock_minimize.return_value = "Minimized content"
+
+        mock_llm_instance = Mock()
+        mock_llm_instance.analyze_patent_portfolio.return_value = (
+            "Strong innovation portfolio"
+        )
+        mock_llm.return_value = mock_llm_instance
+
+        # Run the analysis
+        analyzer = CompanyAnalyzer()
+        result = analyzer.analyze_company("TestCorp")
+
+        # Verify the pipeline executed correctly
+        assert result == "Strong innovation portfolio"
+        mock_query.assert_called_once_with("TestCorp")
+        mock_save.assert_called_once()
+        mock_parse.assert_called_once_with("patents/US123.pdf")
+        mock_minimize.assert_called_once()
+        mock_llm_instance.analyze_patent_portfolio.assert_called_once()
+
+        # Verify the data passed to LLM
+        llm_call_args = mock_llm_instance.analyze_patent_portfolio.call_args
+        patents_data = llm_call_args[1]["patents_data"]
+        assert len(patents_data) == 1
+        assert patents_data[0]["patent_id"] == "US123"
+        assert patents_data[0]["content"] == "Minimized content"
+
+    def test_analyze_company_no_patents_found(self, mocker):
+        """Test handling when no patents are found for a company."""
+        mock_query = mocker.patch("SPARC.analyzer.SERP.query")
+        mock_query.return_value = Patents(patents=[])
+        mocker.patch("SPARC.analyzer.LLMAnalyzer")
+
+        analyzer = CompanyAnalyzer()
+        result = analyzer.analyze_company("UnknownCorp")
+
+        assert result == "No patents found for UnknownCorp"
+
+    def test_analyze_company_handles_processing_errors(self, mocker):
+        """Test that analysis continues even if some patents fail to process."""
+        mock_query = mocker.patch("SPARC.analyzer.SERP.query")
+        mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
+        mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
+        mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
+        mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
+
+        # Create two test patents
+        patent1 = Patent(patent_id="US123", pdf_link="http://example.com/1.pdf")
+        patent2 = Patent(patent_id="US456", pdf_link="http://example.com/2.pdf")
+        mock_query.return_value = Patents(patents=[patent1, patent2])
+
+        # First patent processes successfully
+        patent1.pdf_path = "patents/US123.pdf"
+
+        # Second patent raises an error
+        def save_side_effect(p):
+            if p.patent_id == "US123":
+                p.pdf_path = "patents/US123.pdf"
+                return p
+            else:
+                raise Exception("Download failed")
+
+        mock_save.side_effect = save_side_effect
+
+        mock_parse.return_value = {"abstract": "Test"}
+        mock_minimize.return_value = "Content"
+
+        mock_llm_instance = Mock()
+        mock_llm_instance.analyze_patent_portfolio.return_value = "Analysis result"
+        mock_llm.return_value = mock_llm_instance
+
+        analyzer = CompanyAnalyzer()
+        result = analyzer.analyze_company("TestCorp")
+
+        # Should still succeed with the one patent that worked
+        assert result == "Analysis result"
+
+        # Verify only one patent was analyzed
+        llm_call_args = mock_llm_instance.analyze_patent_portfolio.call_args
+        patents_data = llm_call_args[1]["patents_data"]
+        assert len(patents_data) == 1
+        assert patents_data[0]["patent_id"] == "US123"
+
+    def test_analyze_company_all_patents_fail(self, mocker):
+        """Test handling when all patents fail to process."""
+        mock_query = mocker.patch("SPARC.analyzer.SERP.query")
+        mock_save = mocker.patch("SPARC.analyzer.SERP.save_patents")
+        mocker.patch("SPARC.analyzer.LLMAnalyzer")
+
+        patent = Patent(patent_id="US123", pdf_link="http://example.com/1.pdf")
+        mock_query.return_value = Patents(patents=[patent])
+
+        # Make processing fail
+        mock_save.side_effect = Exception("Processing error")
+
+        analyzer = CompanyAnalyzer()
+        result = analyzer.analyze_company("TestCorp")
+
+        assert result == "Failed to process any patents for TestCorp"
+
+    def test_analyze_single_patent(self, mocker):
+        """Test single patent analysis."""
+        mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
+        mock_minimize = mocker.patch("SPARC.analyzer.SERP.minimize_patent_for_llm")
+        mock_llm = mocker.patch("SPARC.analyzer.LLMAnalyzer")
+
+        mock_parse.return_value = {"abstract": "Test abstract"}
+        mock_minimize.return_value = "Minimized content"
+
+        mock_llm_instance = Mock()
+        mock_llm_instance.analyze_patent_content.return_value = (
+            "Innovative patent analysis"
+        )
+        mock_llm.return_value = mock_llm_instance
+
+        analyzer = CompanyAnalyzer()
+        result = analyzer.analyze_single_patent("US123", "TestCorp")
+
+        assert result == "Innovative patent analysis"
+        mock_parse.assert_called_once_with("patents/US123.pdf")
+        mock_llm_instance.analyze_patent_content.assert_called_once_with(
+            patent_content="Minimized content", company_name="TestCorp"
+        )
+
+    def test_analyze_single_patent_error_handling(self, mocker):
+        """Test single patent analysis with processing error."""
+        mock_parse = mocker.patch("SPARC.analyzer.SERP.parse_patent_pdf")
+        mocker.patch("SPARC.analyzer.LLMAnalyzer")
+
+        mock_parse.side_effect = FileNotFoundError("PDF not found")
+
+        analyzer = CompanyAnalyzer()
+        result = analyzer.analyze_single_patent("US999", "TestCorp")
+
+        assert "Failed to analyze patent US999" in result
+        assert "PDF not found" in result