feat: add LLM integration for patent analysis

Implemented LLMAnalyzer class using Anthropic's Claude API for: - Single patent content analysis - Portfolio-wide analysis across multiple patents - Configurable API key management via environment variables Key features: - Uses Claude 3.5 Sonnet for high-quality analysis - Structured prompts for innovation assessment - Token limits optimized per use case (1024 for single, 2048 for portfolio) - Analyzes: innovation quality, market potential, strategic direction Updated config.py to support ANTHROPIC_API_KEY environment variable. Added comprehensive test suite (6 tests) covering: - Initialization from config and direct API key - Single patent analysis - Portfolio analysis - Token limit validation All 19 tests passing. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2026-02-19 18:55:35 -05:00 · 2026-02-19 18:55:35 -05:00 · d7cf80f02f
commit d7cf80f02f
parent 26a23c02ae
4 changed files with 227 additions and 1 deletions
--- a/SPARC/config.py
+++ b/SPARC/config.py
@ -1,6 +1,14 @@
-# Handle all of the configurations and secrets
+"""Configuration and secrets management.
+
+Loads environment variables from .env file for API keys and other secrets.
+"""
 from dotenv import load_dotenv
 import os

 load_dotenv()
+
+# SerpAPI key for patent search
 api_key = os.getenv("API_KEY")
+
+# Anthropic API key for LLM analysis
+anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
--- a/SPARC/llm.py
+++ b/SPARC/llm.py
@ -0,0 +1,93 @@
+"""LLM integration for patent analysis using Anthropic's Claude."""
+
+from anthropic import Anthropic
+from SPARC import config
+from typing import Dict
+
+
+class LLMAnalyzer:
+    """Handles LLM-based analysis of patent content."""
+
+    def __init__(self, api_key: str | None = None):
+        """Initialize the LLM analyzer.
+
+        Args:
+          api_key: Anthropic API key. If None, will attempt to load from config.
+        """
+        self.client = Anthropic(api_key=api_key or config.anthropic_api_key)
+        self.model = "claude-3-5-sonnet-20241022"
+
+    def analyze_patent_content(self, patent_content: str, company_name: str) -> str:
+        """Analyze patent content to estimate company innovation and performance.
+
+        Args:
+          patent_content: Minimized patent text (abstract, claims, summary)
+          company_name: Name of the company for context
+
+        Returns:
+          Analysis text describing innovation quality and potential impact
+        """
+        prompt = f"""You are a patent analyst evaluating {company_name}'s innovation strategy.
+
+Analyze the following patent content and provide insights on:
+1. Innovation quality and novelty
+2. Technical complexity and defensibility
+3. Market potential and commercial viability
+4. Strategic positioning relative to industry trends
+
+Patent Content:
+{patent_content}
+
+Provide a concise analysis (2-3 paragraphs) focusing on what this patent reveals about the company's technical direction and competitive advantage."""
+
+        message = self.client.messages.create(
+            model=self.model,
+            max_tokens=1024,
+            messages=[{"role": "user", "content": prompt}],
+        )
+
+        return message.content[0].text
+
+    def analyze_patent_portfolio(
+        self, patents_data: list[Dict[str, str]], company_name: str
+    ) -> str:
+        """Analyze multiple patents to estimate overall company performance.
+
+        Args:
+          patents_data: List of dicts, each containing 'patent_id' and 'content'
+          company_name: Name of the company being analyzed
+
+        Returns:
+          Comprehensive analysis of company's innovation trajectory and outlook
+        """
+        # Combine all patent summaries
+        portfolio_summary = []
+        for idx, patent in enumerate(patents_data, 1):
+            portfolio_summary.append(
+                f"Patent {idx} ({patent['patent_id']}):\n{patent['content']}"
+            )
+
+        combined_content = "\n\n---\n\n".join(portfolio_summary)
+
+        prompt = f"""You are analyzing {company_name}'s patent portfolio to estimate their future performance and innovation trajectory.
+
+You have {len(patents_data)} recent patents to analyze. Evaluate the portfolio holistically:
+
+1. Innovation Trends: What technology areas are they focusing on?
+2. Strategic Direction: What does this reveal about their business strategy?
+3. Competitive Position: How defensible are these innovations?
+4. Market Outlook: What market opportunities do these patents target?
+5. Performance Forecast: Based on this innovation activity, what's your assessment of their likely performance?
+
+Patent Portfolio:
+{combined_content}
+
+Provide a comprehensive analysis (4-5 paragraphs) with a final verdict on the company's innovation strength and performance outlook."""
+
+        message = self.client.messages.create(
+            model=self.model,
+            max_tokens=2048,
+            messages=[{"role": "user", "content": prompt}],
+        )
+
+        return message.content[0].text
--- a/requirements.txt
+++ b/requirements.txt
@ -4,3 +4,4 @@ pdfplumber
 requests
 pytest
 pytest-mock
+anthropic
--- a/tests/test_llm.py
+++ b/tests/test_llm.py
@ -0,0 +1,124 @@
+"""Tests for LLM analysis functionality."""
+
+import pytest
+from unittest.mock import Mock, MagicMock
+from SPARC.llm import LLMAnalyzer
+
+
+class TestLLMAnalyzer:
+    """Test LLM analyzer initialization and API interaction."""
+
+    def test_analyzer_initialization_with_api_key(self, mocker):
+        """Test that analyzer initializes with provided API key."""
+        mock_anthropic = mocker.patch("SPARC.llm.Anthropic")
+
+        analyzer = LLMAnalyzer(api_key="test-key-123")
+
+        mock_anthropic.assert_called_once_with(api_key="test-key-123")
+        assert analyzer.model == "claude-3-5-sonnet-20241022"
+
+    def test_analyzer_initialization_from_config(self, mocker):
+        """Test that analyzer loads API key from config when not provided."""
+        mock_anthropic = mocker.patch("SPARC.llm.Anthropic")
+        mock_config = mocker.patch("SPARC.llm.config")
+        mock_config.anthropic_api_key = "config-key-456"
+
+        analyzer = LLMAnalyzer()
+
+        mock_anthropic.assert_called_once_with(api_key="config-key-456")
+
+    def test_analyze_patent_content(self, mocker):
+        """Test single patent content analysis."""
+        mock_anthropic = mocker.patch("SPARC.llm.Anthropic")
+        mock_client = Mock()
+        mock_anthropic.return_value = mock_client
+
+        # Mock the API response
+        mock_response = Mock()
+        mock_response.content = [Mock(text="Innovative GPU architecture.")]
+        mock_client.messages.create.return_value = mock_response
+
+        analyzer = LLMAnalyzer(api_key="test-key")
+        result = analyzer.analyze_patent_content(
+            patent_content="ABSTRACT: GPU with new cache design...",
+            company_name="NVIDIA",
+        )
+
+        assert result == "Innovative GPU architecture."
+        mock_client.messages.create.assert_called_once()
+
+        # Verify the prompt includes company name and content
+        call_args = mock_client.messages.create.call_args
+        prompt_text = call_args[1]["messages"][0]["content"]
+        assert "NVIDIA" in prompt_text
+        assert "GPU with new cache design" in prompt_text
+
+    def test_analyze_patent_portfolio(self, mocker):
+        """Test portfolio analysis with multiple patents."""
+        mock_anthropic = mocker.patch("SPARC.llm.Anthropic")
+        mock_client = Mock()
+        mock_anthropic.return_value = mock_client
+
+        # Mock the API response
+        mock_response = Mock()
+        mock_response.content = [
+            Mock(text="Strong portfolio in AI and graphics.")
+        ]
+        mock_client.messages.create.return_value = mock_response
+
+        analyzer = LLMAnalyzer(api_key="test-key")
+        patents_data = [
+            {"patent_id": "US123", "content": "AI acceleration patent"},
+            {"patent_id": "US456", "content": "Graphics rendering patent"},
+        ]
+
+        result = analyzer.analyze_patent_portfolio(
+            patents_data=patents_data, company_name="NVIDIA"
+        )
+
+        assert result == "Strong portfolio in AI and graphics."
+        mock_client.messages.create.assert_called_once()
+
+        # Verify the prompt includes all patents
+        call_args = mock_client.messages.create.call_args
+        prompt_text = call_args[1]["messages"][0]["content"]
+        assert "US123" in prompt_text
+        assert "US456" in prompt_text
+        assert "AI acceleration patent" in prompt_text
+        assert "Graphics rendering patent" in prompt_text
+
+    def test_analyze_patent_portfolio_with_correct_token_limit(self, mocker):
+        """Test that portfolio analysis uses higher token limit."""
+        mock_anthropic = mocker.patch("SPARC.llm.Anthropic")
+        mock_client = Mock()
+        mock_anthropic.return_value = mock_client
+
+        mock_response = Mock()
+        mock_response.content = [Mock(text="Analysis result.")]
+        mock_client.messages.create.return_value = mock_response
+
+        analyzer = LLMAnalyzer(api_key="test-key")
+        patents_data = [{"patent_id": "US123", "content": "Test content"}]
+
+        analyzer.analyze_patent_portfolio(patents_data, "TestCo")
+
+        call_args = mock_client.messages.create.call_args
+        # Portfolio analysis should use 2048 tokens
+        assert call_args[1]["max_tokens"] == 2048
+
+    def test_analyze_single_patent_with_correct_token_limit(self, mocker):
+        """Test that single patent analysis uses lower token limit."""
+        mock_anthropic = mocker.patch("SPARC.llm.Anthropic")
+        mock_client = Mock()
+        mock_anthropic.return_value = mock_client
+
+        mock_response = Mock()
+        mock_response.content = [Mock(text="Analysis result.")]
+        mock_client.messages.create.return_value = mock_response
+
+        analyzer = LLMAnalyzer(api_key="test-key")
+        analyzer.analyze_patent_content("Test content", "TestCo")
+
+        call_args = mock_client.messages.create.call_args
+        # Single patent should use 1024 tokens
+        assert call_args[1]["max_tokens"] == 1024