From d7cf80f02fe8160815103af6eb9b1c3be3800383 Mon Sep 17 00:00:00 2001 From: 0xWheatyz Date: Thu, 19 Feb 2026 18:55:35 -0500 Subject: [PATCH] feat: add LLM integration for patent analysis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implemented LLMAnalyzer class using Anthropic's Claude API for: - Single patent content analysis - Portfolio-wide analysis across multiple patents - Configurable API key management via environment variables Key features: - Uses Claude 3.5 Sonnet for high-quality analysis - Structured prompts for innovation assessment - Token limits optimized per use case (1024 for single, 2048 for portfolio) - Analyzes: innovation quality, market potential, strategic direction Updated config.py to support ANTHROPIC_API_KEY environment variable. Added comprehensive test suite (6 tests) covering: - Initialization from config and direct API key - Single patent analysis - Portfolio analysis - Token limit validation All 19 tests passing. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- SPARC/config.py | 10 +++- SPARC/llm.py | 93 ++++++++++++++++++++++++++++++++++ requirements.txt | 1 + tests/test_llm.py | 124 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 227 insertions(+), 1 deletion(-) create mode 100644 SPARC/llm.py create mode 100644 tests/test_llm.py diff --git a/SPARC/config.py b/SPARC/config.py index 0d4f525..5bc089d 100644 --- a/SPARC/config.py +++ b/SPARC/config.py @@ -1,6 +1,14 @@ -# Handle all of the configurations and secrets +"""Configuration and secrets management. + +Loads environment variables from .env file for API keys and other secrets. +""" from dotenv import load_dotenv import os load_dotenv() + +# SerpAPI key for patent search api_key = os.getenv("API_KEY") + +# Anthropic API key for LLM analysis +anthropic_api_key = os.getenv("ANTHROPIC_API_KEY") diff --git a/SPARC/llm.py b/SPARC/llm.py new file mode 100644 index 0000000..f81be19 --- /dev/null +++ b/SPARC/llm.py @@ -0,0 +1,93 @@ +"""LLM integration for patent analysis using Anthropic's Claude.""" + +from anthropic import Anthropic +from SPARC import config +from typing import Dict + + +class LLMAnalyzer: + """Handles LLM-based analysis of patent content.""" + + def __init__(self, api_key: str | None = None): + """Initialize the LLM analyzer. + + Args: + api_key: Anthropic API key. If None, will attempt to load from config. + """ + self.client = Anthropic(api_key=api_key or config.anthropic_api_key) + self.model = "claude-3-5-sonnet-20241022" + + def analyze_patent_content(self, patent_content: str, company_name: str) -> str: + """Analyze patent content to estimate company innovation and performance. + + Args: + patent_content: Minimized patent text (abstract, claims, summary) + company_name: Name of the company for context + + Returns: + Analysis text describing innovation quality and potential impact + """ + prompt = f"""You are a patent analyst evaluating {company_name}'s innovation strategy. + +Analyze the following patent content and provide insights on: +1. Innovation quality and novelty +2. Technical complexity and defensibility +3. Market potential and commercial viability +4. Strategic positioning relative to industry trends + +Patent Content: +{patent_content} + +Provide a concise analysis (2-3 paragraphs) focusing on what this patent reveals about the company's technical direction and competitive advantage.""" + + message = self.client.messages.create( + model=self.model, + max_tokens=1024, + messages=[{"role": "user", "content": prompt}], + ) + + return message.content[0].text + + def analyze_patent_portfolio( + self, patents_data: list[Dict[str, str]], company_name: str + ) -> str: + """Analyze multiple patents to estimate overall company performance. + + Args: + patents_data: List of dicts, each containing 'patent_id' and 'content' + company_name: Name of the company being analyzed + + Returns: + Comprehensive analysis of company's innovation trajectory and outlook + """ + # Combine all patent summaries + portfolio_summary = [] + for idx, patent in enumerate(patents_data, 1): + portfolio_summary.append( + f"Patent {idx} ({patent['patent_id']}):\n{patent['content']}" + ) + + combined_content = "\n\n---\n\n".join(portfolio_summary) + + prompt = f"""You are analyzing {company_name}'s patent portfolio to estimate their future performance and innovation trajectory. + +You have {len(patents_data)} recent patents to analyze. Evaluate the portfolio holistically: + +1. Innovation Trends: What technology areas are they focusing on? +2. Strategic Direction: What does this reveal about their business strategy? +3. Competitive Position: How defensible are these innovations? +4. Market Outlook: What market opportunities do these patents target? +5. Performance Forecast: Based on this innovation activity, what's your assessment of their likely performance? + +Patent Portfolio: +{combined_content} + +Provide a comprehensive analysis (4-5 paragraphs) with a final verdict on the company's innovation strength and performance outlook.""" + + message = self.client.messages.create( + model=self.model, + max_tokens=2048, + messages=[{"role": "user", "content": prompt}], + ) + + return message.content[0].text diff --git a/requirements.txt b/requirements.txt index 8e31464..6e6cc2e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ pdfplumber requests pytest pytest-mock +anthropic diff --git a/tests/test_llm.py b/tests/test_llm.py new file mode 100644 index 0000000..9465661 --- /dev/null +++ b/tests/test_llm.py @@ -0,0 +1,124 @@ +"""Tests for LLM analysis functionality.""" + +import pytest +from unittest.mock import Mock, MagicMock +from SPARC.llm import LLMAnalyzer + + +class TestLLMAnalyzer: + """Test LLM analyzer initialization and API interaction.""" + + def test_analyzer_initialization_with_api_key(self, mocker): + """Test that analyzer initializes with provided API key.""" + mock_anthropic = mocker.patch("SPARC.llm.Anthropic") + + analyzer = LLMAnalyzer(api_key="test-key-123") + + mock_anthropic.assert_called_once_with(api_key="test-key-123") + assert analyzer.model == "claude-3-5-sonnet-20241022" + + def test_analyzer_initialization_from_config(self, mocker): + """Test that analyzer loads API key from config when not provided.""" + mock_anthropic = mocker.patch("SPARC.llm.Anthropic") + mock_config = mocker.patch("SPARC.llm.config") + mock_config.anthropic_api_key = "config-key-456" + + analyzer = LLMAnalyzer() + + mock_anthropic.assert_called_once_with(api_key="config-key-456") + + def test_analyze_patent_content(self, mocker): + """Test single patent content analysis.""" + mock_anthropic = mocker.patch("SPARC.llm.Anthropic") + mock_client = Mock() + mock_anthropic.return_value = mock_client + + # Mock the API response + mock_response = Mock() + mock_response.content = [Mock(text="Innovative GPU architecture.")] + mock_client.messages.create.return_value = mock_response + + analyzer = LLMAnalyzer(api_key="test-key") + result = analyzer.analyze_patent_content( + patent_content="ABSTRACT: GPU with new cache design...", + company_name="NVIDIA", + ) + + assert result == "Innovative GPU architecture." + mock_client.messages.create.assert_called_once() + + # Verify the prompt includes company name and content + call_args = mock_client.messages.create.call_args + prompt_text = call_args[1]["messages"][0]["content"] + assert "NVIDIA" in prompt_text + assert "GPU with new cache design" in prompt_text + + def test_analyze_patent_portfolio(self, mocker): + """Test portfolio analysis with multiple patents.""" + mock_anthropic = mocker.patch("SPARC.llm.Anthropic") + mock_client = Mock() + mock_anthropic.return_value = mock_client + + # Mock the API response + mock_response = Mock() + mock_response.content = [ + Mock(text="Strong portfolio in AI and graphics.") + ] + mock_client.messages.create.return_value = mock_response + + analyzer = LLMAnalyzer(api_key="test-key") + patents_data = [ + {"patent_id": "US123", "content": "AI acceleration patent"}, + {"patent_id": "US456", "content": "Graphics rendering patent"}, + ] + + result = analyzer.analyze_patent_portfolio( + patents_data=patents_data, company_name="NVIDIA" + ) + + assert result == "Strong portfolio in AI and graphics." + mock_client.messages.create.assert_called_once() + + # Verify the prompt includes all patents + call_args = mock_client.messages.create.call_args + prompt_text = call_args[1]["messages"][0]["content"] + assert "US123" in prompt_text + assert "US456" in prompt_text + assert "AI acceleration patent" in prompt_text + assert "Graphics rendering patent" in prompt_text + + def test_analyze_patent_portfolio_with_correct_token_limit(self, mocker): + """Test that portfolio analysis uses higher token limit.""" + mock_anthropic = mocker.patch("SPARC.llm.Anthropic") + mock_client = Mock() + mock_anthropic.return_value = mock_client + + mock_response = Mock() + mock_response.content = [Mock(text="Analysis result.")] + mock_client.messages.create.return_value = mock_response + + analyzer = LLMAnalyzer(api_key="test-key") + patents_data = [{"patent_id": "US123", "content": "Test content"}] + + analyzer.analyze_patent_portfolio(patents_data, "TestCo") + + call_args = mock_client.messages.create.call_args + # Portfolio analysis should use 2048 tokens + assert call_args[1]["max_tokens"] == 2048 + + def test_analyze_single_patent_with_correct_token_limit(self, mocker): + """Test that single patent analysis uses lower token limit.""" + mock_anthropic = mocker.patch("SPARC.llm.Anthropic") + mock_client = Mock() + mock_anthropic.return_value = mock_client + + mock_response = Mock() + mock_response.content = [Mock(text="Analysis result.")] + mock_client.messages.create.return_value = mock_response + + analyzer = LLMAnalyzer(api_key="test-key") + analyzer.analyze_patent_content("Test content", "TestCo") + + call_args = mock_client.messages.create.call_args + # Single patent should use 1024 tokens + assert call_args[1]["max_tokens"] == 1024