feat: add LLM integration for patent analysis
Implemented LLMAnalyzer class using Anthropic's Claude API for: - Single patent content analysis - Portfolio-wide analysis across multiple patents - Configurable API key management via environment variables Key features: - Uses Claude 3.5 Sonnet for high-quality analysis - Structured prompts for innovation assessment - Token limits optimized per use case (1024 for single, 2048 for portfolio) - Analyzes: innovation quality, market potential, strategic direction Updated config.py to support ANTHROPIC_API_KEY environment variable. Added comprehensive test suite (6 tests) covering: - Initialization from config and direct API key - Single patent analysis - Portfolio analysis - Token limit validation All 19 tests passing. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
26a23c02ae
commit
d7cf80f02f
@ -1,6 +1,14 @@
|
|||||||
# Handle all of the configurations and secrets
|
"""Configuration and secrets management.
|
||||||
|
|
||||||
|
Loads environment variables from .env file for API keys and other secrets.
|
||||||
|
"""
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
import os
|
import os
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
|
# SerpAPI key for patent search
|
||||||
api_key = os.getenv("API_KEY")
|
api_key = os.getenv("API_KEY")
|
||||||
|
|
||||||
|
# Anthropic API key for LLM analysis
|
||||||
|
anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
|
||||||
|
|||||||
93
SPARC/llm.py
Normal file
93
SPARC/llm.py
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
"""LLM integration for patent analysis using Anthropic's Claude."""
|
||||||
|
|
||||||
|
from anthropic import Anthropic
|
||||||
|
from SPARC import config
|
||||||
|
from typing import Dict
|
||||||
|
|
||||||
|
|
||||||
|
class LLMAnalyzer:
|
||||||
|
"""Handles LLM-based analysis of patent content."""
|
||||||
|
|
||||||
|
def __init__(self, api_key: str | None = None):
|
||||||
|
"""Initialize the LLM analyzer.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
api_key: Anthropic API key. If None, will attempt to load from config.
|
||||||
|
"""
|
||||||
|
self.client = Anthropic(api_key=api_key or config.anthropic_api_key)
|
||||||
|
self.model = "claude-3-5-sonnet-20241022"
|
||||||
|
|
||||||
|
def analyze_patent_content(self, patent_content: str, company_name: str) -> str:
|
||||||
|
"""Analyze patent content to estimate company innovation and performance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
patent_content: Minimized patent text (abstract, claims, summary)
|
||||||
|
company_name: Name of the company for context
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Analysis text describing innovation quality and potential impact
|
||||||
|
"""
|
||||||
|
prompt = f"""You are a patent analyst evaluating {company_name}'s innovation strategy.
|
||||||
|
|
||||||
|
Analyze the following patent content and provide insights on:
|
||||||
|
1. Innovation quality and novelty
|
||||||
|
2. Technical complexity and defensibility
|
||||||
|
3. Market potential and commercial viability
|
||||||
|
4. Strategic positioning relative to industry trends
|
||||||
|
|
||||||
|
Patent Content:
|
||||||
|
{patent_content}
|
||||||
|
|
||||||
|
Provide a concise analysis (2-3 paragraphs) focusing on what this patent reveals about the company's technical direction and competitive advantage."""
|
||||||
|
|
||||||
|
message = self.client.messages.create(
|
||||||
|
model=self.model,
|
||||||
|
max_tokens=1024,
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
)
|
||||||
|
|
||||||
|
return message.content[0].text
|
||||||
|
|
||||||
|
def analyze_patent_portfolio(
|
||||||
|
self, patents_data: list[Dict[str, str]], company_name: str
|
||||||
|
) -> str:
|
||||||
|
"""Analyze multiple patents to estimate overall company performance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
patents_data: List of dicts, each containing 'patent_id' and 'content'
|
||||||
|
company_name: Name of the company being analyzed
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Comprehensive analysis of company's innovation trajectory and outlook
|
||||||
|
"""
|
||||||
|
# Combine all patent summaries
|
||||||
|
portfolio_summary = []
|
||||||
|
for idx, patent in enumerate(patents_data, 1):
|
||||||
|
portfolio_summary.append(
|
||||||
|
f"Patent {idx} ({patent['patent_id']}):\n{patent['content']}"
|
||||||
|
)
|
||||||
|
|
||||||
|
combined_content = "\n\n---\n\n".join(portfolio_summary)
|
||||||
|
|
||||||
|
prompt = f"""You are analyzing {company_name}'s patent portfolio to estimate their future performance and innovation trajectory.
|
||||||
|
|
||||||
|
You have {len(patents_data)} recent patents to analyze. Evaluate the portfolio holistically:
|
||||||
|
|
||||||
|
1. Innovation Trends: What technology areas are they focusing on?
|
||||||
|
2. Strategic Direction: What does this reveal about their business strategy?
|
||||||
|
3. Competitive Position: How defensible are these innovations?
|
||||||
|
4. Market Outlook: What market opportunities do these patents target?
|
||||||
|
5. Performance Forecast: Based on this innovation activity, what's your assessment of their likely performance?
|
||||||
|
|
||||||
|
Patent Portfolio:
|
||||||
|
{combined_content}
|
||||||
|
|
||||||
|
Provide a comprehensive analysis (4-5 paragraphs) with a final verdict on the company's innovation strength and performance outlook."""
|
||||||
|
|
||||||
|
message = self.client.messages.create(
|
||||||
|
model=self.model,
|
||||||
|
max_tokens=2048,
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
)
|
||||||
|
|
||||||
|
return message.content[0].text
|
||||||
@ -4,3 +4,4 @@ pdfplumber
|
|||||||
requests
|
requests
|
||||||
pytest
|
pytest
|
||||||
pytest-mock
|
pytest-mock
|
||||||
|
anthropic
|
||||||
|
|||||||
124
tests/test_llm.py
Normal file
124
tests/test_llm.py
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
"""Tests for LLM analysis functionality."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import Mock, MagicMock
|
||||||
|
from SPARC.llm import LLMAnalyzer
|
||||||
|
|
||||||
|
|
||||||
|
class TestLLMAnalyzer:
|
||||||
|
"""Test LLM analyzer initialization and API interaction."""
|
||||||
|
|
||||||
|
def test_analyzer_initialization_with_api_key(self, mocker):
|
||||||
|
"""Test that analyzer initializes with provided API key."""
|
||||||
|
mock_anthropic = mocker.patch("SPARC.llm.Anthropic")
|
||||||
|
|
||||||
|
analyzer = LLMAnalyzer(api_key="test-key-123")
|
||||||
|
|
||||||
|
mock_anthropic.assert_called_once_with(api_key="test-key-123")
|
||||||
|
assert analyzer.model == "claude-3-5-sonnet-20241022"
|
||||||
|
|
||||||
|
def test_analyzer_initialization_from_config(self, mocker):
|
||||||
|
"""Test that analyzer loads API key from config when not provided."""
|
||||||
|
mock_anthropic = mocker.patch("SPARC.llm.Anthropic")
|
||||||
|
mock_config = mocker.patch("SPARC.llm.config")
|
||||||
|
mock_config.anthropic_api_key = "config-key-456"
|
||||||
|
|
||||||
|
analyzer = LLMAnalyzer()
|
||||||
|
|
||||||
|
mock_anthropic.assert_called_once_with(api_key="config-key-456")
|
||||||
|
|
||||||
|
def test_analyze_patent_content(self, mocker):
|
||||||
|
"""Test single patent content analysis."""
|
||||||
|
mock_anthropic = mocker.patch("SPARC.llm.Anthropic")
|
||||||
|
mock_client = Mock()
|
||||||
|
mock_anthropic.return_value = mock_client
|
||||||
|
|
||||||
|
# Mock the API response
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.content = [Mock(text="Innovative GPU architecture.")]
|
||||||
|
mock_client.messages.create.return_value = mock_response
|
||||||
|
|
||||||
|
analyzer = LLMAnalyzer(api_key="test-key")
|
||||||
|
result = analyzer.analyze_patent_content(
|
||||||
|
patent_content="ABSTRACT: GPU with new cache design...",
|
||||||
|
company_name="NVIDIA",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result == "Innovative GPU architecture."
|
||||||
|
mock_client.messages.create.assert_called_once()
|
||||||
|
|
||||||
|
# Verify the prompt includes company name and content
|
||||||
|
call_args = mock_client.messages.create.call_args
|
||||||
|
prompt_text = call_args[1]["messages"][0]["content"]
|
||||||
|
assert "NVIDIA" in prompt_text
|
||||||
|
assert "GPU with new cache design" in prompt_text
|
||||||
|
|
||||||
|
def test_analyze_patent_portfolio(self, mocker):
|
||||||
|
"""Test portfolio analysis with multiple patents."""
|
||||||
|
mock_anthropic = mocker.patch("SPARC.llm.Anthropic")
|
||||||
|
mock_client = Mock()
|
||||||
|
mock_anthropic.return_value = mock_client
|
||||||
|
|
||||||
|
# Mock the API response
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.content = [
|
||||||
|
Mock(text="Strong portfolio in AI and graphics.")
|
||||||
|
]
|
||||||
|
mock_client.messages.create.return_value = mock_response
|
||||||
|
|
||||||
|
analyzer = LLMAnalyzer(api_key="test-key")
|
||||||
|
patents_data = [
|
||||||
|
{"patent_id": "US123", "content": "AI acceleration patent"},
|
||||||
|
{"patent_id": "US456", "content": "Graphics rendering patent"},
|
||||||
|
]
|
||||||
|
|
||||||
|
result = analyzer.analyze_patent_portfolio(
|
||||||
|
patents_data=patents_data, company_name="NVIDIA"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result == "Strong portfolio in AI and graphics."
|
||||||
|
mock_client.messages.create.assert_called_once()
|
||||||
|
|
||||||
|
# Verify the prompt includes all patents
|
||||||
|
call_args = mock_client.messages.create.call_args
|
||||||
|
prompt_text = call_args[1]["messages"][0]["content"]
|
||||||
|
assert "US123" in prompt_text
|
||||||
|
assert "US456" in prompt_text
|
||||||
|
assert "AI acceleration patent" in prompt_text
|
||||||
|
assert "Graphics rendering patent" in prompt_text
|
||||||
|
|
||||||
|
def test_analyze_patent_portfolio_with_correct_token_limit(self, mocker):
|
||||||
|
"""Test that portfolio analysis uses higher token limit."""
|
||||||
|
mock_anthropic = mocker.patch("SPARC.llm.Anthropic")
|
||||||
|
mock_client = Mock()
|
||||||
|
mock_anthropic.return_value = mock_client
|
||||||
|
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.content = [Mock(text="Analysis result.")]
|
||||||
|
mock_client.messages.create.return_value = mock_response
|
||||||
|
|
||||||
|
analyzer = LLMAnalyzer(api_key="test-key")
|
||||||
|
patents_data = [{"patent_id": "US123", "content": "Test content"}]
|
||||||
|
|
||||||
|
analyzer.analyze_patent_portfolio(patents_data, "TestCo")
|
||||||
|
|
||||||
|
call_args = mock_client.messages.create.call_args
|
||||||
|
# Portfolio analysis should use 2048 tokens
|
||||||
|
assert call_args[1]["max_tokens"] == 2048
|
||||||
|
|
||||||
|
def test_analyze_single_patent_with_correct_token_limit(self, mocker):
|
||||||
|
"""Test that single patent analysis uses lower token limit."""
|
||||||
|
mock_anthropic = mocker.patch("SPARC.llm.Anthropic")
|
||||||
|
mock_client = Mock()
|
||||||
|
mock_anthropic.return_value = mock_client
|
||||||
|
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.content = [Mock(text="Analysis result.")]
|
||||||
|
mock_client.messages.create.return_value = mock_response
|
||||||
|
|
||||||
|
analyzer = LLMAnalyzer(api_key="test-key")
|
||||||
|
analyzer.analyze_patent_content("Test content", "TestCo")
|
||||||
|
|
||||||
|
call_args = mock_client.messages.create.call_args
|
||||||
|
# Single patent should use 1024 tokens
|
||||||
|
assert call_args[1]["max_tokens"] == 1024
|
||||||
Loading…
Reference in New Issue
Block a user