Files
SPARC/SPARC/llm.py
T
agent-company b000146585 feat: configurable LLM model, SERP cache TTL, structured logging, fix patent_id type
- Make LLM model configurable via MODEL env var, default anthropic/claude-3.5-sonnet (#12)
- Expose SERP cache TTL as SERP_CACHE_TTL_HOURS env var, default 24 hours (#13)
- Fix Patent.patent_id type annotation from int to str in types.py (#14)
- Replace all print() calls with structured logging in analyzer.py and llm.py (#11)
- Add LOG_LEVEL config with basicConfig setup in config.py
- Add model and serp_cache_ttl_hours to config.py

Closes leeworks-agents/SPARC#11
Closes leeworks-agents/SPARC#12
Closes leeworks-agents/SPARC#13
Closes leeworks-agents/SPARC#14

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-26 06:03:25 +00:00

243 lines
9.2 KiB
Python

"""LLM integration for patent analysis using OpenRouter."""
import logging
from typing import Dict
from openai import OpenAI
from SPARC import config
from SPARC.database import DatabaseClient
logger = logging.getLogger(__name__)
class LLMAnalyzer:
"""Handles LLM-based analysis of patent content."""
def __init__(self, api_key: str | None = None, test_mode: bool = False, use_cache: bool | None = None):
"""Initialize the LLM analyzer.
Args:
api_key: OpenRouter API key. If None, will attempt to load from config.
test_mode: If True, print prompts instead of making API calls
use_cache: If True, check database cache before making API calls.
If None, uses config.use_cache (default: True)
"""
self.test_mode = test_mode
self.use_cache = use_cache if use_cache is not None else config.use_cache
self.model = config.model
# Always initialize database client for storage and caching
self.db_client = DatabaseClient(config.database_url)
self.db_client.initialize_schema()
# Initialize OpenRouter client if API key is available
if (api_key or config.openrouter_api_key) and not test_mode:
self.client = OpenAI(
api_key=api_key or config.openrouter_api_key,
base_url="https://openrouter.ai/api/v1"
)
else:
self.client = None
def analyze_patent_content(self, patent_content: str, company_name: str) -> str:
"""Analyze patent content to estimate company innovation and performance.
Args:
patent_content: Minimized patent text (abstract, claims, summary)
company_name: Name of the company for context
Returns:
Analysis text describing innovation quality and potential impact
"""
prompt = f"""You are a patent analyst evaluating {company_name}'s innovation strategy.
Analyze the following patent content and provide insights on:
1. Innovation quality and novelty
2. Technical complexity and defensibility
3. Market potential and commercial viability
4. Strategic positioning relative to industry trends
Patent Content:
{patent_content}
Provide a concise analysis (2-3 paragraphs) focusing on what this patent reveals about the company's technical direction and competitive advantage."""
if self.test_mode:
logger.debug("TEST MODE - Prompt that would be sent to LLM:\n%s", prompt)
return "[TEST MODE - No API call made]"
# Check cache first
if self.use_cache:
cached = self.db_client.get_cached_response(
prompt=prompt,
company_name=company_name,
analysis_type="single_patent"
)
if cached:
# Log the cache hit
self.db_client.store_message(
prompt=prompt,
response=cached["response"],
company_name=company_name,
analysis_type="single_patent",
model=self.model,
metadata={
"patent_content_length": len(patent_content),
"cache_hit": True,
"original_message_id": cached["id"]
},
is_cached=True
)
return cached["response"]
# Call API if no cache hit and client is available
if self.client:
response = self.client.chat.completions.create(
model=self.model,
max_tokens=1024,
messages=[{"role": "user", "content": prompt}],
)
response_text = response.choices[0].message.content
# Store in database for future cache lookups
self.db_client.store_message(
prompt=prompt,
response=response_text,
company_name=company_name,
analysis_type="single_patent",
model=self.model,
metadata={"patent_content_length": len(patent_content)},
token_usage={
"prompt_tokens": response.usage.prompt_tokens,
"completion_tokens": response.usage.completion_tokens,
"total_tokens": response.usage.total_tokens
} if hasattr(response, 'usage') else None
)
return response_text
# No API client available - store prompt for later processing
placeholder = "[NO API] Prompt stored in database. Configure OPENROUTER_API_KEY to enable analysis."
self.db_client.store_message(
prompt=prompt,
response=placeholder,
company_name=company_name,
analysis_type="single_patent",
model=self.model,
metadata={"patent_content_length": len(patent_content), "pending": True}
)
return placeholder
def analyze_patent_portfolio(
self, patents_data: list[Dict[str, str]], company_name: str
) -> str:
"""Analyze multiple patents to estimate overall company performance.
Args:
patents_data: List of dicts, each containing 'patent_id' and 'content'
company_name: Name of the company being analyzed
Returns:
Comprehensive analysis of company's innovation trajectory and outlook
"""
# Combine all patent summaries
portfolio_summary = []
for idx, patent in enumerate(patents_data, 1):
portfolio_summary.append(
f"Patent {idx} ({patent['patent_id']}):\n{patent['content']}"
)
combined_content = "\n\n---\n\n".join(portfolio_summary)
prompt = f"""You are analyzing {company_name}'s patent portfolio to estimate their future performance and innovation trajectory.
You have {len(patents_data)} recent patents to analyze. Evaluate the portfolio holistically:
1. Innovation Trends: What technology areas are they focusing on?
2. Strategic Direction: What does this reveal about their business strategy?
3. Competitive Position: How defensible are these innovations?
4. Market Outlook: What market opportunities do these patents target?
5. Performance Forecast: Based on this innovation activity, what's your assessment of their likely performance?
Patent Portfolio:
{combined_content}
Provide a comprehensive analysis (4-5 paragraphs) with a final verdict on the company's innovation strength and performance outlook."""
if self.test_mode:
logger.debug("TEST MODE - Portfolio prompt:\n%s", prompt)
return "[TEST MODE]"
metadata = {
"patent_count": len(patents_data),
"patent_ids": [p['patent_id'] for p in patents_data]
}
# Check cache first
if self.use_cache:
cached = self.db_client.get_cached_response(
prompt=prompt,
company_name=company_name,
analysis_type="portfolio"
)
if cached:
# Log the cache hit
self.db_client.store_message(
prompt=prompt,
response=cached["response"],
company_name=company_name,
analysis_type="portfolio",
model=self.model,
metadata={
**metadata,
"cache_hit": True,
"original_message_id": cached["id"]
},
is_cached=True
)
return cached["response"]
# Call API if no cache hit and client is available
if self.client:
try:
response = self.client.chat.completions.create(
model=self.model,
max_tokens=2048,
messages=[{"role": "user", "content": prompt}],
)
response_text = response.choices[0].message.content
# Store in database for future cache lookups
self.db_client.store_message(
prompt=prompt,
response=response_text,
company_name=company_name,
analysis_type="portfolio",
model=self.model,
metadata=metadata,
token_usage={
"prompt_tokens": response.usage.prompt_tokens,
"completion_tokens": response.usage.completion_tokens,
"total_tokens": response.usage.total_tokens
} if hasattr(response, 'usage') else None
)
return response_text
except AttributeError:
return prompt
# No API client available - store prompt for later processing
placeholder = "[NO API] Prompt stored in database. Configure OPENROUTER_API_KEY to enable analysis."
self.db_client.store_message(
prompt=prompt,
response=placeholder,
company_name=company_name,
analysis_type="portfolio",
model=self.model,
metadata={**metadata, "pending": True}
)
return placeholder