forked from 0xWheatyz/SPARC
04f4d36307
Allow users to choose the LLM model on a per-analysis basis. The model field is optional in both single and batch analysis requests, defaulting to the server-configured MODEL env var. The model used is recorded in the analysis result and database. - Add model parameter to LLMAnalyzer.analyze_patent_content and analyze_patent_portfolio - Add model field to CompanyAnalysisResult and API response - Add model field to BatchAnalysisRequest - Add GET /models endpoint listing supported models and the default - Store model in llm_messages metadata for attribution Closes leeworks-agents/SPARC#37 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
249 lines
9.5 KiB
Python
249 lines
9.5 KiB
Python
"""LLM integration for patent analysis using OpenRouter."""
|
|
|
|
import logging
|
|
from typing import Dict
|
|
|
|
from openai import OpenAI
|
|
|
|
from SPARC import config
|
|
from SPARC.database import DatabaseClient
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class LLMAnalyzer:
|
|
"""Handles LLM-based analysis of patent content."""
|
|
|
|
def __init__(self, api_key: str | None = None, test_mode: bool = False, use_cache: bool | None = None):
|
|
"""Initialize the LLM analyzer.
|
|
|
|
Args:
|
|
api_key: OpenRouter API key. If None, will attempt to load from config.
|
|
test_mode: If True, print prompts instead of making API calls
|
|
use_cache: If True, check database cache before making API calls.
|
|
If None, uses config.use_cache (default: True)
|
|
"""
|
|
self.test_mode = test_mode
|
|
self.use_cache = use_cache if use_cache is not None else config.use_cache
|
|
self.model = config.model
|
|
|
|
# Always initialize database client for storage and caching
|
|
self.db_client = DatabaseClient(config.database_url)
|
|
self.db_client.initialize_schema()
|
|
|
|
# Initialize OpenRouter client if API key is available
|
|
if (api_key or config.openrouter_api_key) and not test_mode:
|
|
self.client = OpenAI(
|
|
api_key=api_key or config.openrouter_api_key,
|
|
base_url="https://openrouter.ai/api/v1"
|
|
)
|
|
else:
|
|
self.client = None
|
|
|
|
def analyze_patent_content(self, patent_content: str, company_name: str, model: str | None = None) -> str:
|
|
"""Analyze patent content to estimate company innovation and performance.
|
|
|
|
Args:
|
|
patent_content: Minimized patent text (abstract, claims, summary)
|
|
company_name: Name of the company for context
|
|
model: Optional model override (e.g. "openai/gpt-4o"). Defaults to config.
|
|
|
|
Returns:
|
|
Analysis text describing innovation quality and potential impact
|
|
"""
|
|
prompt = f"""You are a patent analyst evaluating {company_name}'s innovation strategy.
|
|
|
|
Analyze the following patent content and provide insights on:
|
|
1. Innovation quality and novelty
|
|
2. Technical complexity and defensibility
|
|
3. Market potential and commercial viability
|
|
4. Strategic positioning relative to industry trends
|
|
|
|
Patent Content:
|
|
{patent_content}
|
|
|
|
Provide a concise analysis (2-3 paragraphs) focusing on what this patent reveals about the company's technical direction and competitive advantage."""
|
|
|
|
effective_model = model or self.model
|
|
|
|
if self.test_mode:
|
|
logger.debug("TEST MODE - Prompt that would be sent to LLM:\n%s", prompt)
|
|
return "[TEST MODE - No API call made]"
|
|
|
|
# Check cache first
|
|
if self.use_cache:
|
|
cached = self.db_client.get_cached_response(
|
|
prompt=prompt,
|
|
company_name=company_name,
|
|
analysis_type="single_patent"
|
|
)
|
|
if cached:
|
|
# Log the cache hit
|
|
self.db_client.store_message(
|
|
prompt=prompt,
|
|
response=cached["response"],
|
|
company_name=company_name,
|
|
analysis_type="single_patent",
|
|
model=effective_model,
|
|
metadata={
|
|
"patent_content_length": len(patent_content),
|
|
"cache_hit": True,
|
|
"original_message_id": cached["id"]
|
|
},
|
|
is_cached=True
|
|
)
|
|
return cached["response"]
|
|
|
|
# Call API if no cache hit and client is available
|
|
if self.client:
|
|
response = self.client.chat.completions.create(
|
|
model=effective_model,
|
|
max_tokens=1024,
|
|
messages=[{"role": "user", "content": prompt}],
|
|
)
|
|
response_text = response.choices[0].message.content
|
|
|
|
# Store in database for future cache lookups
|
|
self.db_client.store_message(
|
|
prompt=prompt,
|
|
response=response_text,
|
|
company_name=company_name,
|
|
analysis_type="single_patent",
|
|
model=effective_model,
|
|
metadata={"patent_content_length": len(patent_content)},
|
|
token_usage={
|
|
"prompt_tokens": response.usage.prompt_tokens,
|
|
"completion_tokens": response.usage.completion_tokens,
|
|
"total_tokens": response.usage.total_tokens
|
|
} if hasattr(response, 'usage') else None
|
|
)
|
|
|
|
return response_text
|
|
|
|
# No API client available - store prompt for later processing
|
|
placeholder = "[NO API] Prompt stored in database. Configure OPENROUTER_API_KEY to enable analysis."
|
|
self.db_client.store_message(
|
|
prompt=prompt,
|
|
response=placeholder,
|
|
company_name=company_name,
|
|
analysis_type="single_patent",
|
|
model=effective_model,
|
|
metadata={"patent_content_length": len(patent_content), "pending": True}
|
|
)
|
|
return placeholder
|
|
|
|
def analyze_patent_portfolio(
|
|
self, patents_data: list[Dict[str, str]], company_name: str, model: str | None = None
|
|
) -> str:
|
|
"""Analyze multiple patents to estimate overall company performance.
|
|
|
|
Args:
|
|
patents_data: List of dicts, each containing 'patent_id' and 'content'
|
|
company_name: Name of the company being analyzed
|
|
|
|
Returns:
|
|
Comprehensive analysis of company's innovation trajectory and outlook
|
|
"""
|
|
# Combine all patent summaries
|
|
portfolio_summary = []
|
|
for idx, patent in enumerate(patents_data, 1):
|
|
portfolio_summary.append(
|
|
f"Patent {idx} ({patent['patent_id']}):\n{patent['content']}"
|
|
)
|
|
|
|
combined_content = "\n\n---\n\n".join(portfolio_summary)
|
|
|
|
prompt = f"""You are analyzing {company_name}'s patent portfolio to estimate their future performance and innovation trajectory.
|
|
|
|
You have {len(patents_data)} recent patents to analyze. Evaluate the portfolio holistically:
|
|
|
|
1. Innovation Trends: What technology areas are they focusing on?
|
|
2. Strategic Direction: What does this reveal about their business strategy?
|
|
3. Competitive Position: How defensible are these innovations?
|
|
4. Market Outlook: What market opportunities do these patents target?
|
|
5. Performance Forecast: Based on this innovation activity, what's your assessment of their likely performance?
|
|
|
|
Patent Portfolio:
|
|
{combined_content}
|
|
|
|
Provide a comprehensive analysis (4-5 paragraphs) with a final verdict on the company's innovation strength and performance outlook."""
|
|
|
|
effective_model = model or self.model
|
|
|
|
if self.test_mode:
|
|
logger.debug("TEST MODE - Portfolio prompt:\n%s", prompt)
|
|
return "[TEST MODE]"
|
|
|
|
metadata = {
|
|
"patent_count": len(patents_data),
|
|
"patent_ids": [p['patent_id'] for p in patents_data],
|
|
"model": effective_model,
|
|
}
|
|
|
|
# Check cache first
|
|
if self.use_cache:
|
|
cached = self.db_client.get_cached_response(
|
|
prompt=prompt,
|
|
company_name=company_name,
|
|
analysis_type="portfolio"
|
|
)
|
|
if cached:
|
|
# Log the cache hit
|
|
self.db_client.store_message(
|
|
prompt=prompt,
|
|
response=cached["response"],
|
|
company_name=company_name,
|
|
analysis_type="portfolio",
|
|
model=effective_model,
|
|
metadata={
|
|
**metadata,
|
|
"cache_hit": True,
|
|
"original_message_id": cached["id"]
|
|
},
|
|
is_cached=True
|
|
)
|
|
return cached["response"]
|
|
|
|
# Call API if no cache hit and client is available
|
|
if self.client:
|
|
try:
|
|
response = self.client.chat.completions.create(
|
|
model=effective_model,
|
|
max_tokens=2048,
|
|
messages=[{"role": "user", "content": prompt}],
|
|
)
|
|
|
|
response_text = response.choices[0].message.content
|
|
|
|
# Store in database for future cache lookups
|
|
self.db_client.store_message(
|
|
prompt=prompt,
|
|
response=response_text,
|
|
company_name=company_name,
|
|
analysis_type="portfolio",
|
|
model=effective_model,
|
|
metadata=metadata,
|
|
token_usage={
|
|
"prompt_tokens": response.usage.prompt_tokens,
|
|
"completion_tokens": response.usage.completion_tokens,
|
|
"total_tokens": response.usage.total_tokens
|
|
} if hasattr(response, 'usage') else None
|
|
)
|
|
|
|
return response_text
|
|
except AttributeError:
|
|
return prompt
|
|
|
|
# No API client available - store prompt for later processing
|
|
placeholder = "[NO API] Prompt stored in database. Configure OPENROUTER_API_KEY to enable analysis."
|
|
self.db_client.store_message(
|
|
prompt=prompt,
|
|
response=placeholder,
|
|
company_name=company_name,
|
|
analysis_type="portfolio",
|
|
model=effective_model,
|
|
metadata={**metadata, "pending": True}
|
|
)
|
|
return placeholder
|
|
|