Files
SPARC/SPARC/llm.py
T
agent-company 80c7ddbf8e ci: add pytest and ruff linting to CI, fix all lint errors
- Add test job to build.yaml that runs pytest and ruff before building images
- Add standalone test.yaml workflow for PRs
- Add ruff.toml with E/F/I rules configured
- Fix all ruff lint errors: sort imports, remove unused imports, fix re-exports
- Build jobs now depend on test job passing (needs: test)

Closes leeworks-agents/SPARC#18
Closes leeworks-agents/SPARC#19

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-26 04:19:45 +00:00

244 lines
9.2 KiB
Python

"""LLM integration for patent analysis using OpenRouter."""
from typing import Dict
from openai import OpenAI
from SPARC import config
from SPARC.database import DatabaseClient
class LLMAnalyzer:
"""Handles LLM-based analysis of patent content."""
def __init__(self, api_key: str | None = None, test_mode: bool = False, use_cache: bool | None = None):
"""Initialize the LLM analyzer.
Args:
api_key: OpenRouter API key. If None, will attempt to load from config.
test_mode: If True, print prompts instead of making API calls
use_cache: If True, check database cache before making API calls.
If None, uses config.use_cache (default: True)
"""
self.test_mode = test_mode
self.use_cache = use_cache if use_cache is not None else config.use_cache
self.model = "anthropic/claude-3.5-sonnet"
# Always initialize database client for storage and caching
self.db_client = DatabaseClient(config.database_url)
self.db_client.initialize_schema()
# Initialize OpenRouter client if API key is available
if (api_key or config.openrouter_api_key) and not test_mode:
self.client = OpenAI(
api_key=api_key or config.openrouter_api_key,
base_url="https://openrouter.ai/api/v1"
)
else:
self.client = None
def analyze_patent_content(self, patent_content: str, company_name: str) -> str:
"""Analyze patent content to estimate company innovation and performance.
Args:
patent_content: Minimized patent text (abstract, claims, summary)
company_name: Name of the company for context
Returns:
Analysis text describing innovation quality and potential impact
"""
prompt = f"""You are a patent analyst evaluating {company_name}'s innovation strategy.
Analyze the following patent content and provide insights on:
1. Innovation quality and novelty
2. Technical complexity and defensibility
3. Market potential and commercial viability
4. Strategic positioning relative to industry trends
Patent Content:
{patent_content}
Provide a concise analysis (2-3 paragraphs) focusing on what this patent reveals about the company's technical direction and competitive advantage."""
if self.test_mode:
print("=" * 80)
print("TEST MODE - Prompt that would be sent to LLM:")
print("=" * 80)
print(prompt)
print("=" * 80)
return "[TEST MODE - No API call made]"
# Check cache first
if self.use_cache:
cached = self.db_client.get_cached_response(
prompt=prompt,
company_name=company_name,
analysis_type="single_patent"
)
if cached:
# Log the cache hit
self.db_client.store_message(
prompt=prompt,
response=cached["response"],
company_name=company_name,
analysis_type="single_patent",
model=self.model,
metadata={
"patent_content_length": len(patent_content),
"cache_hit": True,
"original_message_id": cached["id"]
},
is_cached=True
)
return cached["response"]
# Call API if no cache hit and client is available
if self.client:
response = self.client.chat.completions.create(
model=self.model,
max_tokens=1024,
messages=[{"role": "user", "content": prompt}],
)
response_text = response.choices[0].message.content
# Store in database for future cache lookups
self.db_client.store_message(
prompt=prompt,
response=response_text,
company_name=company_name,
analysis_type="single_patent",
model=self.model,
metadata={"patent_content_length": len(patent_content)},
token_usage={
"prompt_tokens": response.usage.prompt_tokens,
"completion_tokens": response.usage.completion_tokens,
"total_tokens": response.usage.total_tokens
} if hasattr(response, 'usage') else None
)
return response_text
# No API client available - store prompt for later processing
placeholder = "[NO API] Prompt stored in database. Configure OPENROUTER_API_KEY to enable analysis."
self.db_client.store_message(
prompt=prompt,
response=placeholder,
company_name=company_name,
analysis_type="single_patent",
model=self.model,
metadata={"patent_content_length": len(patent_content), "pending": True}
)
return placeholder
def analyze_patent_portfolio(
self, patents_data: list[Dict[str, str]], company_name: str
) -> str:
"""Analyze multiple patents to estimate overall company performance.
Args:
patents_data: List of dicts, each containing 'patent_id' and 'content'
company_name: Name of the company being analyzed
Returns:
Comprehensive analysis of company's innovation trajectory and outlook
"""
# Combine all patent summaries
portfolio_summary = []
for idx, patent in enumerate(patents_data, 1):
portfolio_summary.append(
f"Patent {idx} ({patent['patent_id']}):\n{patent['content']}"
)
combined_content = "\n\n---\n\n".join(portfolio_summary)
prompt = f"""You are analyzing {company_name}'s patent portfolio to estimate their future performance and innovation trajectory.
You have {len(patents_data)} recent patents to analyze. Evaluate the portfolio holistically:
1. Innovation Trends: What technology areas are they focusing on?
2. Strategic Direction: What does this reveal about their business strategy?
3. Competitive Position: How defensible are these innovations?
4. Market Outlook: What market opportunities do these patents target?
5. Performance Forecast: Based on this innovation activity, what's your assessment of their likely performance?
Patent Portfolio:
{combined_content}
Provide a comprehensive analysis (4-5 paragraphs) with a final verdict on the company's innovation strength and performance outlook."""
if self.test_mode:
print(prompt)
return "[TEST MODE]"
metadata = {
"patent_count": len(patents_data),
"patent_ids": [p['patent_id'] for p in patents_data]
}
# Check cache first
if self.use_cache:
cached = self.db_client.get_cached_response(
prompt=prompt,
company_name=company_name,
analysis_type="portfolio"
)
if cached:
# Log the cache hit
self.db_client.store_message(
prompt=prompt,
response=cached["response"],
company_name=company_name,
analysis_type="portfolio",
model=self.model,
metadata={
**metadata,
"cache_hit": True,
"original_message_id": cached["id"]
},
is_cached=True
)
return cached["response"]
# Call API if no cache hit and client is available
if self.client:
try:
response = self.client.chat.completions.create(
model=self.model,
max_tokens=2048,
messages=[{"role": "user", "content": prompt}],
)
response_text = response.choices[0].message.content
# Store in database for future cache lookups
self.db_client.store_message(
prompt=prompt,
response=response_text,
company_name=company_name,
analysis_type="portfolio",
model=self.model,
metadata=metadata,
token_usage={
"prompt_tokens": response.usage.prompt_tokens,
"completion_tokens": response.usage.completion_tokens,
"total_tokens": response.usage.total_tokens
} if hasattr(response, 'usage') else None
)
return response_text
except AttributeError:
return prompt
# No API client available - store prompt for later processing
placeholder = "[NO API] Prompt stored in database. Configure OPENROUTER_API_KEY to enable analysis."
self.db_client.store_message(
prompt=prompt,
response=placeholder,
company_name=company_name,
analysis_type="portfolio",
model=self.model,
metadata={**metadata, "pending": True}
)
return placeholder