forked from 0xWheatyz/SPARC
af4114969a
Replace direct Anthropic API integration with OpenRouter to enable more flexible LLM provider access while maintaining Claude 3.5 Sonnet. Changes: - Replace anthropic package with openai in requirements.txt - Update config to use OPENROUTER_API_KEY instead of ANTHROPIC_API_KEY - Migrate LLMAnalyzer from Anthropic client to OpenAI client with OpenRouter base URL (https://openrouter.ai/api/v1) - Update model identifier to OpenRouter format: anthropic/claude-3.5-sonnet - Convert API calls from messages.create() to chat.completions.create() - Update response parsing to match OpenAI format - Rename API key parameter in CompanyAnalyzer from anthropic_api_key to openrouter_api_key - Update all tests to mock OpenAI client instead of Anthropic - Fix client initialization to accept direct API key parameter 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
113 lines
3.8 KiB
Python
113 lines
3.8 KiB
Python
"""High-level patent analysis orchestration.
|
|
|
|
This module ties together patent retrieval, parsing, and LLM analysis
|
|
to provide company performance estimation based on patent portfolios.
|
|
"""
|
|
|
|
from SPARC.serp_api import SERP
|
|
from SPARC.llm import LLMAnalyzer
|
|
from SPARC.types import Patent
|
|
from typing import List
|
|
|
|
|
|
class CompanyAnalyzer:
|
|
"""Orchestrates end-to-end company performance analysis via patents."""
|
|
|
|
def __init__(self, openrouter_api_key: str | None = None):
|
|
"""Initialize the company analyzer.
|
|
|
|
Args:
|
|
openrouter_api_key: Optional OpenRouter API key. If None, loads from config.
|
|
"""
|
|
self.llm_analyzer = LLMAnalyzer(api_key=openrouter_api_key)
|
|
|
|
def analyze_company(self, company_name: str) -> str:
|
|
"""Analyze a company's performance based on their patent portfolio.
|
|
|
|
This is the main entry point that orchestrates the full pipeline:
|
|
1. Retrieve patents from SERP API
|
|
2. Download and parse each patent PDF
|
|
3. Minimize patent content (remove bloat)
|
|
4. Analyze portfolio with LLM
|
|
5. Return performance estimation
|
|
|
|
Args:
|
|
company_name: Name of the company to analyze
|
|
|
|
Returns:
|
|
Comprehensive analysis of company's innovation and performance outlook
|
|
"""
|
|
print(f"Retrieving patents for {company_name}...")
|
|
patents = SERP.query(company_name)
|
|
|
|
if not patents.patents:
|
|
return f"No patents found for {company_name}"
|
|
|
|
print(f"Found {len(patents.patents)} patents. Processing...")
|
|
|
|
# Download and parse each patent
|
|
processed_patents = []
|
|
for idx, patent in enumerate(patents.patents, 1):
|
|
print(f"Processing patent {idx}/{len(patents.patents)}: {patent.patent_id}")
|
|
|
|
try:
|
|
# Download PDF
|
|
patent = SERP.save_patents(patent)
|
|
|
|
# Parse sections from PDF
|
|
sections = SERP.parse_patent_pdf(patent.pdf_path)
|
|
|
|
# Minimize for LLM (remove bloat)
|
|
minimized_content = SERP.minimize_patent_for_llm(sections)
|
|
|
|
processed_patents.append(
|
|
{"patent_id": patent.patent_id, "content": minimized_content}
|
|
)
|
|
|
|
except Exception as e:
|
|
print(f"Warning: Failed to process {patent.patent_id}: {e}")
|
|
continue
|
|
|
|
if not processed_patents:
|
|
return f"Failed to process any patents for {company_name}"
|
|
|
|
print(f"Analyzing portfolio with LLM...")
|
|
|
|
# Analyze the full portfolio with LLM
|
|
analysis = self.llm_analyzer.analyze_patent_portfolio(
|
|
patents_data=processed_patents, company_name=company_name
|
|
)
|
|
|
|
return analysis
|
|
|
|
def analyze_single_patent(self, patent_id: str, company_name: str) -> str:
|
|
"""Analyze a single patent by ID.
|
|
|
|
Useful for focused analysis of specific innovations.
|
|
|
|
Args:
|
|
patent_id: Publication ID of the patent
|
|
company_name: Name of the company (for context)
|
|
|
|
Returns:
|
|
Analysis of the specific patent's innovation quality
|
|
"""
|
|
# Note: This simplified version assumes the patent PDF is already downloaded
|
|
# A more complete implementation would support direct patent ID lookup
|
|
print(f"Analyzing patent {patent_id} for {company_name}...")
|
|
|
|
patent_path = f"patents/{patent_id}.pdf"
|
|
|
|
try:
|
|
sections = SERP.parse_patent_pdf(patent_path)
|
|
minimized_content = SERP.minimize_patent_for_llm(sections)
|
|
|
|
analysis = self.llm_analyzer.analyze_patent_content(
|
|
patent_content=minimized_content, company_name=company_name
|
|
)
|
|
|
|
return analysis
|
|
|
|
except Exception as e:
|
|
return f"Failed to analyze patent {patent_id}: {e}"
|