"""High-level patent analysis orchestration. This module ties together patent retrieval, parsing, and LLM analysis to provide company performance estimation based on patent portfolios. """ from SPARC.serp_api import SERP from SPARC.llm import LLMAnalyzer from SPARC.types import Patent from typing import List class CompanyAnalyzer: """Orchestrates end-to-end company performance analysis via patents.""" def __init__(self, openrouter_api_key: str | None = None): """Initialize the company analyzer. Args: openrouter_api_key: Optional OpenRouter API key. If None, loads from config. """ self.llm_analyzer = LLMAnalyzer(api_key=openrouter_api_key) def analyze_company(self, company_name: str) -> str: """Analyze a company's performance based on their patent portfolio. This is the main entry point that orchestrates the full pipeline: 1. Retrieve patents from SERP API 2. Download and parse each patent PDF 3. Minimize patent content (remove bloat) 4. Analyze portfolio with LLM 5. Return performance estimation Args: company_name: Name of the company to analyze Returns: Comprehensive analysis of company's innovation and performance outlook """ print(f"Retrieving patents for {company_name}...") patents = SERP.query(company_name) if not patents.patents: return f"No patents found for {company_name}" print(f"Found {len(patents.patents)} patents. Processing...") # Download and parse each patent processed_patents = [] for idx, patent in enumerate(patents.patents, 1): print(f"Processing patent {idx}/{len(patents.patents)}: {patent.patent_id}") try: # Download PDF patent = SERP.save_patents(patent) # Parse sections from PDF sections = SERP.parse_patent_pdf(patent.pdf_path) # Minimize for LLM (remove bloat) minimized_content = SERP.minimize_patent_for_llm(sections) processed_patents.append( {"patent_id": patent.patent_id, "content": minimized_content} ) except Exception as e: print(f"Warning: Failed to process {patent.patent_id}: {e}") continue if not processed_patents: return f"Failed to process any patents for {company_name}" print(f"Analyzing portfolio with LLM...") # Analyze the full portfolio with LLM analysis = self.llm_analyzer.analyze_patent_portfolio( patents_data=processed_patents, company_name=company_name ) return analysis def analyze_single_patent(self, patent_id: str, company_name: str) -> str: """Analyze a single patent by ID. Useful for focused analysis of specific innovations. Args: patent_id: Publication ID of the patent company_name: Name of the company (for context) Returns: Analysis of the specific patent's innovation quality """ # Note: This simplified version assumes the patent PDF is already downloaded # A more complete implementation would support direct patent ID lookup print(f"Analyzing patent {patent_id} for {company_name}...") patent_path = f"patents/{patent_id}.pdf" try: sections = SERP.parse_patent_pdf(patent_path) minimized_content = SERP.minimize_patent_for_llm(sections) analysis = self.llm_analyzer.analyze_patent_content( patent_content=minimized_content, company_name=company_name ) return analysis except Exception as e: return f"Failed to analyze patent {patent_id}: {e}"