SPARC/SPARC/llm.py

"""LLM integration for patent analysis using OpenRouter."""

from typing import Dict

from openai import OpenAI

from SPARC import config
from SPARC.database import DatabaseClient


class LLMAnalyzer:
    """Handles LLM-based analysis of patent content."""

    def __init__(self, api_key: str | None = None, test_mode: bool = False, use_cache: bool | None = None):
        """Initialize the LLM analyzer.

        Args:
          api_key: OpenRouter API key. If None, will attempt to load from config.
          test_mode: If True, print prompts instead of making API calls
          use_cache: If True, check database cache before making API calls.
                    If None, uses config.use_cache (default: True)
        """
        self.test_mode = test_mode
        self.use_cache = use_cache if use_cache is not None else config.use_cache
        self.model = "anthropic/claude-3.5-sonnet"

        # Always initialize database client for storage and caching
        self.db_client = DatabaseClient(config.database_url)
        self.db_client.initialize_schema()

        # Initialize OpenRouter client if API key is available
        if (api_key or config.openrouter_api_key) and not test_mode:
            self.client = OpenAI(
                api_key=api_key or config.openrouter_api_key,
                base_url="https://openrouter.ai/api/v1"
            )
        else:
            self.client = None

    def analyze_patent_content(self, patent_content: str, company_name: str) -> str:
        """Analyze patent content to estimate company innovation and performance.

        Args:
          patent_content: Minimized patent text (abstract, claims, summary)
          company_name: Name of the company for context

        Returns:
          Analysis text describing innovation quality and potential impact
        """
        prompt = f"""You are a patent analyst evaluating {company_name}'s innovation strategy.

Analyze the following patent content and provide insights on:
1. Innovation quality and novelty
2. Technical complexity and defensibility
3. Market potential and commercial viability
4. Strategic positioning relative to industry trends

Patent Content:
{patent_content}

Provide a concise analysis (2-3 paragraphs) focusing on what this patent reveals about the company's technical direction and competitive advantage."""

        if self.test_mode:
            print("=" * 80)
            print("TEST MODE - Prompt that would be sent to LLM:")
            print("=" * 80)
            print(prompt)
            print("=" * 80)
            return "[TEST MODE - No API call made]"

        # Check cache first
        if self.use_cache:
            cached = self.db_client.get_cached_response(
                prompt=prompt,
                company_name=company_name,
                analysis_type="single_patent"
            )
            if cached:
                # Log the cache hit
                self.db_client.store_message(
                    prompt=prompt,
                    response=cached["response"],
                    company_name=company_name,
                    analysis_type="single_patent",
                    model=self.model,
                    metadata={
                        "patent_content_length": len(patent_content),
                        "cache_hit": True,
                        "original_message_id": cached["id"]
                    },
                    is_cached=True
                )
                return cached["response"]

        # Call API if no cache hit and client is available
        if self.client:
            response = self.client.chat.completions.create(
                model=self.model,
                max_tokens=1024,
                messages=[{"role": "user", "content": prompt}],
            )
            response_text = response.choices[0].message.content

            # Store in database for future cache lookups
            self.db_client.store_message(
                prompt=prompt,
                response=response_text,
                company_name=company_name,
                analysis_type="single_patent",
                model=self.model,
                metadata={"patent_content_length": len(patent_content)},
                token_usage={
                    "prompt_tokens": response.usage.prompt_tokens,
                    "completion_tokens": response.usage.completion_tokens,
                    "total_tokens": response.usage.total_tokens
                } if hasattr(response, 'usage') else None
            )

            return response_text

        # No API client available - store prompt for later processing
        placeholder = "[NO API] Prompt stored in database. Configure OPENROUTER_API_KEY to enable analysis."
        self.db_client.store_message(
            prompt=prompt,
            response=placeholder,
            company_name=company_name,
            analysis_type="single_patent",
            model=self.model,
            metadata={"patent_content_length": len(patent_content), "pending": True}
        )
        return placeholder

    def analyze_patent_portfolio(
        self, patents_data: list[Dict[str, str]], company_name: str
    ) -> str:
        """Analyze multiple patents to estimate overall company performance.

        Args:
          patents_data: List of dicts, each containing 'patent_id' and 'content'
          company_name: Name of the company being analyzed

        Returns:
          Comprehensive analysis of company's innovation trajectory and outlook
        """
        # Combine all patent summaries
        portfolio_summary = []
        for idx, patent in enumerate(patents_data, 1):
            portfolio_summary.append(
                f"Patent {idx} ({patent['patent_id']}):\n{patent['content']}"
            )

        combined_content = "\n\n---\n\n".join(portfolio_summary)

        prompt = f"""You are analyzing {company_name}'s patent portfolio to estimate their future performance and innovation trajectory.

You have {len(patents_data)} recent patents to analyze. Evaluate the portfolio holistically:

1. Innovation Trends: What technology areas are they focusing on?
2. Strategic Direction: What does this reveal about their business strategy?
3. Competitive Position: How defensible are these innovations?
4. Market Outlook: What market opportunities do these patents target?
5. Performance Forecast: Based on this innovation activity, what's your assessment of their likely performance?

Patent Portfolio:
{combined_content}

Provide a comprehensive analysis (4-5 paragraphs) with a final verdict on the company's innovation strength and performance outlook."""

        if self.test_mode:
            print(prompt)
            return "[TEST MODE]"

        metadata = {
            "patent_count": len(patents_data),
            "patent_ids": [p['patent_id'] for p in patents_data]
        }

        # Check cache first
        if self.use_cache:
            cached = self.db_client.get_cached_response(
                prompt=prompt,
                company_name=company_name,
                analysis_type="portfolio"
            )
            if cached:
                # Log the cache hit
                self.db_client.store_message(
                    prompt=prompt,
                    response=cached["response"],
                    company_name=company_name,
                    analysis_type="portfolio",
                    model=self.model,
                    metadata={
                        **metadata,
                        "cache_hit": True,
                        "original_message_id": cached["id"]
                    },
                    is_cached=True
                )
                return cached["response"]

        # Call API if no cache hit and client is available
        if self.client:
            try:
                response = self.client.chat.completions.create(
                    model=self.model,
                    max_tokens=2048,
                    messages=[{"role": "user", "content": prompt}],
                )

                response_text = response.choices[0].message.content

                # Store in database for future cache lookups
                self.db_client.store_message(
                    prompt=prompt,
                    response=response_text,
                    company_name=company_name,
                    analysis_type="portfolio",
                    model=self.model,
                    metadata=metadata,
                    token_usage={
                        "prompt_tokens": response.usage.prompt_tokens,
                        "completion_tokens": response.usage.completion_tokens,
                        "total_tokens": response.usage.total_tokens
                    } if hasattr(response, 'usage') else None
                )

                return response_text
            except AttributeError:
                return prompt

        # No API client available - store prompt for later processing
        placeholder = "[NO API] Prompt stored in database. Configure OPENROUTER_API_KEY to enable analysis."
        self.db_client.store_message(
            prompt=prompt,
            response=placeholder,
            company_name=company_name,
            analysis_type="portfolio",
            model=self.model,
            metadata={**metadata, "pending": True}
        )
        return placeholder