SPARC/tests/test_llm.py

"""Tests for LLM analysis functionality."""

from unittest.mock import Mock

import pytest

from SPARC.llm import LLMAnalyzer


class TestLLMAnalyzer:
    """Test LLM analyzer initialization and API interaction."""

    @pytest.fixture(autouse=True)
    def mock_database(self, mocker):
        """Mock the database client for all tests."""
        mock_db_client = Mock()
        mock_db_client.get_cached_response.return_value = None  # No cache hit by default
        mock_db_client.store_message.return_value = 1
        mocker.patch("SPARC.llm.DatabaseClient", return_value=mock_db_client)
        return mock_db_client

    def test_analyzer_initialization_with_api_key(self, mocker):
        """Test that analyzer initializes with provided API key."""
        mock_openai = mocker.patch("SPARC.llm.OpenAI")

        analyzer = LLMAnalyzer(api_key="test-key-123")

        mock_openai.assert_called_once_with(
            api_key="test-key-123",
            base_url="https://openrouter.ai/api/v1"
        )
        assert analyzer.model == "anthropic/claude-3.5-sonnet"

    def test_analyzer_initialization_from_config(self, mocker):
        """Test that analyzer loads API key from config when not provided."""
        mock_openai = mocker.patch("SPARC.llm.OpenAI")
        mock_config = mocker.patch("SPARC.llm.config")
        mock_config.openrouter_api_key = "config-key-456"
        mock_config.use_cache = True
        mock_config.database_url = "postgresql://localhost/test"

        analyzer = LLMAnalyzer()

        mock_openai.assert_called_once_with(
            api_key="config-key-456",
            base_url="https://openrouter.ai/api/v1"
        )

    def test_analyze_patent_content(self, mocker, mock_database):
        """Test single patent content analysis."""
        mock_openai = mocker.patch("SPARC.llm.OpenAI")
        mock_client = Mock()
        mock_openai.return_value = mock_client

        # Mock the API response
        mock_response = Mock()
        mock_response.choices = [Mock(message=Mock(content="Innovative GPU architecture."))]
        mock_response.usage = Mock(prompt_tokens=100, completion_tokens=50, total_tokens=150)
        mock_client.chat.completions.create.return_value = mock_response

        analyzer = LLMAnalyzer(api_key="test-key", use_cache=False)
        result = analyzer.analyze_patent_content(
            patent_content="ABSTRACT: GPU with new cache design...",
            company_name="NVIDIA",
        )

        assert result == "Innovative GPU architecture."
        mock_client.chat.completions.create.assert_called_once()

        # Verify the prompt includes company name and content
        call_args = mock_client.chat.completions.create.call_args
        prompt_text = call_args[1]["messages"][0]["content"]
        assert "NVIDIA" in prompt_text
        assert "GPU with new cache design" in prompt_text

        # Verify message was stored in database
        mock_database.store_message.assert_called_once()

    def test_analyze_patent_content_cache_hit(self, mocker, mock_database):
        """Test that cached responses are returned without API call."""
        mock_openai = mocker.patch("SPARC.llm.OpenAI")
        mock_client = Mock()
        mock_openai.return_value = mock_client

        # Set up cache hit
        mock_database.get_cached_response.return_value = {
            "id": 1,
            "response": "Cached analysis result"
        }

        analyzer = LLMAnalyzer(api_key="test-key", use_cache=True)
        result = analyzer.analyze_patent_content(
            patent_content="ABSTRACT: GPU with new cache design...",
            company_name="NVIDIA",
        )

        assert result == "Cached analysis result"
        # API should NOT be called on cache hit
        mock_client.chat.completions.create.assert_not_called()

    def test_analyze_patent_portfolio(self, mocker, mock_database):
        """Test portfolio analysis with multiple patents."""
        mock_openai = mocker.patch("SPARC.llm.OpenAI")
        mock_client = Mock()
        mock_openai.return_value = mock_client

        # Mock the API response
        mock_response = Mock()
        mock_response.choices = [
            Mock(message=Mock(content="Strong portfolio in AI and graphics."))
        ]
        mock_response.usage = Mock(prompt_tokens=200, completion_tokens=100, total_tokens=300)
        mock_client.chat.completions.create.return_value = mock_response

        analyzer = LLMAnalyzer(api_key="test-key", use_cache=False)
        patents_data = [
            {"patent_id": "US123", "content": "AI acceleration patent"},
            {"patent_id": "US456", "content": "Graphics rendering patent"},
        ]

        result = analyzer.analyze_patent_portfolio(
            patents_data=patents_data, company_name="NVIDIA"
        )

        assert result == "Strong portfolio in AI and graphics."
        mock_client.chat.completions.create.assert_called_once()

        # Verify the prompt includes all patents
        call_args = mock_client.chat.completions.create.call_args
        prompt_text = call_args[1]["messages"][0]["content"]
        assert "US123" in prompt_text
        assert "US456" in prompt_text
        assert "AI acceleration patent" in prompt_text
        assert "Graphics rendering patent" in prompt_text

    def test_analyze_patent_portfolio_with_correct_token_limit(self, mocker, mock_database):
        """Test that portfolio analysis uses higher token limit."""
        mock_openai = mocker.patch("SPARC.llm.OpenAI")
        mock_client = Mock()
        mock_openai.return_value = mock_client

        mock_response = Mock()
        mock_response.choices = [Mock(message=Mock(content="Analysis result."))]
        mock_response.usage = Mock(prompt_tokens=100, completion_tokens=50, total_tokens=150)
        mock_client.chat.completions.create.return_value = mock_response

        analyzer = LLMAnalyzer(api_key="test-key", use_cache=False)
        patents_data = [{"patent_id": "US123", "content": "Test content"}]

        analyzer.analyze_patent_portfolio(patents_data, "TestCo")

        call_args = mock_client.chat.completions.create.call_args
        # Portfolio analysis should use 2048 tokens
        assert call_args[1]["max_tokens"] == 2048

    def test_analyze_single_patent_with_correct_token_limit(self, mocker, mock_database):
        """Test that single patent analysis uses lower token limit."""
        mock_openai = mocker.patch("SPARC.llm.OpenAI")
        mock_client = Mock()
        mock_openai.return_value = mock_client

        mock_response = Mock()
        mock_response.choices = [Mock(message=Mock(content="Analysis result."))]
        mock_response.usage = Mock(prompt_tokens=100, completion_tokens=50, total_tokens=150)
        mock_client.chat.completions.create.return_value = mock_response

        analyzer = LLMAnalyzer(api_key="test-key", use_cache=False)
        analyzer.analyze_patent_content("Test content", "TestCo")

        call_args = mock_client.chat.completions.create.call_args
        # Single patent should use 1024 tokens
        assert call_args[1]["max_tokens"] == 1024

    def test_database_always_initialized(self, mocker, mock_database):
        """Test that database client is always initialized."""
        mock_openai = mocker.patch("SPARC.llm.OpenAI")

        analyzer = LLMAnalyzer(api_key="test-key")

        assert analyzer.db_client is not None

    def test_no_api_key_stores_placeholder(self, mocker, mock_database):
        """Test that without API key, a placeholder is stored."""
        mocker.patch("SPARC.llm.config")

        analyzer = LLMAnalyzer(use_cache=False)
        result = analyzer.analyze_patent_content("Test content", "TestCo")

        assert "[NO API]" in result
        mock_database.store_message.assert_called_once()